GSoC/GCI Archive
Google Code-in 2010 Parrot Foundation and The Perl Foundation

Implement Chunked receive for LWP;Protocol;http

completed by: David Czech

mentors: whiteknight

Task Description: The LWP library is used with Perl5 to send data over the internet. Parrot has a port of LWP written in PIR (the low-level Parrot assembly). One of the features of LWP is to support HTTP requests over the network. The Parrot version of it does this too.

One thing that the Parrot version does not do is support chunked transfers. Chunked transfers allow the sender to break up the request into small bits so that the receiver can handle them one at a time. This is important for LWP to support, because many servers will return results in chunks.

Steps to Complete:

  1. Create a fork of parrot/parrot on Github
  2. Find the "request" method in the LWP;Protocol;http class in runtime/parrot/library/LWP/Protocol.pir
  3. Update this method to receive chunked transfers
  4. Build Parrot and run all tests to verify that nothing has broken
  5. Open a pull request on github to have your work merged into Parrot

Links: https://github.com/parrot/parrot/blob/master/runtime/parrot/library/LWP/Protocol.pir, http://en.wikipedia.org/wiki/Chunked_transfer_encoding

Example: This is an example implementation of the algorithm in Winxed. Winxed is a language that compiles to PIR. You should be able to borrow and use ideas from this example in the LWP source code.

#! winxed
// ajax.winxed

// A simplified implementation of the W3C XMLHttpRequest specification.
// See: http://www.w3.org/TR/XMLHttpRequest/

//**********************************************************************

class XMLHttpRequest
{
// Private
// Internal usage variables and methods

var ua; // Parrot LWP;UserAgent
// HTTP method, URL and async mode specified in open.
var method;
var url;
var async;
// Constants for readyState values
const int UNSENT = 0;
const int OPENED = 1;
const int HEADERS_RECEIVED = 2;
const int LOADING = 3;
const int DONE = 4;

function init [vtable]()
{
self.readyState = UNSENT;
self.status = 0;
self.responseText = '';
using extern LWP.UserAgent;
}

function changeState(int state)
{
self.readyState=: state;
var onreadystatechange = self.onreadystatechange;
if (onreadystatechange != null)
onreadystatechange();
}

// Public
// Properties and methods defined by the specification.

var readyState;
var status;
var responseText;
var onreadystatechange;

function open(string method, string url, int async)
{
self.async = async;
string m = upcase(method);
if (m != 'GET' && m != 'HEAD')
throw Error("Unsupported method '" + method + "'");

self.method = m;
self.url = url;
self.changeState(OPENED);
}
function send(var data)
{
if (self.readyState != OPENED)
throw Error('INVALID_STATE_ERR');
string url = self.url;

if (self.async)
self.changeState(OPENED);

self.ua = new LWP.UserAgent();
var nargs = {
'Connection' : 'close'
};
var resp;
switch (self.method) {
case 'GET':
resp = self.ua.get(url, nargs:[named,flat]);
break;
case 'HEAD':
resp = self.ua.head(url, nargs:[named,flat]);
break;
}

if (self.async)
self.changeState(HEADERS_RECEIVED);
if (self.async)
self.changeState(LOADING);
self.status =: resp.code();

string rt = resp.content();

// Check for chunked transfer encoding, LWP currently
// does not handle it.
// See http://en.wikipedia.org/wiki/Chunked_transfer_encoding
var h = resp.headers();
string s = h['Transfer-Encoding'];
if (s == 'chunked') {
string more = rt;
rt = '';
int pos;
// Each chunk header is the chunk length followed by
// CRLF end of line
while ((pos = indexof(more, "\r\n")) > 0) {
int chlen = pos + 2;
string sl = substr(more, 0, pos);
int l = sl.to_int(16);
// A chunk of length 0 signals the end
if (l == 0)
break;
rt += substr(more, chlen, l);
more = substr(more, chlen + l);
}
// Optional trailing HTTP headers ignored in this version
}

self.responseText = rt;
self.changeState(DONE);
}
}

//**********************************************************************

// main function for testing.

class StChange
{
var req;

function invoke[vtable]()
{
var req = self.req;
say('Status changed to ', req.readyState);
if (req.readyState == 4) {
say('Status: ', req.status);
self.showresp();
}
}

function showresp()
{
var req = self.req;
say("Response:");
print(req.responseText);
}
}

function main(var argv)
{
const string optAsync = 'async';
const string optMethod = 'method';

using extern Getopt.Obj;
var getopts = new Getopt.Obj();
getopts.notOptStop(1);
getopts.push_string(optAsync);
getopts.push_string(optMethod + '=s');
argv.shift();
var opts = getopts.get_options(argv);

int use_async = false;
string method;
if (opts[optMethod] != null)
method = opts[optMethod];
else
method = 'GET';
if (opts[optAsync] != null)
use_async = true;

int argc = elements(argv);
if (argc < 1)
Error('No URL');

var req = new XMLHttpRequest();
var stchange = new StChange();
stchange.req = req;

if (use_async)
req.onreadystatechange = stchange;

req.open(method, argv[0], use_async);
req.send(null);
if (! use_async)
stchange.showresp();
}