From e5a9c7844fafce44105e064a1733dcb00cb46d5f Mon Sep 17 00:00:00 2001 From: Srivardhan Hebbar Date: Mon, 9 Nov 2015 15:52:11 -0800 Subject: [PATCH] ecore_con: add http_parser static lib. Summary: This lib would be used in efl_network_websocket. Signed-off-by: Srivardhan Hebbar Reviewers: cedric Reviewed By: cedric Differential Revision: https://phab.enlightenment.org/D3244 Signed-off-by: Cedric BAIL --- Makefile.am | 1 + licenses/COPYING.NGINX-MIT | 23 + src/Makefile_Ecore_Con.am | 12 +- src/static_libs/http-parser/AUTHORS | 41 + src/static_libs/http-parser/CONTRIBUTIONS | 4 + src/static_libs/http-parser/README.md | 183 + .../http-parser/contrib/parsertrace.c | 156 + .../http-parser/contrib/url_parser.c | 44 + src/static_libs/http-parser/http_parser.c | 2209 +++++++++++ src/static_libs/http-parser/http_parser.h | 318 ++ src/static_libs/http-parser/test.c | 3476 +++++++++++++++++ 11 files changed, 6466 insertions(+), 1 deletion(-) create mode 100644 licenses/COPYING.NGINX-MIT create mode 100644 src/static_libs/http-parser/AUTHORS create mode 100644 src/static_libs/http-parser/CONTRIBUTIONS create mode 100644 src/static_libs/http-parser/README.md create mode 100644 src/static_libs/http-parser/contrib/parsertrace.c create mode 100644 src/static_libs/http-parser/contrib/url_parser.c create mode 100644 src/static_libs/http-parser/http_parser.c create mode 100644 src/static_libs/http-parser/http_parser.h create mode 100644 src/static_libs/http-parser/test.c diff --git a/Makefile.am b/Makefile.am index 56c298cb0b..c1d0a84300 100644 --- a/Makefile.am +++ b/Makefile.am @@ -61,6 +61,7 @@ licenses/COPYING.GPL \ licenses/COPYING.BSD \ licenses/COPYING.SMALL \ licenses/COPYING.FTL \ +licenses/COPYING.NGINX-MIT \ old/ChangeLog.ecore \ old/ChangeLog.edje \ old/ChangeLog.eet \ diff --git a/licenses/COPYING.NGINX-MIT b/licenses/COPYING.NGINX-MIT new file mode 100644 index 0000000000..58010b3889 --- /dev/null +++ b/licenses/COPYING.NGINX-MIT @@ -0,0 +1,23 @@ +http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright +Igor Sysoev. + +Additional changes are licensed under the same terms as NGINX and +copyright Joyent, Inc. and other Node contributors. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/src/Makefile_Ecore_Con.am b/src/Makefile_Ecore_Con.am index d5badfc83a..25569ab295 100644 --- a/src/Makefile_Ecore_Con.am +++ b/src/Makefile_Ecore_Con.am @@ -53,6 +53,8 @@ lib/ecore_con/ecore_con_ssl.c \ lib/ecore_con/ecore_con_url.c \ lib/ecore_con/ecore_con_url_curl.c \ lib/ecore_con/ecore_con_url_curl.h \ +static_libs/http-parser/http_parser.c \ +static_libs/http-parser/http_parser.h \ lib/ecore_con/ecore_con_private.h EXTRA_DIST += lib/ecore_con/ecore_con_legacy.c @@ -78,9 +80,17 @@ lib_ecore_con_libecore_con_la_LIBADD = @ECORE_CON_LIBS@ @EVIL_LIBS@ lib_ecore_con_libecore_con_la_DEPENDENCIES = @ECORE_CON_INTERNAL_LIBS@ lib_ecore_con_libecore_con_la_LDFLAGS = @EFL_LTLIBRARY_FLAGS@ +lib_ecore_con_libecore_con_la_CPPFLAGS += -I$(top_srcdir)/src/static_libs/http-parser + EXTRA_DIST += \ tests/ecore_con/server.key \ -tests/ecore_con/server.pem +tests/ecore_con/server.pem \ +static_libs/http-parser/AUTHORS \ +static_libs/http-parser/CONTRIBUTIONS \ +static_libs/http-parser/README.md \ +static_libs/http-parser/test.c \ +static_libs/http-parser/contrib/parsertrace.c \ +static_libs/http-parser/contrib/url_parser.c ### Unit tests diff --git a/src/static_libs/http-parser/AUTHORS b/src/static_libs/http-parser/AUTHORS new file mode 100644 index 0000000000..92ee45cad6 --- /dev/null +++ b/src/static_libs/http-parser/AUTHORS @@ -0,0 +1,41 @@ +# Authors ordered by first contribution. +Ryan Dahl +Jeremy Hinegardner +Sergey Shepelev +Joe Damato +tomika +Phoenix Sol +Cliff Frey +Ewen Cheslack-Postava +Santiago Gala +Tim Becker +Jeff Terrace +Ben Noordhuis +Nathan Rajlich +Mark Nottingham +Aman Gupta +Tim Becker +Sean Cunningham +Peter Griess +Salman Haq +Cliff Frey +Jon Kolb +Fouad Mardini +Paul Querna +Felix Geisendörfer +koichik +Andre Caron +Ivo Raisr +James McLaughlin +David Gwynne +Thomas LE ROUX +Randy Rizun +Andre Louis Caron +Simon Zimmermann +Erik Dubbelboer +Martell Malone +Bertrand Paquet +BogDan Vatra +Peter Faiman +Corey Richardson +Tóth Tamás diff --git a/src/static_libs/http-parser/CONTRIBUTIONS b/src/static_libs/http-parser/CONTRIBUTIONS new file mode 100644 index 0000000000..11ba31e4b9 --- /dev/null +++ b/src/static_libs/http-parser/CONTRIBUTIONS @@ -0,0 +1,4 @@ +Contributors must agree to the Contributor License Agreement before patches +can be accepted. + +http://spreadsheets2.google.com/viewform?hl=en&formkey=dDJXOGUwbzlYaWM4cHN1MERwQS1CSnc6MQ diff --git a/src/static_libs/http-parser/README.md b/src/static_libs/http-parser/README.md new file mode 100644 index 0000000000..0bf5d359ac --- /dev/null +++ b/src/static_libs/http-parser/README.md @@ -0,0 +1,183 @@ +HTTP Parser +=========== + +[![Build Status](https://travis-ci.org/joyent/http-parser.png?branch=master)](https://travis-ci.org/joyent/http-parser) + +This is a parser for HTTP messages written in C. It parses both requests and +responses. The parser is designed to be used in performance HTTP +applications. It does not make any syscalls nor allocations, it does not +buffer data, it can be interrupted at anytime. Depending on your +architecture, it only requires about 40 bytes of data per message +stream (in a web server that is per connection). + +Features: + + * No dependencies + * Handles persistent streams (keep-alive). + * Decodes chunked encoding. + * Upgrade support + * Defends against buffer overflow attacks. + +The parser extracts the following information from HTTP messages: + + * Header fields and values + * Content-Length + * Request method + * Response status code + * Transfer-Encoding + * HTTP version + * Request URL + * Message body + + +Usage +----- + +One `http_parser` object is used per TCP connection. Initialize the struct +using `http_parser_init()` and set the callbacks. That might look something +like this for a request parser: +```c +http_parser_settings settings; +settings.on_url = my_url_callback; +settings.on_header_field = my_header_field_callback; +/* ... */ + +http_parser *parser = malloc(sizeof(http_parser)); +http_parser_init(parser, HTTP_REQUEST); +parser->data = my_socket; +``` + +When data is received on the socket execute the parser and check for errors. + +```c +size_t len = 80*1024, nparsed; +char buf[len]; +ssize_t recved; + +recved = recv(fd, buf, len, 0); + +if (recved < 0) { + /* Handle error. */ +} + +/* Start up / continue the parser. + * Note we pass recved==0 to signal that EOF has been recieved. + */ +nparsed = http_parser_execute(parser, &settings, buf, recved); + +if (parser->upgrade) { + /* handle new protocol */ +} else if (nparsed != recved) { + /* Handle error. Usually just close the connection. */ +} +``` + +HTTP needs to know where the end of the stream is. For example, sometimes +servers send responses without Content-Length and expect the client to +consume input (for the body) until EOF. To tell http_parser about EOF, give +`0` as the forth parameter to `http_parser_execute()`. Callbacks and errors +can still be encountered during an EOF, so one must still be prepared +to receive them. + +Scalar valued message information such as `status_code`, `method`, and the +HTTP version are stored in the parser structure. This data is only +temporally stored in `http_parser` and gets reset on each new message. If +this information is needed later, copy it out of the structure during the +`headers_complete` callback. + +The parser decodes the transfer-encoding for both requests and responses +transparently. That is, a chunked encoding is decoded before being sent to +the on_body callback. + + +The Special Problem of Upgrade +------------------------------ + +HTTP supports upgrading the connection to a different protocol. An +increasingly common example of this is the Web Socket protocol which sends +a request like + + GET /demo HTTP/1.1 + Upgrade: WebSocket + Connection: Upgrade + Host: example.com + Origin: http://example.com + WebSocket-Protocol: sample + +followed by non-HTTP data. + +(See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more +information the Web Socket protocol.) + +To support this, the parser will treat this as a normal HTTP message without a +body. Issuing both on_headers_complete and on_message_complete callbacks. However +http_parser_execute() will stop parsing at the end of the headers and return. + +The user is expected to check if `parser->upgrade` has been set to 1 after +`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied +offset by the return value of `http_parser_execute()`. + + +Callbacks +--------- + +During the `http_parser_execute()` call, the callbacks set in +`http_parser_settings` will be executed. The parser maintains state and +never looks behind, so buffering the data is not necessary. If you need to +save certain data for later usage, you can do that from the callbacks. + +There are two types of callbacks: + +* notification `typedef int (*http_cb) (http_parser*);` + Callbacks: on_message_begin, on_headers_complete, on_message_complete. +* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);` + Callbacks: (requests only) on_uri, + (common) on_header_field, on_header_value, on_body; + +Callbacks must return 0 on success. Returning a non-zero value indicates +error to the parser, making it exit immediately. + +In case you parse HTTP message in chunks (i.e. `read()` request line +from socket, parse, read half headers, parse, etc) your data callbacks +may be called more than once. Http-parser guarantees that data pointer is only +valid for the lifetime of callback. You can also `read()` into a heap allocated +buffer to avoid copying memory around if this fits your application. + +Reading headers may be a tricky task if you read/parse headers partially. +Basically, you need to remember whether last header callback was field or value +and apply following logic: + + (on_header_field and on_header_value shortened to on_h_*) + ------------------------ ------------ -------------------------------------------- + | State (prev. callback) | Callback | Description/action | + ------------------------ ------------ -------------------------------------------- + | nothing (first call) | on_h_field | Allocate new buffer and copy callback data | + | | | into it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_field | New header started. | + | | | Copy current name,value buffers to headers | + | | | list and allocate new buffer for new name | + ------------------------ ------------ -------------------------------------------- + | field | on_h_field | Previous name continues. Reallocate name | + | | | buffer and append callback data to it | + ------------------------ ------------ -------------------------------------------- + | field | on_h_value | Value for current header started. Allocate | + | | | new buffer and copy callback data to it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_value | Value continues. Reallocate value buffer | + | | | and append callback data to it | + ------------------------ ------------ -------------------------------------------- + + +Parsing URLs +------------ + +A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`. +Users of this library may wish to use it to parse URLs constructed from +consecutive `on_url` callbacks. + +See examples of reading in headers: + +* [partial example](http://gist.github.com/155877) in C +* [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C +* [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript diff --git a/src/static_libs/http-parser/contrib/parsertrace.c b/src/static_libs/http-parser/contrib/parsertrace.c new file mode 100644 index 0000000000..c9bc71ec01 --- /dev/null +++ b/src/static_libs/http-parser/contrib/parsertrace.c @@ -0,0 +1,156 @@ +/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev + * + * Additional changes are licensed under the same terms as NGINX and + * copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* Dump what the parser finds to stdout as it happen */ + +#include "http_parser.h" +#include +#include +#include + +int on_message_begin(http_parser* _) { + (void)_; + printf("\n***MESSAGE BEGIN***\n\n"); + return 0; +} + +int on_headers_complete(http_parser* _) { + (void)_; + printf("\n***HEADERS COMPLETE***\n\n"); + return 0; +} + +int on_message_complete(http_parser* _) { + (void)_; + printf("\n***MESSAGE COMPLETE***\n\n"); + return 0; +} + +int on_url(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Url: %.*s\n", (int)length, at); + return 0; +} + +int on_header_field(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Header field: %.*s\n", (int)length, at); + return 0; +} + +int on_header_value(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Header value: %.*s\n", (int)length, at); + return 0; +} + +int on_body(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Body: %.*s\n", (int)length, at); + return 0; +} + +void usage(const char* name) { + fprintf(stderr, + "Usage: %s $type $filename\n" + " type: -x, where x is one of {r,b,q}\n" + " parses file as a Response, reQuest, or Both\n", + name); + exit(EXIT_FAILURE); +} + +int main(int argc, char* argv[]) { + enum http_parser_type file_type; + + if (argc != 3) { + usage(argv[0]); + } + + char* type = argv[1]; + if (type[0] != '-') { + usage(argv[0]); + } + + switch (type[1]) { + /* in the case of "-", type[1] will be NUL */ + case 'r': + file_type = HTTP_RESPONSE; + break; + case 'q': + file_type = HTTP_REQUEST; + break; + case 'b': + file_type = HTTP_BOTH; + break; + default: + usage(argv[0]); + } + + char* filename = argv[2]; + FILE* file = fopen(filename, "r"); + if (file == NULL) { + perror("fopen"); + return EXIT_FAILURE; + } + + fseek(file, 0, SEEK_END); + long file_length = ftell(file); + if (file_length == -1) { + perror("ftell"); + return EXIT_FAILURE; + } + fseek(file, 0, SEEK_SET); + + char* data = malloc(file_length); + if (fread(data, 1, file_length, file) != (size_t)file_length) { + fprintf(stderr, "couldn't read entire file\n"); + free(data); + return EXIT_FAILURE; + } + + http_parser_settings settings; + memset(&settings, 0, sizeof(settings)); + settings.on_message_begin = on_message_begin; + settings.on_url = on_url; + settings.on_header_field = on_header_field; + settings.on_header_value = on_header_value; + settings.on_headers_complete = on_headers_complete; + settings.on_body = on_body; + settings.on_message_complete = on_message_complete; + + http_parser parser; + http_parser_init(&parser, file_type); + size_t nparsed = http_parser_execute(&parser, &settings, data, file_length); + free(data); + + if (nparsed != (size_t)file_length) { + fprintf(stderr, + "Error: %s (%s)\n", + http_errno_description(HTTP_PARSER_ERRNO(&parser)), + http_errno_name(HTTP_PARSER_ERRNO(&parser))); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/src/static_libs/http-parser/contrib/url_parser.c b/src/static_libs/http-parser/contrib/url_parser.c new file mode 100644 index 0000000000..b1f9c979f2 --- /dev/null +++ b/src/static_libs/http-parser/contrib/url_parser.c @@ -0,0 +1,44 @@ +#include "http_parser.h" +#include +#include + +void +dump_url (const char *url, const struct http_parser_url *u) +{ + unsigned int i; + + printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port); + for (i = 0; i < UF_MAX; i++) { + if ((u->field_set & (1 << i)) == 0) { + printf("\tfield_data[%u]: unset\n", i); + continue; + } + + printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n", + i, + u->field_data[i].off, + u->field_data[i].len, + u->field_data[i].len, + url + u->field_data[i].off); + } +} + +int main(int argc, char ** argv) { + if (argc != 3) { + printf("Syntax : %s connect|get url\n", argv[0]); + return 1; + } + struct http_parser_url u; + int len = strlen(argv[2]); + int connect = strcmp("connect", argv[1]) == 0 ? 1 : 0; + printf("Parsing %s, connect %d\n", argv[2], connect); + + int result = http_parser_parse_url(argv[2], len, connect, &u); + if (result != 0) { + printf("Parse error : %d\n", result); + return result; + } + printf("Parse ok, result : \n"); + dump_url(argv[2], &u); + return 0; +} \ No newline at end of file diff --git a/src/static_libs/http-parser/http_parser.c b/src/static_libs/http-parser/http_parser.c new file mode 100644 index 0000000000..9695525b2c --- /dev/null +++ b/src/static_libs/http-parser/http_parser.c @@ -0,0 +1,2209 @@ +/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev + * + * Additional changes are licensed under the same terms as NGINX and + * copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include +#include +#include +#include +#include +#include + +#ifndef ULLONG_MAX +# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ +#endif + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +#ifndef ELEM_AT +# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v)) +#endif + +#define SET_ERRNO(e) \ +do { \ + parser->http_errno = (e); \ +} while(0) + + +/* Run the notify callback FOR, returning ER if it fails */ +#define CALLBACK_NOTIFY_(FOR, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser)) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + \ + /* We either errored above or got paused; get out */ \ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ + return (ER); \ + } \ + } \ +} while (0) + +/* Run the notify callback FOR and consume the current byte */ +#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) + +/* Run the notify callback FOR and don't consume the current byte */ +#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) + +/* Run data callback FOR with LEN bytes, returning ER if it fails */ +#define CALLBACK_DATA_(FOR, LEN, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (FOR##_mark) { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + \ + /* We either errored above or got paused; get out */ \ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ + return (ER); \ + } \ + } \ + FOR##_mark = NULL; \ + } \ +} while (0) + +/* Run the data callback FOR and consume the current byte */ +#define CALLBACK_DATA(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) + +/* Run the data callback FOR and don't consume the current byte */ +#define CALLBACK_DATA_NOADVANCE(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) + +/* Set the mark FOR; non-destructive if mark is already set */ +#define MARK(FOR) \ +do { \ + if (!FOR##_mark) { \ + FOR##_mark = p; \ + } \ +} while (0) + + +#define PROXY_CONNECTION "proxy-connection" +#define CONNECTION "connection" +#define CONTENT_LENGTH "content-length" +#define TRANSFER_ENCODING "transfer-encoding" +#define UPGRADE "upgrade" +#define CHUNKED "chunked" +#define KEEP_ALIVE "keep-alive" +#define CLOSE "close" + + +static const char *method_strings[] = + { +#define XX(num, name, string) #string, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +/* Tokens as defined by rfc 2616. Also lowercases them. + * token = 1* + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + */ +static const char tokens[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0, '!', 0, '#', '$', '%', '&', '\'', +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, '*', '+', 0, '-', '.', 0, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, '^', '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, '|', 0, '~', 0 }; + + +static const int8_t unhex[256] = + {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; + + +#if HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + + +static const uint8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; + +#undef T + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_first_http_major + , s_res_http_major + , s_res_first_http_minor + , s_res_http_minor + , s_res_first_status_code + , s_res_status_code + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_server_start + , s_req_server + , s_req_server_with_at + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_first_http_major + , s_req_http_major + , s_req_first_http_minor + , s_req_http_minor + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_start + , s_header_value + , s_header_value_lws + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + + , s_message_done + }; + + +#define PARSING_HEADER(state) (state <= s_headers_done) + + +enum header_states + { h_general = 0 + , h_C + , h_CO + , h_CON + + , h_matching_connection + , h_matching_proxy_connection + , h_matching_content_length + , h_matching_transfer_encoding + , h_matching_upgrade + + , h_connection + , h_content_length + , h_transfer_encoding + , h_upgrade + + , h_matching_transfer_encoding_chunked + , h_matching_connection_keep_alive + , h_matching_connection_close + + , h_transfer_encoding_chunked + , h_connection_keep_alive + , h_connection_close + }; + +enum http_host_state + { + s_http_host_dead = 1 + , s_http_userinfo_start + , s_http_userinfo + , s_http_host_start + , s_http_host_v6_start + , s_http_host + , s_http_host_v6 + , s_http_host_v6_end + , s_http_host_port_start + , s_http_host_port +}; + +/* Macros for character classes; depends on strict-mode */ +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ + (c) == ')') +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ + (c) == '$' || (c) == ',') + +#if HTTP_PARSER_STRICT +#define TOKEN(c) (tokens[(unsigned char)c]) +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + + +#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) + + +#if HTTP_PARSER_STRICT +# define STRICT_CHECK(cond) \ +do { \ + if (cond) { \ + SET_ERRNO(HPE_STRICT); \ + goto error; \ + } \ +} while (0) +# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) +#else +# define STRICT_CHECK(cond) +# define NEW_MESSAGE() start_state +#endif + + +/* Map errno values to strings for human-readable output */ +#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, +static struct { + const char *name; + const char *description; +} http_strerror_tab[] = { + HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) +}; +#undef HTTP_STRERROR_GEN + +int http_message_needs_eof(const http_parser *parser); + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* FALLTHROUGH */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +size_t http_parser_execute (http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len) +{ + char c, ch; + int8_t unhex_val; + const char *p = data; + const char *header_field_mark = 0; + const char *header_value_mark = 0; + const char *url_mark = 0; + const char *body_mark = 0; + + /* We're in an error state. Don't bother doing anything. */ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return 0; + } + + if (len == 0) { + switch (parser->state) { + case s_body_identity_eof: + /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if + * we got paused. + */ + CALLBACK_NOTIFY_NOADVANCE(message_complete); + return 0; + + case s_dead: + case s_start_req_or_res: + case s_start_res: + case s_start_req: + return 0; + + default: + SET_ERRNO(HPE_INVALID_EOF_STATE); + return 1; + } + } + + + if (parser->state == s_header_field) + header_field_mark = data; + if (parser->state == s_header_value) + header_value_mark = data; + switch (parser->state) { + case s_req_path: + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_server: + case s_req_server_with_at: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + url_mark = data; + break; + } + + for (p=data; p != data + len; p++) { + ch = *p; + + if (PARSING_HEADER(parser->state)) { + ++parser->nread; + /* Don't allow the total size of the HTTP headers (including the status + * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect + * embedders against denial-of-service attacks where the attacker feeds + * us a never-ending header that the embedder keeps buffering. + * + * This check is arguably the responsibility of embedders but we're doing + * it on the embedder's behalf because most won't bother and this way we + * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger + * than any reasonable request or response so this should never affect + * day-to-day operation. + */ + if (parser->nread > HTTP_MAX_HEADER_SIZE) { + SET_ERRNO(HPE_HEADER_OVERFLOW); + goto error; + } + } + + reexecute_byte: + switch (parser->state) { + + case s_dead: + /* this state is used after a 'Connection: close' message + * the parser will error out if it reads another message + */ + if (ch == CR || ch == LF) + break; + + SET_ERRNO(HPE_CLOSED_CONNECTION); + goto error; + + case s_start_req_or_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (ch == 'H') { + parser->state = s_res_or_resp_H; + + CALLBACK_NOTIFY(message_begin); + } else { + parser->type = HTTP_REQUEST; + parser->state = s_start_req; + goto reexecute_byte; + } + + break; + } + + case s_res_or_resp_H: + if (ch == 'T') { + parser->type = HTTP_RESPONSE; + parser->state = s_res_HT; + } else { + if (ch != 'E') { + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + parser->type = HTTP_REQUEST; + parser->method = HTTP_HEAD; + parser->index = 2; + parser->state = s_req_method; + } + break; + + case s_start_res: + { + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + switch (ch) { + case 'H': + parser->state = s_res_H; + break; + + case CR: + case LF: + break; + + default: + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + CALLBACK_NOTIFY(message_begin); + break; + } + + case s_res_H: + STRICT_CHECK(ch != 'T'); + parser->state = s_res_HT; + break; + + case s_res_HT: + STRICT_CHECK(ch != 'T'); + parser->state = s_res_HTT; + break; + + case s_res_HTT: + STRICT_CHECK(ch != 'P'); + parser->state = s_res_HTTP; + break; + + case s_res_HTTP: + STRICT_CHECK(ch != '/'); + parser->state = s_res_first_http_major; + break; + + case s_res_first_http_major: + if (ch < '0' || ch > '9') { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + parser->state = s_res_http_major; + break; + + /* major HTTP version or dot */ + case s_res_http_major: + { + if (ch == '.') { + parser->state = s_res_first_http_minor; + break; + } + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + /* first digit of minor HTTP version */ + case s_res_first_http_minor: + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + parser->state = s_res_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_res_http_minor: + { + if (ch == ' ') { + parser->state = s_res_first_status_code; + break; + } + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + case s_res_first_status_code: + { + if (!IS_NUM(ch)) { + if (ch == ' ') { + break; + } + + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + parser->status_code = ch - '0'; + parser->state = s_res_status_code; + break; + } + + case s_res_status_code: + { + if (!IS_NUM(ch)) { + switch (ch) { + case ' ': + parser->state = s_res_status; + break; + case CR: + parser->state = s_res_line_almost_done; + break; + case LF: + parser->state = s_header_field_start; + break; + default: + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + break; + } + + parser->status_code *= 10; + parser->status_code += ch - '0'; + + if (parser->status_code > 999) { + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + + break; + } + + case s_res_status: + /* the human readable status. e.g. "NOT FOUND" + * we are not humans so just ignore this */ + if (ch == CR) { + parser->state = s_res_line_almost_done; + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + break; + } + break; + + case s_res_line_almost_done: + STRICT_CHECK(ch != LF); + parser->state = s_header_field_start; + CALLBACK_NOTIFY(status_complete); + break; + + case s_start_req: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (!IS_ALPHA(ch)) { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + parser->method = (enum http_method) 0; + parser->index = 1; + switch (ch) { + case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; + case 'D': parser->method = HTTP_DELETE; break; + case 'G': parser->method = HTTP_GET; break; + case 'H': parser->method = HTTP_HEAD; break; + case 'L': parser->method = HTTP_LOCK; break; + case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break; + case 'N': parser->method = HTTP_NOTIFY; break; + case 'O': parser->method = HTTP_OPTIONS; break; + case 'P': parser->method = HTTP_POST; + /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ + break; + case 'R': parser->method = HTTP_REPORT; break; + case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break; + case 'T': parser->method = HTTP_TRACE; break; + case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break; + default: + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + parser->state = s_req_method; + + CALLBACK_NOTIFY(message_begin); + + break; + } + + case s_req_method: + { + const char *matcher; + if (ch == '\0') { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + matcher = method_strings[parser->method]; + if (ch == ' ' && matcher[parser->index] == '\0') { + parser->state = s_req_spaces_before_url; + } else if (ch == matcher[parser->index]) { + ; /* nada */ + } else if (parser->method == HTTP_CONNECT) { + if (parser->index == 1 && ch == 'H') { + parser->method = HTTP_CHECKOUT; + } else if (parser->index == 2 && ch == 'P') { + parser->method = HTTP_COPY; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else if (parser->method == HTTP_MKCOL) { + if (parser->index == 1 && ch == 'O') { + parser->method = HTTP_MOVE; + } else if (parser->index == 1 && ch == 'E') { + parser->method = HTTP_MERGE; + } else if (parser->index == 1 && ch == '-') { + parser->method = HTTP_MSEARCH; + } else if (parser->index == 2 && ch == 'A') { + parser->method = HTTP_MKACTIVITY; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else if (parser->method == HTTP_SUBSCRIBE) { + if (parser->index == 1 && ch == 'E') { + parser->method = HTTP_SEARCH; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else if (parser->index == 1 && parser->method == HTTP_POST) { + if (ch == 'R') { + parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ + } else if (ch == 'U') { + parser->method = HTTP_PUT; /* or HTTP_PURGE */ + } else if (ch == 'A') { + parser->method = HTTP_PATCH; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else if (parser->index == 2) { + if (parser->method == HTTP_PUT) { + if (ch == 'R') { + parser->method = HTTP_PURGE; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else if (parser->method == HTTP_UNLOCK) { + if (ch == 'S') { + parser->method = HTTP_UNSUBSCRIBE; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { + parser->method = HTTP_PROPPATCH; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + ++parser->index; + break; + } + + case s_req_spaces_before_url: + { + if (ch == ' ') break; + + MARK(url); + if (parser->method == HTTP_CONNECT) { + parser->state = s_req_server_start; + } + + parser->state = parse_url_char((enum state)parser->state, ch); + if (parser->state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + + break; + } + + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + { + switch (ch) { + /* No whitespace allowed here */ + case ' ': + case CR: + case LF: + SET_ERRNO(HPE_INVALID_URL); + goto error; + default: + parser->state = parse_url_char((enum state)parser->state, ch); + if (parser->state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + + break; + } + + case s_req_server: + case s_req_server_with_at: + case s_req_path: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + { + switch (ch) { + case ' ': + parser->state = s_req_http_start; + CALLBACK_DATA(url); + break; + case CR: + case LF: + parser->http_major = 0; + parser->http_minor = 9; + parser->state = (ch == CR) ? + s_req_line_almost_done : + s_header_field_start; + CALLBACK_DATA(url); + break; + default: + parser->state = parse_url_char((enum state)parser->state, ch); + if (parser->state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + break; + } + + case s_req_http_start: + switch (ch) { + case 'H': + parser->state = s_req_http_H; + break; + case ' ': + break; + default: + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + break; + + case s_req_http_H: + STRICT_CHECK(ch != 'T'); + parser->state = s_req_http_HT; + break; + + case s_req_http_HT: + STRICT_CHECK(ch != 'T'); + parser->state = s_req_http_HTT; + break; + + case s_req_http_HTT: + STRICT_CHECK(ch != 'P'); + parser->state = s_req_http_HTTP; + break; + + case s_req_http_HTTP: + STRICT_CHECK(ch != '/'); + parser->state = s_req_first_http_major; + break; + + /* first digit of major HTTP version */ + case s_req_first_http_major: + if (ch < '1' || ch > '9') { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + parser->state = s_req_http_major; + break; + + /* major HTTP version or dot */ + case s_req_http_major: + { + if (ch == '.') { + parser->state = s_req_first_http_minor; + break; + } + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + /* first digit of minor HTTP version */ + case s_req_first_http_minor: + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + parser->state = s_req_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_req_http_minor: + { + if (ch == CR) { + parser->state = s_req_line_almost_done; + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + break; + } + + /* XXX allow spaces after digit? */ + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + /* end of request line */ + case s_req_line_almost_done: + { + if (ch != LF) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } + + parser->state = s_header_field_start; + break; + } + + case s_header_field_start: + { + if (ch == CR) { + parser->state = s_headers_almost_done; + break; + } + + if (ch == LF) { + /* they might be just sending \n instead of \r\n so this would be + * the second \n to denote the end of headers*/ + parser->state = s_headers_almost_done; + goto reexecute_byte; + } + + c = TOKEN(ch); + + if (!c) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + MARK(header_field); + + parser->index = 0; + parser->state = s_header_field; + + switch (c) { + case 'c': + parser->header_state = h_C; + break; + + case 'p': + parser->header_state = h_matching_proxy_connection; + break; + + case 't': + parser->header_state = h_matching_transfer_encoding; + break; + + case 'u': + parser->header_state = h_matching_upgrade; + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_field: + { + c = TOKEN(ch); + + if (c) { + switch (parser->header_state) { + case h_general: + break; + + case h_C: + parser->index++; + parser->header_state = (c == 'o' ? h_CO : h_general); + break; + + case h_CO: + parser->index++; + parser->header_state = (c == 'n' ? h_CON : h_general); + break; + + case h_CON: + parser->index++; + switch (c) { + case 'n': + parser->header_state = h_matching_connection; + break; + case 't': + parser->header_state = h_matching_content_length; + break; + default: + parser->header_state = h_general; + break; + } + break; + + /* connection */ + + case h_matching_connection: + parser->index++; + if (parser->index > sizeof(CONNECTION)-1 + || c != CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* proxy-connection */ + + case h_matching_proxy_connection: + parser->index++; + if (parser->index > sizeof(PROXY_CONNECTION)-1 + || c != PROXY_CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* content-length */ + + case h_matching_content_length: + parser->index++; + if (parser->index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { + parser->header_state = h_content_length; + } + break; + + /* transfer-encoding */ + + case h_matching_transfer_encoding: + parser->index++; + if (parser->index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { + parser->header_state = h_transfer_encoding; + } + break; + + /* upgrade */ + + case h_matching_upgrade: + parser->index++; + if (parser->index > sizeof(UPGRADE)-1 + || c != UPGRADE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(UPGRADE)-2) { + parser->header_state = h_upgrade; + } + break; + + case h_connection: + case h_content_length: + case h_transfer_encoding: + case h_upgrade: + if (ch != ' ') parser->header_state = h_general; + break; + + default: + assert(0 && "Unknown header_state"); + break; + } + break; + } + + if (ch == ':') { + parser->state = s_header_value_start; + CALLBACK_DATA(header_field); + break; + } + + if (ch == CR) { + parser->state = s_header_almost_done; + CALLBACK_DATA(header_field); + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + CALLBACK_DATA(header_field); + break; + } + + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + case s_header_value_start: + { + if (ch == ' ' || ch == '\t') break; + + MARK(header_value); + + parser->state = s_header_value; + parser->index = 0; + + if (ch == CR) { + parser->header_state = h_general; + parser->state = s_header_almost_done; + CALLBACK_DATA(header_value); + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + CALLBACK_DATA(header_value); + break; + } + + c = LOWER(ch); + + switch (parser->header_state) { + case h_upgrade: + parser->flags |= F_UPGRADE; + parser->header_state = h_general; + break; + + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + parser->header_state = h_matching_transfer_encoding_chunked; + } else { + parser->header_state = h_general; + } + break; + + case h_content_length: + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = ch - '0'; + break; + + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + parser->header_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + parser->header_state = h_matching_connection_close; + } else { + parser->header_state = h_general; + } + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_value: + { + + if (ch == CR) { + parser->state = s_header_almost_done; + CALLBACK_DATA(header_value); + break; + } + + if (ch == LF) { + parser->state = s_header_almost_done; + CALLBACK_DATA_NOADVANCE(header_value); + goto reexecute_byte; + } + + c = LOWER(ch); + + switch (parser->header_state) { + case h_general: + break; + + case h_connection: + case h_transfer_encoding: + assert(0 && "Shouldn't get here."); + break; + + case h_content_length: + { + uint64_t t; + + if (ch == ' ') break; + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + t = parser->content_length; + t *= 10; + t += ch - '0'; + + /* Overflow? */ + if (t < parser->content_length || t == ULLONG_MAX) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = t; + break; + } + + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_chunked: + parser->index++; + if (parser->index > sizeof(CHUNKED)-1 + || c != CHUNKED[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CHUNKED)-2) { + parser->header_state = h_transfer_encoding_chunked; + } + break; + + /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + parser->index++; + if (parser->index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(KEEP_ALIVE)-2) { + parser->header_state = h_connection_keep_alive; + } + break; + + /* looking for 'Connection: close' */ + case h_matching_connection_close: + parser->index++; + if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CLOSE)-2) { + parser->header_state = h_connection_close; + } + break; + + case h_transfer_encoding_chunked: + case h_connection_keep_alive: + case h_connection_close: + if (ch != ' ') parser->header_state = h_general; + break; + + default: + parser->state = s_header_value; + parser->header_state = h_general; + break; + } + break; + } + + case s_header_almost_done: + { + STRICT_CHECK(ch != LF); + + parser->state = s_header_value_lws; + + switch (parser->header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + default: + break; + } + + break; + } + + case s_header_value_lws: + { + if (ch == ' ' || ch == '\t') + parser->state = s_header_value_start; + else + { + parser->state = s_header_field_start; + goto reexecute_byte; + } + break; + } + + case s_headers_almost_done: + { + STRICT_CHECK(ch != LF); + + if (parser->flags & F_TRAILING) { + /* End of a chunked request */ + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + break; + } + + parser->state = s_headers_done; + + /* Set this here so that on_headers_complete() callbacks can see it */ + parser->upgrade = + (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT); + + /* Here we call the headers_complete callback. This is somewhat + * different than other callbacks because if the user returns 1, we + * will interpret that as saying that this message has no body. This + * is needed for the annoying case of recieving a response to a HEAD + * request. + * + * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so + * we have to simulate it by handling a change in errno below. + */ + if (settings->on_headers_complete) { + switch (settings->on_headers_complete(parser)) { + case 0: + break; + + case 1: + parser->flags |= F_SKIPBODY; + break; + + default: + SET_ERRNO(HPE_CB_headers_complete); + return p - data; /* Error */ + } + } + + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return p - data; + } + + goto reexecute_byte; + } + + case s_headers_done: + { + STRICT_CHECK(ch != LF); + + parser->nread = 0; + + /* Exit, the rest of the connect is in a different protocol. */ + if (parser->upgrade) { + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + return (p - data) + 1; + } + + if (parser->flags & F_SKIPBODY) { + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header */ + parser->state = s_chunk_size_start; + } else { + if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + } else if (parser->content_length != ULLONG_MAX) { + /* Content-Length header given and non-zero */ + parser->state = s_body_identity; + } else { + if (parser->type == HTTP_REQUEST || + !http_message_needs_eof(parser)) { + /* Assume content-length 0 - read the next */ + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + } else { + /* Read body until EOF */ + parser->state = s_body_identity_eof; + } + } + } + + break; + } + + case s_body_identity: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* The difference between advancing content_length and p is because + * the latter will automaticaly advance on the next loop iteration. + * Further, if content_length ends up at 0, we want to see the last + * byte again for our message complete callback. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + parser->state = s_message_done; + + /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. + * + * The alternative to doing this is to wait for the next byte to + * trigger the data callback, just as in every other case. The + * problem with this is that this makes it difficult for the test + * harness to distinguish between complete-on-EOF and + * complete-on-length. It's not clear that this distinction is + * important for applications, but let's keep it for now. + */ + CALLBACK_DATA_(body, p - body_mark + 1, p - data); + goto reexecute_byte; + } + + break; + } + + /* read until EOF */ + case s_body_identity_eof: + MARK(body); + p = data + len - 1; + + break; + + case s_message_done: + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + break; + + case s_chunk_size_start: + { + assert(parser->nread == 1); + assert(parser->flags & F_CHUNKED); + + unhex_val = unhex[(unsigned char)ch]; + if (unhex_val == -1) { + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + parser->content_length = unhex_val; + parser->state = s_chunk_size; + break; + } + + case s_chunk_size: + { + uint64_t t; + + assert(parser->flags & F_CHUNKED); + + if (ch == CR) { + parser->state = s_chunk_size_almost_done; + break; + } + + unhex_val = unhex[(unsigned char)ch]; + + if (unhex_val == -1) { + if (ch == ';' || ch == ' ') { + parser->state = s_chunk_parameters; + break; + } + + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + t = parser->content_length; + t *= 16; + t += unhex_val; + + /* Overflow? */ + if (t < parser->content_length || t == ULLONG_MAX) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = t; + break; + } + + case s_chunk_parameters: + { + assert(parser->flags & F_CHUNKED); + /* just ignore this shit. TODO check for overflow */ + if (ch == CR) { + parser->state = s_chunk_size_almost_done; + break; + } + break; + } + + case s_chunk_size_almost_done: + { + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + + parser->nread = 0; + + if (parser->content_length == 0) { + parser->flags |= F_TRAILING; + parser->state = s_header_field_start; + } else { + parser->state = s_chunk_data; + } + break; + } + + case s_chunk_data: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->flags & F_CHUNKED); + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* See the explanation in s_body_identity for why the content + * length and data pointers are managed this way. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + parser->state = s_chunk_data_almost_done; + } + + break; + } + + case s_chunk_data_almost_done: + assert(parser->flags & F_CHUNKED); + assert(parser->content_length == 0); + STRICT_CHECK(ch != CR); + parser->state = s_chunk_data_done; + CALLBACK_DATA(body); + break; + + case s_chunk_data_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + parser->nread = 0; + parser->state = s_chunk_size_start; + break; + + default: + assert(0 && "unhandled state"); + SET_ERRNO(HPE_INVALID_INTERNAL_STATE); + goto error; + } + } + + /* Run callbacks for any marks that we have leftover after we ran our of + * bytes. There should be at most one of these set, so it's OK to invoke + * them in series (unset marks will not result in callbacks). + * + * We use the NOADVANCE() variety of callbacks here because 'p' has already + * overflowed 'data' and this allows us to correct for the off-by-one that + * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' + * value that's in-bounds). + */ + + assert(((header_field_mark ? 1 : 0) + + (header_value_mark ? 1 : 0) + + (url_mark ? 1 : 0) + + (body_mark ? 1 : 0)) <= 1); + + CALLBACK_DATA_NOADVANCE(header_field); + CALLBACK_DATA_NOADVANCE(header_value); + CALLBACK_DATA_NOADVANCE(url); + CALLBACK_DATA_NOADVANCE(body); + + return len; + +error: + if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { + SET_ERRNO(HPE_UNKNOWN); + } + + return (p - data); +} + + +/* Does the parser need to see an EOF to find the end of the message? */ +int +http_message_needs_eof (const http_parser *parser) +{ + if (parser->type == HTTP_REQUEST) { + return 0; + } + + /* See RFC 2616 section 4.4 */ + if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ + parser->status_code == 204 || /* No Content */ + parser->status_code == 304 || /* Not Modified */ + parser->flags & F_SKIPBODY) { /* response to a HEAD request */ + return 0; + } + + if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { + return 0; + } + + return 1; +} + + +int +http_should_keep_alive (const http_parser *parser) +{ + if (parser->http_major > 0 && parser->http_minor > 0) { + /* HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + return 0; + } + } else { + /* HTTP/1.0 or earlier */ + if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { + return 0; + } + } + + return !http_message_needs_eof(parser); +} + + +const char * +http_method_str (enum http_method m) +{ + return ELEM_AT(method_strings, m, ""); +} + + +void +http_parser_init (http_parser *parser, enum http_parser_type t) +{ + void *data = parser->data; /* preserve application data */ + memset(parser, 0, sizeof(*parser)); + parser->data = data; + parser->type = t; + parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); + parser->http_errno = HPE_OK; +} + +const char * +http_errno_name(enum http_errno err) { + assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + return http_strerror_tab[err].name; +} + +const char * +http_errno_description(enum http_errno err) { + assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + return http_strerror_tab[err].description; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) { + switch(s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* FALLTHROUGH */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* FALLTHROUGH */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':' || ch == '.') { + return s_http_host_v6; + } + + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static int +http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + enum http_host_state s; + + const char *p; + size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch(new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = p - buf; + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = p - buf ; + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + int found_at = 0; + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + uf = old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* FALLTROUGH */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = p - buf; + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + /* Don't bother with endp; we've already validated the string */ + unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + + u->port = (uint16_t) v; + } + + return 0; +} + +void +http_parser_pause(http_parser *parser, int paused) { + /* Users should only be pausing/unpausing a parser that is not in an error + * state. In non-debug builds, there's not much that we can do about this + * other than ignore it. + */ + if (HTTP_PARSER_ERRNO(parser) == HPE_OK || + HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { + SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); + } else { + assert(0 && "Attempting to pause parser in error state"); + } +} + +int +http_body_is_final(const struct http_parser *parser) { + return parser->state == s_message_done; +} + +unsigned long +http_parser_version(void) { + return HTTP_PARSER_VERSION_MAJOR * 0x10000 | + HTTP_PARSER_VERSION_MINOR * 0x00100 | + HTTP_PARSER_VERSION_PATCH * 0x00001; +} diff --git a/src/static_libs/http-parser/http_parser.h b/src/static_libs/http-parser/http_parser.h new file mode 100644 index 0000000000..4810cdcd24 --- /dev/null +++ b/src/static_libs/http-parser/http_parser.h @@ -0,0 +1,318 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +/* Also update SONAME in the Makefile whenever you change these. */ +#define HTTP_PARSER_VERSION_MAJOR 2 +#define HTTP_PARSER_VERSION_MINOR 1 +#define HTTP_PARSER_VERSION_PATCH 0 + +#include +#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) +#include +#include +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#endif + +/* Maximium header size allowed */ +#define HTTP_MAX_HEADER_SIZE (80*1024) + + +typedef struct http_parser http_parser; +typedef struct http_parser_settings http_parser_settings; + + +/* Callbacks should return non-zero to indicate an error. The parser will + * then halt execution. + * + * The one exception is on_headers_complete. In a HTTP_RESPONSE parser + * returning '1' from on_headers_complete will tell the parser that it + * should not expect a body. This is used when receiving a response to a + * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: + * chunked' headers that indicate the presence of a body. + * + * http_data_cb does not return data chunks. It will be call arbitrarally + * many times for each string. E.G. you might get 10 callbacks for "on_url" + * each providing just a few characters more data. + */ +typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); +typedef int (*http_cb) (http_parser*); + + +/* Request Methods */ +#define HTTP_METHOD_MAP(XX) \ + XX(0, DELETE, DELETE) \ + XX(1, GET, GET) \ + XX(2, HEAD, HEAD) \ + XX(3, POST, POST) \ + XX(4, PUT, PUT) \ + /* pathological */ \ + XX(5, CONNECT, CONNECT) \ + XX(6, OPTIONS, OPTIONS) \ + XX(7, TRACE, TRACE) \ + /* webdav */ \ + XX(8, COPY, COPY) \ + XX(9, LOCK, LOCK) \ + XX(10, MKCOL, MKCOL) \ + XX(11, MOVE, MOVE) \ + XX(12, PROPFIND, PROPFIND) \ + XX(13, PROPPATCH, PROPPATCH) \ + XX(14, SEARCH, SEARCH) \ + XX(15, UNLOCK, UNLOCK) \ + /* subversion */ \ + XX(16, REPORT, REPORT) \ + XX(17, MKACTIVITY, MKACTIVITY) \ + XX(18, CHECKOUT, CHECKOUT) \ + XX(19, MERGE, MERGE) \ + /* upnp */ \ + XX(20, MSEARCH, M-SEARCH) \ + XX(21, NOTIFY, NOTIFY) \ + XX(22, SUBSCRIBE, SUBSCRIBE) \ + XX(23, UNSUBSCRIBE, UNSUBSCRIBE) \ + /* RFC-5789 */ \ + XX(24, PATCH, PATCH) \ + XX(25, PURGE, PURGE) \ + +enum http_method + { +#define XX(num, name, string) HTTP_##name = num, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; + + +/* Flag values for http_parser.flags field */ +enum flags + { F_CHUNKED = 1 << 0 + , F_CONNECTION_KEEP_ALIVE = 1 << 1 + , F_CONNECTION_CLOSE = 1 << 2 + , F_TRAILING = 1 << 3 + , F_UPGRADE = 1 << 4 + , F_SKIPBODY = 1 << 5 + }; + + +/* Map for errno-related constants + * + * The provided argument should be a macro that takes 2 arguments. + */ +#define HTTP_ERRNO_MAP(XX) \ + /* No error */ \ + XX(OK, "success") \ + \ + /* Callback-related errors */ \ + XX(CB_message_begin, "the on_message_begin callback failed") \ + XX(CB_status_complete, "the on_status_complete callback failed") \ + XX(CB_url, "the on_url callback failed") \ + XX(CB_header_field, "the on_header_field callback failed") \ + XX(CB_header_value, "the on_header_value callback failed") \ + XX(CB_headers_complete, "the on_headers_complete callback failed") \ + XX(CB_body, "the on_body callback failed") \ + XX(CB_message_complete, "the on_message_complete callback failed") \ + \ + /* Parsing-related errors */ \ + XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ + XX(HEADER_OVERFLOW, \ + "too many header bytes seen; overflow detected") \ + XX(CLOSED_CONNECTION, \ + "data received after completed connection: close message") \ + XX(INVALID_VERSION, "invalid HTTP version") \ + XX(INVALID_STATUS, "invalid HTTP status code") \ + XX(INVALID_METHOD, "invalid HTTP method") \ + XX(INVALID_URL, "invalid URL") \ + XX(INVALID_HOST, "invalid host") \ + XX(INVALID_PORT, "invalid port") \ + XX(INVALID_PATH, "invalid path") \ + XX(INVALID_QUERY_STRING, "invalid query string") \ + XX(INVALID_FRAGMENT, "invalid fragment") \ + XX(LF_EXPECTED, "LF character expected") \ + XX(INVALID_HEADER_TOKEN, "invalid character in header") \ + XX(INVALID_CONTENT_LENGTH, \ + "invalid character in content-length header") \ + XX(INVALID_CHUNK_SIZE, \ + "invalid character in chunk size header") \ + XX(INVALID_CONSTANT, "invalid constant string") \ + XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ + XX(STRICT, "strict mode assertion failed") \ + XX(PAUSED, "parser is paused") \ + XX(UNKNOWN, "an unknown error occurred") + + +/* Define HPE_* values for each errno value above */ +#define HTTP_ERRNO_GEN(n, s) HPE_##n, +enum http_errno { + HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) +}; +#undef HTTP_ERRNO_GEN + + +/* Get an http_errno value from an http_parser */ +#define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno) + + +struct http_parser { + /** PRIVATE **/ + unsigned int type : 2; /* enum http_parser_type */ + unsigned int flags : 6; /* F_* values from 'flags' enum; semi-public */ + unsigned int state : 8; /* enum state from http_parser.c */ + unsigned int header_state : 8; /* enum header_state from http_parser.c */ + unsigned int index : 8; /* index into current matcher */ + + uint32_t nread; /* # bytes read in various scenarios */ + uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ + + /** READ-ONLY **/ + unsigned short http_major; + unsigned short http_minor; + unsigned int status_code : 16; /* responses only */ + unsigned int method : 8; /* requests only */ + unsigned int http_errno : 7; + + /* 1 = Upgrade header was present and the parser has exited because of that. + * 0 = No upgrade header present. + * Should be checked when http_parser_execute() returns in addition to + * error checking. + */ + unsigned int upgrade : 1; + + /** PUBLIC **/ + void *data; /* A pointer to get hook to the "connection" or "socket" object */ +}; + + +struct http_parser_settings { + http_cb on_message_begin; + http_data_cb on_url; + http_cb on_status_complete; + http_data_cb on_header_field; + http_data_cb on_header_value; + http_cb on_headers_complete; + http_data_cb on_body; + http_cb on_message_complete; +}; + + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + + +/* Returns the library version. Bits 16-23 contain the major version number, + * bits 8-15 the minor version number and bits 0-7 the patch level. + * Usage example: + * + * unsigned long version = http_parser_version(); + * unsigned major = (version >> 16) & 255; + * unsigned minor = (version >> 8) & 255; + * unsigned patch = version & 255; + * printf("http_parser v%u.%u.%u\n", major, minor, version); + */ +unsigned long http_parser_version(void); + +void http_parser_init(http_parser *parser, enum http_parser_type type); + + +size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len); + + +/* If http_should_keep_alive() in the on_headers_complete or + * on_message_complete callback returns 0, then this should be + * the last message on the connection. + * If you are the server, respond with the "Connection: close" header. + * If you are the client, close the connection. + */ +int http_should_keep_alive(const http_parser *parser); + +/* Returns a string version of the HTTP method. */ +const char *http_method_str(enum http_method m); + +/* Return a string name of the given error */ +const char *http_errno_name(enum http_errno err); + +/* Return a string description of the given error */ +const char *http_errno_description(enum http_errno err); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, + struct http_parser_url *u); + +/* Pause or un-pause the parser; a nonzero value pauses */ +void http_parser_pause(http_parser *parser, int paused); + +/* Checks if this is the final chunk of the body. */ +int http_body_is_final(const http_parser *parser); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/static_libs/http-parser/test.c b/src/static_libs/http-parser/test.c new file mode 100644 index 0000000000..656bc9f86f --- /dev/null +++ b/src/static_libs/http-parser/test.c @@ -0,0 +1,3476 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include +#include +#include +#include /* rand */ +#include +#include + +#undef TRUE +#define TRUE 1 +#undef FALSE +#define FALSE 0 + +#define MAX_HEADERS 13 +#define MAX_ELEMENT_SIZE 2048 + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +static http_parser *parser; + +struct message { + const char *name; // for debugging purposes + const char *raw; + enum http_parser_type type; + enum http_method method; + int status_code; + char request_path[MAX_ELEMENT_SIZE]; + char request_url[MAX_ELEMENT_SIZE]; + char fragment[MAX_ELEMENT_SIZE]; + char query_string[MAX_ELEMENT_SIZE]; + char body[MAX_ELEMENT_SIZE]; + size_t body_size; + const char *host; + const char *userinfo; + uint16_t port; + int num_headers; + enum { NONE=0, FIELD, VALUE } last_header_element; + char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE]; + int should_keep_alive; + + const char *upgrade; // upgraded body + + unsigned short http_major; + unsigned short http_minor; + + int message_begin_cb_called; + int headers_complete_cb_called; + int message_complete_cb_called; + int message_complete_on_eof; + int body_is_final; +}; + +static int currently_parsing_eof; + +static struct message messages[5]; +static int num_messages; +static http_parser_settings *current_pause_parser; + +/* * R E Q U E S T S * */ +const struct message requests[] = +#define CURL_GET 0 +{ {.name= "curl get" + ,.type= HTTP_REQUEST + ,.raw= "GET /test HTTP/1.1\r\n" + "User-Agent: curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1\r\n" + "Host: 0.0.0.0=5000\r\n" + "Accept: */*\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 3 + ,.headers= + { { "User-Agent", "curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1" } + , { "Host", "0.0.0.0=5000" } + , { "Accept", "*/*" } + } + ,.body= "" + } + +#define FIREFOX_GET 1 +, {.name= "firefox get" + ,.type= HTTP_REQUEST + ,.raw= "GET /favicon.ico HTTP/1.1\r\n" + "Host: 0.0.0.0=5000\r\n" + "User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0\r\n" + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" + "Accept-Language: en-us,en;q=0.5\r\n" + "Accept-Encoding: gzip,deflate\r\n" + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" + "Keep-Alive: 300\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/favicon.ico" + ,.request_url= "/favicon.ico" + ,.num_headers= 8 + ,.headers= + { { "Host", "0.0.0.0=5000" } + , { "User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0" } + , { "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" } + , { "Accept-Language", "en-us,en;q=0.5" } + , { "Accept-Encoding", "gzip,deflate" } + , { "Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7" } + , { "Keep-Alive", "300" } + , { "Connection", "keep-alive" } + } + ,.body= "" + } + +#define DUMBFUCK 2 +, {.name= "dumbfuck" + ,.type= HTTP_REQUEST + ,.raw= "GET /dumbfuck HTTP/1.1\r\n" + "aaaaaaaaaaaaa:++++++++++\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/dumbfuck" + ,.request_url= "/dumbfuck" + ,.num_headers= 1 + ,.headers= + { { "aaaaaaaaaaaaa", "++++++++++" } + } + ,.body= "" + } + +#define FRAGMENT_IN_URI 3 +, {.name= "fragment in url" + ,.type= HTTP_REQUEST + ,.raw= "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "page=1" + ,.fragment= "posts-17408" + ,.request_path= "/forums/1/topics/2375" + /* XXX request url does include fragment? */ + ,.request_url= "/forums/1/topics/2375?page=1#posts-17408" + ,.num_headers= 0 + ,.body= "" + } + +#define GET_NO_HEADERS_NO_BODY 4 +, {.name= "get no headers no body" + ,.type= HTTP_REQUEST + ,.raw= "GET /get_no_headers_no_body/world HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE /* would need Connection: close */ + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_no_headers_no_body/world" + ,.request_url= "/get_no_headers_no_body/world" + ,.num_headers= 0 + ,.body= "" + } + +#define GET_ONE_HEADER_NO_BODY 5 +, {.name= "get one header no body" + ,.type= HTTP_REQUEST + ,.raw= "GET /get_one_header_no_body HTTP/1.1\r\n" + "Accept: */*\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE /* would need Connection: close */ + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_one_header_no_body" + ,.request_url= "/get_one_header_no_body" + ,.num_headers= 1 + ,.headers= + { { "Accept" , "*/*" } + } + ,.body= "" + } + +#define GET_FUNKY_CONTENT_LENGTH 6 +, {.name= "get funky content length body hello" + ,.type= HTTP_REQUEST + ,.raw= "GET /get_funky_content_length_body_hello HTTP/1.0\r\n" + "conTENT-Length: 5\r\n" + "\r\n" + "HELLO" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_funky_content_length_body_hello" + ,.request_url= "/get_funky_content_length_body_hello" + ,.num_headers= 1 + ,.headers= + { { "conTENT-Length" , "5" } + } + ,.body= "HELLO" + } + +#define POST_IDENTITY_BODY_WORLD 7 +, {.name= "post identity body world" + ,.type= HTTP_REQUEST + ,.raw= "POST /post_identity_body_world?q=search#hey HTTP/1.1\r\n" + "Accept: */*\r\n" + "Transfer-Encoding: identity\r\n" + "Content-Length: 5\r\n" + "\r\n" + "World" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "q=search" + ,.fragment= "hey" + ,.request_path= "/post_identity_body_world" + ,.request_url= "/post_identity_body_world?q=search#hey" + ,.num_headers= 3 + ,.headers= + { { "Accept", "*/*" } + , { "Transfer-Encoding", "identity" } + , { "Content-Length", "5" } + } + ,.body= "World" + } + +#define POST_CHUNKED_ALL_YOUR_BASE 8 +, {.name= "post - chunked body: all your base are belong to us" + ,.type= HTTP_REQUEST + ,.raw= "POST /post_chunked_all_your_base HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "1e\r\nall your base are belong to us\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/post_chunked_all_your_base" + ,.request_url= "/post_chunked_all_your_base" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding" , "chunked" } + } + ,.body= "all your base are belong to us" + } + +#define TWO_CHUNKS_MULT_ZERO_END 9 +, {.name= "two chunks ; triple zero ending" + ,.type= HTTP_REQUEST + ,.raw= "POST /two_chunks_mult_zero_end HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5\r\nhello\r\n" + "6\r\n world\r\n" + "000\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/two_chunks_mult_zero_end" + ,.request_url= "/two_chunks_mult_zero_end" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding", "chunked" } + } + ,.body= "hello world" + } + +#define CHUNKED_W_TRAILING_HEADERS 10 +, {.name= "chunked with trailing headers. blech." + ,.type= HTTP_REQUEST + ,.raw= "POST /chunked_w_trailing_headers HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5\r\nhello\r\n" + "6\r\n world\r\n" + "0\r\n" + "Vary: *\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/chunked_w_trailing_headers" + ,.request_url= "/chunked_w_trailing_headers" + ,.num_headers= 3 + ,.headers= + { { "Transfer-Encoding", "chunked" } + , { "Vary", "*" } + , { "Content-Type", "text/plain" } + } + ,.body= "hello world" + } + +#define CHUNKED_W_BULLSHIT_AFTER_LENGTH 11 +, {.name= "with bullshit after the length" + ,.type= HTTP_REQUEST + ,.raw= "POST /chunked_w_bullshit_after_length HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5; ihatew3;whatthefuck=aretheseparametersfor\r\nhello\r\n" + "6; blahblah; blah\r\n world\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/chunked_w_bullshit_after_length" + ,.request_url= "/chunked_w_bullshit_after_length" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding", "chunked" } + } + ,.body= "hello world" + } + +#define WITH_QUOTES 12 +, {.name= "with quotes" + ,.type= HTTP_REQUEST + ,.raw= "GET /with_\"stupid\"_quotes?foo=\"bar\" HTTP/1.1\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "foo=\"bar\"" + ,.fragment= "" + ,.request_path= "/with_\"stupid\"_quotes" + ,.request_url= "/with_\"stupid\"_quotes?foo=\"bar\"" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define APACHEBENCH_GET 13 +/* The server receiving this request SHOULD NOT wait for EOF + * to know that content-length == 0. + * How to represent this in a unit test? message_complete_on_eof + * Compare with NO_CONTENT_LENGTH_RESPONSE. + */ +, {.name = "apachebench get" + ,.type= HTTP_REQUEST + ,.raw= "GET /test HTTP/1.0\r\n" + "Host: 0.0.0.0:5000\r\n" + "User-Agent: ApacheBench/2.3\r\n" + "Accept: */*\r\n\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 3 + ,.headers= { { "Host", "0.0.0.0:5000" } + , { "User-Agent", "ApacheBench/2.3" } + , { "Accept", "*/*" } + } + ,.body= "" + } + +#define QUERY_URL_WITH_QUESTION_MARK_GET 14 +/* Some clients include '?' characters in query strings. + */ +, {.name = "query url with question mark" + ,.type= HTTP_REQUEST + ,.raw= "GET /test.cgi?foo=bar?baz HTTP/1.1\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "foo=bar?baz" + ,.fragment= "" + ,.request_path= "/test.cgi" + ,.request_url= "/test.cgi?foo=bar?baz" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define PREFIX_NEWLINE_GET 15 +/* Some clients, especially after a POST in a keep-alive connection, + * will send an extra CRLF before the next request + */ +, {.name = "newline prefix get" + ,.type= HTTP_REQUEST + ,.raw= "\r\nGET /test HTTP/1.1\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define UPGRADE_REQUEST 16 +, {.name = "upgrade request" + ,.type= HTTP_REQUEST + ,.raw= "GET /demo HTTP/1.1\r\n" + "Host: example.com\r\n" + "Connection: Upgrade\r\n" + "Sec-WebSocket-Key2: 12998 5 Y3 1 .P00\r\n" + "Sec-WebSocket-Protocol: sample\r\n" + "Upgrade: WebSocket\r\n" + "Sec-WebSocket-Key1: 4 @1 46546xW%0l 1 5\r\n" + "Origin: http://example.com\r\n" + "\r\n" + "Hot diggity dogg" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/demo" + ,.request_url= "/demo" + ,.num_headers= 7 + ,.upgrade="Hot diggity dogg" + ,.headers= { { "Host", "example.com" } + , { "Connection", "Upgrade" } + , { "Sec-WebSocket-Key2", "12998 5 Y3 1 .P00" } + , { "Sec-WebSocket-Protocol", "sample" } + , { "Upgrade", "WebSocket" } + , { "Sec-WebSocket-Key1", "4 @1 46546xW%0l 1 5" } + , { "Origin", "http://example.com" } + } + ,.body= "" + } + +#define CONNECT_REQUEST 17 +, {.name = "connect request" + ,.type= HTTP_REQUEST + ,.raw= "CONNECT 0-home0.netscape.com:443 HTTP/1.0\r\n" + "User-agent: Mozilla/1.1N\r\n" + "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n" + "\r\n" + "some data\r\n" + "and yet even more data" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "0-home0.netscape.com:443" + ,.num_headers= 2 + ,.upgrade="some data\r\nand yet even more data" + ,.headers= { { "User-agent", "Mozilla/1.1N" } + , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" } + } + ,.body= "" + } + +#define REPORT_REQ 18 +, {.name= "report request" + ,.type= HTTP_REQUEST + ,.raw= "REPORT /test HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_REPORT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define NO_HTTP_VERSION 19 +, {.name= "request with no http version" + ,.type= HTTP_REQUEST + ,.raw= "GET /\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 0 + ,.http_minor= 9 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define MSEARCH_REQ 20 +, {.name= "m-search request" + ,.type= HTTP_REQUEST + ,.raw= "M-SEARCH * HTTP/1.1\r\n" + "HOST: 239.255.255.250:1900\r\n" + "MAN: \"ssdp:discover\"\r\n" + "ST: \"ssdp:all\"\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_MSEARCH + ,.query_string= "" + ,.fragment= "" + ,.request_path= "*" + ,.request_url= "*" + ,.num_headers= 3 + ,.headers= { { "HOST", "239.255.255.250:1900" } + , { "MAN", "\"ssdp:discover\"" } + , { "ST", "\"ssdp:all\"" } + } + ,.body= "" + } + +#define LINE_FOLDING_IN_HEADER 21 +, {.name= "line folding in header value" + ,.type= HTTP_REQUEST + ,.raw= "GET / HTTP/1.1\r\n" + "Line1: abc\r\n" + "\tdef\r\n" + " ghi\r\n" + "\t\tjkl\r\n" + " mno \r\n" + "\t \tqrs\r\n" + "Line2: \t line2\t\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 2 + ,.headers= { { "Line1", "abcdefghijklmno qrs" } + , { "Line2", "line2\t" } + } + ,.body= "" + } + + +#define QUERY_TERMINATED_HOST 22 +, {.name= "host terminated by a query string" + ,.type= HTTP_REQUEST + ,.raw= "GET http://hypnotoad.org?hail=all HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "hail=all" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "http://hypnotoad.org?hail=all" + ,.host= "hypnotoad.org" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define QUERY_TERMINATED_HOSTPORT 23 +, {.name= "host:port terminated by a query string" + ,.type= HTTP_REQUEST + ,.raw= "GET http://hypnotoad.org:1234?hail=all HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "hail=all" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "http://hypnotoad.org:1234?hail=all" + ,.host= "hypnotoad.org" + ,.port= 1234 + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define SPACE_TERMINATED_HOSTPORT 24 +, {.name= "host:port terminated by a space" + ,.type= HTTP_REQUEST + ,.raw= "GET http://hypnotoad.org:1234 HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "http://hypnotoad.org:1234" + ,.host= "hypnotoad.org" + ,.port= 1234 + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define PATCH_REQ 25 +, {.name = "PATCH request" + ,.type= HTTP_REQUEST + ,.raw= "PATCH /file.txt HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "Content-Type: application/example\r\n" + "If-Match: \"e0023aa4e\"\r\n" + "Content-Length: 10\r\n" + "\r\n" + "cccccccccc" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_PATCH + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/file.txt" + ,.request_url= "/file.txt" + ,.num_headers= 4 + ,.headers= { { "Host", "www.example.com" } + , { "Content-Type", "application/example" } + , { "If-Match", "\"e0023aa4e\"" } + , { "Content-Length", "10" } + } + ,.body= "cccccccccc" + } + +#define CONNECT_CAPS_REQUEST 26 +, {.name = "connect caps request" + ,.type= HTTP_REQUEST + ,.raw= "CONNECT HOME0.NETSCAPE.COM:443 HTTP/1.0\r\n" + "User-agent: Mozilla/1.1N\r\n" + "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "HOME0.NETSCAPE.COM:443" + ,.num_headers= 2 + ,.upgrade="" + ,.headers= { { "User-agent", "Mozilla/1.1N" } + , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" } + } + ,.body= "" + } + +#if !HTTP_PARSER_STRICT +#define UTF8_PATH_REQ 27 +, {.name= "utf-8 path request" + ,.type= HTTP_REQUEST + ,.raw= "GET /δ¶/δt/pope?q=1#narf HTTP/1.1\r\n" + "Host: github.com\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "q=1" + ,.fragment= "narf" + ,.request_path= "/δ¶/δt/pope" + ,.request_url= "/δ¶/δt/pope?q=1#narf" + ,.num_headers= 1 + ,.headers= { {"Host", "github.com" } + } + ,.body= "" + } + +#define HOSTNAME_UNDERSCORE 28 +, {.name = "hostname underscore" + ,.type= HTTP_REQUEST + ,.raw= "CONNECT home_0.netscape.com:443 HTTP/1.0\r\n" + "User-agent: Mozilla/1.1N\r\n" + "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "home_0.netscape.com:443" + ,.num_headers= 2 + ,.upgrade="" + ,.headers= { { "User-agent", "Mozilla/1.1N" } + , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" } + } + ,.body= "" + } +#endif /* !HTTP_PARSER_STRICT */ + +/* see https://github.com/ry/http-parser/issues/47 */ +#define EAT_TRAILING_CRLF_NO_CONNECTION_CLOSE 29 +, {.name = "eat CRLF between requests, no \"Connection: close\" header" + ,.raw= "POST / HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "Content-Type: application/x-www-form-urlencoded\r\n" + "Content-Length: 4\r\n" + "\r\n" + "q=42\r\n" /* note the trailing CRLF */ + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 3 + ,.upgrade= 0 + ,.headers= { { "Host", "www.example.com" } + , { "Content-Type", "application/x-www-form-urlencoded" } + , { "Content-Length", "4" } + } + ,.body= "q=42" + } + +/* see https://github.com/ry/http-parser/issues/47 */ +#define EAT_TRAILING_CRLF_WITH_CONNECTION_CLOSE 30 +, {.name = "eat CRLF between requests even if \"Connection: close\" is set" + ,.raw= "POST / HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "Content-Type: application/x-www-form-urlencoded\r\n" + "Content-Length: 4\r\n" + "Connection: close\r\n" + "\r\n" + "q=42\r\n" /* note the trailing CRLF */ + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE /* input buffer isn't empty when on_message_complete is called */ + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 4 + ,.upgrade= 0 + ,.headers= { { "Host", "www.example.com" } + , { "Content-Type", "application/x-www-form-urlencoded" } + , { "Content-Length", "4" } + , { "Connection", "close" } + } + ,.body= "q=42" + } + +#define PURGE_REQ 31 +, {.name = "PURGE request" + ,.type= HTTP_REQUEST + ,.raw= "PURGE /file.txt HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_PURGE + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/file.txt" + ,.request_url= "/file.txt" + ,.num_headers= 1 + ,.headers= { { "Host", "www.example.com" } } + ,.body= "" + } + +#define SEARCH_REQ 32 +, {.name = "SEARCH request" + ,.type= HTTP_REQUEST + ,.raw= "SEARCH / HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_SEARCH + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 1 + ,.headers= { { "Host", "www.example.com" } } + ,.body= "" + } + +#define PROXY_WITH_BASIC_AUTH 33 +, {.name= "host:port and basic_auth" + ,.type= HTTP_REQUEST + ,.raw= "GET http://a%12:b!&*$@hypnotoad.org:1234/toto HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.fragment= "" + ,.request_path= "/toto" + ,.request_url= "http://a%12:b!&*$@hypnotoad.org:1234/toto" + ,.host= "hypnotoad.org" + ,.userinfo= "a%12:b!&*$" + ,.port= 1234 + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + + +, {.name= NULL } /* sentinel */ +}; + +/* * R E S P O N S E S * */ +const struct message responses[] = +#define GOOGLE_301 0 +{ {.name= "google 301" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 301 Moved Permanently\r\n" + "Location: http://www.google.com/\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Date: Sun, 26 Apr 2009 11:11:49 GMT\r\n" + "Expires: Tue, 26 May 2009 11:11:49 GMT\r\n" + "X-$PrototypeBI-Version: 1.6.0.3\r\n" /* $ char in header field */ + "Cache-Control: public, max-age=2592000\r\n" + "Server: gws\r\n" + "Content-Length: 219 \r\n" + "\r\n" + "\n" + "301 Moved\n" + "

301 Moved

\n" + "The document has moved\n" + "here.\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 301 + ,.num_headers= 8 + ,.headers= + { { "Location", "http://www.google.com/" } + , { "Content-Type", "text/html; charset=UTF-8" } + , { "Date", "Sun, 26 Apr 2009 11:11:49 GMT" } + , { "Expires", "Tue, 26 May 2009 11:11:49 GMT" } + , { "X-$PrototypeBI-Version", "1.6.0.3" } + , { "Cache-Control", "public, max-age=2592000" } + , { "Server", "gws" } + , { "Content-Length", "219 " } + } + ,.body= "\n" + "301 Moved\n" + "

301 Moved

\n" + "The document has moved\n" + "here.\r\n" + "\r\n" + } + +#define NO_CONTENT_LENGTH_RESPONSE 1 +/* The client should wait for the server's EOF. That is, when content-length + * is not specified, and "Connection: close", the end of body is specified + * by the EOF. + * Compare with APACHEBENCH_GET + */ +, {.name= "no content-length response" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Date: Tue, 04 Aug 2009 07:59:32 GMT\r\n" + "Server: Apache\r\n" + "X-Powered-By: Servlet/2.5 JSP/2.1\r\n" + "Content-Type: text/xml; charset=utf-8\r\n" + "Connection: close\r\n" + "\r\n" + "\n" + "\n" + " \n" + " \n" + " SOAP-ENV:Client\n" + " Client Error\n" + " \n" + " \n" + "" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 5 + ,.headers= + { { "Date", "Tue, 04 Aug 2009 07:59:32 GMT" } + , { "Server", "Apache" } + , { "X-Powered-By", "Servlet/2.5 JSP/2.1" } + , { "Content-Type", "text/xml; charset=utf-8" } + , { "Connection", "close" } + } + ,.body= "\n" + "\n" + " \n" + " \n" + " SOAP-ENV:Client\n" + " Client Error\n" + " \n" + " \n" + "" + } + +#define NO_HEADERS_NO_BODY_404 2 +, {.name= "404 no headers no body" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 404 Not Found\r\n\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 404 + ,.num_headers= 0 + ,.headers= {} + ,.body_size= 0 + ,.body= "" + } + +#define NO_REASON_PHRASE 3 +, {.name= "301 no response phrase" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 301\r\n\r\n" + ,.should_keep_alive = FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 301 + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define TRAILING_SPACE_ON_CHUNKED_BODY 4 +, {.name="200 trailing space on chunked body" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Content-Type: text/plain\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "25 \r\n" + "This is the data in the first chunk\r\n" + "\r\n" + "1C\r\n" + "and this is the second one\r\n" + "\r\n" + "0 \r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 2 + ,.headers= + { {"Content-Type", "text/plain" } + , {"Transfer-Encoding", "chunked" } + } + ,.body_size = 37+28 + ,.body = + "This is the data in the first chunk\r\n" + "and this is the second one\r\n" + + } + +#define NO_CARRIAGE_RET 5 +, {.name="no carriage ret" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\n" + "Content-Type: text/html; charset=utf-8\n" + "Connection: close\n" + "\n" + "these headers are from http://news.ycombinator.com/" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 2 + ,.headers= + { {"Content-Type", "text/html; charset=utf-8" } + , {"Connection", "close" } + } + ,.body= "these headers are from http://news.ycombinator.com/" + } + +#define PROXY_CONNECTION 6 +, {.name="proxy connection" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Content-Length: 11\r\n" + "Proxy-Connection: close\r\n" + "Date: Thu, 31 Dec 2009 20:55:48 +0000\r\n" + "\r\n" + "hello world" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 4 + ,.headers= + { {"Content-Type", "text/html; charset=UTF-8" } + , {"Content-Length", "11" } + , {"Proxy-Connection", "close" } + , {"Date", "Thu, 31 Dec 2009 20:55:48 +0000"} + } + ,.body= "hello world" + } + +#define UNDERSTORE_HEADER_KEY 7 + // shown by + // curl -o /dev/null -v "http://ad.doubleclick.net/pfadx/DARTSHELLCONFIGXML;dcmt=text/xml;" +, {.name="underscore header key" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Server: DCLK-AdSvr\r\n" + "Content-Type: text/xml\r\n" + "Content-Length: 0\r\n" + "DCLK_imp: v7;x;114750856;0-0;0;17820020;0/0;21603567/21621457/1;;~okv=;dcmt=text/xml;;~cs=o\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 4 + ,.headers= + { {"Server", "DCLK-AdSvr" } + , {"Content-Type", "text/xml" } + , {"Content-Length", "0" } + , {"DCLK_imp", "v7;x;114750856;0-0;0;17820020;0/0;21603567/21621457/1;;~okv=;dcmt=text/xml;;~cs=o" } + } + ,.body= "" + } + +#define BONJOUR_MADAME_FR 8 +/* The client should not merge two headers fields when the first one doesn't + * have a value. + */ +, {.name= "bonjourmadame.fr" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.0 301 Moved Permanently\r\n" + "Date: Thu, 03 Jun 2010 09:56:32 GMT\r\n" + "Server: Apache/2.2.3 (Red Hat)\r\n" + "Cache-Control: public\r\n" + "Pragma: \r\n" + "Location: http://www.bonjourmadame.fr/\r\n" + "Vary: Accept-Encoding\r\n" + "Content-Length: 0\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 301 + ,.num_headers= 9 + ,.headers= + { { "Date", "Thu, 03 Jun 2010 09:56:32 GMT" } + , { "Server", "Apache/2.2.3 (Red Hat)" } + , { "Cache-Control", "public" } + , { "Pragma", "" } + , { "Location", "http://www.bonjourmadame.fr/" } + , { "Vary", "Accept-Encoding" } + , { "Content-Length", "0" } + , { "Content-Type", "text/html; charset=UTF-8" } + , { "Connection", "keep-alive" } + } + ,.body= "" + } + +#define RES_FIELD_UNDERSCORE 9 +/* Should handle spaces in header fields */ +, {.name= "field underscore" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Date: Tue, 28 Sep 2010 01:14:13 GMT\r\n" + "Server: Apache\r\n" + "Cache-Control: no-cache, must-revalidate\r\n" + "Expires: Mon, 26 Jul 1997 05:00:00 GMT\r\n" + ".et-Cookie: PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com\r\n" + "Vary: Accept-Encoding\r\n" + "_eep-Alive: timeout=45\r\n" /* semantic value ignored */ + "_onnection: Keep-Alive\r\n" /* semantic value ignored */ + "Transfer-Encoding: chunked\r\n" + "Content-Type: text/html\r\n" + "Connection: close\r\n" + "\r\n" + "0\r\n\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 11 + ,.headers= + { { "Date", "Tue, 28 Sep 2010 01:14:13 GMT" } + , { "Server", "Apache" } + , { "Cache-Control", "no-cache, must-revalidate" } + , { "Expires", "Mon, 26 Jul 1997 05:00:00 GMT" } + , { ".et-Cookie", "PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com" } + , { "Vary", "Accept-Encoding" } + , { "_eep-Alive", "timeout=45" } + , { "_onnection", "Keep-Alive" } + , { "Transfer-Encoding", "chunked" } + , { "Content-Type", "text/html" } + , { "Connection", "close" } + } + ,.body= "" + } + +#define NON_ASCII_IN_STATUS_LINE 10 +/* Should handle non-ASCII in status line */ +, {.name= "non-ASCII in status line" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 500 Oriëntatieprobleem\r\n" + "Date: Fri, 5 Nov 2010 23:07:12 GMT+2\r\n" + "Content-Length: 0\r\n" + "Connection: close\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 500 + ,.num_headers= 3 + ,.headers= + { { "Date", "Fri, 5 Nov 2010 23:07:12 GMT+2" } + , { "Content-Length", "0" } + , { "Connection", "close" } + } + ,.body= "" + } + +#define HTTP_VERSION_0_9 11 +/* Should handle HTTP/0.9 */ +, {.name= "http version 0.9" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/0.9 200 OK\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 0 + ,.http_minor= 9 + ,.status_code= 200 + ,.num_headers= 0 + ,.headers= + {} + ,.body= "" + } + +#define NO_CONTENT_LENGTH_NO_TRANSFER_ENCODING_RESPONSE 12 +/* The client should wait for the server's EOF. That is, when neither + * content-length nor transfer-encoding is specified, the end of body + * is specified by the EOF. + */ +, {.name= "neither content-length nor transfer-encoding response" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + "hello world" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 1 + ,.headers= + { { "Content-Type", "text/plain" } + } + ,.body= "hello world" + } + +#define NO_BODY_HTTP10_KA_200 13 +, {.name= "HTTP/1.0 with keep-alive and EOF-terminated 200 status" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.0 200 OK\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 200 + ,.num_headers= 1 + ,.headers= + { { "Connection", "keep-alive" } + } + ,.body_size= 0 + ,.body= "" + } + +#define NO_BODY_HTTP10_KA_204 14 +, {.name= "HTTP/1.0 with keep-alive and a 204 status" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.0 204 No content\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 204 + ,.num_headers= 1 + ,.headers= + { { "Connection", "keep-alive" } + } + ,.body_size= 0 + ,.body= "" + } + +#define NO_BODY_HTTP11_KA_200 15 +, {.name= "HTTP/1.1 with an EOF-terminated 200 status" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 0 + ,.headers={} + ,.body_size= 0 + ,.body= "" + } + +#define NO_BODY_HTTP11_KA_204 16 +, {.name= "HTTP/1.1 with a 204 status" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 204 No content\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 204 + ,.num_headers= 0 + ,.headers={} + ,.body_size= 0 + ,.body= "" + } + +#define NO_BODY_HTTP11_NOKA_204 17 +, {.name= "HTTP/1.1 with a 204 status and keep-alive disabled" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 204 No content\r\n" + "Connection: close\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 204 + ,.num_headers= 1 + ,.headers= + { { "Connection", "close" } + } + ,.body_size= 0 + ,.body= "" + } + +#define NO_BODY_HTTP11_KA_CHUNKED_200 18 +, {.name= "HTTP/1.1 with chunked endocing and a 200 response" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding", "chunked" } + } + ,.body_size= 0 + ,.body= "" + } + +#if !HTTP_PARSER_STRICT +#define SPACE_IN_FIELD_RES 19 +/* Should handle spaces in header fields */ +, {.name= "field space" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Server: Microsoft-IIS/6.0\r\n" + "X-Powered-By: ASP.NET\r\n" + "en-US Content-Type: text/xml\r\n" /* this is the problem */ + "Content-Type: text/xml\r\n" + "Content-Length: 16\r\n" + "Date: Fri, 23 Jul 2010 18:45:38 GMT\r\n" + "Connection: keep-alive\r\n" + "\r\n" + "hello" /* fake body */ + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 7 + ,.headers= + { { "Server", "Microsoft-IIS/6.0" } + , { "X-Powered-By", "ASP.NET" } + , { "en-US Content-Type", "text/xml" } + , { "Content-Type", "text/xml" } + , { "Content-Length", "16" } + , { "Date", "Fri, 23 Jul 2010 18:45:38 GMT" } + , { "Connection", "keep-alive" } + } + ,.body= "hello" + } +#endif /* !HTTP_PARSER_STRICT */ + +#define AMAZON_COM 20 +, {.name= "amazon.com" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 301 MovedPermanently\r\n" + "Date: Wed, 15 May 2013 17:06:33 GMT\r\n" + "Server: Server\r\n" + "x-amz-id-1: 0GPHKXSJQ826RK7GZEB2\r\n" + "p3p: policyref=\"http://www.amazon.com/w3c/p3p.xml\",CP=\"CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC \"\r\n" + "x-amz-id-2: STN69VZxIFSz9YJLbz1GDbxpbjG6Qjmmq5E3DxRhOUw+Et0p4hr7c/Q8qNcx4oAD\r\n" + "Location: http://www.amazon.com/Dan-Brown/e/B000AP9DSU/ref=s9_pop_gw_al1?_encoding=UTF8&refinementId=618073011&pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0SHYY5BZXN3KR20BNFAY&pf_rd_t=101&pf_rd_p=1263340922&pf_rd_i=507846\r\n" + "Vary: Accept-Encoding,User-Agent\r\n" + "Content-Type: text/html; charset=ISO-8859-1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "1\r\n" + "\n\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 301 + ,.num_headers= 9 + ,.headers= { { "Date", "Wed, 15 May 2013 17:06:33 GMT" } + , { "Server", "Server" } + , { "x-amz-id-1", "0GPHKXSJQ826RK7GZEB2" } + , { "p3p", "policyref=\"http://www.amazon.com/w3c/p3p.xml\",CP=\"CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC \"" } + , { "x-amz-id-2", "STN69VZxIFSz9YJLbz1GDbxpbjG6Qjmmq5E3DxRhOUw+Et0p4hr7c/Q8qNcx4oAD" } + , { "Location", "http://www.amazon.com/Dan-Brown/e/B000AP9DSU/ref=s9_pop_gw_al1?_encoding=UTF8&refinementId=618073011&pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0SHYY5BZXN3KR20BNFAY&pf_rd_t=101&pf_rd_p=1263340922&pf_rd_i=507846" } + , { "Vary", "Accept-Encoding,User-Agent" } + , { "Content-Type", "text/html; charset=ISO-8859-1" } + , { "Transfer-Encoding", "chunked" } + } + ,.body= "\n" + } + +, {.name= NULL } /* sentinel */ +}; + +/* strnlen() is a POSIX.2008 addition. Can't rely on it being available so + * define it ourselves. + */ +size_t +strnlen(const char *s, size_t maxlen) +{ + const char *p; + + p = memchr(s, '\0', maxlen); + if (p == NULL) + return maxlen; + + return p - s; +} + +size_t +strlncat(char *dst, size_t len, const char *src, size_t n) +{ + size_t slen; + size_t dlen; + size_t rlen; + size_t ncpy; + + slen = strnlen(src, n); + dlen = strnlen(dst, len); + + if (dlen < len) { + rlen = len - dlen; + ncpy = slen < rlen ? slen : (rlen - 1); + memcpy(dst + dlen, src, ncpy); + dst[dlen + ncpy] = '\0'; + } + + assert(len > slen + dlen); + return slen + dlen; +} + +size_t +strlcat(char *dst, const char *src, size_t len) +{ + return strlncat(dst, len, src, (size_t) -1); +} + +size_t +strlncpy(char *dst, size_t len, const char *src, size_t n) +{ + size_t slen; + size_t ncpy; + + slen = strnlen(src, n); + + if (len > 0) { + ncpy = slen < len ? slen : (len - 1); + memcpy(dst, src, ncpy); + dst[ncpy] = '\0'; + } + + assert(len > slen); + return slen; +} + +size_t +strlcpy(char *dst, const char *src, size_t len) +{ + return strlncpy(dst, len, src, (size_t) -1); +} + +int +request_url_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + strlncat(messages[num_messages].request_url, + sizeof(messages[num_messages].request_url), + buf, + len); + return 0; +} + +int +status_complete_cb (http_parser *p) { + assert(p == parser); + p->data++; + return 0; +} + +int +header_field_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + struct message *m = &messages[num_messages]; + + if (m->last_header_element != FIELD) + m->num_headers++; + + strlncat(m->headers[m->num_headers-1][0], + sizeof(m->headers[m->num_headers-1][0]), + buf, + len); + + m->last_header_element = FIELD; + + return 0; +} + +int +header_value_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + struct message *m = &messages[num_messages]; + + strlncat(m->headers[m->num_headers-1][1], + sizeof(m->headers[m->num_headers-1][1]), + buf, + len); + + m->last_header_element = VALUE; + + return 0; +} + +void +check_body_is_final (const http_parser *p) +{ + if (messages[num_messages].body_is_final) { + fprintf(stderr, "\n\n *** Error http_body_is_final() should return 1 " + "on last on_body callback call " + "but it doesn't! ***\n\n"); + assert(0); + abort(); + } + messages[num_messages].body_is_final = http_body_is_final(p); +} + +int +body_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + strlncat(messages[num_messages].body, + sizeof(messages[num_messages].body), + buf, + len); + messages[num_messages].body_size += len; + check_body_is_final(p); + // printf("body_cb: '%s'\n", requests[num_messages].body); + return 0; +} + +int +count_body_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + assert(buf); + messages[num_messages].body_size += len; + check_body_is_final(p); + return 0; +} + +int +message_begin_cb (http_parser *p) +{ + assert(p == parser); + messages[num_messages].message_begin_cb_called = TRUE; + return 0; +} + +int +headers_complete_cb (http_parser *p) +{ + assert(p == parser); + messages[num_messages].method = parser->method; + messages[num_messages].status_code = parser->status_code; + messages[num_messages].http_major = parser->http_major; + messages[num_messages].http_minor = parser->http_minor; + messages[num_messages].headers_complete_cb_called = TRUE; + messages[num_messages].should_keep_alive = http_should_keep_alive(parser); + return 0; +} + +int +message_complete_cb (http_parser *p) +{ + assert(p == parser); + if (messages[num_messages].should_keep_alive != http_should_keep_alive(parser)) + { + fprintf(stderr, "\n\n *** Error http_should_keep_alive() should have same " + "value in both on_message_complete and on_headers_complete " + "but it doesn't! ***\n\n"); + assert(0); + abort(); + } + + if (messages[num_messages].body_size && + http_body_is_final(p) && + !messages[num_messages].body_is_final) + { + fprintf(stderr, "\n\n *** Error http_body_is_final() should return 1 " + "on last on_body callback call " + "but it doesn't! ***\n\n"); + assert(0); + abort(); + } + + messages[num_messages].message_complete_cb_called = TRUE; + + messages[num_messages].message_complete_on_eof = currently_parsing_eof; + + num_messages++; + return 0; +} + +/* These dontcall_* callbacks exist so that we can verify that when we're + * paused, no additional callbacks are invoked */ +int +dontcall_message_begin_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_message_begin() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_header_field_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_header_field() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_header_value_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_header_value() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_request_url_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_request_url() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_body_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_body_cb() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_headers_complete_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_headers_complete() called on paused " + "parser ***\n\n"); + abort(); +} + +int +dontcall_message_complete_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_message_complete() called on paused " + "parser ***\n\n"); + abort(); +} + +static http_parser_settings settings_dontcall = + {.on_message_begin = dontcall_message_begin_cb + ,.on_header_field = dontcall_header_field_cb + ,.on_header_value = dontcall_header_value_cb + ,.on_url = dontcall_request_url_cb + ,.on_body = dontcall_body_cb + ,.on_headers_complete = dontcall_headers_complete_cb + ,.on_message_complete = dontcall_message_complete_cb + }; + +/* These pause_* callbacks always pause the parser and just invoke the regular + * callback that tracks content. Before returning, we overwrite the parser + * settings to point to the _dontcall variety so that we can verify that + * the pause actually did, you know, pause. */ +int +pause_message_begin_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return message_begin_cb(p); +} + +int +pause_header_field_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return header_field_cb(p, buf, len); +} + +int +pause_header_value_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return header_value_cb(p, buf, len); +} + +int +pause_request_url_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return request_url_cb(p, buf, len); +} + +int +pause_body_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return body_cb(p, buf, len); +} + +int +pause_headers_complete_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return headers_complete_cb(p); +} + +int +pause_message_complete_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return message_complete_cb(p); +} + +static http_parser_settings settings_pause = + {.on_message_begin = pause_message_begin_cb + ,.on_header_field = pause_header_field_cb + ,.on_header_value = pause_header_value_cb + ,.on_url = pause_request_url_cb + ,.on_body = pause_body_cb + ,.on_headers_complete = pause_headers_complete_cb + ,.on_message_complete = pause_message_complete_cb + }; + +static http_parser_settings settings = + {.on_message_begin = message_begin_cb + ,.on_header_field = header_field_cb + ,.on_header_value = header_value_cb + ,.on_url = request_url_cb + ,.on_body = body_cb + ,.on_headers_complete = headers_complete_cb + ,.on_message_complete = message_complete_cb + }; + +static http_parser_settings settings_count_body = + {.on_message_begin = message_begin_cb + ,.on_header_field = header_field_cb + ,.on_header_value = header_value_cb + ,.on_url = request_url_cb + ,.on_body = count_body_cb + ,.on_headers_complete = headers_complete_cb + ,.on_message_complete = message_complete_cb + }; + +static http_parser_settings settings_null = + {.on_message_begin = 0 + ,.on_header_field = 0 + ,.on_header_value = 0 + ,.on_url = 0 + ,.on_body = 0 + ,.on_headers_complete = 0 + ,.on_message_complete = 0 + }; + +void +parser_init (enum http_parser_type type) +{ + num_messages = 0; + + assert(parser == NULL); + + parser = malloc(sizeof(http_parser)); + + http_parser_init(parser, type); + + memset(&messages, 0, sizeof messages); + +} + +void +parser_free () +{ + assert(parser); + free(parser); + parser = NULL; +} + +size_t parse (const char *buf, size_t len) +{ + size_t nparsed; + currently_parsing_eof = (len == 0); + nparsed = http_parser_execute(parser, &settings, buf, len); + return nparsed; +} + +size_t parse_count_body (const char *buf, size_t len) +{ + size_t nparsed; + currently_parsing_eof = (len == 0); + nparsed = http_parser_execute(parser, &settings_count_body, buf, len); + return nparsed; +} + +size_t parse_pause (const char *buf, size_t len) +{ + size_t nparsed; + http_parser_settings s = settings_pause; + + currently_parsing_eof = (len == 0); + current_pause_parser = &s; + nparsed = http_parser_execute(parser, current_pause_parser, buf, len); + return nparsed; +} + +static inline int +check_str_eq (const struct message *m, + const char *prop, + const char *expected, + const char *found) { + if ((expected == NULL) != (found == NULL)) { + printf("\n*** Error: %s in '%s' ***\n\n", prop, m->name); + printf("expected %s\n", (expected == NULL) ? "NULL" : expected); + printf(" found %s\n", (found == NULL) ? "NULL" : found); + return 0; + } + if (expected != NULL && 0 != strcmp(expected, found)) { + printf("\n*** Error: %s in '%s' ***\n\n", prop, m->name); + printf("expected '%s'\n", expected); + printf(" found '%s'\n", found); + return 0; + } + return 1; +} + +static inline int +check_num_eq (const struct message *m, + const char *prop, + int expected, + int found) { + if (expected != found) { + printf("\n*** Error: %s in '%s' ***\n\n", prop, m->name); + printf("expected %d\n", expected); + printf(" found %d\n", found); + return 0; + } + return 1; +} + +#define MESSAGE_CHECK_STR_EQ(expected, found, prop) \ + if (!check_str_eq(expected, #prop, expected->prop, found->prop)) return 0 + +#define MESSAGE_CHECK_NUM_EQ(expected, found, prop) \ + if (!check_num_eq(expected, #prop, expected->prop, found->prop)) return 0 + +#define MESSAGE_CHECK_URL_EQ(u, expected, found, prop, fn) \ +do { \ + char ubuf[256]; \ + \ + if ((u)->field_set & (1 << (fn))) { \ + memcpy(ubuf, (found)->request_url + (u)->field_data[(fn)].off, \ + (u)->field_data[(fn)].len); \ + ubuf[(u)->field_data[(fn)].len] = '\0'; \ + } else { \ + ubuf[0] = '\0'; \ + } \ + \ + check_str_eq(expected, #prop, expected->prop, ubuf); \ +} while(0) + +int +message_eq (int index, const struct message *expected) +{ + int i; + struct message *m = &messages[index]; + + MESSAGE_CHECK_NUM_EQ(expected, m, http_major); + MESSAGE_CHECK_NUM_EQ(expected, m, http_minor); + + if (expected->type == HTTP_REQUEST) { + MESSAGE_CHECK_NUM_EQ(expected, m, method); + } else { + MESSAGE_CHECK_NUM_EQ(expected, m, status_code); + } + + MESSAGE_CHECK_NUM_EQ(expected, m, should_keep_alive); + MESSAGE_CHECK_NUM_EQ(expected, m, message_complete_on_eof); + + assert(m->message_begin_cb_called); + assert(m->headers_complete_cb_called); + assert(m->message_complete_cb_called); + + + MESSAGE_CHECK_STR_EQ(expected, m, request_url); + + /* Check URL components; we can't do this w/ CONNECT since it doesn't + * send us a well-formed URL. + */ + if (*m->request_url && m->method != HTTP_CONNECT) { + struct http_parser_url u; + + if (http_parser_parse_url(m->request_url, strlen(m->request_url), 0, &u)) { + fprintf(stderr, "\n\n*** failed to parse URL %s ***\n\n", + m->request_url); + abort(); + } + + if (expected->host) { + MESSAGE_CHECK_URL_EQ(&u, expected, m, host, UF_HOST); + } + + if (expected->userinfo) { + MESSAGE_CHECK_URL_EQ(&u, expected, m, userinfo, UF_USERINFO); + } + + m->port = (u.field_set & (1 << UF_PORT)) ? + u.port : 0; + + MESSAGE_CHECK_URL_EQ(&u, expected, m, query_string, UF_QUERY); + MESSAGE_CHECK_URL_EQ(&u, expected, m, fragment, UF_FRAGMENT); + MESSAGE_CHECK_URL_EQ(&u, expected, m, request_path, UF_PATH); + MESSAGE_CHECK_NUM_EQ(expected, m, port); + } + + if (expected->body_size) { + MESSAGE_CHECK_NUM_EQ(expected, m, body_size); + } else { + MESSAGE_CHECK_STR_EQ(expected, m, body); + } + + MESSAGE_CHECK_NUM_EQ(expected, m, num_headers); + + int r; + for (i = 0; i < m->num_headers; i++) { + r = check_str_eq(expected, "header field", expected->headers[i][0], m->headers[i][0]); + if (!r) return 0; + r = check_str_eq(expected, "header value", expected->headers[i][1], m->headers[i][1]); + if (!r) return 0; + } + + MESSAGE_CHECK_STR_EQ(expected, m, upgrade); + + return 1; +} + +/* Given a sequence of varargs messages, return the number of them that the + * parser should successfully parse, taking into account that upgraded + * messages prevent all subsequent messages from being parsed. + */ +size_t +count_parsed_messages(const size_t nmsgs, ...) { + size_t i; + va_list ap; + + va_start(ap, nmsgs); + + for (i = 0; i < nmsgs; i++) { + struct message *m = va_arg(ap, struct message *); + + if (m->upgrade) { + va_end(ap); + return i + 1; + } + } + + va_end(ap); + return nmsgs; +} + +/* Given a sequence of bytes and the number of these that we were able to + * parse, verify that upgrade bodies are correct. + */ +void +upgrade_message_fix(char *body, const size_t nread, const size_t nmsgs, ...) { + va_list ap; + size_t i; + size_t off = 0; + + va_start(ap, nmsgs); + + for (i = 0; i < nmsgs; i++) { + struct message *m = va_arg(ap, struct message *); + + off += strlen(m->raw); + + if (m->upgrade) { + off -= strlen(m->upgrade); + + /* Check the portion of the response after its specified upgrade */ + if (!check_str_eq(m, "upgrade", body + off, body + nread)) { + abort(); + } + + /* Fix up the response so that message_eq() will verify the beginning + * of the upgrade */ + *(body + nread + strlen(m->upgrade)) = '\0'; + messages[num_messages -1 ].upgrade = body + nread; + + va_end(ap); + return; + } + } + + va_end(ap); + printf("\n\n*** Error: expected a message with upgrade ***\n"); + + abort(); +} + +static void +print_error (const char *raw, size_t error_location) +{ + fprintf(stderr, "\n*** %s ***\n\n", + http_errno_description(HTTP_PARSER_ERRNO(parser))); + + int this_line = 0, char_len = 0; + size_t i, j, len = strlen(raw), error_location_line = 0; + for (i = 0; i < len; i++) { + if (i == error_location) this_line = 1; + switch (raw[i]) { + case '\r': + char_len = 2; + fprintf(stderr, "\\r"); + break; + + case '\n': + char_len = 2; + fprintf(stderr, "\\n\n"); + + if (this_line) goto print; + + error_location_line = 0; + continue; + + default: + char_len = 1; + fputc(raw[i], stderr); + break; + } + if (!this_line) error_location_line += char_len; + } + + fprintf(stderr, "[eof]\n"); + + print: + for (j = 0; j < error_location_line; j++) { + fputc(' ', stderr); + } + fprintf(stderr, "^\n\nerror location: %u\n", (unsigned int)error_location); +} + +void +test_preserve_data (void) +{ + char my_data[] = "application-specific data"; + http_parser parser; + parser.data = my_data; + http_parser_init(&parser, HTTP_REQUEST); + if (parser.data != my_data) { + printf("\n*** parser.data not preserved accross http_parser_init ***\n\n"); + abort(); + } +} + +struct url_test { + const char *name; + const char *url; + int is_connect; + struct http_parser_url u; + int rv; +}; + +const struct url_test url_tests[] = +{ {.name="proxy request" + ,.url="http://hostname/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PATH) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 7, 8 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 15, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="proxy request with port" + ,.url="http://hostname:444/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PORT) | (1 << UF_PATH) + ,.port=444 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 7, 8 } /* UF_HOST */ + ,{ 16, 3 } /* UF_PORT */ + ,{ 19, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="CONNECT request" + ,.url="hostname:443" + ,.is_connect=1 + ,.u= + {.field_set=(1 << UF_HOST) | (1 << UF_PORT) + ,.port=443 + ,.field_data= + {{ 0, 0 } /* UF_SCHEMA */ + ,{ 0, 8 } /* UF_HOST */ + ,{ 9, 3 } /* UF_PORT */ + ,{ 0, 0 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="CONNECT request but not connect" + ,.url="hostname:443" + ,.is_connect=0 + ,.rv=1 + } + +, {.name="proxy ipv6 request" + ,.url="http://[1:2::3:4]/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PATH) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 8, 8 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 17, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="proxy ipv6 request with port" + ,.url="http://[1:2::3:4]:67/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PORT) | (1 << UF_PATH) + ,.port=67 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 8, 8 } /* UF_HOST */ + ,{ 18, 2 } /* UF_PORT */ + ,{ 20, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="CONNECT ipv6 address" + ,.url="[1:2::3:4]:443" + ,.is_connect=1 + ,.u= + {.field_set=(1 << UF_HOST) | (1 << UF_PORT) + ,.port=443 + ,.field_data= + {{ 0, 0 } /* UF_SCHEMA */ + ,{ 1, 8 } /* UF_HOST */ + ,{ 11, 3 } /* UF_PORT */ + ,{ 0, 0 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="ipv4 in ipv6 address" + ,.url="http://[2001:0000:0000:0000:0000:0000:1.9.1.1]/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PATH) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 8, 37 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 46, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="extra ? in query string" + ,.url="http://a.tbcdn.cn/p/fp/2010c/??fp-header-min.css,fp-base-min.css," + "fp-channel-min.css,fp-product-min.css,fp-mall-min.css,fp-category-min.css," + "fp-sub-min.css,fp-gdp4p-min.css,fp-css3-min.css,fp-misc-min.css?t=20101022.css" + ,.is_connect=0 + ,.u= + {.field_set=(1<field_set, u->port); + for (i = 0; i < UF_MAX; i++) { + if ((u->field_set & (1 << i)) == 0) { + printf("\tfield_data[%u]: unset\n", i); + continue; + } + + printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n\"", + i, + u->field_data[i].off, + u->field_data[i].len, + u->field_data[i].len, + url + u->field_data[i].off); + } +} + +void +test_parse_url (void) +{ + struct http_parser_url u; + const struct url_test *test; + unsigned int i; + int rv; + + for (i = 0; i < (sizeof(url_tests) / sizeof(url_tests[0])); i++) { + test = &url_tests[i]; + memset(&u, 0, sizeof(u)); + + rv = http_parser_parse_url(test->url, + strlen(test->url), + test->is_connect, + &u); + + if (test->rv == 0) { + if (rv != 0) { + printf("\n*** http_parser_parse_url(\"%s\") \"%s\" test failed, " + "unexpected rv %d ***\n\n", test->url, test->name, rv); + abort(); + } + + if (memcmp(&u, &test->u, sizeof(u)) != 0) { + printf("\n*** http_parser_parse_url(\"%s\") \"%s\" failed ***\n", + test->url, test->name); + + printf("target http_parser_url:\n"); + dump_url(test->url, &test->u); + printf("result http_parser_url:\n"); + dump_url(test->url, &u); + + abort(); + } + } else { + /* test->rv != 0 */ + if (rv == 0) { + printf("\n*** http_parser_parse_url(\"%s\") \"%s\" test failed, " + "unexpected rv %d ***\n\n", test->url, test->name, rv); + abort(); + } + } + } +} + +void +test_method_str (void) +{ + assert(0 == strcmp("GET", http_method_str(HTTP_GET))); + assert(0 == strcmp("", http_method_str(1337))); +} + +void +test_message (const struct message *message) +{ + size_t raw_len = strlen(message->raw); + size_t msg1len; + for (msg1len = 0; msg1len < raw_len; msg1len++) { + parser_init(message->type); + + size_t read; + const char *msg1 = message->raw; + const char *msg2 = msg1 + msg1len; + size_t msg2len = raw_len - msg1len; + + if (msg1len) { + read = parse(msg1, msg1len); + + if (message->upgrade && parser->upgrade) { + messages[num_messages - 1].upgrade = msg1 + read; + goto test; + } + + if (read != msg1len) { + print_error(msg1, read); + abort(); + } + } + + + read = parse(msg2, msg2len); + + if (message->upgrade && parser->upgrade) { + messages[num_messages - 1].upgrade = msg2 + read; + goto test; + } + + if (read != msg2len) { + print_error(msg2, read); + abort(); + } + + read = parse(NULL, 0); + + if (read != 0) { + print_error(message->raw, read); + abort(); + } + + test: + + if (num_messages != 1) { + printf("\n*** num_messages != 1 after testing '%s' ***\n\n", message->name); + abort(); + } + + if(!message_eq(0, message)) abort(); + + parser_free(); + } +} + +void +test_message_count_body (const struct message *message) +{ + parser_init(message->type); + + size_t read; + size_t l = strlen(message->raw); + size_t i, toread; + size_t chunk = 4024; + + for (i = 0; i < l; i+= chunk) { + toread = MIN(l-i, chunk); + read = parse_count_body(message->raw + i, toread); + if (read != toread) { + print_error(message->raw, read); + abort(); + } + } + + + read = parse_count_body(NULL, 0); + if (read != 0) { + print_error(message->raw, read); + abort(); + } + + if (num_messages != 1) { + printf("\n*** num_messages != 1 after testing '%s' ***\n\n", message->name); + abort(); + } + + if(!message_eq(0, message)) abort(); + + parser_free(); +} + +void +test_simple (const char *buf, enum http_errno err_expected) +{ + parser_init(HTTP_REQUEST); + + size_t parsed; + int pass; + enum http_errno err; + + parsed = parse(buf, strlen(buf)); + pass = (parsed == strlen(buf)); + err = HTTP_PARSER_ERRNO(parser); + parsed = parse(NULL, 0); + pass &= (parsed == 0); + + parser_free(); + + /* In strict mode, allow us to pass with an unexpected HPE_STRICT as + * long as the caller isn't expecting success. + */ +#if HTTP_PARSER_STRICT + if (err_expected != err && err_expected != HPE_OK && err != HPE_STRICT) { +#else + if (err_expected != err) { +#endif + fprintf(stderr, "\n*** test_simple expected %s, but saw %s ***\n\n%s\n", + http_errno_name(err_expected), http_errno_name(err), buf); + abort(); + } +} + +void +test_header_overflow_error (int req) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + const char *buf; + buf = req ? "GET / HTTP/1.1\r\n" : "HTTP/1.0 200 OK\r\n"; + parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf)); + assert(parsed == strlen(buf)); + + buf = "header-key: header-value\r\n"; + size_t buflen = strlen(buf); + + int i; + for (i = 0; i < 10000; i++) { + parsed = http_parser_execute(&parser, &settings_null, buf, buflen); + if (parsed != buflen) { + //fprintf(stderr, "error found on iter %d\n", i); + assert(HTTP_PARSER_ERRNO(&parser) == HPE_HEADER_OVERFLOW); + return; + } + } + + fprintf(stderr, "\n*** Error expected but none in header overflow test ***\n"); + abort(); +} + +static void +test_content_length_overflow (const char *buf, size_t buflen, int expect_ok) +{ + http_parser parser; + http_parser_init(&parser, HTTP_RESPONSE); + http_parser_execute(&parser, &settings_null, buf, buflen); + + if (expect_ok) + assert(HTTP_PARSER_ERRNO(&parser) == HPE_OK); + else + assert(HTTP_PARSER_ERRNO(&parser) == HPE_INVALID_CONTENT_LENGTH); +} + +void +test_header_content_length_overflow_error (void) +{ +#define X(size) \ + "HTTP/1.1 200 OK\r\n" \ + "Content-Length: " #size "\r\n" \ + "\r\n" + const char a[] = X(18446744073709551614); /* 2^64-2 */ + const char b[] = X(18446744073709551615); /* 2^64-1 */ + const char c[] = X(18446744073709551616); /* 2^64 */ +#undef X + test_content_length_overflow(a, sizeof(a) - 1, 1); /* expect ok */ + test_content_length_overflow(b, sizeof(b) - 1, 0); /* expect failure */ + test_content_length_overflow(c, sizeof(c) - 1, 0); /* expect failure */ +} + +void +test_chunk_content_length_overflow_error (void) +{ +#define X(size) \ + "HTTP/1.1 200 OK\r\n" \ + "Transfer-Encoding: chunked\r\n" \ + "\r\n" \ + #size "\r\n" \ + "..." + const char a[] = X(FFFFFFFFFFFFFFFE); /* 2^64-2 */ + const char b[] = X(FFFFFFFFFFFFFFFF); /* 2^64-1 */ + const char c[] = X(10000000000000000); /* 2^64 */ +#undef X + test_content_length_overflow(a, sizeof(a) - 1, 1); /* expect ok */ + test_content_length_overflow(b, sizeof(b) - 1, 0); /* expect failure */ + test_content_length_overflow(c, sizeof(c) - 1, 0); /* expect failure */ +} + +void +test_no_overflow_long_body (int req, size_t length) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + size_t i; + char buf1[3000]; + size_t buf1len = sprintf(buf1, "%s\r\nConnection: Keep-Alive\r\nContent-Length: %lu\r\n\r\n", + req ? "POST / HTTP/1.0" : "HTTP/1.0 200 OK", (unsigned long)length); + parsed = http_parser_execute(&parser, &settings_null, buf1, buf1len); + if (parsed != buf1len) + goto err; + + for (i = 0; i < length; i++) { + char foo = 'a'; + parsed = http_parser_execute(&parser, &settings_null, &foo, 1); + if (parsed != 1) + goto err; + } + + parsed = http_parser_execute(&parser, &settings_null, buf1, buf1len); + if (parsed != buf1len) goto err; + return; + + err: + fprintf(stderr, + "\n*** error in test_no_overflow_long_body %s of length %lu ***\n", + req ? "REQUEST" : "RESPONSE", + (unsigned long)length); + abort(); +} + +void +test_multiple3 (const struct message *r1, const struct message *r2, const struct message *r3) +{ + int message_count = count_parsed_messages(3, r1, r2, r3); + + char total[ strlen(r1->raw) + + strlen(r2->raw) + + strlen(r3->raw) + + 1 + ]; + total[0] = '\0'; + + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); + + parser_init(r1->type); + + size_t read; + + read = parse(total, strlen(total)); + + if (parser->upgrade) { + upgrade_message_fix(total, read, 3, r1, r2, r3); + goto test; + } + + if (read != strlen(total)) { + print_error(total, read); + abort(); + } + + read = parse(NULL, 0); + + if (read != 0) { + print_error(total, read); + abort(); + } + +test: + + if (message_count != num_messages) { + fprintf(stderr, "\n\n*** Parser didn't see 3 messages only %d *** \n", num_messages); + abort(); + } + + if (!message_eq(0, r1)) abort(); + if (message_count > 1 && !message_eq(1, r2)) abort(); + if (message_count > 2 && !message_eq(2, r3)) abort(); + + parser_free(); +} + +/* SCAN through every possible breaking to make sure the + * parser can handle getting the content in any chunks that + * might come from the socket + */ +void +test_scan (const struct message *r1, const struct message *r2, const struct message *r3) +{ + char total[80*1024] = "\0"; + char buf1[80*1024] = "\0"; + char buf2[80*1024] = "\0"; + char buf3[80*1024] = "\0"; + + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); + + size_t read; + + int total_len = strlen(total); + + int total_ops = 2 * (total_len - 1) * (total_len - 2) / 2; + int ops = 0 ; + + size_t buf1_len, buf2_len, buf3_len; + int message_count = count_parsed_messages(3, r1, r2, r3); + + int i,j,type_both; + for (type_both = 0; type_both < 2; type_both ++ ) { + for (j = 2; j < total_len; j ++ ) { + for (i = 1; i < j; i ++ ) { + + if (ops % 1000 == 0) { + printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops); + fflush(stdout); + } + ops += 1; + + parser_init(type_both ? HTTP_BOTH : r1->type); + + buf1_len = i; + strlncpy(buf1, sizeof(buf1), total, buf1_len); + buf1[buf1_len] = 0; + + buf2_len = j - i; + strlncpy(buf2, sizeof(buf1), total+i, buf2_len); + buf2[buf2_len] = 0; + + buf3_len = total_len - j; + strlncpy(buf3, sizeof(buf1), total+j, buf3_len); + buf3[buf3_len] = 0; + + read = parse(buf1, buf1_len); + + if (parser->upgrade) goto test; + + if (read != buf1_len) { + print_error(buf1, read); + goto error; + } + + read += parse(buf2, buf2_len); + + if (parser->upgrade) goto test; + + if (read != buf1_len + buf2_len) { + print_error(buf2, read); + goto error; + } + + read += parse(buf3, buf3_len); + + if (parser->upgrade) goto test; + + if (read != buf1_len + buf2_len + buf3_len) { + print_error(buf3, read); + goto error; + } + + parse(NULL, 0); + +test: + if (parser->upgrade) { + upgrade_message_fix(total, read, 3, r1, r2, r3); + } + + if (message_count != num_messages) { + fprintf(stderr, "\n\nParser didn't see %d messages only %d\n", + message_count, num_messages); + goto error; + } + + if (!message_eq(0, r1)) { + fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n"); + goto error; + } + + if (message_count > 1 && !message_eq(1, r2)) { + fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n"); + goto error; + } + + if (message_count > 2 && !message_eq(2, r3)) { + fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n"); + goto error; + } + + parser_free(); + } + } + } + puts("\b\b\b\b100%"); + return; + + error: + fprintf(stderr, "i=%d j=%d\n", i, j); + fprintf(stderr, "buf1 (%u) %s\n\n", (unsigned int)buf1_len, buf1); + fprintf(stderr, "buf2 (%u) %s\n\n", (unsigned int)buf2_len , buf2); + fprintf(stderr, "buf3 (%u) %s\n", (unsigned int)buf3_len, buf3); + abort(); +} + +// user required to free the result +// string terminated by \0 +char * +create_large_chunked_message (int body_size_in_kb, const char* headers) +{ + int i; + size_t wrote = 0; + size_t headers_len = strlen(headers); + size_t bufsize = headers_len + (5+1024+2)*body_size_in_kb + 6; + char * buf = malloc(bufsize); + + memcpy(buf, headers, headers_len); + wrote += headers_len; + + for (i = 0; i < body_size_in_kb; i++) { + // write 1kb chunk into the body. + memcpy(buf + wrote, "400\r\n", 5); + wrote += 5; + memset(buf + wrote, 'C', 1024); + wrote += 1024; + strcpy(buf + wrote, "\r\n"); + wrote += 2; + } + + memcpy(buf + wrote, "0\r\n\r\n", 6); + wrote += 6; + assert(wrote == bufsize); + + return buf; +} + +void +test_status_complete (void) +{ + parser_init(HTTP_RESPONSE); + parser->data = 0; + http_parser_settings settings = settings_null; + settings.on_status_complete = status_complete_cb; + + char *response = "don't mind me, just a simple response"; + http_parser_execute(parser, &settings, response, strlen(response)); + assert(parser->data == (void*)0); // the status_complete callback was never called + assert(parser->http_errno == HPE_INVALID_CONSTANT); // the errno for an invalid status line +} + +/* Verify that we can pause parsing at any of the bytes in the + * message and still get the result that we're expecting. */ +void +test_message_pause (const struct message *msg) +{ + char *buf = (char*) msg->raw; + size_t buflen = strlen(msg->raw); + size_t nread; + + parser_init(msg->type); + + do { + nread = parse_pause(buf, buflen); + + // We can only set the upgrade buffer once we've gotten our message + // completion callback. + if (messages[0].message_complete_cb_called && + msg->upgrade && + parser->upgrade) { + messages[0].upgrade = buf + nread; + goto test; + } + + if (nread < buflen) { + + // Not much do to if we failed a strict-mode check + if (HTTP_PARSER_ERRNO(parser) == HPE_STRICT) { + parser_free(); + return; + } + + assert (HTTP_PARSER_ERRNO(parser) == HPE_PAUSED); + } + + buf += nread; + buflen -= nread; + http_parser_pause(parser, 0); + } while (buflen > 0); + + nread = parse_pause(NULL, 0); + assert (nread == 0); + +test: + if (num_messages != 1) { + printf("\n*** num_messages != 1 after testing '%s' ***\n\n", msg->name); + abort(); + } + + if(!message_eq(0, msg)) abort(); + + parser_free(); +} + +int +main (void) +{ + parser = NULL; + int i, j, k; + int request_count; + int response_count; + unsigned long version; + unsigned major; + unsigned minor; + unsigned patch; + + version = http_parser_version(); + major = (version >> 16) & 255; + minor = (version >> 8) & 255; + patch = version & 255; + printf("http_parser v%u.%u.%u (0x%06lx)\n", major, minor, patch, version); + + printf("sizeof(http_parser) = %u\n", (unsigned int)sizeof(http_parser)); + + for (request_count = 0; requests[request_count].name; request_count++); + for (response_count = 0; responses[response_count].name; response_count++); + + //// API + test_preserve_data(); + test_parse_url(); + test_method_str(); + + //// OVERFLOW CONDITIONS + + test_header_overflow_error(HTTP_REQUEST); + test_no_overflow_long_body(HTTP_REQUEST, 1000); + test_no_overflow_long_body(HTTP_REQUEST, 100000); + + test_header_overflow_error(HTTP_RESPONSE); + test_no_overflow_long_body(HTTP_RESPONSE, 1000); + test_no_overflow_long_body(HTTP_RESPONSE, 100000); + + test_header_content_length_overflow_error(); + test_chunk_content_length_overflow_error(); + + //// RESPONSES + + for (i = 0; i < response_count; i++) { + test_message(&responses[i]); + } + + for (i = 0; i < response_count; i++) { + test_message_pause(&responses[i]); + } + + for (i = 0; i < response_count; i++) { + if (!responses[i].should_keep_alive) continue; + for (j = 0; j < response_count; j++) { + if (!responses[j].should_keep_alive) continue; + for (k = 0; k < response_count; k++) { + test_multiple3(&responses[i], &responses[j], &responses[k]); + } + } + } + + test_message_count_body(&responses[NO_HEADERS_NO_BODY_404]); + test_message_count_body(&responses[TRAILING_SPACE_ON_CHUNKED_BODY]); + + // test very large chunked response + { + char * msg = create_large_chunked_message(31337, + "HTTP/1.0 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "Content-Type: text/plain\r\n" + "\r\n"); + struct message large_chunked = + {.name= "large chunked" + ,.type= HTTP_RESPONSE + ,.raw= msg + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 200 + ,.num_headers= 2 + ,.headers= + { { "Transfer-Encoding", "chunked" } + , { "Content-Type", "text/plain" } + } + ,.body_size= 31337*1024 + }; + test_message_count_body(&large_chunked); + free(msg); + } + + + + printf("response scan 1/2 "); + test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY] + , &responses[NO_BODY_HTTP10_KA_204] + , &responses[NO_REASON_PHRASE] + ); + + printf("response scan 2/2 "); + test_scan( &responses[BONJOUR_MADAME_FR] + , &responses[UNDERSTORE_HEADER_KEY] + , &responses[NO_CARRIAGE_RET] + ); + + puts("responses okay"); + + + /// REQUESTS + + test_simple("GET / HTP/1.1\r\n\r\n", HPE_INVALID_VERSION); + + // Well-formed but incomplete + test_simple("GET / HTTP/1.1\r\n" + "Content-Type: text/plain\r\n" + "Content-Length: 6\r\n" + "\r\n" + "fooba", + HPE_OK); + + static const char *all_methods[] = { + "DELETE", + "GET", + "HEAD", + "POST", + "PUT", + //"CONNECT", //CONNECT can't be tested like other methods, it's a tunnel + "OPTIONS", + "TRACE", + "COPY", + "LOCK", + "MKCOL", + "MOVE", + "PROPFIND", + "PROPPATCH", + "UNLOCK", + "REPORT", + "MKACTIVITY", + "CHECKOUT", + "MERGE", + "M-SEARCH", + "NOTIFY", + "SUBSCRIBE", + "UNSUBSCRIBE", + "PATCH", + 0 }; + const char **this_method; + for (this_method = all_methods; *this_method; this_method++) { + char buf[200]; + sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); + test_simple(buf, HPE_OK); + } + + static const char *bad_methods[] = { + "ASDF", + "C******", + "COLA", + "GEM", + "GETA", + "M****", + "MKCOLA", + "PROPPATCHA", + "PUN", + "PX", + "SA", + "hello world", + 0 }; + for (this_method = bad_methods; *this_method; this_method++) { + char buf[200]; + sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); + test_simple(buf, HPE_INVALID_METHOD); + } + + const char *dumbfuck2 = + "GET / HTTP/1.1\r\n" + "X-SSL-Bullshit: -----BEGIN CERTIFICATE-----\r\n" + "\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n" + "\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n" + "\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n" + "\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n" + "\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n" + "\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n" + "\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n" + "\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n" + "\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n" + "\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n" + "\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n" + "\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n" + "\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n" + "\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n" + "\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n" + "\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n" + "\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n" + "\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n" + "\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n" + "\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n" + "\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n" + "\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n" + "\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n" + "\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n" + "\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n" + "\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n" + "\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n" + "\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n" + "\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n" + "\tRA==\r\n" + "\t-----END CERTIFICATE-----\r\n" + "\r\n"; + test_simple(dumbfuck2, HPE_OK); + +#if 0 + // NOTE(Wed Nov 18 11:57:27 CET 2009) this seems okay. we just read body + // until EOF. + // + // no content-length + // error if there is a body without content length + const char *bad_get_no_headers_no_body = "GET /bad_get_no_headers_no_body/world HTTP/1.1\r\n" + "Accept: */*\r\n" + "\r\n" + "HELLO"; + test_simple(bad_get_no_headers_no_body, 0); +#endif + /* TODO sending junk and large headers gets rejected */ + + + /* check to make sure our predefined requests are okay */ + for (i = 0; requests[i].name; i++) { + test_message(&requests[i]); + } + + for (i = 0; i < request_count; i++) { + test_message_pause(&requests[i]); + } + + for (i = 0; i < request_count; i++) { + if (!requests[i].should_keep_alive) continue; + for (j = 0; j < request_count; j++) { + if (!requests[j].should_keep_alive) continue; + for (k = 0; k < request_count; k++) { + test_multiple3(&requests[i], &requests[j], &requests[k]); + } + } + } + + printf("request scan 1/4 "); + test_scan( &requests[GET_NO_HEADERS_NO_BODY] + , &requests[GET_ONE_HEADER_NO_BODY] + , &requests[GET_NO_HEADERS_NO_BODY] + ); + + printf("request scan 2/4 "); + test_scan( &requests[POST_CHUNKED_ALL_YOUR_BASE] + , &requests[POST_IDENTITY_BODY_WORLD] + , &requests[GET_FUNKY_CONTENT_LENGTH] + ); + + printf("request scan 3/4 "); + test_scan( &requests[TWO_CHUNKS_MULT_ZERO_END] + , &requests[CHUNKED_W_TRAILING_HEADERS] + , &requests[CHUNKED_W_BULLSHIT_AFTER_LENGTH] + ); + + printf("request scan 4/4 "); + test_scan( &requests[QUERY_URL_WITH_QUESTION_MARK_GET] + , &requests[PREFIX_NEWLINE_GET ] + , &requests[CONNECT_REQUEST] + ); + + test_status_complete(); + + puts("requests okay"); + + return 0; +}