From 2f77284843ad94e52bf26ffe31481d203c109385 Mon Sep 17 00:00:00 2001 From: Eugene K Date: Thu, 27 Jun 2019 12:41:27 -0400 Subject: [PATCH 1/4] add cmake --- CMakeLists.txt | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..6d2d0537 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.14) +project(http_parser C) + +set(CMAKE_C_STANDARD 11) + +include_directories(.) + +add_library(http_parser STATIC + http_parser.c) +target_include_directories(http_parser + PUBLIC ./) + +add_executable(test test.c) +target_link_libraries(test PUBLIC http_parser) + +add_executable(bench bench.c) +target_link_libraries(bench PUBLIC http_parser) + +add_executable(url_parser contrib/url_parser.c) +target_link_libraries(url_parser PUBLIC http_parser) \ No newline at end of file From c7edd845f9ad1d2576242c0d6aafba21f7d26bab Mon Sep 17 00:00:00 2001 From: Eugene K Date: Thu, 27 Jun 2019 12:42:08 -0400 Subject: [PATCH 2/4] support file, opague, and query-only(magnet:) URLS --- http_parser.c | 26 +++++++++--- http_parser.h | 3 +- test.c | 108 +++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 122 insertions(+), 15 deletions(-) diff --git a/http_parser.c b/http_parser.c index 48963853..fd3f4d8d 100644 --- a/http_parser.c +++ b/http_parser.c @@ -311,6 +311,7 @@ enum state , s_req_query_string , s_req_fragment_start , s_req_fragment + , s_req_opague , s_req_http_start , s_req_http_H , s_req_http_HT @@ -532,7 +533,18 @@ parse_url_char(enum state s, const char ch) return s_req_schema_slash_slash; } - break; + if (ch == '?') { + return s_req_query_string_start; + } + + return s_req_opague; + + case s_req_opague: + if (ch == '?') { + return s_req_query_string_start; + } + + return s; case s_req_schema_slash_slash: if (ch == '/') { @@ -2399,6 +2411,10 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect, uf = UF_FRAGMENT; break; + case s_req_opague: + uf = UF_OPAQUE; + break; + default: assert(!"Unexpected state"); return 1; @@ -2419,10 +2435,10 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect, /* host must be present if there is a schema */ /* parsing http:///toto will fail */ - if ((u->field_set & (1 << UF_SCHEMA)) && - (u->field_set & (1 << UF_HOST)) == 0) { - return 1; - } +// if ((u->field_set & (1 << UF_SCHEMA)) && +// (u->field_set & (1 << UF_HOST)) == 0) { +// return 1; +// } if (u->field_set & (1 << UF_HOST)) { if (http_parse_host(buf, u, found_at) != 0) { diff --git a/http_parser.h b/http_parser.h index 16b5281d..4d7a669b 100644 --- a/http_parser.h +++ b/http_parser.h @@ -346,7 +346,8 @@ enum http_parser_url_fields , UF_QUERY = 4 , UF_FRAGMENT = 5 , UF_USERINFO = 6 - , UF_MAX = 7 + , UF_OPAQUE = 7 + , UF_MAX = 8 }; diff --git a/test.c b/test.c index 0140a18b..0c2ff3e2 100644 --- a/test.c +++ b/test.c @@ -2801,6 +2801,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -2820,6 +2821,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -2839,16 +2841,17 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 } -, {.name="CONNECT request but not connect" - ,.url="hostname:443" - ,.is_connect=0 - ,.rv=1 - } +//, {.name="CONNECT request but not connect" +// ,.url="hostname:443" +// ,.is_connect=0 +// ,.rv=1 +// } , {.name="proxy ipv6 request" ,.url="http://[1:2::3:4]/" @@ -2864,6 +2867,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -2883,6 +2887,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -2902,6 +2907,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -2921,6 +2927,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -2942,6 +2949,7 @@ const struct url_test url_tests[] = ,{ 30,187 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -2961,6 +2969,7 @@ const struct url_test url_tests[] = ,{ 11, 10 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -2981,6 +2990,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 11, 4 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -3002,6 +3012,7 @@ const struct url_test url_tests[] = ,{ 36, 69 } /* UF_QUERY */ ,{106, 7 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -3022,6 +3033,7 @@ const struct url_test url_tests[] = ,{ 29, 12 } /* UF_QUERY */ ,{ 42, 4 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ } } ,.rv=0 @@ -3042,6 +3054,84 @@ const struct url_test url_tests[] = ,{ 33, 12 } /* UF_QUERY */ ,{ 46, 4 } /* UF_FRAGMENT */ ,{ 7, 3 } /* UF_USERINFO */ + ,{ 0, 0 } /* UF_OPAQUE */ + } + } + ,.rv=0 + } +, {.name="opaque URL: see https://golang.org/src/net/url/url_test.go#L136" + ,.url="http:www.google.com/?q=go+language" + ,.is_connect=0 + ,.u= + {.field_set= (1< Date: Tue, 2 Jul 2019 10:37:58 -0400 Subject: [PATCH 3/4] support full scheme spec https://tools.ietf.org/html/rfc3986#section-3.1 --- http_parser.c | 5 ++++- test.c | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/http_parser.c b/http_parser.c index fd3f4d8d..78b9c000 100644 --- a/http_parser.c +++ b/http_parser.c @@ -426,6 +426,8 @@ enum http_host_state (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ (c) == '$' || (c) == ',') +#define IS_SCHEME_CHAR(c) (IS_ALPHANUM(c) || c == '.' || c == '+' || c == '-') + #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c]) #if HTTP_PARSER_STRICT @@ -518,7 +520,8 @@ parse_url_char(enum state s, const char ch) break; case s_req_schema: - if (IS_ALPHA(ch)) { + // scheme spec: https://tools.ietf.org/html/rfc3986#section-3.1 + if (IS_SCHEME_CHAR(ch)) { return s; } diff --git a/test.c b/test.c index 0c2ff3e2..a7d3600f 100644 --- a/test.c +++ b/test.c @@ -3137,6 +3137,33 @@ const struct url_test url_tests[] = ,.rv=0 } + , {.name="scheme full test. https://tools.ietf.org/html/rfc3986#section-3.1" + ,.url="sch3m3+full-test.v21:somethig_here" + ,.is_connect=0 + ,.u= + {.field_set= (1< Date: Wed, 3 Jul 2019 10:43:00 -0400 Subject: [PATCH 4/4] Rename UF_OPAQUE -> UF_OPAQ due to conflict on MacOS/BSD (defined in sys/stat.h) --- http_parser.c | 2 +- http_parser.h | 2 +- test.c | 42 +++++++++++++++++++++--------------------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/http_parser.c b/http_parser.c index 78b9c000..f8a4d589 100644 --- a/http_parser.c +++ b/http_parser.c @@ -2415,7 +2415,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect, break; case s_req_opague: - uf = UF_OPAQUE; + uf = UF_OPAQ; break; default: diff --git a/http_parser.h b/http_parser.h index 4d7a669b..7785a5cf 100644 --- a/http_parser.h +++ b/http_parser.h @@ -346,7 +346,7 @@ enum http_parser_url_fields , UF_QUERY = 4 , UF_FRAGMENT = 5 , UF_USERINFO = 6 - , UF_OPAQUE = 7 + , UF_OPAQ = 7 , UF_MAX = 8 }; diff --git a/test.c b/test.c index a7d3600f..02af33c9 100644 --- a/test.c +++ b/test.c @@ -2801,7 +2801,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -2821,7 +2821,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -2841,7 +2841,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -2867,7 +2867,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -2887,7 +2887,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -2907,7 +2907,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -2927,7 +2927,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -2949,7 +2949,7 @@ const struct url_test url_tests[] = ,{ 30,187 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -2969,7 +2969,7 @@ const struct url_test url_tests[] = ,{ 11, 10 } /* UF_QUERY */ ,{ 0, 0 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -2990,7 +2990,7 @@ const struct url_test url_tests[] = ,{ 0, 0 } /* UF_QUERY */ ,{ 11, 4 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -3012,7 +3012,7 @@ const struct url_test url_tests[] = ,{ 36, 69 } /* UF_QUERY */ ,{106, 7 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -3033,7 +3033,7 @@ const struct url_test url_tests[] = ,{ 29, 12 } /* UF_QUERY */ ,{ 42, 4 } /* UF_FRAGMENT */ ,{ 0, 0 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -3054,7 +3054,7 @@ const struct url_test url_tests[] = ,{ 33, 12 } /* UF_QUERY */ ,{ 46, 4 } /* UF_FRAGMENT */ ,{ 7, 3 } /* UF_USERINFO */ - ,{ 0, 0 } /* UF_OPAQUE */ + ,{ 0, 0 } /* UF_OPAQ */ } } ,.rv=0 @@ -3063,7 +3063,7 @@ const struct url_test url_tests[] = ,.url="http:www.google.com/?q=go+language" ,.is_connect=0 ,.u= - {.field_set= (1<