Skip to content

Commit 09ac7ac

Browse files
nnseeringabout
andauthored
Content-Type header parser (#259)
Co-authored-by: ringabout <[email protected]>
1 parent a3436c9 commit 09ac7ac

File tree

4 files changed

+335
-27
lines changed

4 files changed

+335
-27
lines changed

src/prologue/core/contenttype.nim

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import std/[strutils, tables, strformat, sequtils]
2+
3+
type
4+
MediaType* = object
5+
mainType*: string
6+
subType*: string
7+
parameters*: Table[string, string]
8+
9+
const
10+
# Token characters as per RFC 7230 section 3.2.6
11+
# https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6
12+
VALID_TOKEN_CHARACTERS = {'a'..'z', 'A'..'Z', '0'..'9',
13+
'!', '#', '$', '%', '&', '\'', '*', '+', '-', '.', '^', '_', '`', '|', '~'}
14+
15+
proc skipWhitespace(headerValue: string, i: var int) =
16+
## Skips the next whitespace characters beginning from index ``i``.
17+
## This updates the param ``i``
18+
while i < headerValue.len and headerValue[i] in Whitespace:
19+
inc i
20+
21+
proc parseContentType*(headerValue: string): MediaType =
22+
## Parses a Content-Type header according to RFC 7230, RFC 2045, and RFC 2046.
23+
## Returns a MediaType object containing the main type, sub type, and parameters.
24+
runnableExamples:
25+
let mediaType = parseContentType("text/plain; charset=\"utf-8\"")
26+
doAssert mediaType.mainType == "text"
27+
doAssert mediaType.subType == "plain"
28+
doAssert mediaType.parameters.len == 1
29+
doAssert mediaType.parameters["charset"] == "utf-8"
30+
31+
result = MediaType(parameters: initTable[string, string]())
32+
var
33+
i = 0
34+
headerLen = headerValue.len
35+
36+
headerValue.skipWhitespace(i)
37+
38+
# media type
39+
let mediaTypeStart = i
40+
while i < headerLen and headerValue[i] notin {';', ' ', '\t'}:
41+
inc i
42+
43+
let mediaType = headerValue[mediaTypeStart..<i].strip()
44+
let typeParts = mediaType.split('/')
45+
46+
if typeParts.len != 2:
47+
raise newException(ValueError, &"Invalid media type: {mediaType}")
48+
49+
result.mainType = typeParts[0].toLowerAscii
50+
result.subType = typeParts[1].toLowerAscii
51+
52+
headerValue.skipWhitespace(i)
53+
54+
# params
55+
while i < headerLen and headerValue[i] == ';':
56+
inc i
57+
58+
headerValue.skipWhitespace(i)
59+
60+
# param name
61+
let paramNameStart = i
62+
while i < headerLen and headerValue[i] notin {'=', ';', ' ', '\t'}:
63+
inc i
64+
65+
if i >= headerLen or headerValue[i] != '=':
66+
# this is a malformed parameter - skip it
67+
while i < headerLen and headerValue[i] != ';':
68+
inc i
69+
continue
70+
71+
let paramName = headerValue[paramNameStart..<i].strip().toLowerAscii()
72+
inc i
73+
74+
headerValue.skipWhitespace(i)
75+
76+
# param value
77+
var
78+
paramValue: string
79+
foundClosingQuote = false
80+
81+
if i < headerLen and headerValue[i] == '"':
82+
# quoted value
83+
inc i
84+
let valueStart = i
85+
86+
while i < headerLen:
87+
if headerValue[i] == '\\' and i + 1 < headerLen and headerValue[i + 1] == '"':
88+
inc i, 2
89+
elif headerValue[i] == '"':
90+
paramValue = headerValue[valueStart..<i]
91+
inc i
92+
foundClosingQuote = true
93+
break
94+
else:
95+
inc i
96+
97+
if not foundClosingQuote:
98+
paramValue = headerValue[valueStart..<headerLen]
99+
else:
100+
# unquoted value
101+
let valueStart = i
102+
while i < headerLen and headerValue[i] notin {';', ' ', '\t'}:
103+
inc i
104+
105+
paramValue = headerValue[valueStart..<i]
106+
107+
result.parameters[paramName] = paramValue
108+
headerValue.skipWhitespace(i)
109+
110+
proc `$`*(mediaType: MediaType): string =
111+
## Convert MediaType to string representation
112+
result = mediaType.mainType & "/" & mediaType.subType
113+
114+
for name, value in mediaType.parameters:
115+
if value.anyIt(it notin VALID_TOKEN_CHARACTERS):
116+
result.add(&"; {name}=\"{value}\"")
117+
else:
118+
result.add(&"; {name}={value}")

src/prologue/core/form.nim

+13-5
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,30 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
1615
import std/[strtabs, strutils, strformat, parseutils, tables]
1716
from std/uri import decodeQuery
1817

1918
import ./httpcore/httplogue
2019
from ./types import FormPart, initFormPart, `[]=`
2120
import ./request
21+
import ./contenttype
2222

2323

2424
func parseFormPart*(body, contentType: string): FormPart =
2525
## Parses form part of the body of the request.
2626
let
27-
sep = contentType[contentType.rfind("boundary") + 9 .. ^1]
27+
mediaType = parseContentType(contentType)
28+
sep = if "boundary" in mediaType.parameters: mediaType.parameters["boundary"] else: ""
2829
startSep = fmt"--{sep}"
2930
endSep = fmt"--{sep}--"
3031
startPos = find(body, startSep)
3132
endPos = rfind(body, endSep)
33+
34+
# make sure we found valid boundaries
35+
if startPos < 0 or endPos < 0 or startPos >= endPos:
36+
return initFormPart()
37+
38+
let
3239
formData = body[startPos ..< endPos]
3340
formDataSeq = formData.split(startSep & "\c\L")
3441

@@ -89,18 +96,19 @@ func parseFormPart*(body, contentType: string): FormPart =
8996

9097
func parseFormParams*(request: var Request, contentType: string) =
9198
## Parses get or post or query parameters.
92-
if "form-urlencoded" in contentType:
99+
let mediaType = parseContentType(contentType)
100+
101+
if mediaType.mainType == "application" and mediaType.subType == "x-www-form-urlencoded":
93102
request.formParams = initFormPart()
94103
if request.reqMethod == HttpPost:
95104
for (key, value) in decodeQuery(request.body):
96105
# formPrams and postParams for secret event
97106
request.formParams[key] = value
98107
request.postParams[key] = value
99-
elif "multipart/form-data" in contentType and "boundary" in contentType:
108+
elif mediaType.mainType == "multipart" and mediaType.subType == "form-data" and "boundary" in mediaType.parameters:
100109
request.formParams = parseFormPart(request.body, contentType)
101110

102111
# /student?name=simon&age=sixteen
103112
# query -> name=simon&age=sixteen
104-
105113
for (key, value) in decodeQuery(request.query):
106114
request.queryParams[key] = value
+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import ../../../src/prologue/core/contenttype
2+
import std/[tables, strutils, strformat]
3+
4+
block:
5+
let mediaType = parseContentType("text/plain")
6+
doAssert mediaType.mainType == "text"
7+
doAssert mediaType.subType == "plain"
8+
doAssert mediaType.parameters.len == 0
9+
10+
block:
11+
let mediaType = parseContentType("text/plain; charset=utf-8")
12+
doAssert mediaType.mainType == "text"
13+
doAssert mediaType.subType == "plain"
14+
doAssert mediaType.parameters.len == 1
15+
doAssert "charset" in mediaType.parameters
16+
doAssert mediaType.parameters["charset"] == "utf-8"
17+
18+
block:
19+
let mediaType = parseContentType("text/plain; charset=utf-8; format=flowed")
20+
doAssert mediaType.mainType == "text"
21+
doAssert mediaType.subType == "plain"
22+
doAssert mediaType.parameters.len == 2
23+
doAssert "charset" in mediaType.parameters
24+
doAssert "format" in mediaType.parameters
25+
doAssert mediaType.parameters["charset"] == "utf-8"
26+
doAssert mediaType.parameters["format"] == "flowed"
27+
28+
block:
29+
let mediaType = parseContentType("text/plain; charset=\"utf-8\"")
30+
doAssert mediaType.mainType == "text"
31+
doAssert mediaType.subType == "plain"
32+
doAssert mediaType.parameters.len == 1
33+
doAssert "charset" in mediaType.parameters
34+
doAssert mediaType.parameters["charset"] == "utf-8"
35+
36+
block:
37+
let mediaType = parseContentType("application/json; charset=\"utf 8\"")
38+
doAssert mediaType.mainType == "application"
39+
doAssert mediaType.subType == "json"
40+
doAssert mediaType.parameters.len == 1
41+
doAssert "charset" in mediaType.parameters
42+
doAssert mediaType.parameters["charset"] == "utf 8"
43+
44+
block:
45+
let mediaType = parseContentType("multipart/form-data; boundary=---------------------------263701891623491983764541468")
46+
doAssert mediaType.mainType == "multipart"
47+
doAssert mediaType.subType == "form-data"
48+
doAssert mediaType.parameters.len == 1
49+
doAssert "boundary" in mediaType.parameters
50+
doAssert mediaType.parameters["boundary"] == "---------------------------263701891623491983764541468"
51+
52+
block:
53+
let mediaType = parseContentType("multipart/form-data; boundary=\"simple-boundary\"")
54+
doAssert mediaType.mainType == "multipart"
55+
doAssert mediaType.subType == "form-data"
56+
doAssert mediaType.parameters.len == 1
57+
doAssert "boundary" in mediaType.parameters
58+
doAssert mediaType.parameters["boundary"] == "simple-boundary"
59+
60+
block:
61+
let mediaType = parseContentType("multipart/form-data; boundary=\"boundary with spaces and-dashes\"")
62+
doAssert mediaType.mainType == "multipart"
63+
doAssert mediaType.subType == "form-data"
64+
doAssert mediaType.parameters.len == 1
65+
doAssert "boundary" in mediaType.parameters
66+
doAssert mediaType.parameters["boundary"] == "boundary with spaces and-dashes"
67+
68+
block:
69+
let mediaType = parseContentType("text/plain; description=\"This is a \\\"quoted\\\" description\"")
70+
doAssert mediaType.mainType == "text"
71+
doAssert mediaType.subType == "plain"
72+
doAssert mediaType.parameters.len == 1
73+
doAssert "description" in mediaType.parameters
74+
doAssert mediaType.parameters["description"] == "This is a \\\"quoted\\\" description"
75+
76+
block:
77+
let mediaType = parseContentType("TeXt/PlAiN; ChArSeT=UTF-8")
78+
doAssert mediaType.mainType == "text"
79+
doAssert mediaType.subType == "plain"
80+
doAssert mediaType.parameters.len == 1
81+
doAssert "charset" in mediaType.parameters
82+
doAssert mediaType.parameters["charset"] == "UTF-8"
83+
84+
block:
85+
let mediaType = parseContentType(" text/plain ; charset=utf-8 ; format=flowed ")
86+
doAssert mediaType.mainType == "text"
87+
doAssert mediaType.subType == "plain"
88+
doAssert mediaType.parameters.len == 2
89+
doAssert "charset" in mediaType.parameters
90+
doAssert "format" in mediaType.parameters
91+
doAssert mediaType.parameters["charset"] == "utf-8"
92+
doAssert mediaType.parameters["format"] == "flowed"
93+
94+
block:
95+
let mediaType = parseContentType("text/plain; charset")
96+
doAssert mediaType.mainType == "text"
97+
doAssert mediaType.subType == "plain"
98+
doAssert mediaType.parameters.len == 0
99+
100+
block:
101+
try:
102+
discard parseContentType("text")
103+
doAssert false, "Should have raised an exception"
104+
except ValueError:
105+
doAssert true
106+
107+
block:
108+
let mediaType = parseContentType("text/plain; charset=utf-8; format=flowed")
109+
let str = $mediaType
110+
doAssert str.startsWith("text/plain")
111+
doAssert "; charset=utf-8" in str
112+
doAssert "; format=flowed" in str
113+
114+
block:
115+
var mediaType = MediaType(
116+
mainType: "multipart",
117+
subType: "form-data",
118+
parameters: {"boundary": "simple boundary with spaces"}.toTable
119+
)
120+
let str = $mediaType
121+
doAssert str == "multipart/form-data; boundary=\"simple boundary with spaces\""
122+
123+
block:
124+
var mediaType = MediaType(
125+
mainType: "text",
126+
subType: "plain",
127+
parameters: {
128+
"charset": "utf-8",
129+
"description": "This is a description with spaces"
130+
}.toTable
131+
)
132+
let str = $mediaType
133+
doAssert str.startsWith("text/plain")
134+
doAssert "; charset=utf-8" in str
135+
doAssert "; description=\"This is a description with spaces\"" in str
136+
137+
# some real-world examples
138+
block:
139+
let examples = [
140+
"text/html; charset=UTF-8",
141+
"application/json",
142+
"application/x-www-form-urlencoded",
143+
"multipart/form-data; boundary=something",
144+
"image/jpeg",
145+
"application/octet-stream",
146+
"text/css; charset=utf-8",
147+
"application/javascript",
148+
"multipart/mixed; boundary=\"frontier\"",
149+
"application/pdf",
150+
"text/plain; charset=us-ascii"
151+
]
152+
153+
for example in examples:
154+
let mediaType = parseContentType(example)
155+
let roundTrip = $mediaType
156+
157+
let roundTripMediaType = parseContentType(roundTrip)
158+
doAssert roundTripMediaType.mainType == mediaType.mainType
159+
doAssert roundTripMediaType.subType == mediaType.subType
160+
doAssert roundTripMediaType.parameters.len == mediaType.parameters.len
161+
162+
for key, value in mediaType.parameters:
163+
doAssert key in roundTripMediaType.parameters
164+
doAssert roundTripMediaType.parameters[key] == value

0 commit comments

Comments
 (0)