Skip to content

Commit 874a605

Browse files
committed
net/url: add RawPath field, a hint at the desired encoding of Path
Historically we have declined to try to provide real support for URLs that contain %2F in the path, but they seem to be popping up more often, especially in (arguably ill-considered) REST APIs that shoehorn entire paths into individual path elements. The obvious thing to do is to introduce a URL.RawPath field that records the original encoding of Path and then consult it during URL.String and URL.RequestURI. The problem with the obvious thing is that it breaks backward compatibility: if someone parses a URL into u, modifies u.Path, and calls u.String, they expect the result to use the modified u.Path and not the original raw encoding. Split the difference by treating u.RawPath as a hint: the observation is that there are many valid encodings of u.Path. If u.RawPath is one of them, use it. Otherwise compute the encoding of u.Path as before. If a client does not use RawPath, the only change will be that String selects a different valid encoding sometimes (the original passed to Parse). This ensures that, for example, HTTP requests use the exact encoding passed to http.Get (or http.NewRequest, etc). Also add new URL.EscapedPath method for access to the actual escaped path. Clients should use EscapedPath instead of reading RawPath directly. All the old workarounds remain valid. Fixes #5777. Might help #9859. Fixes #7356. Fixes #8767. Fixes #8292. Fixes #8450. Fixes #4860. Fixes #10887. Fixes #3659. Fixes #8248. Fixes #6658. Reduces need for #2782. Change-Id: I77b88f14631883a7d74b72d1cf19b0073d4f5473 Reviewed-on: https://go-review.googlesource.com/11302 Reviewed-by: Brad Fitzpatrick <[email protected]>
1 parent 794c01b commit 874a605

File tree

3 files changed

+124
-12
lines changed

3 files changed

+124
-12
lines changed

src/net/url/example_test.go

+15
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,21 @@ func ExampleURL() {
4343
// Output: https://google.com/search?q=golang
4444
}
4545

46+
func ExampleURL_roundtrip() {
47+
// Parse + String preserve the original encoding.
48+
u, err := url.Parse("https://example.com/foo%2fbar")
49+
if err != nil {
50+
log.Fatal(err)
51+
}
52+
fmt.Println(u.Path)
53+
fmt.Println(u.RawPath)
54+
fmt.Println(u.String())
55+
// Output:
56+
// /foo/bar
57+
// /foo%2fbar
58+
// https://example.com/foo%2fbar
59+
}
60+
4661
func ExampleURL_opaque() {
4762
// Sending a literal '%' in an HTTP request's Path
4863
req := &http.Request{

src/net/url/url.go

+48-7
Original file line numberDiff line numberDiff line change
@@ -239,16 +239,24 @@ func escape(s string, mode encoding) string {
239239
// Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
240240
// A consequence is that it is impossible to tell which slashes in the Path were
241241
// slashes in the raw URL and which were %2f. This distinction is rarely important,
242-
// but when it is a client must use other routines to parse the raw URL or construct
243-
// the parsed URL. For example, an HTTP server can consult req.RequestURI, and
244-
// an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"}
245-
// instead of URL{Host: "example.com", Path: "/Go/"}.
242+
// but when it is, code must not use Path directly.
243+
//
244+
// Go 1.5 introduced the RawPath field to hold the encoded form of Path.
245+
// The Parse function sets both Path and RawPath in the URL it returns,
246+
// and URL's String method uses RawPath if it is a valid encoding of Path,
247+
// by calling the EncodedPath method.
248+
//
249+
// In earlier versions of Go, the more indirect workarounds were that an
250+
// HTTP server could consult req.RequestURI and an HTTP client could
251+
// construct a URL struct directly and set the Opaque field instead of Path.
252+
// These still work as well.
246253
type URL struct {
247254
Scheme string
248255
Opaque string // encoded opaque data
249256
User *Userinfo // username and password information
250257
Host string // host or host:port
251258
Path string
259+
RawPath string // encoded path hint (Go 1.5 and later only; see EscapedPath method)
252260
RawQuery string // encoded query values, without '?'
253261
Fragment string // fragment for references, without '#'
254262
}
@@ -417,6 +425,7 @@ func parse(rawurl string, viaRequest bool) (url *URL, err error) {
417425
goto Error
418426
}
419427
}
428+
url.RawPath = rest
420429
if url.Path, err = unescape(rest, encodePath); err != nil {
421430
goto Error
422431
}
@@ -501,6 +510,36 @@ func parseHost(host string) (string, error) {
501510
return host, nil
502511
}
503512

513+
// EscapedPath returns the escaped form of u.Path.
514+
// In general there are multiple possible escaped forms of any path.
515+
// EscapedPath returns u.RawPath when it is a valid escaping of u.Path.
516+
// Otherwise EscapedPath ignores u.RawPath and computes an escaped
517+
// form on its own.
518+
// The String and RequestURI methods use EscapedPath to construct
519+
// their results.
520+
// In general, code should call EscapedPath instead of
521+
// reading u.RawPath directly.
522+
func (u *URL) EscapedPath() string {
523+
if u.RawPath != "" && validEncodedPath(u.RawPath) {
524+
p, err := unescape(u.RawPath, encodePath)
525+
if err == nil && p == u.Path {
526+
return u.RawPath
527+
}
528+
}
529+
return escape(u.Path, encodePath)
530+
}
531+
532+
// validEncodedPath reports whether s is a valid encoded path.
533+
// It must contain any bytes that require escaping during path encoding.
534+
func validEncodedPath(s string) bool {
535+
for i := 0; i < len(s); i++ {
536+
if s[i] != '%' && shouldEscape(s[i], encodePath) {
537+
return false
538+
}
539+
}
540+
return true
541+
}
542+
504543
// String reassembles the URL into a valid URL string.
505544
// The general form of the result is one of:
506545
//
@@ -509,6 +548,7 @@ func parseHost(host string) (string, error) {
509548
//
510549
// If u.Opaque is non-empty, String uses the first form;
511550
// otherwise it uses the second form.
551+
// To obtain the path, String uses u.EncodedPath().
512552
//
513553
// In the second form, the following rules apply:
514554
// - if u.Scheme is empty, scheme: is omitted.
@@ -539,10 +579,11 @@ func (u *URL) String() string {
539579
buf.WriteString(escape(h, encodeHost))
540580
}
541581
}
542-
if u.Path != "" && u.Path[0] != '/' && u.Host != "" {
582+
path := u.EscapedPath()
583+
if path != "" && path[0] != '/' && u.Host != "" {
543584
buf.WriteByte('/')
544585
}
545-
buf.WriteString(escape(u.Path, encodePath))
586+
buf.WriteString(path)
546587
}
547588
if u.RawQuery != "" {
548589
buf.WriteByte('?')
@@ -764,7 +805,7 @@ func (u *URL) Query() Values {
764805
func (u *URL) RequestURI() string {
765806
result := u.Opaque
766807
if result == "" {
767-
result = escape(u.Path, encodePath)
808+
result = u.EscapedPath()
768809
if result == "" {
769810
result = "/"
770811
}

src/net/url/url_test.go

+61-5
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313

1414
type URLTest struct {
1515
in string
16-
out *URL
16+
out *URL // expected parse; RawPath="" means same as Path
1717
roundtrip string // expected result of reserializing the URL; empty means same as "in".
1818
}
1919

@@ -41,11 +41,12 @@ var urltests = []URLTest{
4141
{
4242
"http://www.google.com/file%20one%26two",
4343
&URL{
44-
Scheme: "http",
45-
Host: "www.google.com",
46-
Path: "/file one&two",
44+
Scheme: "http",
45+
Host: "www.google.com",
46+
Path: "/file one&two",
47+
RawPath: "/file%20one%26two",
4748
},
48-
"http://www.google.com/file%20one&two",
49+
"",
4950
},
5051
// user
5152
{
@@ -98,6 +99,7 @@ var urltests = []URLTest{
9899
Scheme: "http",
99100
Host: "www.google.com",
100101
Path: "/a b",
102+
RawPath: "/a%20b",
101103
RawQuery: "q=c+d",
102104
},
103105
"",
@@ -369,6 +371,18 @@ var urltests = []URLTest{
369371
},
370372
"http://[fe80::1%25en01-._~]:8080/",
371373
},
374+
// alternate escapings of path survive round trip
375+
{
376+
"http://rest.rsc.io/foo%2fbar/baz%2Fquux?alt=media",
377+
&URL{
378+
Scheme: "http",
379+
Host: "rest.rsc.io",
380+
Path: "/foo/bar/baz/quux",
381+
RawPath: "/foo%2fbar/baz%2Fquux",
382+
RawQuery: "alt=media",
383+
},
384+
"",
385+
},
372386
}
373387

374388
// more useful string for debugging than fmt's struct printer
@@ -391,6 +405,9 @@ func DoTest(t *testing.T, parse func(string) (*URL, error), name string, tests [
391405
t.Errorf("%s(%q) returned error %s", name, tt.in, err)
392406
continue
393407
}
408+
if tt.out.RawPath == "" {
409+
tt.out.RawPath = tt.out.Path
410+
}
394411
if !reflect.DeepEqual(u, tt.out) {
395412
t.Errorf("%s(%q):\n\thave %v\n\twant %v\n",
396413
name, tt.in, ufmt(u), ufmt(tt.out))
@@ -973,6 +990,25 @@ var requritests = []RequestURITest{
973990
},
974991
"http://other.example.com/%2F/%2F/",
975992
},
993+
// better fix for issue 4860
994+
{
995+
&URL{
996+
Scheme: "http",
997+
Host: "example.com",
998+
Path: "/////",
999+
RawPath: "/%2F/%2F/",
1000+
},
1001+
"/%2F/%2F/",
1002+
},
1003+
{
1004+
&URL{
1005+
Scheme: "http",
1006+
Host: "example.com",
1007+
Path: "/////",
1008+
RawPath: "/WRONG/", // ignored because doesn't match Path
1009+
},
1010+
"/////",
1011+
},
9761012
{
9771013
&URL{
9781014
Scheme: "http",
@@ -982,6 +1018,26 @@ var requritests = []RequestURITest{
9821018
},
9831019
"/a%20b?q=go+language",
9841020
},
1021+
{
1022+
&URL{
1023+
Scheme: "http",
1024+
Host: "example.com",
1025+
Path: "/a b",
1026+
RawPath: "/a b", // ignored because invalid
1027+
RawQuery: "q=go+language",
1028+
},
1029+
"/a%20b?q=go+language",
1030+
},
1031+
{
1032+
&URL{
1033+
Scheme: "http",
1034+
Host: "example.com",
1035+
Path: "/a?b",
1036+
RawPath: "/a?b", // ignored because invalid
1037+
RawQuery: "q=go+language",
1038+
},
1039+
"/a%3Fb?q=go+language",
1040+
},
9851041
{
9861042
&URL{
9871043
Scheme: "myschema",

0 commit comments

Comments
 (0)