Skip to content

Commit 068f673

Browse files
authored
Merge pull request #459 from jarthod/iso-encoding-problem
fix `invalid byte sequence in UTF-8` exception when unencoding URLs containing non UTF-8 characters
2 parents 08d27e8 + b4c9882 commit 068f673

File tree

2 files changed

+8
-9
lines changed

2 files changed

+8
-9
lines changed

lib/addressable/uri.rb

+3-9
Original file line numberDiff line numberDiff line change
@@ -468,19 +468,13 @@ def self.unencode(uri, return_type=String, leave_encoded='')
468468
"Expected Class (String or Addressable::URI), " +
469469
"got #{return_type.inspect}"
470470
end
471-
uri = uri.dup
472-
# Seriously, only use UTF-8. I'm really not kidding!
473-
uri.force_encoding("utf-8")
474471

475-
unless leave_encoded.empty?
476-
leave_encoded = leave_encoded.dup.force_encoding("utf-8")
477-
end
478-
479-
result = uri.gsub(/%[0-9a-f]{2}/iu) do |sequence|
472+
result = uri.gsub(/%[0-9a-f]{2}/i) do |sequence|
480473
c = sequence[1..3].to_i(16).chr
481-
c.force_encoding("utf-8")
474+
c.force_encoding(sequence.encoding)
482475
leave_encoded.include?(c) ? sequence : c
483476
end
477+
484478
result.force_encoding("utf-8")
485479
if return_type == String
486480
return result

spec/addressable/uri_spec.rb

+5
Original file line numberDiff line numberDiff line change
@@ -5992,6 +5992,11 @@ def to_str
59925992
expect(Addressable::URI.unencode_component("ski=%BA%DAɫ")).to eq("ski=\xBA\xDAɫ")
59935993
end
59945994

5995+
it "should not fail with UTF-8 incompatible string" do
5996+
url = "/M%E9/\xE9?p=\xFC".b
5997+
expect(Addressable::URI.unencode_component(url)).to eq("/M\xE9/\xE9?p=\xFC")
5998+
end
5999+
59956000
it "should result in correct percent encoded sequence as a URI" do
59966001
expect(Addressable::URI.unencode(
59976002
"/path?g%C3%BCnther", ::Addressable::URI

0 commit comments

Comments
 (0)