-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
adds base64 decoding (fixes #5656) #9157
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
9d4d0c3
f529f93
dbe0b02
487c349
14e2ced
58956c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,25 @@ | ||
module Base64 | ||
import Base: read, write, close | ||
export Base64Pipe, base64 | ||
import Base: read, write, close, eof | ||
export Base64EncodePipe, Base64DecodePipe, base64encode, base64decode | ||
|
||
# Base64Pipe is a pipe-like IO object, which converts writes (and | ||
# someday reads?) into base64 encoded (decoded) data send to a stream. | ||
# (You must close the pipe to complete the encode, separate from | ||
# closing the target stream). We also have a function base64(f, | ||
# Base64EncodePipe is a pipe-like IO object, which converts into base64 data sent | ||
# to a stream. (You must close the pipe to complete the encode, separate from | ||
# closing the target stream). We also have a function base64encode(f, | ||
# args...) which works like sprint except that it produces | ||
# base64-encoded data, along with base64(args...) which is equivalent | ||
# to base64(write, args...), to return base64 strings. | ||
|
||
# base64-encoded data, along with base64encode(args...) which is equivalent | ||
# to base64encode(write, args...), to return base64 strings. | ||
# A Base64DecodePipe object can be used to decode base64-encoded data read from a stream | ||
# , while function base64decode is useful for decoding strings | ||
############################################################################# | ||
|
||
type Base64Pipe <: IO | ||
type Base64EncodePipe <: IO | ||
io::IO | ||
# writing works in groups of 3, so we need to cache last two bytes written | ||
b0::UInt8 | ||
b1::UInt8 | ||
nb::UInt8 # number of bytes in cache: 0, 1, or 2 | ||
|
||
function Base64Pipe(io::IO) | ||
function Base64EncodePipe(io::IO) | ||
b = new(io,0,0,0) | ||
finalizer(b, close) | ||
return b | ||
|
@@ -32,6 +32,8 @@ end | |
|
||
const b64chars = ['A':'Z','a':'z','0':'9','+','/'] | ||
|
||
const base64_pad = uint8('=') | ||
|
||
function b64(x::UInt8, y::UInt8, z::UInt8) | ||
n = int(x)<<16 | int(y)<<8 | int(z) | ||
b64chars[(n >> 18) + 1], | ||
|
@@ -42,17 +44,44 @@ end | |
|
||
function b64(x::UInt8, y::UInt8) | ||
a, b, c = b64(x, y, 0x0) | ||
a, b, c, '=' | ||
a, b, c, base64_pad | ||
end | ||
|
||
function b64(x::UInt8) | ||
a, b = b64(x, 0x0, 0x0) | ||
a, b, '=', '=' | ||
a, b, base64_pad, base64_pad | ||
end | ||
|
||
const sentinel = typemax(UInt8) | ||
const revb64chars = fill(sentinel, 256) | ||
# Fill revb64chars | ||
for (val, ch) in enumerate(b64chars) | ||
revb64chars[uint8(ch)] = uint8(val - 1) | ||
end | ||
|
||
#Decode a block of at least 2 and at most 4 bytes | ||
function b64decode(encvec::Vector{UInt8}) | ||
if length(encvec) < 2 | ||
error("Incorrect base64 format") | ||
end | ||
@inbounds u = revb64chars[encvec[1]] | ||
@inbounds v = revb64chars[encvec[2]] | ||
decvec = [(u << 2) | (v >> 4)] | ||
if length(encvec) > 2 | ||
@inbounds w = revb64chars[encvec[3]] | ||
push!(decvec, (v << 4) | (w >> 2)) | ||
end | ||
if length(encvec) > 3 | ||
@inbounds z = revb64chars[encvec[4]] | ||
push!(decvec, (w << 6) | z) | ||
end | ||
decvec | ||
end | ||
|
||
|
||
############################################################################# | ||
|
||
function write(b::Base64Pipe, x::AbstractVector{UInt8}) | ||
function write(b::Base64EncodePipe, x::AbstractVector{UInt8}) | ||
n = length(x) | ||
s = 1 # starting index | ||
# finish any cached data to write: | ||
|
@@ -93,7 +122,7 @@ function write(b::Base64Pipe, x::AbstractVector{UInt8}) | |
end | ||
end | ||
|
||
function write(b::Base64Pipe, x::UInt8) | ||
function write(b::Base64EncodePipe, x::UInt8) | ||
if b.nb == 0 | ||
b.b0 = x | ||
b.nb = 1 | ||
|
@@ -106,7 +135,7 @@ function write(b::Base64Pipe, x::UInt8) | |
end | ||
end | ||
|
||
function close(b::Base64Pipe) | ||
function close(b::Base64EncodePipe) | ||
if b.nb > 0 | ||
# write leftover bytes + padding | ||
if b.nb == 1 | ||
|
@@ -119,19 +148,64 @@ function close(b::Base64Pipe) | |
end | ||
|
||
# like sprint, but returns base64 string | ||
function base64(f::Function, args...) | ||
function base64encode(f::Function, args...) | ||
s = IOBuffer() | ||
b = Base64Pipe(s) | ||
b = Base64EncodePipe(s) | ||
f(b, args...) | ||
close(b) | ||
takebuf_string(s) | ||
end | ||
base64(x...) = base64(write, x...) | ||
base64encode(x...) = base64encode(write, x...) | ||
|
||
############################################################################# | ||
|
||
# read(b::Base64Pipe, ::Type{UInt8}) = # TODO: decode base64 | ||
|
||
############################################################################# | ||
|
||
type Base64DecodePipe <: IO | ||
io::IO | ||
# reading works in blocks of 4 characters that are decoded into 3 bytes and 2 of them cached | ||
cache::Vector{UInt8} | ||
|
||
function Base64DecodePipe(io::IO) | ||
b = new(io,[]) | ||
finalizer(b, close) | ||
return b | ||
end | ||
end | ||
|
||
function read(b::Base64DecodePipe, t::Type{UInt8}) | ||
if length(b.cache) > 0 | ||
val = shift!(b.cache) | ||
else | ||
encvec = Array(UInt8, 0) | ||
while !eof(b.io) && length(encvec) < 4 | ||
c::UInt8 = read(b.io, t) | ||
@inbounds if revb64chars[c] != sentinel | ||
push!(encvec, c) | ||
end | ||
end | ||
decvec = b64decode(encvec) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likewise here; we have to encode to decode? (I don't know anything really about Base64, so I could easily be misunderstanding.) |
||
val = decvec[1] | ||
b.cache = decvec[2:end] | ||
end | ||
val | ||
end | ||
|
||
function eof(b::Base64DecodePipe) | ||
return length(b.cache) == 0 && eof(b.io) | ||
end | ||
|
||
function close(b::Base64DecodePipe) | ||
end | ||
|
||
# Decodes a base64-encoded string | ||
function base64decode(s) | ||
b = IOBuffer(s) | ||
decoded = readall(Base64DecodePipe(b)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is wrong. It should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's definitely wrong... esp. since most uses of base64 encoding are to store binary data, not Unicode text... |
||
close(b) | ||
decoded | ||
end | ||
|
||
end # module |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -240,3 +240,7 @@ const Uint128 = UInt128 | |
@deprecate ifloor{T}(::Type{T},x) floor(T,x) | ||
@deprecate iround(x) round(Integer,x) | ||
@deprecate iround{T}(::Type{T},x) round(T,x) | ||
|
||
export Base64Pipe, base64 | ||
const Base64Pipe = Base64EncodePipe | ||
const base64 = base64encode | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this be a |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
|
||
const inputText = "Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure." | ||
const encodedMaxLine76 = | ||
"""TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz | ||
IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg | ||
dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu | ||
dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo | ||
ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=""" | ||
|
||
# Encode and decode | ||
fname = tempname() | ||
f = open(fname, "w") | ||
opipe = Base64EncodePipe(f) | ||
write(opipe,inputText) | ||
close(opipe) | ||
close(f) | ||
f = open(fname, "r") | ||
ipipe = Base64DecodePipe(f) | ||
@test readall(ipipe) == inputText | ||
close(ipipe) | ||
close(f) | ||
rm(fname) | ||
|
||
# Encode to string and decode | ||
@test base64decode(base64encode(inputText)) == inputText | ||
|
||
# Decode with max line chars = 76 and padding | ||
ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76)) | ||
@test readall(ipipe) == inputText | ||
|
||
# Decode with max line chars = 76 and no padding | ||
ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76[1:end-1])) | ||
@test readall(ipipe) == inputText | ||
|
||
# Decode with two padding characters ("==") | ||
ipipe = Base64DecodePipe(IOBuffer(string(encodedMaxLine76[1:end-2],"=="))) | ||
@test readall(ipipe) == inputText[1:end-1] | ||
|
||
# Test incorrect format | ||
ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76[1:end-3])) | ||
@test_throws ErrorException readall(ipipe) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this mean that a whole array needs to be allocated for every 4 bytes? That seems unlikely to have good performance.