-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
wip: string overhaul #24439
wip: string overhaul #24439
Changes from all commits
1917811
80cb480
40ac089
8ba3bba
2aa2c8c
806441e
5ab5eef
08656a3
b8f7306
747ce23
d1e83e8
1972d46
0024056
a84e666
358ce5d
912779e
cbbee08
8a22a96
829aba2
b2d231b
f82c793
c55cca0
68467ad
29dc1c3
d68eb07
2fadfb0
5aad731
cad41c5
f849593
61dbb90
931b289
0794487
b674bc1
b802606
8d414fd
120f9ca
1861238
1c722f1
e54e4c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -149,6 +149,26 @@ function read(f::File, ::Type{UInt8}) | |
return ret % UInt8 | ||
end | ||
|
||
function read(f::File, ::Type{Char}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is annoying: |
||
b0 = read(f, UInt8) | ||
l = 8(4-leading_ones(b0)) | ||
c = UInt32(b0) << 24 | ||
if l < 24 | ||
s = 16 | ||
while s ≥ l && !eof(f) | ||
p = position(f) | ||
b = read(f, UInt8) | ||
if b & 0xc0 != 0x80 | ||
seek(f, p) | ||
break | ||
end | ||
c |= UInt32(b) << s | ||
s -= 8 | ||
end | ||
end | ||
return reinterpret(Char, c) | ||
end | ||
|
||
function unsafe_read(f::File, p::Ptr{UInt8}, nel::UInt) | ||
check_open(f) | ||
ret = ccall(:jl_fs_read, Int32, (Int32, Ptr{Void}, Csize_t), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -315,12 +315,13 @@ end | |
|
||
## low-level calls ## | ||
|
||
write(s::IOStream, b::UInt8) = Int(ccall(:ios_putc, Cint, (Cint, Ptr{Void}), b, s.ios)) | ||
function write(s::IOStream, b::UInt8) | ||
iswritable(s) || throw(ArgumentError("write failed, IOStream is not writeable")) | ||
Int(ccall(:ios_putc, Cint, (Cint, Ptr{Void}), b, s.ios)) | ||
end | ||
|
||
function unsafe_write(s::IOStream, p::Ptr{UInt8}, nb::UInt) | ||
if !iswritable(s) | ||
throw(ArgumentError("write failed, IOStream is not writeable")) | ||
end | ||
iswritable(s) || throw(ArgumentError("write failed, IOStream is not writeable")) | ||
return Int(ccall(:ios_write, Csize_t, (Ptr{Void}, Ptr{Void}, Csize_t), s.ios, p, nb)) | ||
end | ||
|
||
|
@@ -353,14 +354,6 @@ end | |
|
||
## text I/O ## | ||
|
||
function write(s::IOStream, c::Char) | ||
if !iswritable(s) | ||
throw(ArgumentError("write failed, IOStream is not writeable")) | ||
end | ||
Int(ccall(:ios_pututf8, Cint, (Ptr{Void}, UInt32), s.ios, c)) | ||
end | ||
read(s::IOStream, ::Type{Char}) = Char(ccall(:jl_getutf8, UInt32, (Ptr{Void},), s.ios)) | ||
|
||
take!(s::IOStream) = | ||
ccall(:jl_take_buffer, Vector{UInt8}, (Ptr{Void},), s.ios) | ||
|
||
|
@@ -452,14 +445,23 @@ function read(s::IOStream, nb::Integer; all::Bool=true) | |
end | ||
|
||
## Character streams ## | ||
const _chtmp = Ref{Char}() | ||
|
||
function peekchar(s::IOStream) | ||
if ccall(:ios_peekutf8, Cint, (Ptr{Void}, Ptr{Char}), s, _chtmp) < 0 | ||
chref = Ref{UInt32}() | ||
if ccall(:ios_peekutf8, Cint, (Ptr{Void}, Ptr{UInt32}), s, chref) < 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. current implementation of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems a bit wasteful for
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree, this stuff should be optimized to avoid converting back and forth. This PR is WIP and the first step is just to get everything working and all tests passing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is still in my new PR, which is annoying but I think that all the |
||
return typemax(Char) | ||
end | ||
return _chtmp[] | ||
return Char(chref[]) | ||
end | ||
|
||
function peek(s::IOStream) | ||
ccall(:ios_peekc, Cint, (Ptr{Void},), s) | ||
end | ||
|
||
function peek(s::IO) | ||
mark(s) | ||
try read(s, UInt8) | ||
finally | ||
reset(s) | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1148,6 +1148,14 @@ unmark(x::LibuvStream) = unmark(x.buffer) | |
reset(x::LibuvStream) = reset(x.buffer) | ||
ismarked(x::LibuvStream) = ismarked(x.buffer) | ||
|
||
function peek(s::LibuvStream) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
mark(s) | ||
try read(s, UInt8) | ||
finally | ||
reset(s) | ||
end | ||
end | ||
|
||
# BufferStream's are non-OS streams, backed by a regular IOBuffer | ||
mutable struct BufferStream <: LibuvStream | ||
buffer::IOBuffer | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
According to
BenchmarkTools
this is 5 times slower than the oldhash(x::Char)
. Should we useor similar instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps. I worry about that xor being symmetric. I also worry that we're using this kind of pattern all over Base and it is quite inefficient. We really need a better way to express this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Went with your suggested definition for now. If there's some hash collision issue with the existing definition, that's a totally independent issue to this representation change so can be addressed separately.