Skip to content

Commit 2c327d3

Browse files
committed
tryparse: parse string to Nullable
Introduces the tryparse method: - tryparse{T<:Integer}(::Type{T<:Integer},s::AbstractString) - tryparse(::Type{Float..},s::AbstractString) - a few variants of the above And: - tryparse(Float.., ...) call the corresponding C functions jl_try_strtof, jl_try_substrtof, jl_try_strtod and jl_try_substrtod. - The parseint, parsefloat, float64_isvalid and float32_isvalid methods wrap the corresponding tryparse methods. - The jl_strtod, jl_strtof, ... functions are wrappers over the jl_try_str... functions. This should fix JuliaLang#10498 as well. Ref: discussions at JuliaLang#9316, JuliaLang#3631, JuliaLang#5704
1 parent 2a4fd44 commit 2c327d3

10 files changed

+277
-130
lines changed

base/base.jl

-1
Original file line numberDiff line numberDiff line change
@@ -277,4 +277,3 @@ immutable Nullable{T}
277277
Nullable() = new(true)
278278
Nullable(value::T) = new(false, value)
279279
end
280-

base/combinatorics.jl

-21
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,6 @@ const _fact_table64 =
33
87178291200,1307674368000,20922789888000,355687428096000,6402373705728000,
44
121645100408832000,2432902008176640000]
55

6-
const _fact_table128 =
7-
UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002,
8-
0x00000000000000000000000000000006, 0x00000000000000000000000000000018,
9-
0x00000000000000000000000000000078, 0x000000000000000000000000000002d0,
10-
0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80,
11-
0x00000000000000000000000000058980, 0x00000000000000000000000000375f00,
12-
0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00,
13-
0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800,
14-
0x00000000000000000000013077775800, 0x00000000000000000000130777758000,
15-
0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000,
16-
0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000,
17-
0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000,
18-
0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000,
19-
0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000,
20-
0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000,
21-
0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000,
22-
0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000,
23-
0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000]
24-
256
function factorial_lookup(n::Integer, table, lim)
267
n < 0 && throw(DomainError())
278
n > lim && throw(OverflowError())
@@ -30,8 +11,6 @@ function factorial_lookup(n::Integer, table, lim)
3011
return oftype(n, f)
3112
end
3213

33-
factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33)
34-
factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34)
3514
factorial(n::Union(Int64,UInt64)) = factorial_lookup(n, _fact_table64, 20)
3615

3716
if Int === Int32

base/exports.jl

+1
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ export
343343
fldmod,
344344
flipsign,
345345
float,
346+
tryparse,
346347
floor,
347348
fma,
348349
frexp,

base/gmp.jl

+13-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ export BigInt
55
import Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), ($),
66
binomial, cmp, convert, div, divrem, factorial, fld, gcd, gcdx, lcm, mod,
77
ndigits, promote_rule, rem, show, isqrt, string, isprime, powermod,
8-
sum, trailing_zeros, trailing_ones, count_ones, base, parseint,
8+
sum, trailing_zeros, trailing_ones, count_ones, base, parseint, tryparse_internal,
99
serialize, deserialize, bin, oct, dec, hex, isequal, invmod,
1010
prevpow2, nextpow2, ndigits0z, widen, signed
1111

@@ -76,15 +76,23 @@ signed(x::BigInt) = x
7676
BigInt(x::BigInt) = x
7777
BigInt(s::AbstractString) = parseint(BigInt,s)
7878

79-
function Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int)
79+
function tryparse_internal(::Type{BigInt}, s::AbstractString, base::Int, raise::Bool)
80+
_n = Nullable{BigInt}()
8081
s = bytestring(s)
8182
sgn, base, i = Base.parseint_preamble(true,s,base)
83+
if i == 0
84+
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
85+
return _n
86+
end
8287
z = BigInt()
8388
err = ccall((:__gmpz_set_str, :libgmp),
8489
Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32),
8590
&z, SubString(s,i), base)
86-
err == 0 || throw(ArgumentError("invalid BigInt: $(repr(s))"))
87-
return sgn < 0 ? -z : z
91+
if err != 0
92+
raise && throw(ArgumentError("invalid BigInt: $(repr(s))"))
93+
return _n
94+
end
95+
Nullable(sgn < 0 ? -z : z)
8896
end
8997

9098
function BigInt(x::Union(Clong,Int32))
@@ -217,7 +225,7 @@ function serialize(s, n::BigInt)
217225
serialize(s, base(62,n))
218226
end
219227

220-
deserialize(s, ::Type{BigInt}) = Base.parseint_nocheck(BigInt, deserialize(s), 62)
228+
deserialize(s, ::Type{BigInt}) = get(tryparse_internal(BigInt, deserialize(s), 62, true))
221229

222230
# Binary ops
223231
for (fJ, fC) in ((:+, :add), (:-,:sub), (:*, :mul),

base/nullable.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ convert( ::Type{Nullable }, ::Void) = Nullable{Union()}()
1717

1818
function show{T}(io::IO, x::Nullable{T})
1919
if x.isnull
20-
@printf(io, "Nullable{%s}()", repr(T))
20+
println(io, "Nullable{$(repr(T))}()")
2121
else
22-
@printf(io, "Nullable(%s)", repr(x.value))
22+
println(io, "Nullable($(repr(x.value)))")
2323
end
2424
end
2525

base/string.jl

+81-48
Original file line numberDiff line numberDiff line change
@@ -1487,27 +1487,33 @@ parseint{T<:Integer}(::Type{T}, c::Char, base::Integer) = convert(T,parseint(c,b
14871487
parseint{T<:Integer}(::Type{T}, c::Char) = convert(T,parseint(c))
14881488

14891489
function parseint_next(s::AbstractString, i::Int=start(s))
1490-
done(s,i) && throw(ArgumentError("premature end of integer: $(repr(s))"))
1490+
done(s,i) && (return Char(0), 0, 0)
14911491
j = i
14921492
c, i = next(s,i)
14931493
c, i, j
14941494
end
14951495

14961496
function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
14971497
c, i, j = parseint_next(s)
1498+
14981499
while isspace(c)
14991500
c, i, j = parseint_next(s,i)
15001501
end
1502+
(j == 0) && (return 0, 0, 0)
1503+
15011504
sgn = 1
15021505
if signed
15031506
if c == '-' || c == '+'
15041507
(c == '-') && (sgn = -1)
15051508
c, i, j = parseint_next(s,i)
15061509
end
15071510
end
1511+
15081512
while isspace(c)
15091513
c, i, j = parseint_next(s,i)
15101514
end
1515+
(j == 0) && (return 0, 0, 0)
1516+
15111517
if base == 0
15121518
if c == '0' && !done(s,i)
15131519
c, i = next(s,i)
@@ -1522,94 +1528,121 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
15221528
return sgn, base, j
15231529
end
15241530

1525-
function parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int)
1531+
safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
1532+
safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
1533+
(n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
1534+
((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)
1535+
1536+
function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int, raise::Bool)
1537+
_n = Nullable{T}()
15261538
sgn, base, i = parseint_preamble(T<:Signed,s,base)
1539+
if i == 0
1540+
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
1541+
return _n
1542+
end
15271543
c, i = parseint_next(s,i)
1544+
if i == 0
1545+
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
1546+
return _n
1547+
end
1548+
15281549
base = convert(T,base)
1529-
## FIXME: remove 128-bit specific code once 128-bit div doesn't rely on BigInt
1530-
m::T = T===UInt128 || T===Int128 ? typemax(T) : div(typemax(T)-base+1,base)
1550+
m::T = div(typemax(T)-base+1,base)
15311551
n::T = 0
15321552
while n <= m
15331553
d::T = '0' <= c <= '9' ? c-'0' :
15341554
'A' <= c <= 'Z' ? c-'A'+10 :
15351555
'a' <= c <= 'z' ? c-'a'+a : base
1536-
d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
1556+
if d >= base
1557+
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
1558+
return _n
1559+
end
15371560
n *= base
15381561
n += d
15391562
if done(s,i)
15401563
n *= sgn
1541-
return n
1564+
return Nullable{T}(n)
15421565
end
15431566
c, i = next(s,i)
15441567
isspace(c) && break
15451568
end
15461569
(T <: Signed) && (n *= sgn)
15471570
while !isspace(c)
15481571
d::T = '0' <= c <= '9' ? c-'0' :
1549-
'A' <= c <= 'Z' ? c-'A'+10 :
1550-
'a' <= c <= 'z' ? c-'a'+a : base
1551-
d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
1572+
'A' <= c <= 'Z' ? c-'A'+10 :
1573+
'a' <= c <= 'z' ? c-'a'+a : base
1574+
if d >= base
1575+
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
1576+
return _n
1577+
end
15521578
(T <: Signed) && (d *= sgn)
1553-
n = checked_mul(n,base)
1554-
n = checked_add(n,d)
1555-
done(s,i) && return n
1579+
1580+
safe_n = safe_mul(n, base)
1581+
isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
1582+
if isnull(safe_n)
1583+
raise && throw(OverflowError())
1584+
return _n
1585+
end
1586+
n = get(safe_n)
1587+
done(s,i) && return Nullable{T}(n)
15561588
c, i = next(s,i)
15571589
end
15581590
while !done(s,i)
15591591
c, i = next(s,i)
1560-
isspace(c) || throw(ArgumentError("extra characters after whitespace in $(repr(s))"))
1592+
if !isspace(c)
1593+
raise && throw(ArgumentError("extra characters after whitespace in $(repr(s))"))
1594+
return _n
1595+
end
15611596
end
1562-
return n
1597+
return Nullable{T}(n)
15631598
end
1564-
parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
1565-
parseint_nocheck(T, s, base, base <= 36 ? 10 : 36)
1599+
tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) =
1600+
tryparse_internal(T, s, base, base <= 36 ? 10 : 36, raise)
1601+
tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
1602+
2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
1603+
tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false)
15661604

1567-
parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer) =
1568-
2 <= base <= 62 ? parseint_nocheck(T,s,Int(base)) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
1569-
parseint{T<:Integer}(::Type{T}, s::AbstractString) = parseint_nocheck(T,s,0)
1605+
function parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer)
1606+
(2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
1607+
get(tryparse_internal(T, s, base, true))
1608+
end
1609+
parseint{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true))
15701610
parseint(s::AbstractString, base::Integer) = parseint(Int,s,base)
1571-
parseint(s::AbstractString) = parseint_nocheck(Int,s,0)
1611+
parseint(s::AbstractString) = parseint(Int,s)
15721612

15731613
## stringifying integers more efficiently ##
15741614

15751615
string(x::Union(Int8,Int16,Int32,Int64,Int128)) = dec(x)
15761616

15771617
## string to float functions ##
15781618

1579-
float64_isvalid(s::AbstractString, out::Array{Float64,1}) =
1580-
ccall(:jl_strtod, Int32, (Ptr{UInt8},Ptr{Float64}), s, out) == 0
1581-
float32_isvalid(s::AbstractString, out::Array{Float32,1}) =
1582-
ccall(:jl_strtof, Int32, (Ptr{UInt8},Ptr{Float32}), s, out) == 0
1583-
1584-
float64_isvalid(s::SubString, out::Array{Float64,1}) =
1585-
ccall(:jl_substrtod, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float64}), s.string, s.offset, s.endof, out) == 0
1586-
float32_isvalid(s::SubString, out::Array{Float32,1}) =
1587-
ccall(:jl_substrtof, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float32}), s.string, s.offset, s.endof, out) == 0
1588-
1589-
begin
1590-
local tmp::Array{Float64,1} = Array(Float64,1)
1591-
local tmpf::Array{Float32,1} = Array(Float32,1)
1592-
global parsefloat
1593-
function parsefloat(::Type{Float64}, s::AbstractString)
1594-
if !float64_isvalid(s, tmp)
1595-
throw(ArgumentError("parsefloat(Float64,::AbstractString): invalid number format $(repr(s))"))
1596-
end
1597-
return tmp[1]
1598-
end
1619+
tryparse(::Type{Float64}, s::AbstractString) = ccall(:jl_try_strtod, Nullable{Float64}, (Ptr{UInt8},), s)
1620+
tryparse(::Type{Float64}, s::SubString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)
15991621

1600-
function parsefloat(::Type{Float32}, s::AbstractString)
1601-
if !float32_isvalid(s, tmpf)
1602-
throw(ArgumentError("parsefloat(Float32,::AbstractString): invalid number format $(repr(s))"))
1603-
end
1604-
return tmpf[1]
1605-
end
1622+
tryparse(::Type{Float32}, s::AbstractString) = ccall(:jl_try_strtof, Nullable{Float32}, (Ptr{UInt8},), s)
1623+
tryparse(::Type{Float32}, s::SubString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)
1624+
1625+
function parse{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString)
1626+
nf = tryparse(T, s)
1627+
isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf)
16061628
end
16071629

1608-
float(x::AbstractString) = parsefloat(x)
1609-
parsefloat(x::AbstractString) = parsefloat(Float64,x)
1630+
parsefloat{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString) = parse(T,s)
1631+
1632+
float(x::AbstractString) = parse(Float64,x)
1633+
parsefloat(x::AbstractString) = parse(Float64,x)
16101634

16111635
float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)
16121636

1637+
function float_isvalid{T<:Union(Float32,Float64)}(s::AbstractString, out::Array{T,1})
1638+
tf = tryparse(T, s)
1639+
isnull(tf) || (out[1] = get(tf))
1640+
!isnull(tf)
1641+
end
1642+
1643+
float32_isvalid(s::AbstractString, out::Array{Float32,1}) = float_isvalid(s, out)
1644+
float64_isvalid(s::AbstractString, out::Array{Float64,1}) = float_isvalid(s, out)
1645+
16131646
# find the index of the first occurrence of a value in a byte array
16141647

16151648
typealias ByteArray Union(Array{UInt8,1},Array{Int8,1})

base/sysimg.jl

+23-3
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ include("env.jl")
107107
include("path.jl")
108108
include("intfuncs.jl")
109109

110+
# nullable types
111+
include("nullable.jl")
110112

111113
# I/O
112114
include("task.jl")
@@ -176,6 +178,27 @@ big(n::Integer) = convert(BigInt,n)
176178
big(x::FloatingPoint) = convert(BigFloat,x)
177179
big(q::Rational) = big(num(q))//big(den(q))
178180

181+
const _fact_table128 =
182+
UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002,
183+
0x00000000000000000000000000000006, 0x00000000000000000000000000000018,
184+
0x00000000000000000000000000000078, 0x000000000000000000000000000002d0,
185+
0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80,
186+
0x00000000000000000000000000058980, 0x00000000000000000000000000375f00,
187+
0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00,
188+
0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800,
189+
0x00000000000000000000013077775800, 0x00000000000000000000130777758000,
190+
0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000,
191+
0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000,
192+
0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000,
193+
0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000,
194+
0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000,
195+
0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000,
196+
0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000,
197+
0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000,
198+
0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000]
199+
factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33)
200+
factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34)
201+
179202
# more hashing definitions
180203
include("hashing2.jl")
181204

@@ -188,9 +211,6 @@ importall .Random
188211
include("printf.jl")
189212
importall .Printf
190213

191-
# nullable types
192-
include("nullable.jl")
193-
194214
# concurrency and parallelism
195215
include("serialize.jl")
196216
include("multi.jl")

0 commit comments

Comments
 (0)