Skip to content

Commit 86d7f2a

Browse files
committed
tryparse: parse string to Nullable
Introduces the tryparse method: - tryparse{T<:Integer}(::Type{T<:Integer},s::AbstractString) - tryparse(::Type{Float..},s::AbstractString) - a few variants of the above And: - tryparse(Float.., ...) call the corresponding C functions jl_try_strtof, jl_try_substrtof, jl_try_strtod and jl_try_substrtod. - The parseint, parsefloat, float64_isvalid and float32_isvalid methods wrap the corresponding tryparse methods. - The jl_strtod, jl_strtof, ... functions are wrappers over the jl_try_str... functions. This should fix JuliaLang#10498 as well. Ref: discussions at JuliaLang#9316, JuliaLang#3631, JuliaLang#5704
1 parent 5195cc8 commit 86d7f2a

10 files changed

+286
-136
lines changed

base/base.jl

+3-1
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ type AssertionError <: Exception
125125
AssertionError(msg) = new(msg)
126126
end
127127

128+
immutable NullException <: Exception
129+
end
130+
128131
# For passing constants through type inference
129132
immutable Val{T}
130133
end
@@ -277,4 +280,3 @@ immutable Nullable{T}
277280
Nullable() = new(true)
278281
Nullable(value::T) = new(false, value)
279282
end
280-

base/combinatorics.jl

-21
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,6 @@ const _fact_table64 =
33
87178291200,1307674368000,20922789888000,355687428096000,6402373705728000,
44
121645100408832000,2432902008176640000]
55

6-
const _fact_table128 =
7-
UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002,
8-
0x00000000000000000000000000000006, 0x00000000000000000000000000000018,
9-
0x00000000000000000000000000000078, 0x000000000000000000000000000002d0,
10-
0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80,
11-
0x00000000000000000000000000058980, 0x00000000000000000000000000375f00,
12-
0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00,
13-
0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800,
14-
0x00000000000000000000013077775800, 0x00000000000000000000130777758000,
15-
0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000,
16-
0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000,
17-
0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000,
18-
0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000,
19-
0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000,
20-
0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000,
21-
0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000,
22-
0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000,
23-
0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000]
24-
256
function factorial_lookup(n::Integer, table, lim)
267
n < 0 && throw(DomainError())
278
n > lim && throw(OverflowError())
@@ -30,8 +11,6 @@ function factorial_lookup(n::Integer, table, lim)
3011
return oftype(n, f)
3112
end
3213

33-
factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33)
34-
factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34)
3514
factorial(n::Union(Int64,UInt64)) = factorial_lookup(n, _fact_table64, 20)
3615

3716
if Int === Int32

base/exports.jl

+1
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ export
355355
fldmod,
356356
flipsign,
357357
float,
358+
tryparse,
358359
floor,
359360
fma,
360361
frexp,

base/gmp.jl

+13-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ export BigInt
55
import Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), ($),
66
binomial, cmp, convert, div, divrem, factorial, fld, gcd, gcdx, lcm, mod,
77
ndigits, promote_rule, rem, show, isqrt, string, isprime, powermod,
8-
sum, trailing_zeros, trailing_ones, count_ones, base, parseint,
8+
sum, trailing_zeros, trailing_ones, count_ones, base, parseint, tryparse_internal,
99
serialize, deserialize, bin, oct, dec, hex, isequal, invmod,
1010
prevpow2, nextpow2, ndigits0z, widen, signed
1111

@@ -76,15 +76,23 @@ signed(x::BigInt) = x
7676
BigInt(x::BigInt) = x
7777
BigInt(s::AbstractString) = parseint(BigInt,s)
7878

79-
function Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int)
79+
function tryparse_internal(::Type{BigInt}, s::AbstractString, base::Int, raise::Bool)
80+
_n = Nullable{BigInt}()
8081
s = bytestring(s)
8182
sgn, base, i = Base.parseint_preamble(true,s,base)
83+
if i == 0
84+
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
85+
return _n
86+
end
8287
z = BigInt()
8388
err = ccall((:__gmpz_set_str, :libgmp),
8489
Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32),
8590
&z, SubString(s,i), base)
86-
err == 0 || throw(ArgumentError("invalid BigInt: $(repr(s))"))
87-
return sgn < 0 ? -z : z
91+
if err != 0
92+
raise && throw(ArgumentError("invalid BigInt: $(repr(s))"))
93+
return _n
94+
end
95+
Nullable(sgn < 0 ? -z : z)
8896
end
8997

9098
function BigInt(x::Union(Clong,Int32))
@@ -217,7 +225,7 @@ function serialize(s, n::BigInt)
217225
serialize(s, base(62,n))
218226
end
219227

220-
deserialize(s, ::Type{BigInt}) = Base.parseint_nocheck(BigInt, deserialize(s), 62)
228+
deserialize(s, ::Type{BigInt}) = get(tryparse_internal(BigInt, deserialize(s), 62, true))
221229

222230
# Binary ops
223231
for (fJ, fC) in ((:+, :add), (:-,:sub), (:*, :mul),

base/nullable.jl

+4-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
immutable NullException <: Exception
2-
end
3-
41
Nullable{T}(value::T) = Nullable{T}(value)
52
Nullable() = Nullable{Union()}()
63

@@ -17,18 +14,17 @@ convert( ::Type{Nullable }, ::Void) = Nullable{Union()}()
1714

1815
function show{T}(io::IO, x::Nullable{T})
1916
if x.isnull
20-
@printf(io, "Nullable{%s}()", repr(T))
17+
println(io, "Nullable{$(repr(T))}()")
2118
else
22-
@printf(io, "Nullable(%s)", repr(x.value))
19+
println(io, "Nullable($(repr(x.value)))")
2320
end
2421
end
2522

26-
get(x::Nullable) = x.isnull ? throw(NullException()) : x.value
23+
isnull(x::Nullable) = x.isnull
2724

25+
get(x::Nullable) = x.isnull ? throw(NullException()) : x.value
2826
get{T}(x::Nullable{T}, y) = x.isnull ? convert(T, y) : x.value
2927

30-
isnull(x::Nullable) = x.isnull
31-
3228
function isequal(x::Nullable, y::Nullable)
3329
if x.isnull && y.isnull
3430
return true

base/string.jl

+85-48
Original file line numberDiff line numberDiff line change
@@ -1487,27 +1487,33 @@ parseint{T<:Integer}(::Type{T}, c::Char, base::Integer) = convert(T,parseint(c,b
14871487
parseint{T<:Integer}(::Type{T}, c::Char) = convert(T,parseint(c))
14881488

14891489
function parseint_next(s::AbstractString, i::Int=start(s))
1490-
done(s,i) && throw(ArgumentError("premature end of integer: $(repr(s))"))
1490+
done(s,i) && (return Char(0), 0, 0)
14911491
j = i
14921492
c, i = next(s,i)
14931493
c, i, j
14941494
end
14951495

14961496
function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
14971497
c, i, j = parseint_next(s)
1498+
14981499
while isspace(c)
14991500
c, i, j = parseint_next(s,i)
15001501
end
1502+
(j == 0) && (return 0, 0, 0)
1503+
15011504
sgn = 1
15021505
if signed
15031506
if c == '-' || c == '+'
15041507
(c == '-') && (sgn = -1)
15051508
c, i, j = parseint_next(s,i)
15061509
end
15071510
end
1511+
15081512
while isspace(c)
15091513
c, i, j = parseint_next(s,i)
15101514
end
1515+
(j == 0) && (return 0, 0, 0)
1516+
15111517
if base == 0
15121518
if c == '0' && !done(s,i)
15131519
c, i = next(s,i)
@@ -1522,94 +1528,125 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
15221528
return sgn, base, j
15231529
end
15241530

1525-
function parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int)
1531+
safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
1532+
safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
1533+
(n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
1534+
((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)
1535+
1536+
#safe_sub{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 < (typemin(T) + n2)) : (n1 > (typemax(T) + n2))) ? Nullable{T}() : Nullable{T}(n1 - n2)
1537+
#safe_div{T<:Integer}(n1::T, n2::T) = ((n1 == typemin(T)) && (n2 == T(-1))) ? Nullable{T}() : Nullable{T}(div(n1, n2))
1538+
#safe_abs{T<:Integer}(n::T) = (n == typemin(T)) ? Nullable{T}() : abs(n)
1539+
1540+
function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int, raise::Bool)
1541+
_n = Nullable{T}()
15261542
sgn, base, i = parseint_preamble(T<:Signed,s,base)
1543+
if i == 0
1544+
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
1545+
return _n
1546+
end
15271547
c, i = parseint_next(s,i)
1548+
if i == 0
1549+
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
1550+
return _n
1551+
end
1552+
15281553
base = convert(T,base)
1529-
## FIXME: remove 128-bit specific code once 128-bit div doesn't rely on BigInt
1530-
m::T = T===UInt128 || T===Int128 ? typemax(T) : div(typemax(T)-base+1,base)
1554+
m::T = div(typemax(T)-base+1,base)
15311555
n::T = 0
15321556
while n <= m
15331557
d::T = '0' <= c <= '9' ? c-'0' :
15341558
'A' <= c <= 'Z' ? c-'A'+10 :
15351559
'a' <= c <= 'z' ? c-'a'+a : base
1536-
d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
1560+
if d >= base
1561+
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
1562+
return _n
1563+
end
15371564
n *= base
15381565
n += d
15391566
if done(s,i)
15401567
n *= sgn
1541-
return n
1568+
return Nullable{T}(n)
15421569
end
15431570
c, i = next(s,i)
15441571
isspace(c) && break
15451572
end
15461573
(T <: Signed) && (n *= sgn)
15471574
while !isspace(c)
15481575
d::T = '0' <= c <= '9' ? c-'0' :
1549-
'A' <= c <= 'Z' ? c-'A'+10 :
1550-
'a' <= c <= 'z' ? c-'a'+a : base
1551-
d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
1576+
'A' <= c <= 'Z' ? c-'A'+10 :
1577+
'a' <= c <= 'z' ? c-'a'+a : base
1578+
if d >= base
1579+
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
1580+
return _n
1581+
end
15521582
(T <: Signed) && (d *= sgn)
1553-
n = checked_mul(n,base)
1554-
n = checked_add(n,d)
1555-
done(s,i) && return n
1583+
1584+
safe_n = safe_mul(n, base)
1585+
isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
1586+
if isnull(safe_n)
1587+
raise && throw(OverflowError())
1588+
return _n
1589+
end
1590+
n = get(safe_n)
1591+
done(s,i) && return Nullable{T}(n)
15561592
c, i = next(s,i)
15571593
end
15581594
while !done(s,i)
15591595
c, i = next(s,i)
1560-
isspace(c) || throw(ArgumentError("extra characters after whitespace in $(repr(s))"))
1596+
if !isspace(c)
1597+
raise && throw(ArgumentError("extra characters after whitespace in $(repr(s))"))
1598+
return _n
1599+
end
15611600
end
1562-
return n
1601+
return Nullable{T}(n)
15631602
end
1564-
parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
1565-
parseint_nocheck(T, s, base, base <= 36 ? 10 : 36)
1603+
tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) =
1604+
tryparse_internal(T, s, base, base <= 36 ? 10 : 36, raise)
1605+
tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
1606+
2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
1607+
tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false)
15661608

1567-
parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer) =
1568-
2 <= base <= 62 ? parseint_nocheck(T,s,Int(base)) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
1569-
parseint{T<:Integer}(::Type{T}, s::AbstractString) = parseint_nocheck(T,s,0)
1609+
function parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer)
1610+
(2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
1611+
get(tryparse_internal(T, s, base, true))
1612+
end
1613+
parseint{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true))
15701614
parseint(s::AbstractString, base::Integer) = parseint(Int,s,base)
1571-
parseint(s::AbstractString) = parseint_nocheck(Int,s,0)
1615+
parseint(s::AbstractString) = parseint(Int,s)
15721616

15731617
## stringifying integers more efficiently ##
15741618

15751619
string(x::Union(Int8,Int16,Int32,Int64,Int128)) = dec(x)
15761620

15771621
## string to float functions ##
15781622

1579-
float64_isvalid(s::AbstractString, out::Array{Float64,1}) =
1580-
ccall(:jl_strtod, Int32, (Ptr{UInt8},Ptr{Float64}), s, out) == 0
1581-
float32_isvalid(s::AbstractString, out::Array{Float32,1}) =
1582-
ccall(:jl_strtof, Int32, (Ptr{UInt8},Ptr{Float32}), s, out) == 0
1583-
1584-
float64_isvalid(s::SubString, out::Array{Float64,1}) =
1585-
ccall(:jl_substrtod, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float64}), s.string, s.offset, s.endof, out) == 0
1586-
float32_isvalid(s::SubString, out::Array{Float32,1}) =
1587-
ccall(:jl_substrtof, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float32}), s.string, s.offset, s.endof, out) == 0
1588-
1589-
begin
1590-
local tmp::Array{Float64,1} = Array(Float64,1)
1591-
local tmpf::Array{Float32,1} = Array(Float32,1)
1592-
global parsefloat
1593-
function parsefloat(::Type{Float64}, s::AbstractString)
1594-
if !float64_isvalid(s, tmp)
1595-
throw(ArgumentError("parsefloat(Float64,::AbstractString): invalid number format $(repr(s))"))
1596-
end
1597-
return tmp[1]
1598-
end
1623+
tryparse(::Type{Float64}, s::AbstractString) = ccall(:jl_try_strtod, Nullable{Float64}, (Ptr{UInt8},), s)
1624+
tryparse(::Type{Float64}, s::SubString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)
15991625

1600-
function parsefloat(::Type{Float32}, s::AbstractString)
1601-
if !float32_isvalid(s, tmpf)
1602-
throw(ArgumentError("parsefloat(Float32,::AbstractString): invalid number format $(repr(s))"))
1603-
end
1604-
return tmpf[1]
1605-
end
1626+
tryparse(::Type{Float32}, s::AbstractString) = ccall(:jl_try_strtof, Nullable{Float32}, (Ptr{UInt8},), s)
1627+
tryparse(::Type{Float32}, s::SubString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)
1628+
1629+
function parse{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString)
1630+
nf = tryparse(T, s)
1631+
isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf)
16061632
end
16071633

1608-
float(x::AbstractString) = parsefloat(x)
1609-
parsefloat(x::AbstractString) = parsefloat(Float64,x)
1634+
parsefloat{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString) = parse(T,s)
1635+
1636+
float(x::AbstractString) = parse(Float64,x)
1637+
parsefloat(x::AbstractString) = parse(Float64,x)
16101638

16111639
float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)
16121640

1641+
function float_isvalid{T<:Union(Float32,Float64)}(s::AbstractString, out::Array{T,1})
1642+
tf = tryparse(T, s)
1643+
isnull(tf) || (out[1] = get(tf))
1644+
!isnull(tf)
1645+
end
1646+
1647+
float32_isvalid(s::AbstractString, out::Array{Float32,1}) = float_isvalid(s, out)
1648+
float64_isvalid(s::AbstractString, out::Array{Float64,1}) = float_isvalid(s, out)
1649+
16131650
# find the index of the first occurrence of a value in a byte array
16141651

16151652
typealias ByteArray Union(Array{UInt8,1},Array{Int8,1})

base/sysimg.jl

+23-3
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ using .Errno
111111
include("path.jl")
112112
include("intfuncs.jl")
113113

114+
# nullable types
115+
include("nullable.jl")
114116

115117
# I/O
116118
include("task.jl")
@@ -180,6 +182,27 @@ big(n::Integer) = convert(BigInt,n)
180182
big(x::FloatingPoint) = convert(BigFloat,x)
181183
big(q::Rational) = big(num(q))//big(den(q))
182184

185+
const _fact_table128 =
186+
UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002,
187+
0x00000000000000000000000000000006, 0x00000000000000000000000000000018,
188+
0x00000000000000000000000000000078, 0x000000000000000000000000000002d0,
189+
0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80,
190+
0x00000000000000000000000000058980, 0x00000000000000000000000000375f00,
191+
0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00,
192+
0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800,
193+
0x00000000000000000000013077775800, 0x00000000000000000000130777758000,
194+
0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000,
195+
0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000,
196+
0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000,
197+
0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000,
198+
0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000,
199+
0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000,
200+
0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000,
201+
0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000,
202+
0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000]
203+
factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33)
204+
factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34)
205+
183206
# more hashing definitions
184207
include("hashing2.jl")
185208

@@ -192,9 +215,6 @@ importall .Random
192215
include("printf.jl")
193216
importall .Printf
194217

195-
# nullable types
196-
include("nullable.jl")
197-
198218
# concurrency and parallelism
199219
include("serialize.jl")
200220
include("multi.jl")

0 commit comments

Comments
 (0)