tryparse: parse string to Nullable

tanmaykm · tanmaykm · commit 86d7f2a48f17 · 2015-03-17T12:10:29.000+05:30
Introduces the tryparse method: - tryparse{T<:Integer}(::Type{T<:Integer},s::AbstractString) - tryparse(::Type{Float..},s::AbstractString) - a few variants of the above And: - tryparse(Float.., ...) call the corresponding C functions jl_try_strtof, jl_try_substrtof, jl_try_strtod and jl_try_substrtod. - The parseint, parsefloat, float64_isvalid and float32_isvalid methods wrap the corresponding tryparse methods. - The jl_strtod, jl_strtof, ... functions are wrappers over the jl_try_str... functions. This should fix JuliaLang#10498 as well. Ref: discussions at JuliaLang#9316, JuliaLang#3631, JuliaLang#5704
diff --git a/base/base.jl b/base/base.jl
@@ -125,6 +125,9 @@ type AssertionError <: Exception
     AssertionError(msg) = new(msg)
 end
 
+immutable NullException <: Exception
+end
+
 # For passing constants through type inference
 immutable Val{T}
 end
@@ -277,4 +280,3 @@ immutable Nullable{T}
     Nullable() = new(true)
     Nullable(value::T) = new(false, value)
 end
-
diff --git a/base/combinatorics.jl b/base/combinatorics.jl
@@ -3,25 +3,6 @@ const _fact_table64 =
           87178291200,1307674368000,20922789888000,355687428096000,6402373705728000,
           121645100408832000,2432902008176640000]
 
-const _fact_table128 =
-    UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002,
-            0x00000000000000000000000000000006, 0x00000000000000000000000000000018,
-            0x00000000000000000000000000000078, 0x000000000000000000000000000002d0,
-            0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80,
-            0x00000000000000000000000000058980, 0x00000000000000000000000000375f00,
-            0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00,
-            0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800,
-            0x00000000000000000000013077775800, 0x00000000000000000000130777758000,
-            0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000,
-            0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000,
-            0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000,
-            0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000,
-            0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000,
-            0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000,
-            0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000,
-            0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000,
-            0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000]
-
 function factorial_lookup(n::Integer, table, lim)
     n < 0 && throw(DomainError())
     n > lim && throw(OverflowError())
@@ -30,8 +11,6 @@ function factorial_lookup(n::Integer, table, lim)
     return oftype(n, f)
 end
 
-factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33)
-factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34)
 factorial(n::Union(Int64,UInt64)) = factorial_lookup(n, _fact_table64, 20)
 
 if Int === Int32
diff --git a/base/exports.jl b/base/exports.jl
@@ -355,6 +355,7 @@ export
     fldmod,
     flipsign,
     float,
+    tryparse,
     floor,
     fma,
     frexp,
diff --git a/base/gmp.jl b/base/gmp.jl
@@ -5,7 +5,7 @@ export BigInt
 import Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), ($),
              binomial, cmp, convert, div, divrem, factorial, fld, gcd, gcdx, lcm, mod,
              ndigits, promote_rule, rem, show, isqrt, string, isprime, powermod,
-             sum, trailing_zeros, trailing_ones, count_ones, base, parseint,
+             sum, trailing_zeros, trailing_ones, count_ones, base, parseint, tryparse_internal,
              serialize, deserialize, bin, oct, dec, hex, isequal, invmod,
              prevpow2, nextpow2, ndigits0z, widen, signed
 
@@ -76,15 +76,23 @@ signed(x::BigInt) = x
 BigInt(x::BigInt) = x
 BigInt(s::AbstractString) = parseint(BigInt,s)
 
-function Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int)
+function tryparse_internal(::Type{BigInt}, s::AbstractString, base::Int, raise::Bool)
+    _n = Nullable{BigInt}()
     s = bytestring(s)
     sgn, base, i = Base.parseint_preamble(true,s,base)
+    if i == 0
+        raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
+        return _n
+    end
     z = BigInt()
     err = ccall((:__gmpz_set_str, :libgmp),
                Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32),
                &z, SubString(s,i), base)
-    err == 0 || throw(ArgumentError("invalid BigInt: $(repr(s))"))
-    return sgn < 0 ? -z : z
+    if err != 0
+        raise && throw(ArgumentError("invalid BigInt: $(repr(s))"))
+        return _n
+    end
+    Nullable(sgn < 0 ? -z : z)
 end
 
 function BigInt(x::Union(Clong,Int32))
@@ -217,7 +225,7 @@ function serialize(s, n::BigInt)
     serialize(s, base(62,n))
 end
 
-deserialize(s, ::Type{BigInt}) = Base.parseint_nocheck(BigInt, deserialize(s), 62)
+deserialize(s, ::Type{BigInt}) = get(tryparse_internal(BigInt, deserialize(s), 62, true))
 
 # Binary ops
 for (fJ, fC) in ((:+, :add), (:-,:sub), (:*, :mul),
diff --git a/base/nullable.jl b/base/nullable.jl
@@ -1,6 +1,3 @@
-immutable NullException <: Exception
-end
-
 Nullable{T}(value::T) = Nullable{T}(value)
 Nullable() = Nullable{Union()}()
 
@@ -17,18 +14,17 @@ convert(   ::Type{Nullable   }, ::Void) = Nullable{Union()}()
 
 function show{T}(io::IO, x::Nullable{T})
     if x.isnull
-        @printf(io, "Nullable{%s}()", repr(T))
+        println(io, "Nullable{$(repr(T))}()")
     else
-        @printf(io, "Nullable(%s)", repr(x.value))
+        println(io, "Nullable($(repr(x.value)))")
     end
 end
 
-get(x::Nullable) = x.isnull ? throw(NullException()) : x.value
+isnull(x::Nullable) = x.isnull
 
+get(x::Nullable) = x.isnull ? throw(NullException()) : x.value
 get{T}(x::Nullable{T}, y) = x.isnull ? convert(T, y) : x.value
 
-isnull(x::Nullable) = x.isnull
-
 function isequal(x::Nullable, y::Nullable)
     if x.isnull && y.isnull
         return true
diff --git a/base/string.jl b/base/string.jl
@@ -1487,27 +1487,33 @@ parseint{T<:Integer}(::Type{T}, c::Char, base::Integer) = convert(T,parseint(c,b
 parseint{T<:Integer}(::Type{T}, c::Char) = convert(T,parseint(c))
 
 function parseint_next(s::AbstractString, i::Int=start(s))
-    done(s,i) && throw(ArgumentError("premature end of integer: $(repr(s))"))
+    done(s,i) && (return Char(0), 0, 0)
     j = i
     c, i = next(s,i)
     c, i, j
 end
 
 function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
     c, i, j = parseint_next(s)
+
     while isspace(c)
         c, i, j = parseint_next(s,i)
     end
+    (j == 0) && (return 0, 0, 0)
+
     sgn = 1
     if signed
         if c == '-' || c == '+'
             (c == '-') && (sgn = -1)
             c, i, j = parseint_next(s,i)
         end
     end
+
     while isspace(c)
         c, i, j = parseint_next(s,i)
     end
+    (j == 0) && (return 0, 0, 0)
+
     if base == 0
         if c == '0' && !done(s,i)
             c, i = next(s,i)
@@ -1522,94 +1528,125 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
     return sgn, base, j
 end
 
-function parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int)
+safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
+safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 >   0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
+                                      (n2 <  -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
+                                      ((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)
+
+#safe_sub{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 < (typemin(T) + n2)) : (n1 > (typemax(T) + n2))) ? Nullable{T}() : Nullable{T}(n1 - n2)
+#safe_div{T<:Integer}(n1::T, n2::T) = ((n1 == typemin(T)) && (n2 == T(-1))) ? Nullable{T}() : Nullable{T}(div(n1, n2))
+#safe_abs{T<:Integer}(n::T) = (n == typemin(T)) ? Nullable{T}() : abs(n)
+
+function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int, raise::Bool)
+    _n = Nullable{T}()
     sgn, base, i = parseint_preamble(T<:Signed,s,base)
+    if i == 0
+        raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
+        return _n
+    end
     c, i = parseint_next(s,i)
+    if i == 0
+        raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
+        return _n
+    end
+
     base = convert(T,base)
-    ## FIXME: remove 128-bit specific code once 128-bit div doesn't rely on BigInt
-    m::T = T===UInt128 || T===Int128 ? typemax(T) : div(typemax(T)-base+1,base)
+    m::T = div(typemax(T)-base+1,base)
     n::T = 0
     while n <= m
         d::T = '0' <= c <= '9' ? c-'0'    :
                'A' <= c <= 'Z' ? c-'A'+10 :
                'a' <= c <= 'z' ? c-'a'+a  : base
-        d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
+        if d >= base
+            raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
+            return _n
+        end
         n *= base
         n += d
         if done(s,i)
             n *= sgn
-            return n
+            return Nullable{T}(n)
         end
         c, i = next(s,i)
         isspace(c) && break
     end
     (T <: Signed) && (n *= sgn)
     while !isspace(c)
         d::T = '0' <= c <= '9' ? c-'0'    :
-               'A' <= c <= 'Z' ? c-'A'+10 :
-               'a' <= c <= 'z' ? c-'a'+a  : base
-        d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
+        'A' <= c <= 'Z' ? c-'A'+10 :
+            'a' <= c <= 'z' ? c-'a'+a  : base
+        if d >= base
+            raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
+            return _n
+        end
         (T <: Signed) && (d *= sgn)
-        n = checked_mul(n,base)
-        n = checked_add(n,d)
-        done(s,i) && return n
+
+        safe_n = safe_mul(n, base)
+        isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
+        if isnull(safe_n)
+            raise && throw(OverflowError())
+            return _n
+        end
+        n = get(safe_n)
+        done(s,i) && return Nullable{T}(n)
         c, i = next(s,i)
     end
     while !done(s,i)
         c, i = next(s,i)
-        isspace(c) || throw(ArgumentError("extra characters after whitespace in $(repr(s))"))
+        if !isspace(c)
+            raise && throw(ArgumentError("extra characters after whitespace in $(repr(s))"))
+            return _n
+        end
     end
-    return n
+    return Nullable{T}(n)
 end
-parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
-    parseint_nocheck(T, s, base, base <= 36 ? 10 : 36)
+tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) =
+    tryparse_internal(T, s, base, base <= 36 ? 10 : 36, raise)
+tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
+    2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
+tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false)
 
-parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer) =
-    2 <= base <= 62 ? parseint_nocheck(T,s,Int(base)) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
-parseint{T<:Integer}(::Type{T}, s::AbstractString) = parseint_nocheck(T,s,0)
+function parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer)
+    (2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
+    get(tryparse_internal(T, s, base, true))
+end
+parseint{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true))
 parseint(s::AbstractString, base::Integer) = parseint(Int,s,base)
-parseint(s::AbstractString) = parseint_nocheck(Int,s,0)
+parseint(s::AbstractString) = parseint(Int,s)
 
 ## stringifying integers more efficiently ##
 
 string(x::Union(Int8,Int16,Int32,Int64,Int128)) = dec(x)
 
 ## string to float functions ##
 
-float64_isvalid(s::AbstractString, out::Array{Float64,1}) =
-    ccall(:jl_strtod, Int32, (Ptr{UInt8},Ptr{Float64}), s, out) == 0
-float32_isvalid(s::AbstractString, out::Array{Float32,1}) =
-    ccall(:jl_strtof, Int32, (Ptr{UInt8},Ptr{Float32}), s, out) == 0
-
-float64_isvalid(s::SubString, out::Array{Float64,1}) =
-    ccall(:jl_substrtod, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float64}), s.string, s.offset, s.endof, out) == 0
-float32_isvalid(s::SubString, out::Array{Float32,1}) =
-    ccall(:jl_substrtof, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float32}), s.string, s.offset, s.endof, out) == 0
-
-begin
-    local tmp::Array{Float64,1} = Array(Float64,1)
-    local tmpf::Array{Float32,1} = Array(Float32,1)
-    global parsefloat
-    function parsefloat(::Type{Float64}, s::AbstractString)
-        if !float64_isvalid(s, tmp)
-            throw(ArgumentError("parsefloat(Float64,::AbstractString): invalid number format $(repr(s))"))
-        end
-        return tmp[1]
-    end
+tryparse(::Type{Float64}, s::AbstractString) = ccall(:jl_try_strtod, Nullable{Float64}, (Ptr{UInt8},), s)
+tryparse(::Type{Float64}, s::SubString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)
 
-    function parsefloat(::Type{Float32}, s::AbstractString)
-        if !float32_isvalid(s, tmpf)
-            throw(ArgumentError("parsefloat(Float32,::AbstractString): invalid number format $(repr(s))"))
-        end
-        return tmpf[1]
-    end
+tryparse(::Type{Float32}, s::AbstractString) = ccall(:jl_try_strtof, Nullable{Float32}, (Ptr{UInt8},), s)
+tryparse(::Type{Float32}, s::SubString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)
+
+function parse{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString)
+    nf = tryparse(T, s)
+    isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf)
 end
 
-float(x::AbstractString) = parsefloat(x)
-parsefloat(x::AbstractString) = parsefloat(Float64,x)
+parsefloat{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString) = parse(T,s)
+
+float(x::AbstractString) = parse(Float64,x)
+parsefloat(x::AbstractString) = parse(Float64,x)
 
 float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)
 
+function float_isvalid{T<:Union(Float32,Float64)}(s::AbstractString, out::Array{T,1})
+    tf = tryparse(T, s)
+    isnull(tf) || (out[1] = get(tf))
+    !isnull(tf)
+end
+
+float32_isvalid(s::AbstractString, out::Array{Float32,1}) = float_isvalid(s, out)
+float64_isvalid(s::AbstractString, out::Array{Float64,1}) = float_isvalid(s, out)
+
 # find the index of the first occurrence of a value in a byte array
 
 typealias ByteArray Union(Array{UInt8,1},Array{Int8,1})
diff --git a/base/sysimg.jl b/base/sysimg.jl
@@ -111,6 +111,8 @@ using .Errno
 include("path.jl")
 include("intfuncs.jl")
 
+# nullable types
+include("nullable.jl")
 
 # I/O
 include("task.jl")
@@ -180,6 +182,27 @@ big(n::Integer) = convert(BigInt,n)
 big(x::FloatingPoint) = convert(BigFloat,x)
 big(q::Rational) = big(num(q))//big(den(q))
 
+const _fact_table128 =
+    UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002,
+            0x00000000000000000000000000000006, 0x00000000000000000000000000000018,
+            0x00000000000000000000000000000078, 0x000000000000000000000000000002d0,
+            0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80,
+            0x00000000000000000000000000058980, 0x00000000000000000000000000375f00,
+            0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00,
+            0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800,
+            0x00000000000000000000013077775800, 0x00000000000000000000130777758000,
+            0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000,
+            0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000,
+            0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000,
+            0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000,
+            0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000,
+            0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000,
+            0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000,
+            0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000,
+            0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000]
+factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33)
+factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34)
+
 # more hashing definitions
 include("hashing2.jl")
 
@@ -192,9 +215,6 @@ importall .Random
 include("printf.jl")
 importall .Printf
 
-# nullable types
-include("nullable.jl")
-
 # concurrency and parallelism
 include("serialize.jl")
 include("multi.jl")
diff --git a/src/builtins.c b/src/builtins.c
diff --git a/src/julia.h b/src/julia.h
diff --git a/test/strings.jl b/test/strings.jl