diff --git a/base/dates/io.jl b/base/dates/io.jl index 778e7e99641bc..29170630395e1 100644 --- a/base/dates/io.jl +++ b/base/dates/io.jl @@ -175,17 +175,17 @@ Delim(d::Char) = Delim{Char, 1}(d) Delim(d::String) = Delim{String, length(d)}(d) @inline function tryparsenext{N}(d::Delim{Char, N}, str, i::Int, len) - R = Nullable{Int64} + R = Nullable{Bool} for j=1:N i > len && return (R(), i) c, i = next(str, i) c != d.d && return (R(), i) end - return R(0), i + return R(true), i end @inline function tryparsenext{N}(d::Delim{String, N}, str, i::Int, len) - R = Nullable{Int64} + R = Nullable{Bool} i1 = i i2 = start(d.d) for j = 1:N @@ -198,7 +198,7 @@ end return R(), i1 end end - return R(0), i1 + return R(true), i1 end @inline function format(io, d::Delim, dt, locale) @@ -206,7 +206,7 @@ end end function _show_content{N}(io::IO, d::Delim{Char, N}) - if d.d in keys(SLOT_RULE) + if d.d in keys(CONVERSION_SPECIFIERS) for i = 1:N write(io, '\\', d.d) end @@ -219,7 +219,7 @@ end function _show_content(io::IO, d::Delim) for c in d.d - if c in keys(SLOT_RULE) + if c in keys(CONVERSION_SPECIFIERS) write(io, '\\') end write(io, c) @@ -236,8 +236,9 @@ end abstract type DayOfWeekToken end # special addition to Period types -# mapping format specifiers to period types -const SLOT_RULE = Dict{Char, Type}( +# Map conversion specifiers or character codes to tokens. +# Note: Allow addition of new character codes added by packages +const CONVERSION_SPECIFIERS = Dict{Char, Type}( 'y' => Year, 'Y' => Year, 'm' => Month, @@ -252,13 +253,26 @@ const SLOT_RULE = Dict{Char, Type}( 's' => Millisecond, ) -slot_order(::Type{Date}) = (Year, Month, Day) -slot_order(::Type{DateTime}) = (Year, Month, Day, Hour, Minute, Second, Millisecond) - -slot_defaults(::Type{Date}) = map(Int64, (1, 1, 1)) -slot_defaults(::Type{DateTime}) = map(Int64, (1, 1, 1, 0, 0, 0, 0)) +# Default values are needed when a conversion specifier is used in a DateFormat for parsing +# and we have reached the end of the input string. +# Note: Allow `Any` value as a default to support extensibility +const CONVERSION_DEFAULTS = Dict{Type, Any}( + Year => Int64(1), + Month => Int64(1), + DayOfWeekToken => Int64(0), + Day => Int64(1), + Hour => Int64(0), + Minute => Int64(0), + Second => Int64(0), + Millisecond => Int64(0), +) -slot_types{T<:TimeType}(::Type{T}) = typeof(slot_defaults(T)) +# Specifies the required fields in order to parse a TimeType +# Note: Allows for addition of new TimeTypes +const CONVERSION_TRANSLATIONS = Dict{Type{<:TimeType}, Tuple}( + Date => (Year, Month, Day), + DateTime => (Year, Month, Day, Hour, Minute, Second, Millisecond), +) """ DateFormat(format::AbstractString, locale="english") -> DateFormat @@ -300,13 +314,13 @@ function DateFormat(f::AbstractString, locale::DateLocale=ENGLISH) prev = () prev_offset = 1 - letters = String(collect(keys(Base.Dates.SLOT_RULE))) + letters = String(collect(keys(CONVERSION_SPECIFIERS))) for m in eachmatch(Regex("(? DateTime diff --git a/base/dates/parse.jl b/base/dates/parse.jl index e638319f10b09..84b3c475717a2 100644 --- a/base/dates/parse.jl +++ b/base/dates/parse.jl @@ -1,97 +1,161 @@ ### Parsing utilities -@generated function tryparse_internal{T<:TimeType, S, F}(::Type{T}, str::AbstractString, df::DateFormat{S, F}, raise::Bool=false) - token_types = Type[dp <: DatePart ? SLOT_RULE[first(dp.parameters)] : Void for dp in F.parameters] - N = length(F.parameters) - - types = slot_order(T) - num_types = length(types) - order = Vector{Int}(num_types) - for i = 1:num_types - order[i] = findfirst(token_types, types[i]) +_directives{S,T}(::Type{DateFormat{S,T}}) = T.parameters + +character_codes{S,T}(df::Type{DateFormat{S,T}}) = character_codes(_directives(df)) +function character_codes(directives::SimpleVector) + letters = sizehint!(Char[], length(directives)) + for (i, directive) in enumerate(directives) + if directive <: DatePart + letter = first(directive.parameters) + push!(letters, letter) + end end + return letters +end - field_defaults = slot_defaults(T) - field_order = tuple(order...) - tuple_type = slot_types(T) - - # `slot_order`, `slot_defaults`, and `slot_types` return tuples of the same length - assert(num_types == length(field_order) == length(field_defaults)) +genvar(t::DataType) = Symbol(lowercase(string(Base.datatype_name(t)))) + +""" + tryparsenext_core(str::AbstractString, pos::Int, len::Int, df::DateFormat, raise=false) + +Parses the string according to the directives within the DateFormat. Parsing will start at +character index `pos` and will stop when all directives are used or we have parsed up to +the end of the string, `len`. When a directive cannot be parsed the returned value tuple +will be null if `raise` is false otherwise an exception will be thrown. + +Returns a 3-element tuple `(values, pos, num_parsed)`: +* `values::Nullable{Tuple}`: A tuple which contains a value for each `DatePart` within the + `DateFormat` in the order in which they occur. If the string ends before we finish parsing + all the directives the missing values will be filled in with default values. +* `pos::Int`: The character index at which parsing stopped. +* `num_parsed::Int`: The number of values which were parsed and stored within `values`. + Useful for distinguishing parsed values from default values. +""" +@generated function tryparsenext_core( + str::AbstractString, pos::Int, len::Int, df::DateFormat, raise::Bool=false, +) + directives = _directives(df) + letters = character_codes(directives) + + tokens = Type[CONVERSION_SPECIFIERS[letter] for letter in letters] + value_names = Symbol[genvar(t) for t in tokens] + value_defaults = Tuple(CONVERSION_DEFAULTS[t] for t in tokens) + R = typeof(value_defaults) + + # Pre-assign variables to defaults. Allows us to use `@goto done` without worrying about + # unassigned variables. + assign_defaults = Expr[ + quote + $name = $default + end + for (name, default) in zip(value_names, value_defaults) + ] + + vi = 1 + parsers = Expr[ + begin + if directives[i] <: DatePart + name = value_names[vi] + nullable = Symbol(:nullable_, name) + vi += 1 + quote + pos > len && @goto done + $nullable, next_pos = tryparsenext(directives[$i], str, pos, len, locale) + isnull($nullable) && @goto error + $name = unsafe_get($nullable) + pos = next_pos + num_parsed += 1 + directive_index += 1 + end + else + quote + pos > len && @goto done + nullable_delim, next_pos = tryparsenext(directives[$i], str, pos, len, locale) + isnull(nullable_delim) && @goto error + pos = next_pos + directive_index += 1 + end + end + end + for i in 1:length(directives) + ] quote - R = Nullable{$tuple_type} - t = df.tokens - l = df.locale - pos, len = start(str), endof(str) + directives = df.tokens + locale::DateLocale = df.locale - err_idx = 1 - Base.@nexprs $N i->val_i = 0 - Base.@nexprs $N i->(begin - pos > len && @goto done - nv, next_pos = tryparsenext(t[i], str, pos, len, l) - isnull(nv) && @goto error - val_i, pos = unsafe_get(nv), next_pos - err_idx += 1 - end) - pos <= len && @goto error + num_parsed = 0 + directive_index = 1 - @label done - parts = Base.@ntuple $N val - return R(reorder_args(parts, $field_order, $field_defaults, err_idx)::$tuple_type) + $(assign_defaults...) + $(parsers...) - @label error - # Note: Keeping exception generation in separate function helps with performance - raise && throw(gen_exception(t, err_idx, pos)) - return R() - end -end + pos > len || @goto error -function gen_exception(tokens, err_idx, pos) - if err_idx > length(tokens) - ArgumentError("Found extra characters at the end of date time string") - else - ArgumentError("Unable to parse date time. Expected token $(tokens[err_idx]) at char $pos") - end -end + @label done + return Nullable{$R}($(Expr(:tuple, value_names...))), pos, num_parsed -# reorder_args(val, idx, default, default_from) -# -# reorder elements of `val` tuple according to `idx` tuple. Use `default[i]` -# when `idx[i] == 0` or i >= default_from -# -# returns a tuple `xs` of the same length as `idx` where `xs[i]` is -# `val[idx[i]]` if `idx[i]` is non zero, `default[i]` if `idx[i]` is zero. -# -# `xs[i]` is `default[i]` for all i >= `default_from`. -# -# -function reorder_args{N}(val::Tuple, idx::NTuple{N}, default::Tuple, default_from::Integer) - ntuple(Val{N}) do i - if idx[i] == 0 || idx[i] >= default_from - default[i] - else - val[idx[i]] + @label error + if raise + if directive_index > length(directives) + throw(ArgumentError("Found extra characters at the end of date time string")) + else + d = directives[directive_index] + throw(ArgumentError("Unable to parse date time. Expected directive $d at char $pos")) + end end + return Nullable{$R}(), pos, 0 end end -function Base.tryparse{T<:TimeType}(::Type{T}, str::AbstractString, df::DateFormat) - nt = tryparse_internal(T, str, df, false) - if isnull(nt) - return Nullable{T}() - else - return Nullable{T}(T(unsafe_get(nt)...)) - end -end +""" + tryparsenext_internal(::Type{<:TimeType}, str, pos, len, df::DateFormat, raise=false) + +Parses the string according to the directives within the DateFormat. The specified TimeType +type determines the type of and order of tokens returned. If the given DateFormat or string +does not provide a required token a default value will be used. When the string cannot be +parsed the returned value tuple will be null if `raise` is false otherwise an exception will +be thrown. + +Returns a 2-element tuple `(values, pos)`: +* `values::Nullable{Tuple}`: A tuple which contains a value for each token as specified by + the passed in type. +* `pos::Int`: The character index at which parsing stopped. +""" +@generated function tryparsenext_internal{T<:TimeType}( + ::Type{T}, str::AbstractString, pos::Int, len::Int, df::DateFormat, raise::Bool=false, +) + letters = character_codes(df) + + tokens = Type[CONVERSION_SPECIFIERS[letter] for letter in letters] + value_names = Symbol[genvar(t) for t in tokens] + + output_tokens = CONVERSION_TRANSLATIONS[T] + output_names = Symbol[genvar(t) for t in output_tokens] + output_defaults = Tuple(CONVERSION_DEFAULTS[t] for t in output_tokens) + R = typeof(output_defaults) + + # Pre-assign output variables to defaults. Ensures that all output variables are + # assigned as the value tuple returned from `tryparsenext_core` may not include all + # of the required variables. + assign_defaults = Expr[ + quote + $name = $default + end + for (name, default) in zip(output_names, output_defaults) + ] -default_format(::Type{Date}) = ISODateFormat -default_format(::Type{DateTime}) = ISODateTimeFormat + # Unpacks the value tuple returned by `tryparsenext_core` into separate variables. + value_tuple = Expr(:tuple, value_names...) -function Base.parse{T<:TimeType}(::Type{T}, - str::AbstractString, - df::DateFormat=default_format(T)) - nt = tryparse_internal(T, str, df, true) - T(unsafe_get(nt)...) + quote + values, pos, num_parsed = tryparsenext_core(str, pos, len, df, raise) + isnull(values) && return Nullable{$R}(), pos + $(assign_defaults...) + $value_tuple = unsafe_get(values) + return Nullable{$R}($(Expr(:tuple, output_names...))), pos + end end @inline function tryparsenext_base10(str::AbstractString, i::Int, len::Int, min_width::Int=1, max_width::Int=0) @@ -200,3 +264,49 @@ function Base.parse(::Type{DateTime}, s::AbstractString, df::typeof(ISODateTimeF @label error throw(ArgumentError("Invalid DateTime string")) end + +function Base.parse{T<:TimeType}( + ::Type{T}, str::AbstractString, df::DateFormat=default_format(T), +) + pos, len = start(str), endof(str) + values, pos = tryparsenext_internal(T, str, pos, len, df, true) + T(unsafe_get(values)...) +end + +function Base.tryparse{T<:TimeType}( + ::Type{T}, str::AbstractString, df::DateFormat=default_format(T), +) + pos, len = start(str), endof(str) + values, pos = tryparsenext_internal(T, str, pos, len, df, false) + if isnull(values) + Nullable{T}() + else + Nullable{T}(T(unsafe_get(values)...)) + end +end + +""" + parse_components(str::AbstractString, df::DateFormat) -> Array{Any} + +Parse the string into its components according to the directives in the DateFormat. +Each component will be a distinct type, typically a subtype of Period. The order of the +components will match the order of the `DatePart` directives within the DateFormat. The +number of components may be less than the total number of `DatePart`. +""" +@generated function parse_components(str::AbstractString, df::DateFormat) + letters = character_codes(df) + tokens = Type[CONVERSION_SPECIFIERS[letter] for letter in letters] + + quote + pos, len = start(str), endof(str) + values, pos, num_parsed = tryparsenext_core(str, pos, len, df, true) + t = unsafe_get(values) + types = $(Expr(:tuple, tokens...)) + result = Vector{Any}(num_parsed) + for (i, typ) in enumerate(types) + i > num_parsed && break + result[i] = typ(t[i]) # Constructing types takes most of the time + end + return result + end +end diff --git a/base/deprecated.jl b/base/deprecated.jl index 9ecfd1843f90f..c53ae967e6006 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -1278,6 +1278,17 @@ end @deprecate_binding LinearSlow IndexCartesian false @deprecate_binding linearindexing IndexStyle false +# #20876 +@eval Base.Dates begin + function Base.Dates.parse(x::AbstractString, df::DateFormat) + Base.depwarn(string( + "`Dates.parse(x::AbstractString, df::DateFormat)` is deprecated, use ", + "`sort!(filter!(el -> isa(el, Dates.Period), Dates.parse_components(x, df), rev=true, lt=Dates.periodisless)` ", + " instead."), :parse) + sort!(filter!(el -> isa(el, Period), parse_components(x, df)), rev=true, lt=periodisless) + end +end + # END 0.6 deprecations # BEGIN 1.0 deprecations diff --git a/test/dates/io.jl b/test/dates/io.jl index 7a3790478c2fc..8564265d880cf 100644 --- a/test/dates/io.jl +++ b/test/dates/io.jl @@ -29,10 +29,13 @@ # DateTime parsing # Useful reference for different locales: http://library.princeton.edu/departments/tsd/katmandu/reference/months.html -let str = "1996/02/15 24:00", format = "yyyy/mm/dd HH:MM" - expected = (1996, 2, 15, 24, 0, 0, 0) - @test get(Dates.tryparse_internal(DateTime, str, Dates.DateFormat(format))) == expected - @test_throws ArgumentError Dates.DateTime(str, Dates.DateFormat(format)) +# Allow parsing of strings which are not representable as a TimeType +let str = "02/15/1996 24:00", df = Dates.DateFormat("mm/dd/yyyy HH:MM") + parsed = Any[ + Dates.Month(2), Dates.Day(15), Dates.Year(1996), Dates.Hour(24), Dates.Minute(0) + ] + @test Dates.parse_components(str, df) == parsed + @test_throws ArgumentError Dates.parse(DateTime, str, df) end # DateFormat printing @@ -76,18 +79,18 @@ b2 = "96/Feb/1" b3 = "96/2/15" @test_throws ArgumentError Dates.DateTime(b3, f) try - Dates.tryparse_internal(DateTime, "2012/02/20T09:09:31.25i90", dateformat"yyyy/mm/ddTHH:MM:SS.s", true) + Dates.parse(DateTime, "2012/2/20T9:9:31.25i90", dateformat"yyyy/mm/ddTHH:MM:SS.s") @test false catch err @test isa(err, ArgumentError) @test err.msg == "Found extra characters at the end of date time string" end try - Dates.tryparse_internal(DateTime, "2012/02/20T09:09:3i90", dateformat"yyyy/mm/ddTHH:MM:SS.s", true) + Dates.parse(DateTime, "2012/2/20T9:9:3i90", dateformat"yyyy/mm/ddTHH:MM:SS.s") @test false catch err @test isa(err, ArgumentError) - @test err.msg == "Unable to parse date time. Expected token Delim(.) at char 19" + @test err.msg == "Unable to parse date time. Expected directive Delim(.) at char 16" end f = "yy:dd:mm" @@ -375,29 +378,42 @@ let f = "YY" end # Issue: https://github.com/quinnj/TimeZones.jl/issues/19 -let ds = "2015-07-24T05:38:19.591Z", - dt = Dates.DateTime(2015, 7, 24, 5, 38, 19, 591), +let + const Zulu = String - format = "yyyy-mm-ddTHH:MM:SS.sssZ", + function Dates.tryparsenext(d::Dates.DatePart{'Z'}, str, i, len) + Dates.tryparsenext_word(str, i, len, Dates.min_width(d), Dates.max_width(d)) + end + + str = "2015-07-24T05:38:19.591Z" + dt = Dates.DateTime(2015, 7, 24, 5, 38, 19, 591) + parsed = Any[ + Dates.Year(2015), Dates.Month(7), Dates.Day(24), + Dates.Hour(5), Dates.Minute(38), Dates.Second(19), Dates.Millisecond(591) + ] + + format = "yyyy-mm-ddTHH:MM:SS.sssZ" escaped_format = "yyyy-mm-dd\\THH:MM:SS.sss\\Z" - # Typically 'Z' isn't treated as a slot so it doesn't have to be escaped - @test DateTime(ds, format) == dt - @test DateTime(ds, escaped_format) == dt + # Typically 'Z' isn't treated as a specifier so it doesn't have to be escaped + @test Dates.parse_components(str, Dates.DateFormat(format)) == parsed + @test Dates.parse_components(str, Dates.DateFormat(escaped_format)) == parsed try - # Make 'Z' into a slot - Dates.SLOT_RULE['Z'] = Dates.TimeZone + # Make 'Z' into a specifier + Dates.CONVERSION_SPECIFIERS['Z'] = Zulu + Dates.CONVERSION_DEFAULTS[Zulu] = "" - @test_throws MethodError DateTime(ds, format) - @test DateTime(ds, escaped_format) == dt + @test Dates.parse_components(str, Dates.DateFormat(format)) == [parsed; Zulu("Z")] + @test Dates.parse_components(str, Dates.DateFormat(escaped_format)) == parsed finally - delete!(Dates.SLOT_RULE, 'Z') + delete!(Dates.CONVERSION_SPECIFIERS, 'Z') + delete!(Dates.CONVERSION_DEFAULTS, Zulu) end # Ensure that the default behaviour has been restored - @test DateTime(ds, format) == dt - @test DateTime(ds, escaped_format) == dt + @test Dates.parse_components(str, Dates.DateFormat(format)) == parsed + @test Dates.parse_components(str, Dates.DateFormat(escaped_format)) == parsed end # Issue 10817