@@ -367,6 +367,78 @@ function rpad(
367
367
r == 0 ? string (s, p^ q) : string (s, p^ q, first (p, r))
368
368
end
369
369
370
+ """
371
+ eachsplit(str::AbstractString, dlm; limit::Integer=0)
372
+ eachsplit(str::AbstractString; limit::Integer=0)
373
+
374
+ Split `str` on occurrences of the delimiter(s) `dlm` and return an iterator over the
375
+ substrings. `dlm` can be any of the formats allowed by [`findnext`](@ref)'s first argument
376
+ (i.e. as a string, regular expression or a function), or as a single character or collection
377
+ of characters.
378
+
379
+ If `dlm` is omitted, it defaults to [`isspace`](@ref).
380
+
381
+ The iterator will return a maximum of `limit` results if the keyword argument is supplied.
382
+ The default of `limit=0` implies no maximum.
383
+
384
+ See also [`split`](@ref).
385
+
386
+ # Examples
387
+ ```jldoctest
388
+ julia> a = "Ma.rch"
389
+ "Ma.rch"
390
+
391
+ julia> collect(eachsplit(a, "."))
392
+ 2-element Vector{SubString}:
393
+ "Ma"
394
+ "rch"
395
+ ```
396
+ """
397
+ function eachsplit end
398
+
399
+ # Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
400
+ # and prevents a major invalidation risk (1550 MethodInstances)
401
+ struct SplitIterator{S<: AbstractString ,F}
402
+ str:: S
403
+ splitter:: F
404
+ limit:: Int
405
+ end
406
+
407
+ eltype (:: Type{<:SplitIterator} ) = SubString
408
+
409
+ IteratorSize (:: Type{<:SplitIterator} ) = SizeUnknown ()
410
+
411
+ # i: the starting index of the substring to be extracted
412
+ # k: the starting index of the next substring to be extracted
413
+ # n: the number of splits returned so far; always less than iter.limit - 1 (1 for the rest)
414
+ function iterate (iter:: SplitIterator , (i, k, n)= (firstindex (iter. str), firstindex (iter. str), 0 ))
415
+ i - 1 > ncodeunits (iter. str):: Int && return nothing
416
+ r = findnext (iter. splitter, iter. str, k):: Union{Nothing,Int,UnitRange{Int}}
417
+ while r != = nothing && n != iter. limit - 1 && first (r) <= lastindex (iter. str)
418
+ j, k = first (r), nextind (iter. str, last (r)):: Int
419
+ k_ = ifelse (k <= j, nextind (iter. str, j), k)
420
+ if i < k
421
+ substr = @inbounds SubString (iter. str, i, prevind (iter. str, j):: Int )
422
+ return (substr, (max (i, k), k_, n + Int (i < j)))
423
+ end
424
+ k = k_
425
+ r = findnext (iter. splitter, iter. str, k):: Union{Nothing,Int,UnitRange{Int}}
426
+ end
427
+ @inbounds SubString (iter. str, i), (ncodeunits (iter. str) + 2 , k, n + 1 )
428
+ end
429
+
430
+ eachsplit (str:: T , splitter; limit:: Integer = 0 ) where {T<: AbstractString } =
431
+ SplitIterator (str, splitter, limit)
432
+
433
+ eachsplit (str:: T , splitter:: Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}} ; limit:: Integer = 0 ) where {T<: AbstractString } =
434
+ eachsplit (str, in (splitter); limit)
435
+
436
+ eachsplit (str:: T , splitter:: AbstractChar ; limit:: Integer = 0 ) where {T<: AbstractString } =
437
+ eachsplit (str, isequal (splitter); limit)
438
+
439
+ # a bit oddball, but standard behavior in Perl, Ruby & Python:
440
+ eachsplit (str:: AbstractString ; limit:: Integer = 0 ) = eachsplit (str, isspace; limit)
441
+
370
442
"""
371
443
split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
372
444
split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -396,46 +468,11 @@ julia> split(a, ".")
396
468
"rch"
397
469
```
398
470
"""
399
- function split end
400
-
401
471
function split (str:: T , splitter;
402
472
limit:: Integer = 0 , keepempty:: Bool = true ) where {T<: AbstractString }
403
- _split (str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
404
- end
405
- function split (str:: T , splitter:: Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}} ;
406
- limit:: Integer = 0 , keepempty:: Bool = true ) where {T<: AbstractString }
407
- _split (str, in (splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
408
- end
409
- function split (str:: T , splitter:: AbstractChar ;
410
- limit:: Integer = 0 , keepempty:: Bool = true ) where {T<: AbstractString }
411
- _split (str, isequal (splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
412
- end
413
-
414
- function _split (str:: AbstractString , splitter:: F , limit:: Integer , keepempty:: Bool , strs:: Vector ) where F
415
- # Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
416
- # and prevents a major invalidation risk (1550 MethodInstances)
417
- i = 1 # firstindex(str)
418
- n = lastindex (str):: Int
419
- r = findfirst (splitter,str):: Union{Nothing,Int,UnitRange{Int}}
420
- if r != = nothing
421
- j, k = first (r), nextind (str,last (r)):: Int
422
- while 0 < j <= n && length (strs) != limit- 1
423
- if i < k
424
- if keepempty || i < j
425
- push! (strs, @inbounds SubString (str,i,prevind (str,j):: Int ))
426
- end
427
- i = k
428
- end
429
- (k <= j) && (k = nextind (str,j):: Int )
430
- r = findnext (splitter,str,k):: Union{Nothing,Int,UnitRange{Int}}
431
- r === nothing && break
432
- j, k = first (r), nextind (str,last (r)):: Int
433
- end
434
- end
435
- if keepempty || i <= ncodeunits (str):: Int
436
- push! (strs, @inbounds SubString (str,i))
437
- end
438
- return strs
473
+ itr = eachsplit (str, splitter; limit)
474
+ keepempty || (itr = Iterators. filter (! isempty, itr))
475
+ collect (T <: SubString ? T : SubString{T}, itr)
439
476
end
440
477
441
478
# a bit oddball, but standard behavior in Perl, Ruby & Python:
0 commit comments