Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: eachsplit for iterative splitting #39245

merged 2 commits into from
Sep 8, 2021
Show file tree
Hide file tree
Changes from all commits
File filter

Filter by extension

Filter by extension

Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ New language features
* `@inline` and `@noinline` annotations can now be applied to a function callsite or block
to enforce the involved function calls to be (or not to be) inlined. ([#41312])
* The default behavior of observing `@inbounds` declarations is now an option via `auto` in `--check-bounds=yes|no|auto` ([#41551])
* New function `eachsplit(str)` for iteratively performing `split(str)`.

Language changes
Expand Down
2 changes: 1 addition & 1 deletion base/binaryplatforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ function Base.parse(::Type{Platform}, triplet::AbstractString; validate_strict::
libstdcxx_version = get_field(m, libstdcxx_version_mapping)
cxxstring_abi = get_field(m, cxxstring_abi_mapping)
function split_tags(tagstr)
tag_fields = filter(!isempty, split(tagstr, "-"))
tag_fields = split(tagstr, "-"; keepempty=false)
if isempty(tag_fields)
return Pair{String,String}[]
Expand Down
4 changes: 2 additions & 2 deletions base/cmd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ function addenv(cmd::Cmd, env::Dict; inherit::Bool = true)
merge!(new_env, ENV)
for (k, v) in split.(cmd.env, "=")
for (k, v) in eachsplit.(cmd.env, "=")
new_env[string(k)::String] = string(v)::String
Expand All @@ -284,7 +284,7 @@ function addenv(cmd::Cmd, pairs::Pair{<:AbstractString}...; inherit::Bool = true

function addenv(cmd::Cmd, env::Vector{<:AbstractString}; inherit::Bool = true)
return addenv(cmd, Dict(k => v for (k, v) in split.(env, "=")); inherit)
return addenv(cmd, Dict(k => v for (k, v) in eachsplit.(env, "=")); inherit)

(&)(left::AbstractCmd, right::AbstractCmd) = AndCmds(left, right)
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ export
Expand Down
4 changes: 2 additions & 2 deletions base/initdefs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ function init_depot_path()
if haskey(ENV, "JULIA_DEPOT_PATH")
isempty(str) && return
for path in split(str, Sys.iswindows() ? ';' : ':')
for path in eachsplit(str, Sys.iswindows() ? ';' : ':')
if isempty(path)
Expand Down Expand Up @@ -198,7 +198,7 @@ end
function parse_load_path(str::String)
envs = String[]
isempty(str) && return envs
for env in split(str, Sys.iswindows() ? ';' : ':')
for env in eachsplit(str, Sys.iswindows() ? ';' : ':')
if isempty(env)
env′ in envs || push!(envs, env′)
Expand Down
9 changes: 5 additions & 4 deletions base/logging.jl
Original file line number Diff line number Diff line change
Expand Up @@ -674,10 +674,11 @@ function handle_message(logger::SimpleLogger, level::LogLevel, message, _module,
iob = IOContext(buf, stream)
levelstr = level == Warn ? "Warning" : string(level)
msglines = split(chomp(string(message)::String), '\n')
println(iob, "┌ ", levelstr, ": ", msglines[1])
for i in 2:length(msglines)
println(iob, "│ ", msglines[i])
msglines = eachsplit(chomp(string(message)::String), '\n')
msg1, rest = Iterators.peel(msglines)
println(iob, "┌ ", levelstr, ": ", msg1)
for msg in rest
println(iob, "│ ", msg)
for (key, val) in kwargs
key === :maxlog && continue
Expand Down
4 changes: 2 additions & 2 deletions base/mpfr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,7 @@ function string_mpfr(x::BigFloat, fmt::String)

function _prettify_bigfloat(s::String)::String
mantissa, exponent = split(s, 'e')
mantissa, exponent = eachsplit(s, 'e')
if !occursin('.', mantissa)
mantissa = string(mantissa, '.')
Expand All @@ -973,7 +973,7 @@ function _prettify_bigfloat(s::String)::String
expo = parse(Int, exponent)
if -5 < expo < 6
expo == 0 && return mantissa
int, frac = split(mantissa, '.')
int, frac = eachsplit(mantissa, '.')
if expo > 0
expo < length(frac) ?
string(int, frac[1:expo], '.', frac[expo+1:end]) :
Expand Down
4 changes: 2 additions & 2 deletions base/path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -368,8 +368,8 @@ function normpath(path::String)
isabs = isabspath(path)
isdir = isdirpath(path)
drive, path = splitdrive(path)
parts = split(path, path_separator_re)
filter!(x->!isempty(x) && x!=".", parts)
parts = split(path, path_separator_re; keepempty=false)
filter!(!=("."), parts)
while true
clean = true
for j = 1:length(parts)-1
Expand Down
119 changes: 80 additions & 39 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,83 @@ function rpad(
r == 0 ? string(s, p^q) : string(s, p^q, first(p, r))

eachsplit(str::AbstractString, dlm; limit::Integer=0)
eachsplit(str::AbstractString; limit::Integer=0)

Split `str` on occurrences of the delimiter(s) `dlm` and return an iterator over the
substrings. `dlm` can be any of the formats allowed by [`findnext`](@ref)'s first argument
(i.e. as a string, regular expression or a function), or as a single character or collection
of characters.

If `dlm` is omitted, it defaults to [`isspace`](@ref).

The iterator will return a maximum of `limit` results if the keyword argument is supplied.
The default of `limit=0` implies no maximum.

See also [`split`](@ref).

# Examples
julia> a = "Ma.rch"

julia> collect(eachsplit(a, "."))
2-element Vector{SubString}:
function eachsplit end

# Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
# and prevents a major invalidation risk (1550 MethodInstances)
struct SplitIterator{S<:AbstractString,F}

eltype(::Type{<:SplitIterator}) = SubString

IteratorSize(::Type{<:SplitIterator}) = SizeUnknown()

# i: the starting index of the substring to be extracted
# k: the starting index of the next substring to be extracted
# n: the number of splits returned so far; always less than iter.limit - 1 (1 for the rest)
function iterate(iter::SplitIterator, (i, k, n)=(firstindex(iter.str), firstindex(iter.str), 0))
i - 1 > ncodeunits(iter.str)::Int && return nothing
r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
while r !== nothing && n != iter.limit - 1 && first(r) <= ncodeunits(iter.str)
j, k = first(r), nextind(iter.str, last(r))::Int
k_ = k <= j ? nextind(iter.str, j) : k
if i < k
substr = @inbounds SubString(iter.str, i, prevind(iter.str, j)::Int)
(iter.keepempty || i < j) && return (substr, (k, k_, n + 1))
i = k
k = k_
r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
iter.keepempty || i <= ncodeunits(iter.str) || return nothing
@inbounds SubString(iter.str, i), (ncodeunits(iter.str) + 2, k, n + 1)

eachsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
SplitIterator(str, splitter, limit, keepempty)

eachsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
limit::Integer=0, keepempty=true) where {T<:AbstractString} =
eachsplit(str, in(splitter); limit, keepempty)

eachsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) where {T<:AbstractString} =
eachsplit(str, isequal(splitter); limit, keepempty)

# a bit oddball, but standard behavior in Perl, Ruby & Python:
eachsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
eachsplit(str, isspace; limit, keepempty)

split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
Expand Down Expand Up @@ -412,52 +489,16 @@ julia> split(a, ".")
function split end

function split(str::T, splitter;
limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
_split(str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
function split(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
_split(str, in(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
function split(str::T, splitter::AbstractChar;
limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
_split(str, isequal(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])

function _split(str::AbstractString, splitter::F, limit::Integer, keepempty::Bool, strs::Vector) where F
# Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
# and prevents a major invalidation risk (1550 MethodInstances)
i = 1 # firstindex(str)
n = lastindex(str)::Int
r = findfirst(splitter,str)::Union{Nothing,Int,UnitRange{Int}}
if r !== nothing
j, k = first(r), nextind(str,last(r))::Int
while 0 < j <= n && length(strs) != limit-1
if i < k
if keepempty || i < j
push!(strs, @inbounds SubString(str,i,prevind(str,j)::Int))
i = k
(k <= j) && (k = nextind(str,j)::Int)
r = findnext(splitter,str,k)::Union{Nothing,Int,UnitRange{Int}}
r === nothing && break
j, k = first(r), nextind(str,last(r))::Int
if keepempty || i <= ncodeunits(str)::Int
push!(strs, @inbounds SubString(str,i))
return strs
itr = eachsplit(str, splitter; limit, keepempty)
collect(T <: SubString ? T : SubString{T}, itr)

# a bit oddball, but standard behavior in Perl, Ruby & Python:
limit::Integer=0, keepempty::Bool=false) =
split(str, isspace; limit=limit, keepempty=keepempty)
split(str, isspace; limit, keepempty)

rsplit(s::AbstractString; limit::Integer=0, keepempty::Bool=false)
Expand Down
2 changes: 1 addition & 1 deletion base/sysinfo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ function which(program_name::String)
# If we have been given just a program name (not a relative or absolute
# path) then we should search `PATH` for it here:
pathsep = iswindows() ? ';' : ':'
path_dirs = abspath.(split(get(ENV, "PATH", ""), pathsep))
path_dirs = map(abspath, eachsplit(get(ENV, "PATH", ""), pathsep))

# On windows we always check the current directory as well
if iswindows()
Expand Down
2 changes: 1 addition & 1 deletion base/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
(bold ? disable_text_style[:bold] : "") *
get(disable_text_style, color, text_colors[:default])
first = true
for line in split(str, '\n')
for line in eachsplit(str, '\n')
first || print(buf, '\n')
first = false
isempty(line) && continue
Expand Down
2 changes: 1 addition & 1 deletion base/version.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ const VERSION_REGEX = r"^

function split_idents(s::AbstractString)
idents = split(s, '.')
idents = eachsplit(s, '.')
pidents = Union{UInt64,String}[occursin(r"^\d+$", ident) ? parse(UInt64, ident) : String(ident) for ident in idents]
return tuple(pidents...)::VerTuple
Expand Down