Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: eachsplit for iterative splitting #39245

Merged
merged 2 commits into from
Sep 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ New language features
* `@inline` and `@noinline` annotations can now be applied to a function callsite or block
to enforce the involved function calls to be (or not to be) inlined. ([#41312])
* The default behavior of observing `@inbounds` declarations is now an option via `auto` in `--check-bounds=yes|no|auto` ([#41551])
* New function `eachsplit(str)` for iteratively performing `split(str)`.

Language changes
----------------
Expand Down
2 changes: 1 addition & 1 deletion base/binaryplatforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ function Base.parse(::Type{Platform}, triplet::AbstractString; validate_strict::
libstdcxx_version = get_field(m, libstdcxx_version_mapping)
cxxstring_abi = get_field(m, cxxstring_abi_mapping)
function split_tags(tagstr)
tag_fields = filter(!isempty, split(tagstr, "-"))
tag_fields = split(tagstr, "-"; keepempty=false)
if isempty(tag_fields)
return Pair{String,String}[]
end
Expand Down
4 changes: 2 additions & 2 deletions base/cmd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ function addenv(cmd::Cmd, env::Dict; inherit::Bool = true)
merge!(new_env, ENV)
end
else
for (k, v) in split.(cmd.env, "=")
for (k, v) in eachsplit.(cmd.env, "=")
new_env[string(k)::String] = string(v)::String
end
end
Expand All @@ -284,7 +284,7 @@ function addenv(cmd::Cmd, pairs::Pair{<:AbstractString}...; inherit::Bool = true
end

function addenv(cmd::Cmd, env::Vector{<:AbstractString}; inherit::Bool = true)
return addenv(cmd, Dict(k => v for (k, v) in split.(env, "=")); inherit)
return addenv(cmd, Dict(k => v for (k, v) in eachsplit.(env, "=")); inherit)
end

(&)(left::AbstractCmd, right::AbstractCmd) = AndCmds(left, right)
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ export
codeunits,
digits,
digits!,
eachsplit,
escape_string,
hex2bytes,
hex2bytes!,
Expand Down
4 changes: 2 additions & 2 deletions base/initdefs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ function init_depot_path()
if haskey(ENV, "JULIA_DEPOT_PATH")
str = ENV["JULIA_DEPOT_PATH"]
isempty(str) && return
for path in split(str, Sys.iswindows() ? ';' : ':')
for path in eachsplit(str, Sys.iswindows() ? ';' : ':')
if isempty(path)
append_default_depot_path!(DEPOT_PATH)
else
Expand Down Expand Up @@ -198,7 +198,7 @@ end
function parse_load_path(str::String)
envs = String[]
isempty(str) && return envs
for env in split(str, Sys.iswindows() ? ';' : ':')
for env in eachsplit(str, Sys.iswindows() ? ';' : ':')
if isempty(env)
for env′ in DEFAULT_LOAD_PATH
env′ in envs || push!(envs, env′)
Expand Down
9 changes: 5 additions & 4 deletions base/logging.jl
Original file line number Diff line number Diff line change
Expand Up @@ -674,10 +674,11 @@ function handle_message(logger::SimpleLogger, level::LogLevel, message, _module,
end
iob = IOContext(buf, stream)
levelstr = level == Warn ? "Warning" : string(level)
msglines = split(chomp(string(message)::String), '\n')
println(iob, "┌ ", levelstr, ": ", msglines[1])
for i in 2:length(msglines)
println(iob, "│ ", msglines[i])
msglines = eachsplit(chomp(string(message)::String), '\n')
msg1, rest = Iterators.peel(msglines)
println(iob, "┌ ", levelstr, ": ", msg1)
for msg in rest
println(iob, "│ ", msg)
end
for (key, val) in kwargs
key === :maxlog && continue
Expand Down
4 changes: 2 additions & 2 deletions base/mpfr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,7 @@ function string_mpfr(x::BigFloat, fmt::String)
end

function _prettify_bigfloat(s::String)::String
mantissa, exponent = split(s, 'e')
mantissa, exponent = eachsplit(s, 'e')
if !occursin('.', mantissa)
mantissa = string(mantissa, '.')
end
Expand All @@ -973,7 +973,7 @@ function _prettify_bigfloat(s::String)::String
expo = parse(Int, exponent)
if -5 < expo < 6
expo == 0 && return mantissa
int, frac = split(mantissa, '.')
int, frac = eachsplit(mantissa, '.')
if expo > 0
expo < length(frac) ?
string(int, frac[1:expo], '.', frac[expo+1:end]) :
Expand Down
4 changes: 2 additions & 2 deletions base/path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -368,8 +368,8 @@ function normpath(path::String)
isabs = isabspath(path)
isdir = isdirpath(path)
drive, path = splitdrive(path)
parts = split(path, path_separator_re)
filter!(x->!isempty(x) && x!=".", parts)
parts = split(path, path_separator_re; keepempty=false)
filter!(!=("."), parts)
while true
clean = true
for j = 1:length(parts)-1
Expand Down
119 changes: 80 additions & 39 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,83 @@ function rpad(
r == 0 ? string(s, p^q) : string(s, p^q, first(p, r))
end

"""
eachsplit(str::AbstractString, dlm; limit::Integer=0)
eachsplit(str::AbstractString; limit::Integer=0)

Split `str` on occurrences of the delimiter(s) `dlm` and return an iterator over the
substrings. `dlm` can be any of the formats allowed by [`findnext`](@ref)'s first argument
(i.e. as a string, regular expression or a function), or as a single character or collection
of characters.

If `dlm` is omitted, it defaults to [`isspace`](@ref).

The iterator will return a maximum of `limit` results if the keyword argument is supplied.
The default of `limit=0` implies no maximum.

See also [`split`](@ref).

# Examples
```jldoctest
julia> a = "Ma.rch"
"Ma.rch"

julia> collect(eachsplit(a, "."))
2-element Vector{SubString}:
"Ma"
"rch"
```
"""
function eachsplit end

# Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
# and prevents a major invalidation risk (1550 MethodInstances)
struct SplitIterator{S<:AbstractString,F}
str::S
splitter::F
limit::Int
keepempty::Bool
end

eltype(::Type{<:SplitIterator}) = SubString

IteratorSize(::Type{<:SplitIterator}) = SizeUnknown()

# i: the starting index of the substring to be extracted
# k: the starting index of the next substring to be extracted
# n: the number of splits returned so far; always less than iter.limit - 1 (1 for the rest)
function iterate(iter::SplitIterator, (i, k, n)=(firstindex(iter.str), firstindex(iter.str), 0))
i - 1 > ncodeunits(iter.str)::Int && return nothing
r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
while r !== nothing && n != iter.limit - 1 && first(r) <= ncodeunits(iter.str)
j, k = first(r), nextind(iter.str, last(r))::Int
k_ = k <= j ? nextind(iter.str, j) : k
if i < k
substr = @inbounds SubString(iter.str, i, prevind(iter.str, j)::Int)
(iter.keepempty || i < j) && return (substr, (k, k_, n + 1))
i = k
end
k = k_
r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
end
iter.keepempty || i <= ncodeunits(iter.str) || return nothing
@inbounds SubString(iter.str, i), (ncodeunits(iter.str) + 2, k, n + 1)
end

eachsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
SplitIterator(str, splitter, limit, keepempty)

eachsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
limit::Integer=0, keepempty=true) where {T<:AbstractString} =
eachsplit(str, in(splitter); limit, keepempty)

eachsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) where {T<:AbstractString} =
eachsplit(str, isequal(splitter); limit, keepempty)

# a bit oddball, but standard behavior in Perl, Ruby & Python:
eachsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
eachsplit(str, isspace; limit, keepempty)

"""
split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
Expand Down Expand Up @@ -412,52 +489,16 @@ julia> split(a, ".")
"rch"
```
"""
function split end

function split(str::T, splitter;
limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
_split(str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
end
function split(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
_split(str, in(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
end
function split(str::T, splitter::AbstractChar;
limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
_split(str, isequal(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
end

function _split(str::AbstractString, splitter::F, limit::Integer, keepempty::Bool, strs::Vector) where F
# Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
# and prevents a major invalidation risk (1550 MethodInstances)
i = 1 # firstindex(str)
n = lastindex(str)::Int
r = findfirst(splitter,str)::Union{Nothing,Int,UnitRange{Int}}
if r !== nothing
j, k = first(r), nextind(str,last(r))::Int
while 0 < j <= n && length(strs) != limit-1
if i < k
if keepempty || i < j
push!(strs, @inbounds SubString(str,i,prevind(str,j)::Int))
end
i = k
end
(k <= j) && (k = nextind(str,j)::Int)
r = findnext(splitter,str,k)::Union{Nothing,Int,UnitRange{Int}}
r === nothing && break
j, k = first(r), nextind(str,last(r))::Int
end
end
if keepempty || i <= ncodeunits(str)::Int
push!(strs, @inbounds SubString(str,i))
end
return strs
itr = eachsplit(str, splitter; limit, keepempty)
collect(T <: SubString ? T : SubString{T}, itr)
end

# a bit oddball, but standard behavior in Perl, Ruby & Python:
split(str::AbstractString;
limit::Integer=0, keepempty::Bool=false) =
split(str, isspace; limit=limit, keepempty=keepempty)
split(str, isspace; limit, keepempty)

"""
rsplit(s::AbstractString; limit::Integer=0, keepempty::Bool=false)
Expand Down
2 changes: 1 addition & 1 deletion base/sysinfo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ function which(program_name::String)
# If we have been given just a program name (not a relative or absolute
# path) then we should search `PATH` for it here:
pathsep = iswindows() ? ';' : ':'
path_dirs = abspath.(split(get(ENV, "PATH", ""), pathsep))
path_dirs = map(abspath, eachsplit(get(ENV, "PATH", ""), pathsep))

# On windows we always check the current directory as well
if iswindows()
Expand Down
2 changes: 1 addition & 1 deletion base/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
(bold ? disable_text_style[:bold] : "") *
get(disable_text_style, color, text_colors[:default])
first = true
for line in split(str, '\n')
for line in eachsplit(str, '\n')
first || print(buf, '\n')
first = false
isempty(line) && continue
Expand Down
2 changes: 1 addition & 1 deletion base/version.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ const VERSION_REGEX = r"^
$"ix

function split_idents(s::AbstractString)
idents = split(s, '.')
idents = eachsplit(s, '.')
pidents = Union{UInt64,String}[occursin(r"^\d+$", ident) ? parse(UInt64, ident) : String(ident) for ident in idents]
return tuple(pidents...)::VerTuple
end
Expand Down