From 9425f9fb3beb5039be2897d0063520591f534f13 Mon Sep 17 00:00:00 2001 From: Priynsh <119518987+Priynsh@users.noreply.github.com> Date: Tue, 28 Jan 2025 18:05:23 +0530 Subject: [PATCH 1/7] Update util.jl --- test/strings/util.jl | 71 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/test/strings/util.jl b/test/strings/util.jl index bb87881bbaa1d..ad663753b1e8d 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -791,3 +791,74 @@ end @test endswith(A, split(B, ' ')[end]) @test endswith(A, 'g') end +@testset "String Iterator Tests" begin + # Test with various types of iterators (String, SubStr, and GenericString) + for S in (String, SubStr, Test.GenericString) + # Valid iterators (strings, substrings, and characters) + @test String(Iterators.map(c -> c+1, "abc")) == "bcd" + @test String(Iterators.take("hello world", 5)) == "hello" + @test String(Iterators.filter(c -> c != ' ', "hello world")) == "helloworld" + @test String(Iterators.drop("hello world", 6)) == "world" + + # Single character iterators + @test String(Iterators.map(c -> 'a', "hello")) == "aaaaa" + + # Mixed characters and Unicode + @test String(Iterators.map(c -> c == ' ' ? ' ' : 'A', "hello world")) == "AAAAA AAAAA" + @test String(Iterators.map(c -> 'šŸ˜Š', "hello")) == "šŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜Š" + + # Edge cases: empty string or iterator with no elements + @test String(Iterators.take("", 3)) == "" + @test String(Iterators.drop("", 3)) == "" + + # Invalid iterators (non-Char elements in the iterator) + @test_throws MethodError String(Iterators.map(c -> 1, "hello")) + @test_throws MethodError String(Iterators.filter(c -> c > 128, "hello")) + + # Infinite iterators (should raise MethodError) + @test_throws MethodError String(Iterators.cycle("abc")) + + # Ensure valid behavior for non-iterable inputs + @test_throws MethodError String(19) + @test_throws MethodError String(3.14) + + # Mixed types, ensure correct character handling + @test String(Iterators.map(c -> Char(c), "abc")) == "abc" + + # Nested iterators (iterators within iterators) + @test String(Iterators.flatten(Iterators.map(c -> c, ["hello", "world"]))) == "helloworld" + @test String(Iterators.flatten(Iterators.map(c -> "hello", 1:3))) == "hellohellohello" + + # Using an empty generator that produces no values + @test String(Iterators.filter(c -> false, "hello")) == "" + + # Iterators with edge cases (multiple empty spaces, mixed Unicode chars) + @test String(Iterators.map(c -> ' ', "hello world")) == " " + @test String(Iterators.map(c -> 'šŸ˜Š', "hi there")) == "šŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜Š" + + # Checking for correct error handling with invalid iterators + @test_throws MethodError String(Iterators.map(c -> [1,2], "hello")) + end + + # Additional tests for infinite iterators + @test_throws MethodError String(Iterators.cycle("abc")) + + # Ensure that String is only created from valid iterators (chars, strings) + @test String("valid string") == "valid string" + @test String("test") == "test" + + # Edge case with non-character data in the iterator + @test_throws MethodError String(Iterators.map(c -> 42, "hello")) + + # Nested iterator tests (flattening iterators) + @test String(Iterators.flatten(Iterators.map(c -> "abc", 1:3))) == "abcabcabc" + @test String(Iterators.flatten(Iterators.map(c -> "šŸ˜Š", 1:2))) == "šŸ˜ŠšŸ˜Š" + + # Edge case with no characters in the iterator + @test String(Iterators.filter(c -> false, "hello")) == "" + @test String(Iterators.take("hello", 0)) == "" + + # Performance testing for large input + @test String(Iterators.take("a"^1000, 500)) == "a"^500 + @test String(Iterators.map(c -> 'a', "a"^100000)) == "a"^100000 +end From ba5ce0eac715f0763e3359c071fea141fe319dea Mon Sep 17 00:00:00 2001 From: Priynsh <119518987+Priynsh@users.noreply.github.com> Date: Tue, 28 Jan 2025 18:06:27 +0530 Subject: [PATCH 2/7] Update util.jl --- base/strings/util.jl | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/base/strings/util.jl b/base/strings/util.jl index 87c2abab5344c..bf413fdc217d3 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -100,6 +100,7 @@ Base.startswith(io::IO, prefix::AbstractString) = startswith(io, String(prefix)) function endswith(a::Union{String, SubString{String}}, b::Union{String, SubString{String}}) + cub = ncodeunits(b) astart = ncodeunits(a) - ncodeunits(b) + 1 if astart < 1 false @@ -1274,3 +1275,29 @@ function Base.rest(s::AbstractString, st...) end return String(take!(io)) end +""" +The `String` constructor is enhanced to accept iterators/generator objects. + +### Method Details: +- **String(x::AbstractIterator)** + - Converts an iterator into a string. + - Throws a `MethodError` if the iterator contains invalid data types (non-Char types) or if it is an infinite iterator. + - Ensures that the result is a valid string representation composed solely of characters (`Char`). + +### Examples +```jldoctest +julia> String(Iterators.map(c -> c+1, "Hello, world")) +"Ifmmp-!xpsme" # Generates a string by incrementing ASCII values of each character. + +julia> String(Iterators.take("Hello, world", 5)) +"Hello" # Takes the first 5 characters of the string and converts it to a string. +""" +String(x) = String_iterator(x, IteratorSize(x)) +String_iterator(x, ::IsInfinite) = throw(MethodError(String, (x,))) +String_iterator(x, ::IteratorSize) = begin + collected = collect(x) + if !(isa(collected, AbstractVector) && all(x -> isa(x, Char), collected)) + throw(MethodError(String, (x,))) + end + return String(collected::AbstractVector{<:AbstractChar}) +end From ec60545633b8301b450d765815af750b40a50733 Mon Sep 17 00:00:00 2001 From: Priynsh <119518987+Priynsh@users.noreply.github.com> Date: Tue, 28 Jan 2025 18:10:22 +0530 Subject: [PATCH 3/7] Update util.jl --- base/strings/util.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index bf413fdc217d3..67e6cc1744d5d 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -100,7 +100,6 @@ Base.startswith(io::IO, prefix::AbstractString) = startswith(io, String(prefix)) function endswith(a::Union{String, SubString{String}}, b::Union{String, SubString{String}}) - cub = ncodeunits(b) astart = ncodeunits(a) - ncodeunits(b) + 1 if astart < 1 false From 3570a4f8f03d9ec14ab124f5e690893399983483 Mon Sep 17 00:00:00 2001 From: Priynsh <119518987+Priynsh@users.noreply.github.com> Date: Thu, 30 Jan 2025 18:06:30 +0530 Subject: [PATCH 4/7] Update util.jl new implementation without parsing twice --- base/strings/util.jl | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 67e6cc1744d5d..7ce0b00cfb6b8 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -100,6 +100,7 @@ Base.startswith(io::IO, prefix::AbstractString) = startswith(io, String(prefix)) function endswith(a::Union{String, SubString{String}}, b::Union{String, SubString{String}}) + cub = ncodeunits(b) astart = ncodeunits(a) - ncodeunits(b) + 1 if astart < 1 false @@ -1291,12 +1292,16 @@ julia> String(Iterators.map(c -> c+1, "Hello, world")) julia> String(Iterators.take("Hello, world", 5)) "Hello" # Takes the first 5 characters of the string and converts it to a string. """ -String(x) = String_iterator(x, IteratorSize(x)) -String_iterator(x, ::IsInfinite) = throw(MethodError(String, (x,))) -String_iterator(x, ::IteratorSize) = begin - collected = collect(x) - if !(isa(collected, AbstractVector) && all(x -> isa(x, Char), collected)) +String(x) = _string_iterator(x, IteratorSize(x)) +_string_iterator(x, ::IsInfinite) = throw(MethodError(String, (x,))) +_string_iterator(x, ::IteratorSize) = begin + try + collected = collect(Char, x) + if ndims(collected) != 1 + throw(MethodError(String, (x,))) + end + return String(collected) + catch e throw(MethodError(String, (x,))) end - return String(collected::AbstractVector{<:AbstractChar}) end From c351bdbd8c9cc48e10c3566a6bef77836327bf8b Mon Sep 17 00:00:00 2001 From: Priynsh <119518987+Priynsh@users.noreply.github.com> Date: Thu, 30 Jan 2025 18:07:15 +0530 Subject: [PATCH 5/7] Update util.jl adding new tests for non-ascii characters --- test/strings/util.jl | 43 +++++-------------------------------------- 1 file changed, 5 insertions(+), 38 deletions(-) diff --git a/test/strings/util.jl b/test/strings/util.jl index ad663753b1e8d..cb089c3e3a615 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -792,73 +792,40 @@ end @test endswith(A, 'g') end @testset "String Iterator Tests" begin - # Test with various types of iterators (String, SubStr, and GenericString) for S in (String, SubStr, Test.GenericString) - # Valid iterators (strings, substrings, and characters) @test String(Iterators.map(c -> c+1, "abc")) == "bcd" @test String(Iterators.take("hello world", 5)) == "hello" @test String(Iterators.filter(c -> c != ' ', "hello world")) == "helloworld" @test String(Iterators.drop("hello world", 6)) == "world" - - # Single character iterators @test String(Iterators.map(c -> 'a', "hello")) == "aaaaa" - - # Mixed characters and Unicode @test String(Iterators.map(c -> c == ' ' ? ' ' : 'A', "hello world")) == "AAAAA AAAAA" @test String(Iterators.map(c -> 'šŸ˜Š', "hello")) == "šŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜Š" - - # Edge cases: empty string or iterator with no elements @test String(Iterators.take("", 3)) == "" @test String(Iterators.drop("", 3)) == "" - - # Invalid iterators (non-Char elements in the iterator) @test_throws MethodError String(Iterators.map(c -> 1, "hello")) @test_throws MethodError String(Iterators.filter(c -> c > 128, "hello")) - - # Infinite iterators (should raise MethodError) @test_throws MethodError String(Iterators.cycle("abc")) - - # Ensure valid behavior for non-iterable inputs @test_throws MethodError String(19) @test_throws MethodError String(3.14) - - # Mixed types, ensure correct character handling @test String(Iterators.map(c -> Char(c), "abc")) == "abc" - - # Nested iterators (iterators within iterators) @test String(Iterators.flatten(Iterators.map(c -> c, ["hello", "world"]))) == "helloworld" @test String(Iterators.flatten(Iterators.map(c -> "hello", 1:3))) == "hellohellohello" - - # Using an empty generator that produces no values @test String(Iterators.filter(c -> false, "hello")) == "" - - # Iterators with edge cases (multiple empty spaces, mixed Unicode chars) @test String(Iterators.map(c -> ' ', "hello world")) == " " @test String(Iterators.map(c -> 'šŸ˜Š', "hi there")) == "šŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜Š" - - # Checking for correct error handling with invalid iterators - @test_throws MethodError String(Iterators.map(c -> [1,2], "hello")) + @test String("ƄƖƜā»Ø") == "ƄƖƜā»Ø" + @test String(Iterators.map(c -> 'A', "ƄƖƜā»Ø")) == "AAAA" + @test String(Iterators.map(c -> c == 'Ɩ' ? 'O' : c, "ƄƖƜā»Ø")) == "ƄOƜā»Ø" + @test String(Iterators.take("ƄƖƜā»Ø", 2)) == "ƄƖ" + @test String(Iterators.drop("ƄƖƜā»Ø", 1)) == "ƖƜā»Ø" end - - # Additional tests for infinite iterators @test_throws MethodError String(Iterators.cycle("abc")) - - # Ensure that String is only created from valid iterators (chars, strings) @test String("valid string") == "valid string" @test String("test") == "test" - - # Edge case with non-character data in the iterator - @test_throws MethodError String(Iterators.map(c -> 42, "hello")) - - # Nested iterator tests (flattening iterators) @test String(Iterators.flatten(Iterators.map(c -> "abc", 1:3))) == "abcabcabc" @test String(Iterators.flatten(Iterators.map(c -> "šŸ˜Š", 1:2))) == "šŸ˜ŠšŸ˜Š" - - # Edge case with no characters in the iterator @test String(Iterators.filter(c -> false, "hello")) == "" @test String(Iterators.take("hello", 0)) == "" - - # Performance testing for large input @test String(Iterators.take("a"^1000, 500)) == "a"^500 @test String(Iterators.map(c -> 'a', "a"^100000)) == "a"^100000 end From db79544ceeb615d6faa7ef4d6e1171c1a1e6c358 Mon Sep 17 00:00:00 2001 From: Priynsh <119518987+Priynsh@users.noreply.github.com> Date: Thu, 30 Jan 2025 22:25:53 +0530 Subject: [PATCH 6/7] Update util.jl --- test/strings/util.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/strings/util.jl b/test/strings/util.jl index cb089c3e3a615..4e1da39dd586a 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -802,7 +802,6 @@ end @test String(Iterators.map(c -> 'šŸ˜Š', "hello")) == "šŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜ŠšŸ˜Š" @test String(Iterators.take("", 3)) == "" @test String(Iterators.drop("", 3)) == "" - @test_throws MethodError String(Iterators.map(c -> 1, "hello")) @test_throws MethodError String(Iterators.filter(c -> c > 128, "hello")) @test_throws MethodError String(Iterators.cycle("abc")) @test_throws MethodError String(19) From 6fa694d21e8d748aa7c3cfc441b5b72d845da04c Mon Sep 17 00:00:00 2001 From: Priynsh <119518987+Priynsh@users.noreply.github.com> Date: Sun, 9 Feb 2025 15:33:13 +0530 Subject: [PATCH 7/7] Update base/strings/util.jl Co-authored-by: Jameson Nash --- base/strings/util.jl | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 7ce0b00cfb6b8..a23dfbf033a60 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1292,16 +1292,4 @@ julia> String(Iterators.map(c -> c+1, "Hello, world")) julia> String(Iterators.take("Hello, world", 5)) "Hello" # Takes the first 5 characters of the string and converts it to a string. """ -String(x) = _string_iterator(x, IteratorSize(x)) -_string_iterator(x, ::IsInfinite) = throw(MethodError(String, (x,))) -_string_iterator(x, ::IteratorSize) = begin - try - collected = collect(Char, x) - if ndims(collected) != 1 - throw(MethodError(String, (x,))) - end - return String(collected) - catch e - throw(MethodError(String, (x,))) - end -end +String(x) = sprint(io -> foreach(c -> write(io, Char(c)::Char), x))