Skip to content

Commit 2945c84

Browse files
committed
switch to utf8proc's portable, up-to-date, upper/lowercase functions (fixes #11471)
1 parent 7d8178c commit 2945c84

File tree

9 files changed

+16
-10
lines changed

9 files changed

+16
-10
lines changed

base/string.jl

-4
Original file line numberDiff line numberDiff line change
@@ -793,10 +793,6 @@ write(io::IO, s::RopeString) = (write(io, s.head); write(io, s.tail))
793793
sizeof(s::RopeString) = sizeof(s.head) + sizeof(s.tail)
794794

795795
## uppercase and lowercase transformations ##
796-
797-
uppercase(c::Char) = convert(Char, ccall(:towupper, Cwchar_t, (Cwchar_t,), c))
798-
lowercase(c::Char) = convert(Char, ccall(:towlower, Cwchar_t, (Cwchar_t,), c))
799-
800796
uppercase(s::AbstractString) = map(uppercase, s)
801797
lowercase(s::AbstractString) = map(lowercase, s)
802798

base/utf8proc.jl

+7-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Various Unicode functionality from the utf8proc library
44
module UTF8proc
55

6-
import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype, start, next, done, convert, isvalid
6+
import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype, start, next, done, convert, isvalid, lowercase, uppercase
77

88
export isgraphemebreak
99

@@ -121,6 +121,12 @@ end
121121

122122
charwidth(c::Char) = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))
123123

124+
# faster x+y that does no overflow checking
125+
fastplus(x::Char, y::UInt32) = reinterpret(Char, reinterpret(UInt32, x) + y)
126+
127+
lowercase(c::Char) = isascii(c) ? ('A' <= c <= 'Z' ? fastplus(c,0x00000020) : c) : ccall(:utf8proc_tolower, Char, (UInt32,), c)
128+
uppercase(c::Char) = isascii(c) ? ('a' <= c <= 'z' ? fastplus(c,0xffffffe0) : c) : ccall(:utf8proc_toupper, Char, (UInt32,), c)
129+
124130
############################################################################
125131

126132
# returns UTF8PROC_CATEGORY code in 0:30 giving Unicode category

deps/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1436,7 +1436,7 @@ UTF8PROC_OBJ_HEADER = $(build_includedir)/utf8proc.h
14361436
UTF8PROC_OBJ_TARGET = $(UTF8PROC_OBJ_LIB) $(UTF8PROC_OBJ_HEADER)
14371437

14381438
$(UTF8PROC_SRC_TARGET): $(UTF8PROC_SRC_DIR)/Makefile
1439-
$(MAKE) -C $(UTF8PROC_SRC_DIR) cc="$(CC) -O2 -std=c99 $(fPIC) -DUTF8PROC_EXPORTS $(DEPS_CFLAGS)" AR="$(AR)" libutf8proc.a
1439+
$(MAKE) -C $(UTF8PROC_SRC_DIR) CC="$(CC)" CFLAGS="-O2 -std=c99 $(fPIC) -DUTF8PROC_EXPORTS $(DEPS_CFLAGS)" AR="$(AR)" libutf8proc.a
14401440
touch -c $@
14411441
$(UTF8PROC_SRC_DIR)/checked: $(UTF8PROC_SRC_TARGET)
14421442
ifeq ($(OS),$(BUILD_OS))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
e61de478d2a858586671dfd56477281e
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
167d3a7779335d7ac0103431c4e7b1d9dc4f31836a7704524ea6c1c29d447012ec49f122247f9bf5d445500d7ddd7af5403ca260da05ffac9dba114e32afac07

deps/checksums/utf8proc-v1.2.tar.gz/md5

-1
This file was deleted.

deps/checksums/utf8proc-v1.2.tar.gz/sha512

-1
This file was deleted.

deps/utf8proc.version

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
UTF8PROC_BRANCH=v1.2
2-
UTF8PROC_SHA1=e1fdad0ca9dc518b429439b6f4eac546a1bdd0de
1+
UTF8PROC_BRANCH=1.3-dev1
2+
UTF8PROC_SHA1=f7219d516efe7496737d041f22b0ba567174fb16

test/strings.jl

+4
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,10 @@ end
673673
@test lowercase("AbC") == "abc"
674674
@test lowercase('A') == 'a'
675675
@test lowercase('a') == 'a'
676+
@test uppercase('α') == '\u0391'
677+
@test lowercase('Δ') == 'δ'
678+
@test lowercase('\U118bf') == '\U118df'
679+
@test uppercase('\U1044d') == '\U10425'
676680
@test ucfirst("Abc") == "Abc"
677681
@test ucfirst("abc") == "Abc"
678682
@test lcfirst("ABC") == "aBC"

0 commit comments

Comments
 (0)