Skip to content

Commit 40d5719

Browse files
committed
replace wcwidth by utf8proc_charwidth (fixes JuliaLang#3721, closes JuliaLang#6939)
1 parent 39b9caa commit 40d5719

File tree

7 files changed

+14
-336
lines changed

7 files changed

+14
-336
lines changed

Make.inc

+2
Original file line numberDiff line numberDiff line change
@@ -596,8 +596,10 @@ endif
596596

597597
ifeq ($(USE_SYSTEM_UTF8PROC), 1)
598598
LIBUTF8PROC = -lutf8proc
599+
UTF8PROC_INC = /usr/include
599600
else
600601
LIBUTF8PROC = $(build_libdir)/libutf8proc.a
602+
UTF8PROC_INC = $(JULIAHOME)/deps/utf8proc
601603
endif
602604

603605
# OS specific stuff

base/string.jl

-1
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,6 @@ startswith(a::Array{UInt8,1}, b::Array{UInt8,1}) =
541541

542542
## character column width function ##
543543

544-
charwidth(c::Char) = max(0,Int(ccall(:wcwidth, Int32, (UInt32,), c)))
545544
strwidth(s::AbstractString) = (w=0; for c in s; w += charwidth(c); end; w)
546545
strwidth(s::ByteString) = Int(ccall(:u8_strwidth, Csize_t, (Ptr{UInt8},), s.data))
547546
# TODO: implement and use u8_strnwidth that takes a length argument

base/utf8proc.jl

+5-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype
66
export isgraphemebreak
77

88
# also exported by Base:
9-
export normalize_string, graphemes, is_valid_char, is_assigned_char,
9+
export normalize_string, graphemes, is_valid_char, is_assigned_char, charwidth,
1010
islower, isupper, isalpha, isdigit, isnumber, isalnum,
1111
iscntrl, ispunct, isspace, isprint, isgraph, isblank
1212

@@ -116,6 +116,10 @@ end
116116

117117
############################################################################
118118

119+
charwidth(c::Char) = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))
120+
121+
############################################################################
122+
119123
# returns UTF8PROC_CATEGORY code in 1:30 giving Unicode category
120124
function category_code(c)
121125
UInt32(c) > 0x10FFFF && return 0x0000 # see utf8proc_get_property docs

src/support/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ SRCS = hashing timefuncs ptrhash operators \
1111
utf8 ios htable bitvector \
1212
int2str libsupportinit arraylist strtod
1313
ifeq ($(OS),WINNT)
14-
SRCS += asprintf wcwidth
14+
SRCS += asprintf
1515
ifeq ($(ARCH),i686)
1616
SRCS += _setjmp.win32 _longjmp.win32
1717
else ifeq ($(ARCH),i386)
@@ -26,7 +26,7 @@ HEADERS = $(wildcard *.h) $(LIBUV_INC)/uv.h
2626
OBJS = $(SRCS:%=$(BUILDDIR)/%.o)
2727
DOBJS = $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
2828

29-
FLAGS = $(CFLAGS) $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -DLIBRARY_EXPORTS
29+
FLAGS = $(CFLAGS) $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -I$(UTF8PROC_INC) -DLIBRARY_EXPORTS
3030
ifneq ($(USEMSVC), 1)
3131
FLAGS += -Wall -Wno-strict-aliasing -fvisibility=hidden
3232
endif

src/support/utf8.c

+2-9
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <assert.h>
3434

3535
#include "utf8.h"
36+
#include "utf8proc.h"
3637

3738
#ifdef __cplusplus
3839
extern "C" {
@@ -261,17 +262,10 @@ size_t u8_strlen(const char *s)
261262
return count;
262263
}
263264

264-
#if defined(_OS_WINDOWS_)
265-
extern int wcwidth(uint32_t ch);
266-
#elif defined(_OS_LINUX_)
267-
extern int wcwidth(wchar_t ch);
268-
#endif
269-
270265
size_t u8_strwidth(const char *s)
271266
{
272267
uint32_t ch;
273268
size_t nb, tot=0;
274-
int w;
275269
signed char sc;
276270

277271
while ((sc = (signed char)*s) != 0) {
@@ -293,8 +287,7 @@ size_t u8_strwidth(const char *s)
293287
case 0: ch += (unsigned char)*s++;
294288
}
295289
ch -= offsetsFromUTF8[nb];
296-
w = wcwidth(ch); // might return -1
297-
if (w > 0) tot += w;
290+
tot += utf8proc_charwidth(ch);
298291
}
299292
}
300293
return tot;

0 commit comments

Comments
 (0)