Skip to content

Commit c82c084

Browse files
addaleaxBethGriggs
authored andcommitted
util,readline: NFC-normalize strings before getStringWidth
The assumption here is that decomposed characters render like their composed character equivalents, and that working with the former comes with a risk of over-estimating string widths given that we compute them on a per-code-point basis. The regression test added here (한글 vs 한글) is an example of that happening. PR-URL: #33052 Reviewed-By: Gus Caplan <[email protected]> Reviewed-By: Michaël Zasso <[email protected]> Reviewed-By: Anto Aravinth <[email protected]> Reviewed-By: Colin Ihrig <[email protected]> Reviewed-By: James M Snell <[email protected]>
1 parent 4abc45a commit c82c084

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

lib/internal/util/inspect.js

+10-5
Original file line numberDiff line numberDiff line change
@@ -1922,6 +1922,13 @@ function formatWithOptionsInternal(inspectOptions, ...args) {
19221922
return str;
19231923
}
19241924

1925+
function prepareStringForGetStringWidth(str, removeControlChars) {
1926+
str = str.normalize('NFC');
1927+
if (removeControlChars)
1928+
str = stripVTControlCharacters(str);
1929+
return str;
1930+
}
1931+
19251932
if (internalBinding('config').hasIntl) {
19261933
const icu = internalBinding('icu');
19271934
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
@@ -1931,8 +1938,8 @@ if (internalBinding('config').hasIntl) {
19311938
// the receiving end supports.
19321939
getStringWidth = function getStringWidth(str, removeControlChars = true) {
19331940
let width = 0;
1934-
if (removeControlChars)
1935-
str = stripVTControlCharacters(str);
1941+
1942+
str = prepareStringForGetStringWidth(str, removeControlChars);
19361943
for (let i = 0; i < str.length; i++) {
19371944
// Try to avoid calling into C++ by first handling the ASCII portion of
19381945
// the string. If it is fully ASCII, we skip the C++ part.
@@ -1952,9 +1959,7 @@ if (internalBinding('config').hasIntl) {
19521959
getStringWidth = function getStringWidth(str, removeControlChars = true) {
19531960
let width = 0;
19541961

1955-
if (removeControlChars)
1956-
str = stripVTControlCharacters(str);
1957-
1962+
str = prepareStringForGetStringWidth(str, removeControlChars);
19581963
for (const char of str) {
19591964
const code = char.codePointAt(0);
19601965
if (isFullWidthCodePoint(code)) {

test/parallel/test-icu-stringwidth.js

+9
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,12 @@ for (let i = 0; i < 256; i++) {
8787
assert.strictEqual(getStringWidth(char), 1);
8888
}
8989
}
90+
91+
{
92+
const a = '한글'.normalize('NFD'); // 한글
93+
const b = '한글'.normalize('NFC'); // 한글
94+
assert.strictEqual(a.length, 6);
95+
assert.strictEqual(b.length, 2);
96+
assert.strictEqual(getStringWidth(a), 4);
97+
assert.strictEqual(getStringWidth(b), 4);
98+
}

0 commit comments

Comments
 (0)