Skip to content

Commit a0326f0

Browse files
BridgeARdanielleadams
authored andcommitted
util: escape lone surrogate code points using .inspect()
Unpaired surrogate code points have no representation in UTF8. Therefore, such code points are just "random" output that is unreadable. Instead, escape the code points similar to C0 and C1 control characters. Refs: https://unicodebook.readthedocs.io/unicode_encodings.html#utf-16-surrogate-pairs Signed-off-by: Ruben Bridgewater <[email protected]> PR-URL: #41001 Reviewed-By: James M Snell <[email protected]>
1 parent 668284b commit a0326f0

File tree

2 files changed

+62
-8
lines changed

2 files changed

+62
-8
lines changed

lib/internal/util/inspect.js

+20-8
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,10 @@ const kArrayType = 1;
179179
const kArrayExtrasType = 2;
180180

181181
/* eslint-disable no-control-regex */
182-
const strEscapeSequencesRegExp = /[\x00-\x1f\x27\x5c\x7f-\x9f]/;
183-
const strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]/g;
184-
const strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]/;
185-
const strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]/g;
182+
const strEscapeSequencesRegExp = /[\x00-\x1f\x27\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/;
183+
const strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/g;
184+
const strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/;
185+
const strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/g;
186186
/* eslint-enable no-control-regex */
187187

188188
const keyStrRegExp = /^[a-zA-Z_][a-zA-Z_0-9]*$/;
@@ -463,7 +463,10 @@ function addQuotes(str, quotes) {
463463
return `'${str}'`;
464464
}
465465

466-
const escapeFn = (str) => meta[StringPrototypeCharCodeAt(str)];
466+
function escapeFn(str) {
467+
const charCode = StringPrototypeCharCodeAt(str);
468+
return meta.length > charCode ? meta[charCode] : `\\u${charCode.toString(16)}`;
469+
}
467470

468471
// Escape control characters, single quotes and the backslash.
469472
// This is similar to JSON stringify escaping.
@@ -501,8 +504,7 @@ function strEscape(str) {
501504

502505
let result = '';
503506
let last = 0;
504-
const lastIndex = str.length;
505-
for (let i = 0; i < lastIndex; i++) {
507+
for (let i = 0; i < str.length; i++) {
506508
const point = StringPrototypeCharCodeAt(str, i);
507509
if (point === singleQuote ||
508510
point === 92 ||
@@ -514,10 +516,20 @@ function strEscape(str) {
514516
result += `${StringPrototypeSlice(str, last, i)}${meta[point]}`;
515517
}
516518
last = i + 1;
519+
} else if (point >= 0xd800 && point <= 0xdfff) {
520+
if (point <= 0xdbff && i + 1 < str.length) {
521+
const point = StringPrototypeCharCodeAt(str, i + 1);
522+
if (point >= 0xdc00 && point <= 0xdfff) {
523+
i++;
524+
continue;
525+
}
526+
}
527+
result += `${StringPrototypeSlice(str, last, i)}${`\\u${point.toString(16)}`}`;
528+
last = i + 1;
517529
}
518530
}
519531

520-
if (last !== lastIndex) {
532+
if (last !== str.length) {
521533
result += StringPrototypeSlice(str, last);
522534
}
523535
return addQuotes(result, singleQuote);

test/parallel/test-util-inspect.js

+42
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,48 @@ assert.strictEqual(util.inspect(Object.create(Date.prototype)), 'Date {}');
837837
);
838838
}
839839

840+
// Escape unpaired surrogate pairs.
841+
{
842+
const edgeChar = String.fromCharCode(0xd799);
843+
844+
for (let charCode = 0xD800; charCode < 0xDFFF; charCode++) {
845+
const surrogate = String.fromCharCode(charCode);
846+
847+
assert.strictEqual(
848+
util.inspect(surrogate),
849+
`'\\u${charCode.toString(16)}'`
850+
);
851+
assert.strictEqual(
852+
util.inspect(`${'a'.repeat(200)}${surrogate}`),
853+
`'${'a'.repeat(200)}\\u${charCode.toString(16)}'`
854+
);
855+
assert.strictEqual(
856+
util.inspect(`${surrogate}${'a'.repeat(200)}`),
857+
`'\\u${charCode.toString(16)}${'a'.repeat(200)}'`
858+
);
859+
if (charCode < 0xdc00) {
860+
const highSurrogate = surrogate;
861+
const lowSurrogate = String.fromCharCode(charCode + 1024);
862+
assert(
863+
!util.inspect(
864+
`${edgeChar}${highSurrogate}${lowSurrogate}${edgeChar}`
865+
).includes('\\u')
866+
);
867+
assert.strictEqual(
868+
(util.inspect(
869+
`${highSurrogate}${highSurrogate}${lowSurrogate}`
870+
).match(/\\u/g) ?? []).length,
871+
1
872+
);
873+
} else {
874+
assert.strictEqual(
875+
util.inspect(`${edgeChar}${surrogate}${edgeChar}`),
876+
`'${edgeChar}\\u${charCode.toString(16)}${edgeChar}'`
877+
);
878+
}
879+
}
880+
}
881+
840882
// Test util.inspect.styles and util.inspect.colors.
841883
{
842884
function testColorStyle(style, input, implicit) {

0 commit comments

Comments
 (0)