Skip to content

Commit c228807

Browse files
Mert Can Altintargos
Mert Can Altin
authored andcommitted
node-api: add support for UTF-8 and Latin-1 property keys
PR-URL: #52984 Reviewed-By: James M Snell <[email protected]> Reviewed-By: Chengzhong Wu <[email protected]> Reviewed-By: Vladimir Morozov <[email protected]>
1 parent 84d740f commit c228807

File tree

5 files changed

+224
-129
lines changed

5 files changed

+224
-129
lines changed

doc/api/n-api.md

+77-11
Original file line numberDiff line numberDiff line change
@@ -3081,6 +3081,54 @@ The native string is copied.
30813081
The JavaScript `string` type is described in
30823082
[Section 6.1.4][] of the ECMAScript Language Specification.
30833083

3084+
### Functions to create optimized property keys
3085+
3086+
Many JavaScript engines including V8 use internalized strings as keys
3087+
to set and get property values. They typically use a hash table to create
3088+
and lookup such strings. While it adds some cost per key creation, it improves
3089+
the performance after that by enabling comparison of string pointers instead
3090+
of the whole strings.
3091+
3092+
If a new JavaScript string is intended to be used as a property key, then for
3093+
some JavaScript engines it will be more efficient to use the functions in this
3094+
section. Otherwise, use the `napi_create_string_utf8` or
3095+
`node_api_create_external_string_utf8` series functions as there may be
3096+
additional overhead in creating/storing strings with the property key
3097+
creation methods.
3098+
3099+
#### `node_api_create_property_key_latin1`
3100+
3101+
<!-- YAML
3102+
added: REPLACEME
3103+
-->
3104+
3105+
> Stability: 1 - Experimental
3106+
3107+
```c
3108+
napi_status NAPI_CDECL node_api_create_property_key_latin1(napi_env env,
3109+
const char* str,
3110+
size_t length,
3111+
napi_value* result);
3112+
```
3113+
3114+
* `[in] env`: The environment that the API is invoked under.
3115+
* `[in] str`: Character buffer representing an ISO-8859-1-encoded string.
3116+
* `[in] length`: The length of the string in bytes, or `NAPI_AUTO_LENGTH` if it
3117+
is null-terminated.
3118+
* `[out] result`: A `napi_value` representing an optimized JavaScript `string`
3119+
to be used as a property key for objects.
3120+
3121+
Returns `napi_ok` if the API succeeded.
3122+
3123+
This API creates an optimized JavaScript `string` value from
3124+
an ISO-8859-1-encoded C string to be used as a property key for objects.
3125+
The native string is copied. In contrast with `napi_create_string_latin1`,
3126+
subsequent calls to this function with the same `str` pointer may benefit from a speedup
3127+
in the creation of the requested `napi_value`, depending on the engine.
3128+
3129+
The JavaScript `string` type is described in
3130+
[Section 6.1.4][] of the ECMAScript Language Specification.
3131+
30843132
#### `node_api_create_property_key_utf16`
30853133

30863134
<!-- YAML
@@ -3109,18 +3157,36 @@ This API creates an optimized JavaScript `string` value from
31093157
a UTF16-LE-encoded C string to be used as a property key for objects.
31103158
The native string is copied.
31113159

3112-
Many JavaScript engines including V8 use internalized strings as keys
3113-
to set and get property values. They typically use a hash table to create
3114-
and lookup such strings. While it adds some cost per key creation, it improves
3115-
the performance after that by enabling comparison of string pointers instead
3116-
of the whole strings.
3160+
The JavaScript `string` type is described in
3161+
[Section 6.1.4][] of the ECMAScript Language Specification.
31173162

3118-
If a new JavaScript string is intended to be used as a property key, then for
3119-
some JavaScript engines it will be more efficient to use
3120-
the `node_api_create_property_key_utf16` function.
3121-
Otherwise, use the `napi_create_string_utf16` or
3122-
`node_api_create_external_string_utf16` functions as there may be additional
3123-
overhead in creating/storing strings with this method.
3163+
#### `node_api_create_property_key_utf8`
3164+
3165+
<!-- YAML
3166+
added: REPLACEME
3167+
-->
3168+
3169+
> Stability: 1 - Experimental
3170+
3171+
```c
3172+
napi_status NAPI_CDECL node_api_create_property_key_utf8(napi_env env,
3173+
const char* str,
3174+
size_t length,
3175+
napi_value* result);
3176+
```
3177+
3178+
* `[in] env`: The environment that the API is invoked under.
3179+
* `[in] str`: Character buffer representing a UTF8-encoded string.
3180+
* `[in] length`: The length of the string in two-byte code units, or
3181+
`NAPI_AUTO_LENGTH` if it is null-terminated.
3182+
* `[out] result`: A `napi_value` representing an optimized JavaScript `string`
3183+
to be used as a property key for objects.
3184+
3185+
Returns `napi_ok` if the API succeeded.
3186+
3187+
This API creates an optimized JavaScript `string` value from
3188+
a UTF8-encoded C string to be used as a property key for objects.
3189+
The native string is copied.
31243190

31253191
The JavaScript `string` type is described in
31263192
[Section 6.1.4][] of the ECMAScript Language Specification.

src/js_native_api.h

+4
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ node_api_create_external_string_utf16(napi_env env,
114114

115115
#ifdef NAPI_EXPERIMENTAL
116116
#define NODE_API_EXPERIMENTAL_HAS_PROPERTY_KEYS
117+
NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_latin1(
118+
napi_env env, const char* str, size_t length, napi_value* result);
119+
NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_utf8(
120+
napi_env env, const char* str, size_t length, napi_value* result);
117121
NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_utf16(
118122
napi_env env, const char16_t* str, size_t length, napi_value* result);
119123
#endif // NAPI_EXPERIMENTAL

src/js_native_api_v8.cc

+24
Original file line numberDiff line numberDiff line change
@@ -1707,6 +1707,30 @@ napi_status NAPI_CDECL node_api_create_external_string_utf16(
17071707
});
17081708
}
17091709

1710+
napi_status node_api_create_property_key_latin1(napi_env env,
1711+
const char* str,
1712+
size_t length,
1713+
napi_value* result) {
1714+
return v8impl::NewString(env, str, length, result, [&](v8::Isolate* isolate) {
1715+
return v8::String::NewFromOneByte(isolate,
1716+
reinterpret_cast<const uint8_t*>(str),
1717+
v8::NewStringType::kInternalized,
1718+
length);
1719+
});
1720+
}
1721+
1722+
napi_status node_api_create_property_key_utf8(napi_env env,
1723+
const char* str,
1724+
size_t length,
1725+
napi_value* result) {
1726+
return v8impl::NewString(env, str, length, result, [&](v8::Isolate* isolate) {
1727+
return v8::String::NewFromUtf8(isolate,
1728+
str,
1729+
v8::NewStringType::kInternalized,
1730+
static_cast<int>(length));
1731+
});
1732+
}
1733+
17101734
napi_status NAPI_CDECL node_api_create_property_key_utf16(napi_env env,
17111735
const char16_t* str,
17121736
size_t length,

test/js-native-api/test_string/test.js

+64-118
Original file line numberDiff line numberDiff line change
@@ -4,131 +4,77 @@ const assert = require('assert');
44

55
// Testing api calls for string
66
const test_string = require(`./build/${common.buildType}/test_string`);
7+
// The insufficient buffer test case allocates a buffer of size 4, including
8+
// the null terminator.
9+
const kInsufficientIdx = 3;
710

8-
const empty = '';
9-
assert.strictEqual(test_string.TestLatin1(empty), empty);
10-
assert.strictEqual(test_string.TestUtf8(empty), empty);
11-
assert.strictEqual(test_string.TestUtf16(empty), empty);
12-
assert.strictEqual(test_string.TestLatin1AutoLength(empty), empty);
13-
assert.strictEqual(test_string.TestUtf8AutoLength(empty), empty);
14-
assert.strictEqual(test_string.TestUtf16AutoLength(empty), empty);
15-
assert.strictEqual(test_string.TestLatin1External(empty), empty);
16-
assert.strictEqual(test_string.TestUtf16External(empty), empty);
17-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(empty), empty);
18-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(empty), empty);
19-
assert.strictEqual(test_string.TestPropertyKeyUtf16(empty), empty);
20-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(empty), empty);
21-
assert.strictEqual(test_string.Utf16Length(empty), 0);
22-
assert.strictEqual(test_string.Utf8Length(empty), 0);
11+
const asciiCases = [
12+
'',
13+
'hello world',
14+
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
15+
'?!@#$%^&*()_+-=[]{}/.,<>\'"\\',
16+
];
2317

24-
const str1 = 'hello world';
25-
assert.strictEqual(test_string.TestLatin1(str1), str1);
26-
assert.strictEqual(test_string.TestUtf8(str1), str1);
27-
assert.strictEqual(test_string.TestUtf16(str1), str1);
28-
assert.strictEqual(test_string.TestLatin1AutoLength(str1), str1);
29-
assert.strictEqual(test_string.TestUtf8AutoLength(str1), str1);
30-
assert.strictEqual(test_string.TestUtf16AutoLength(str1), str1);
31-
assert.strictEqual(test_string.TestLatin1External(str1), str1);
32-
assert.strictEqual(test_string.TestUtf16External(str1), str1);
33-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str1), str1);
34-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str1), str1);
35-
assert.strictEqual(test_string.TestLatin1Insufficient(str1), str1.slice(0, 3));
36-
assert.strictEqual(test_string.TestUtf8Insufficient(str1), str1.slice(0, 3));
37-
assert.strictEqual(test_string.TestUtf16Insufficient(str1), str1.slice(0, 3));
38-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str1), str1);
39-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str1), str1);
40-
assert.strictEqual(test_string.Utf16Length(str1), 11);
41-
assert.strictEqual(test_string.Utf8Length(str1), 11);
18+
const latin1Cases = [
19+
{
20+
str: '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿',
21+
utf8Length: 62,
22+
utf8InsufficientIdx: 1,
23+
},
24+
{
25+
str: 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ',
26+
utf8Length: 126,
27+
utf8InsufficientIdx: 1,
28+
},
29+
];
4230

43-
const str2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
44-
assert.strictEqual(test_string.TestLatin1(str2), str2);
45-
assert.strictEqual(test_string.TestUtf8(str2), str2);
46-
assert.strictEqual(test_string.TestUtf16(str2), str2);
47-
assert.strictEqual(test_string.TestLatin1AutoLength(str2), str2);
48-
assert.strictEqual(test_string.TestUtf8AutoLength(str2), str2);
49-
assert.strictEqual(test_string.TestUtf16AutoLength(str2), str2);
50-
assert.strictEqual(test_string.TestLatin1External(str2), str2);
51-
assert.strictEqual(test_string.TestUtf16External(str2), str2);
52-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str2), str2);
53-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str2), str2);
54-
assert.strictEqual(test_string.TestLatin1Insufficient(str2), str2.slice(0, 3));
55-
assert.strictEqual(test_string.TestUtf8Insufficient(str2), str2.slice(0, 3));
56-
assert.strictEqual(test_string.TestUtf16Insufficient(str2), str2.slice(0, 3));
57-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str2), str2);
58-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str2), str2);
59-
assert.strictEqual(test_string.Utf16Length(str2), 62);
60-
assert.strictEqual(test_string.Utf8Length(str2), 62);
31+
const unicodeCases = [
32+
{
33+
str: '\u{2003}\u{2101}\u{2001}\u{202}\u{2011}',
34+
utf8Length: 14,
35+
utf8InsufficientIdx: 1,
36+
},
37+
];
6138

62-
const str3 = '?!@#$%^&*()_+-=[]{}/.,<>\'"\\';
63-
assert.strictEqual(test_string.TestLatin1(str3), str3);
64-
assert.strictEqual(test_string.TestUtf8(str3), str3);
65-
assert.strictEqual(test_string.TestUtf16(str3), str3);
66-
assert.strictEqual(test_string.TestLatin1AutoLength(str3), str3);
67-
assert.strictEqual(test_string.TestUtf8AutoLength(str3), str3);
68-
assert.strictEqual(test_string.TestUtf16AutoLength(str3), str3);
69-
assert.strictEqual(test_string.TestLatin1External(str3), str3);
70-
assert.strictEqual(test_string.TestUtf16External(str3), str3);
71-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str3), str3);
72-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str3), str3);
73-
assert.strictEqual(test_string.TestLatin1Insufficient(str3), str3.slice(0, 3));
74-
assert.strictEqual(test_string.TestUtf8Insufficient(str3), str3.slice(0, 3));
75-
assert.strictEqual(test_string.TestUtf16Insufficient(str3), str3.slice(0, 3));
76-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str3), str3);
77-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str3), str3);
78-
assert.strictEqual(test_string.Utf16Length(str3), 27);
79-
assert.strictEqual(test_string.Utf8Length(str3), 27);
39+
function testLatin1Cases(str) {
40+
assert.strictEqual(test_string.TestLatin1(str), str);
41+
assert.strictEqual(test_string.TestLatin1AutoLength(str), str);
42+
assert.strictEqual(test_string.TestLatin1External(str), str);
43+
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str), str);
44+
assert.strictEqual(test_string.TestPropertyKeyLatin1(str), str);
45+
assert.strictEqual(test_string.TestPropertyKeyLatin1AutoLength(str), str);
46+
assert.strictEqual(test_string.Latin1Length(str), str.length);
8047

81-
const str4 = '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿';
82-
assert.strictEqual(test_string.TestLatin1(str4), str4);
83-
assert.strictEqual(test_string.TestUtf8(str4), str4);
84-
assert.strictEqual(test_string.TestUtf16(str4), str4);
85-
assert.strictEqual(test_string.TestLatin1AutoLength(str4), str4);
86-
assert.strictEqual(test_string.TestUtf8AutoLength(str4), str4);
87-
assert.strictEqual(test_string.TestUtf16AutoLength(str4), str4);
88-
assert.strictEqual(test_string.TestLatin1External(str4), str4);
89-
assert.strictEqual(test_string.TestUtf16External(str4), str4);
90-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str4), str4);
91-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str4), str4);
92-
assert.strictEqual(test_string.TestLatin1Insufficient(str4), str4.slice(0, 3));
93-
assert.strictEqual(test_string.TestUtf8Insufficient(str4), str4.slice(0, 1));
94-
assert.strictEqual(test_string.TestUtf16Insufficient(str4), str4.slice(0, 3));
95-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str4), str4);
96-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str4), str4);
97-
assert.strictEqual(test_string.Utf16Length(str4), 31);
98-
assert.strictEqual(test_string.Utf8Length(str4), 62);
48+
if (str !== '') {
49+
assert.strictEqual(test_string.TestLatin1Insufficient(str), str.slice(0, kInsufficientIdx));
50+
}
51+
}
9952

100-
const str5 = 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ';
101-
assert.strictEqual(test_string.TestLatin1(str5), str5);
102-
assert.strictEqual(test_string.TestUtf8(str5), str5);
103-
assert.strictEqual(test_string.TestUtf16(str5), str5);
104-
assert.strictEqual(test_string.TestLatin1AutoLength(str5), str5);
105-
assert.strictEqual(test_string.TestUtf8AutoLength(str5), str5);
106-
assert.strictEqual(test_string.TestUtf16AutoLength(str5), str5);
107-
assert.strictEqual(test_string.TestLatin1External(str5), str5);
108-
assert.strictEqual(test_string.TestUtf16External(str5), str5);
109-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str5), str5);
110-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str5), str5);
111-
assert.strictEqual(test_string.TestLatin1Insufficient(str5), str5.slice(0, 3));
112-
assert.strictEqual(test_string.TestUtf8Insufficient(str5), str5.slice(0, 1));
113-
assert.strictEqual(test_string.TestUtf16Insufficient(str5), str5.slice(0, 3));
114-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str5), str5);
115-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str5), str5);
116-
assert.strictEqual(test_string.Utf16Length(str5), 63);
117-
assert.strictEqual(test_string.Utf8Length(str5), 126);
53+
function testUnicodeCases(str, utf8Length, utf8InsufficientIdx) {
54+
assert.strictEqual(test_string.TestUtf8(str), str);
55+
assert.strictEqual(test_string.TestUtf16(str), str);
56+
assert.strictEqual(test_string.TestUtf8AutoLength(str), str);
57+
assert.strictEqual(test_string.TestUtf16AutoLength(str), str);
58+
assert.strictEqual(test_string.TestUtf16External(str), str);
59+
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str), str);
60+
assert.strictEqual(test_string.TestPropertyKeyUtf8(str), str);
61+
assert.strictEqual(test_string.TestPropertyKeyUtf8AutoLength(str), str);
62+
assert.strictEqual(test_string.TestPropertyKeyUtf16(str), str);
63+
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str), str);
64+
assert.strictEqual(test_string.Utf8Length(str), utf8Length);
65+
assert.strictEqual(test_string.Utf16Length(str), str.length);
11866

119-
const str6 = '\u{2003}\u{2101}\u{2001}\u{202}\u{2011}';
120-
assert.strictEqual(test_string.TestUtf8(str6), str6);
121-
assert.strictEqual(test_string.TestUtf16(str6), str6);
122-
assert.strictEqual(test_string.TestUtf8AutoLength(str6), str6);
123-
assert.strictEqual(test_string.TestUtf16AutoLength(str6), str6);
124-
assert.strictEqual(test_string.TestUtf16External(str6), str6);
125-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str6), str6);
126-
assert.strictEqual(test_string.TestUtf8Insufficient(str6), str6.slice(0, 1));
127-
assert.strictEqual(test_string.TestUtf16Insufficient(str6), str6.slice(0, 3));
128-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str6), str6);
129-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str6), str6);
130-
assert.strictEqual(test_string.Utf16Length(str6), 5);
131-
assert.strictEqual(test_string.Utf8Length(str6), 14);
67+
if (str !== '') {
68+
assert.strictEqual(test_string.TestUtf8Insufficient(str), str.slice(0, utf8InsufficientIdx));
69+
assert.strictEqual(test_string.TestUtf16Insufficient(str), str.slice(0, kInsufficientIdx));
70+
}
71+
}
72+
73+
asciiCases.forEach(testLatin1Cases);
74+
asciiCases.forEach((str) => testUnicodeCases(str, str.length, kInsufficientIdx));
75+
latin1Cases.forEach((it) => testLatin1Cases(it.str));
76+
latin1Cases.forEach((it) => testUnicodeCases(it.str, it.utf8Length, it.utf8InsufficientIdx));
77+
unicodeCases.forEach((it) => testUnicodeCases(it.str, it.utf8Length, it.utf8InsufficientIdx));
13278

13379
assert.throws(() => {
13480
test_string.TestLargeUtf8();

0 commit comments

Comments
 (0)