Skip to content

Commit 34198a2

Browse files
bnoordhuisMayaLekova
authored andcommitted
src: fix fs.write() externalized string handling
* Respect `encoding` argument when the string is externalized. * Copy the string when the write request can outlive the externalized string. This commit removes `StringBytes::GetExternalParts()` because it is fundamentally broken. Fixes: nodejs#18146 PR-URL: nodejs#18216 Reviewed-By: Colin Ihrig <[email protected]> Reviewed-By: James M Snell <[email protected]> Reviewed-By: Joyee Cheung <[email protected]>
1 parent 2a92d0f commit 34198a2

File tree

4 files changed

+105
-108
lines changed

4 files changed

+105
-108
lines changed

src/node_file.cc

+32-18
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
# include <io.h>
4040
#endif
4141

42+
#include <memory>
4243
#include <vector>
4344

4445
namespace node {
@@ -1019,40 +1020,53 @@ static void WriteString(const FunctionCallbackInfo<Value>& args) {
10191020

10201021
CHECK(args[0]->IsInt32());
10211022

1023+
std::unique_ptr<char[]> delete_on_return;
10221024
Local<Value> req;
1023-
Local<Value> string = args[1];
1025+
Local<Value> value = args[1];
10241026
int fd = args[0]->Int32Value();
10251027
char* buf = nullptr;
1026-
int64_t pos;
10271028
size_t len;
1029+
const int64_t pos = GET_OFFSET(args[2]);
1030+
const auto enc = ParseEncoding(env->isolate(), args[3], UTF8);
1031+
const auto is_async = args[4]->IsObject();
1032+
1033+
// Avoid copying the string when it is externalized but only when:
1034+
// 1. The target encoding is compatible with the string's encoding, and
1035+
// 2. The write is synchronous, otherwise the string might get neutered
1036+
// while the request is in flight, and
1037+
// 3. For UCS2, when the host system is little-endian. Big-endian systems
1038+
// need to call StringBytes::Write() to ensure proper byte swapping.
1039+
// The const_casts are conceptually sound: memory is read but not written.
1040+
if (!is_async && value->IsString()) {
1041+
auto string = value.As<String>();
1042+
if ((enc == ASCII || enc == LATIN1) && string->IsExternalOneByte()) {
1043+
auto ext = string->GetExternalOneByteStringResource();
1044+
buf = const_cast<char*>(ext->data());
1045+
len = ext->length();
1046+
} else if (enc == UCS2 && IsLittleEndian() && string->IsExternal()) {
1047+
auto ext = string->GetExternalStringResource();
1048+
buf = reinterpret_cast<char*>(const_cast<uint16_t*>(ext->data()));
1049+
len = ext->length() * sizeof(*ext->data());
1050+
}
1051+
}
10281052

1029-
// will assign buf and len if string was external
1030-
if (!StringBytes::GetExternalParts(string,
1031-
const_cast<const char**>(&buf),
1032-
&len)) {
1033-
enum encoding enc = ParseEncoding(env->isolate(), args[3], UTF8);
1034-
len = StringBytes::StorageSize(env->isolate(), string, enc);
1053+
if (buf == nullptr) {
1054+
len = StringBytes::StorageSize(env->isolate(), value, enc);
10351055
buf = new char[len];
1056+
// SYNC_CALL returns on error. Make sure to always free the memory.
1057+
if (!is_async) delete_on_return.reset(buf);
10361058
// StorageSize may return too large a char, so correct the actual length
10371059
// by the write size
10381060
len = StringBytes::Write(env->isolate(), buf, len, args[1], enc);
10391061
}
1040-
pos = GET_OFFSET(args[2]);
10411062

1042-
uv_buf_t uvbuf = uv_buf_init(const_cast<char*>(buf), len);
1063+
uv_buf_t uvbuf = uv_buf_init(buf, len);
10431064

1044-
if (args[4]->IsObject()) {
1065+
if (is_async) {
10451066
CHECK_EQ(args.Length(), 5);
10461067
AsyncCall(env, args, "write", UTF8, AfterInteger,
10471068
uv_fs_write, fd, &uvbuf, 1, pos);
10481069
} else {
1049-
// SYNC_CALL returns on error. Make sure to always free the memory.
1050-
struct Delete {
1051-
inline explicit Delete(char* pointer) : pointer_(pointer) {}
1052-
inline ~Delete() { delete[] pointer_; }
1053-
char* const pointer_;
1054-
};
1055-
Delete delete_on_return(buf);
10561070
SYNC_CALL(write, nullptr, fd, &uvbuf, 1, pos)
10571071
return args.GetReturnValue().Set(SYNC_RESULT);
10581072
}

src/string_bytes.cc

+29-84
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727

2828
#include <limits.h>
2929
#include <string.h> // memcpy
30+
31+
#include <algorithm>
3032
#include <vector>
3133

3234
// When creating strings >= this length v8's gc spins up and consumes
@@ -269,39 +271,6 @@ static size_t hex_decode(char* buf,
269271
}
270272

271273

272-
bool StringBytes::GetExternalParts(Local<Value> val,
273-
const char** data,
274-
size_t* len) {
275-
if (Buffer::HasInstance(val)) {
276-
*data = Buffer::Data(val);
277-
*len = Buffer::Length(val);
278-
return true;
279-
}
280-
281-
if (!val->IsString())
282-
return false;
283-
284-
Local<String> str = val.As<String>();
285-
286-
if (str->IsExternalOneByte()) {
287-
const String::ExternalOneByteStringResource* ext;
288-
ext = str->GetExternalOneByteStringResource();
289-
*data = ext->data();
290-
*len = ext->length();
291-
return true;
292-
293-
} else if (str->IsExternal()) {
294-
const String::ExternalStringResource* ext;
295-
ext = str->GetExternalStringResource();
296-
*data = reinterpret_cast<const char*>(ext->data());
297-
*len = ext->length() * sizeof(*ext->data());
298-
return true;
299-
}
300-
301-
return false;
302-
}
303-
304-
305274
size_t StringBytes::WriteUCS2(char* buf,
306275
size_t buflen,
307276
Local<String> str,
@@ -347,32 +316,31 @@ size_t StringBytes::Write(Isolate* isolate,
347316
enum encoding encoding,
348317
int* chars_written) {
349318
HandleScope scope(isolate);
350-
const char* data = nullptr;
351-
size_t nbytes = 0;
352-
const bool is_extern = GetExternalParts(val, &data, &nbytes);
353-
const size_t external_nbytes = nbytes;
319+
size_t nbytes;
320+
int nchars;
321+
322+
if (chars_written == nullptr)
323+
chars_written = &nchars;
354324

355325
CHECK(val->IsString() == true);
356326
Local<String> str = val.As<String>();
357327

358-
if (nbytes > buflen)
359-
nbytes = buflen;
360-
361328
int flags = String::HINT_MANY_WRITES_EXPECTED |
362329
String::NO_NULL_TERMINATION |
363330
String::REPLACE_INVALID_UTF8;
364331

365332
switch (encoding) {
366333
case ASCII:
367334
case LATIN1:
368-
if (is_extern && str->IsOneByte()) {
369-
memcpy(buf, data, nbytes);
335+
if (str->IsExternalOneByte()) {
336+
auto ext = str->GetExternalOneByteStringResource();
337+
nbytes = std::min(buflen, ext->length());
338+
memcpy(buf, ext->data(), nbytes);
370339
} else {
371340
uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
372341
nbytes = str->WriteOneByte(dst, 0, buflen, flags);
373342
}
374-
if (chars_written != nullptr)
375-
*chars_written = nbytes;
343+
*chars_written = nbytes;
376344
break;
377345

378346
case BUFFER:
@@ -383,14 +351,8 @@ size_t StringBytes::Write(Isolate* isolate,
383351
case UCS2: {
384352
size_t nchars;
385353

386-
if (is_extern && !str->IsOneByte()) {
387-
memcpy(buf, data, nbytes);
388-
nchars = nbytes / sizeof(uint16_t);
389-
} else {
390-
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
391-
}
392-
if (chars_written != nullptr)
393-
*chars_written = nchars;
354+
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
355+
*chars_written = static_cast<int>(nchars);
394356

395357
// Node's "ucs2" encoding wants LE character data stored in
396358
// the Buffer, so we need to reorder on BE platforms. See
@@ -403,27 +365,25 @@ size_t StringBytes::Write(Isolate* isolate,
403365
}
404366

405367
case BASE64:
406-
if (is_extern) {
407-
nbytes = base64_decode(buf, buflen, data, external_nbytes);
368+
if (str->IsExternalOneByte()) {
369+
auto ext = str->GetExternalOneByteStringResource();
370+
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
408371
} else {
409372
String::Value value(str);
410373
nbytes = base64_decode(buf, buflen, *value, value.length());
411374
}
412-
if (chars_written != nullptr) {
413-
*chars_written = nbytes;
414-
}
375+
*chars_written = nbytes;
415376
break;
416377

417378
case HEX:
418-
if (is_extern) {
419-
nbytes = hex_decode(buf, buflen, data, external_nbytes);
379+
if (str->IsExternalOneByte()) {
380+
auto ext = str->GetExternalOneByteStringResource();
381+
nbytes = hex_decode(buf, buflen, ext->data(), ext->length());
420382
} else {
421383
String::Value value(str);
422384
nbytes = hex_decode(buf, buflen, *value, value.length());
423385
}
424-
if (chars_written != nullptr) {
425-
*chars_written = nbytes;
426-
}
386+
*chars_written = nbytes;
427387
break;
428388

429389
default:
@@ -500,49 +460,34 @@ size_t StringBytes::Size(Isolate* isolate,
500460
Local<Value> val,
501461
enum encoding encoding) {
502462
HandleScope scope(isolate);
503-
size_t data_size = 0;
504-
bool is_buffer = Buffer::HasInstance(val);
505463

506-
if (is_buffer && (encoding == BUFFER || encoding == LATIN1))
464+
if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1))
507465
return Buffer::Length(val);
508466

509-
const char* data;
510-
if (GetExternalParts(val, &data, &data_size))
511-
return data_size;
512-
513467
Local<String> str = val->ToString(isolate);
514468

515469
switch (encoding) {
516470
case ASCII:
517471
case LATIN1:
518-
data_size = str->Length();
519-
break;
472+
return str->Length();
520473

521474
case BUFFER:
522475
case UTF8:
523-
data_size = str->Utf8Length();
524-
break;
476+
return str->Utf8Length();
525477

526478
case UCS2:
527-
data_size = str->Length() * sizeof(uint16_t);
528-
break;
479+
return str->Length() * sizeof(uint16_t);
529480

530481
case BASE64: {
531482
String::Value value(str);
532-
data_size = base64_decoded_size(*value, value.length());
533-
break;
483+
return base64_decoded_size(*value, value.length());
534484
}
535485

536486
case HEX:
537-
data_size = str->Length() / 2;
538-
break;
539-
540-
default:
541-
CHECK(0 && "unknown encoding");
542-
break;
487+
return str->Length() / 2;
543488
}
544489

545-
return data_size;
490+
UNREACHABLE();
546491
}
547492

548493

src/string_bytes.h

-6
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,6 @@ class StringBytes {
8181
v8::Local<v8::Value> val,
8282
enum encoding enc);
8383

84-
// If the string is external then assign external properties to data and len,
85-
// then return true. If not return false.
86-
static bool GetExternalParts(v8::Local<v8::Value> val,
87-
const char** data,
88-
size_t* len);
89-
9084
// Write the bytes from the string or buffer into the char*
9185
// returns the number of bytes written, which will always be
9286
// <= buflen. Use StorageSize/Size first to know how much

test/parallel/test-fs-write.js

+44
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2020
// USE OR OTHER DEALINGS IN THE SOFTWARE.
2121

22+
// Flags: --expose_externalize_string
2223
'use strict';
2324
const common = require('../common');
2425
const assert = require('assert');
@@ -30,6 +31,49 @@ const fn3 = path.join(common.tmpDir, 'write3.txt');
3031
const expected = 'ümlaut.';
3132
const constants = fs.constants;
3233

34+
/* eslint-disable no-undef */
35+
common.allowGlobals(externalizeString, isOneByteString, x);
36+
37+
{
38+
const expected = 'ümlaut eins'; // Must be a unique string.
39+
externalizeString(expected);
40+
assert.strictEqual(true, isOneByteString(expected));
41+
const fd = fs.openSync(fn, 'w');
42+
fs.writeSync(fd, expected, 0, 'latin1');
43+
fs.closeSync(fd);
44+
assert.strictEqual(expected, fs.readFileSync(fn, 'latin1'));
45+
}
46+
47+
{
48+
const expected = 'ümlaut zwei'; // Must be a unique string.
49+
externalizeString(expected);
50+
assert.strictEqual(true, isOneByteString(expected));
51+
const fd = fs.openSync(fn, 'w');
52+
fs.writeSync(fd, expected, 0, 'utf8');
53+
fs.closeSync(fd);
54+
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
55+
}
56+
57+
{
58+
const expected = '中文 1'; // Must be a unique string.
59+
externalizeString(expected);
60+
assert.strictEqual(false, isOneByteString(expected));
61+
const fd = fs.openSync(fn, 'w');
62+
fs.writeSync(fd, expected, 0, 'ucs2');
63+
fs.closeSync(fd);
64+
assert.strictEqual(expected, fs.readFileSync(fn, 'ucs2'));
65+
}
66+
67+
{
68+
const expected = '中文 2'; // Must be a unique string.
69+
externalizeString(expected);
70+
assert.strictEqual(false, isOneByteString(expected));
71+
const fd = fs.openSync(fn, 'w');
72+
fs.writeSync(fd, expected, 0, 'utf8');
73+
fs.closeSync(fd);
74+
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
75+
}
76+
3377
common.refreshTmpDir();
3478

3579
fs.open(fn, 'w', 0o644, common.mustCall(function(err, fd) {

0 commit comments

Comments
 (0)