Skip to content

Commit b67bf38

Browse files
bnoordhuisBethGriggs
authored andcommitted
src: fix fs.write() externalized string handling
* Respect `encoding` argument when the string is externalized. * Copy the string when the write request can outlive the externalized string. This commit removes `StringBytes::GetExternalParts()` because it is fundamentally broken. Fixes: #18146 Fixes: #22728 Backport-PR-URL: #22731 PR-URL: #18216 Reviewed-By: James M Snell <[email protected]>
1 parent ca8d4e3 commit b67bf38

File tree

4 files changed

+106
-107
lines changed

4 files changed

+106
-107
lines changed

src/node_file.cc

+33-17
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
# include <io.h>
3838
#endif
3939

40+
#include <memory>
4041
#include <vector>
4142

4243
namespace node {
@@ -1127,39 +1128,54 @@ static void WriteString(const FunctionCallbackInfo<Value>& args) {
11271128
if (!args[0]->IsInt32())
11281129
return env->ThrowTypeError("First argument must be file descriptor");
11291130

1131+
std::unique_ptr<char[]> delete_on_return;
11301132
Local<Value> req;
1131-
Local<Value> string = args[1];
1133+
Local<Value> value = args[1];
11321134
int fd = args[0]->Int32Value();
11331135
char* buf = nullptr;
1134-
int64_t pos;
11351136
size_t len;
11361137
FSReqWrap::Ownership ownership = FSReqWrap::COPY;
11371138

1138-
// will assign buf and len if string was external
1139-
if (!StringBytes::GetExternalParts(string,
1140-
const_cast<const char**>(&buf),
1141-
&len)) {
1142-
enum encoding enc = ParseEncoding(env->isolate(), args[3], UTF8);
1143-
len = StringBytes::StorageSize(env->isolate(), string, enc);
1139+
const int64_t pos = GET_OFFSET(args[2]);
1140+
const auto enc = ParseEncoding(env->isolate(), args[3], UTF8);
1141+
const auto is_async = args[4]->IsObject();
1142+
1143+
// Avoid copying the string when it is externalized but only when:
1144+
// 1. The target encoding is compatible with the string's encoding, and
1145+
// 2. The write is synchronous, otherwise the string might get neutered
1146+
// while the request is in flight, and
1147+
// 3. For UCS2, when the host system is little-endian. Big-endian systems
1148+
// need to call StringBytes::Write() to ensure proper byte swapping.
1149+
// The const_casts are conceptually sound: memory is read but not written.
1150+
if (!is_async && value->IsString()) {
1151+
auto string = value.As<String>();
1152+
if ((enc == ASCII || enc == LATIN1) && string->IsExternalOneByte()) {
1153+
auto ext = string->GetExternalOneByteStringResource();
1154+
buf = const_cast<char*>(ext->data());
1155+
len = ext->length();
1156+
} else if (enc == UCS2 && IsLittleEndian() && string->IsExternal()) {
1157+
auto ext = string->GetExternalStringResource();
1158+
buf = reinterpret_cast<char*>(const_cast<uint16_t*>(ext->data()));
1159+
len = ext->length() * sizeof(*ext->data());
1160+
}
1161+
}
1162+
1163+
if (buf == nullptr) {
1164+
len = StringBytes::StorageSize(env->isolate(), value, enc);
11441165
buf = new char[len];
1166+
// SYNC_CALL returns on error. Make sure to always free the memory.
1167+
if (!is_async) delete_on_return.reset(buf);
11451168
// StorageSize may return too large a char, so correct the actual length
11461169
// by the write size
11471170
len = StringBytes::Write(env->isolate(), buf, len, args[1], enc);
11481171
ownership = FSReqWrap::MOVE;
11491172
}
1150-
pos = GET_OFFSET(args[2]);
1173+
11511174
req = args[4];
11521175

1153-
uv_buf_t uvbuf = uv_buf_init(const_cast<char*>(buf), len);
1176+
uv_buf_t uvbuf = uv_buf_init(buf, len);
11541177

11551178
if (!req->IsObject()) {
1156-
// SYNC_CALL returns on error. Make sure to always free the memory.
1157-
struct Delete {
1158-
inline explicit Delete(char* pointer) : pointer_(pointer) {}
1159-
inline ~Delete() { delete[] pointer_; }
1160-
char* const pointer_;
1161-
};
1162-
Delete delete_on_return(ownership == FSReqWrap::MOVE ? buf : nullptr);
11631179
SYNC_CALL(write, nullptr, fd, &uvbuf, 1, pos)
11641180
return args.GetReturnValue().Set(SYNC_RESULT);
11651181
}

src/string_bytes.cc

+29-84
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727

2828
#include <limits.h>
2929
#include <string.h> // memcpy
30+
31+
#include <algorithm>
3032
#include <vector>
3133

3234
// When creating strings >= this length v8's gc spins up and consumes
@@ -269,39 +271,6 @@ static size_t hex_decode(char* buf,
269271
}
270272

271273

272-
bool StringBytes::GetExternalParts(Local<Value> val,
273-
const char** data,
274-
size_t* len) {
275-
if (Buffer::HasInstance(val)) {
276-
*data = Buffer::Data(val);
277-
*len = Buffer::Length(val);
278-
return true;
279-
}
280-
281-
if (!val->IsString())
282-
return false;
283-
284-
Local<String> str = val.As<String>();
285-
286-
if (str->IsExternalOneByte()) {
287-
const String::ExternalOneByteStringResource* ext;
288-
ext = str->GetExternalOneByteStringResource();
289-
*data = ext->data();
290-
*len = ext->length();
291-
return true;
292-
293-
} else if (str->IsExternal()) {
294-
const String::ExternalStringResource* ext;
295-
ext = str->GetExternalStringResource();
296-
*data = reinterpret_cast<const char*>(ext->data());
297-
*len = ext->length() * sizeof(*ext->data());
298-
return true;
299-
}
300-
301-
return false;
302-
}
303-
304-
305274
size_t StringBytes::WriteUCS2(char* buf,
306275
size_t buflen,
307276
Local<String> str,
@@ -351,32 +320,31 @@ size_t StringBytes::Write(Isolate* isolate,
351320
enum encoding encoding,
352321
int* chars_written) {
353322
HandleScope scope(isolate);
354-
const char* data = nullptr;
355-
size_t nbytes = 0;
356-
const bool is_extern = GetExternalParts(val, &data, &nbytes);
357-
const size_t external_nbytes = nbytes;
323+
size_t nbytes;
324+
int nchars;
325+
326+
if (chars_written == nullptr)
327+
chars_written = &nchars;
358328

359329
CHECK(val->IsString() == true);
360330
Local<String> str = val.As<String>();
361331

362-
if (nbytes > buflen)
363-
nbytes = buflen;
364-
365332
int flags = String::HINT_MANY_WRITES_EXPECTED |
366333
String::NO_NULL_TERMINATION |
367334
String::REPLACE_INVALID_UTF8;
368335

369336
switch (encoding) {
370337
case ASCII:
371338
case LATIN1:
372-
if (is_extern && str->IsOneByte()) {
373-
memcpy(buf, data, nbytes);
339+
if (str->IsExternalOneByte()) {
340+
auto ext = str->GetExternalOneByteStringResource();
341+
nbytes = std::min(buflen, ext->length());
342+
memcpy(buf, ext->data(), nbytes);
374343
} else {
375344
uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
376345
nbytes = str->WriteOneByte(dst, 0, buflen, flags);
377346
}
378-
if (chars_written != nullptr)
379-
*chars_written = nbytes;
347+
*chars_written = nbytes;
380348
break;
381349

382350
case BUFFER:
@@ -387,14 +355,8 @@ size_t StringBytes::Write(Isolate* isolate,
387355
case UCS2: {
388356
size_t nchars;
389357

390-
if (is_extern && !str->IsOneByte()) {
391-
memcpy(buf, data, nbytes);
392-
nchars = nbytes / sizeof(uint16_t);
393-
} else {
394-
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
395-
}
396-
if (chars_written != nullptr)
397-
*chars_written = nchars;
358+
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
359+
*chars_written = static_cast<int>(nchars);
398360

399361
// Node's "ucs2" encoding wants LE character data stored in
400362
// the Buffer, so we need to reorder on BE platforms. See
@@ -407,27 +369,25 @@ size_t StringBytes::Write(Isolate* isolate,
407369
}
408370

409371
case BASE64:
410-
if (is_extern) {
411-
nbytes = base64_decode(buf, buflen, data, external_nbytes);
372+
if (str->IsExternalOneByte()) {
373+
auto ext = str->GetExternalOneByteStringResource();
374+
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
412375
} else {
413376
String::Value value(str);
414377
nbytes = base64_decode(buf, buflen, *value, value.length());
415378
}
416-
if (chars_written != nullptr) {
417-
*chars_written = nbytes;
418-
}
379+
*chars_written = nbytes;
419380
break;
420381

421382
case HEX:
422-
if (is_extern) {
423-
nbytes = hex_decode(buf, buflen, data, external_nbytes);
383+
if (str->IsExternalOneByte()) {
384+
auto ext = str->GetExternalOneByteStringResource();
385+
nbytes = hex_decode(buf, buflen, ext->data(), ext->length());
424386
} else {
425387
String::Value value(str);
426388
nbytes = hex_decode(buf, buflen, *value, value.length());
427389
}
428-
if (chars_written != nullptr) {
429-
*chars_written = nbytes;
430-
}
390+
*chars_written = nbytes;
431391
break;
432392

433393
default:
@@ -504,49 +464,34 @@ size_t StringBytes::Size(Isolate* isolate,
504464
Local<Value> val,
505465
enum encoding encoding) {
506466
HandleScope scope(isolate);
507-
size_t data_size = 0;
508-
bool is_buffer = Buffer::HasInstance(val);
509467

510-
if (is_buffer && (encoding == BUFFER || encoding == LATIN1))
468+
if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1))
511469
return Buffer::Length(val);
512470

513-
const char* data;
514-
if (GetExternalParts(val, &data, &data_size))
515-
return data_size;
516-
517471
Local<String> str = val->ToString(isolate);
518472

519473
switch (encoding) {
520474
case ASCII:
521475
case LATIN1:
522-
data_size = str->Length();
523-
break;
476+
return str->Length();
524477

525478
case BUFFER:
526479
case UTF8:
527-
data_size = str->Utf8Length();
528-
break;
480+
return str->Utf8Length();
529481

530482
case UCS2:
531-
data_size = str->Length() * sizeof(uint16_t);
532-
break;
483+
return str->Length() * sizeof(uint16_t);
533484

534485
case BASE64: {
535486
String::Value value(str);
536-
data_size = base64_decoded_size(*value, value.length());
537-
break;
487+
return base64_decoded_size(*value, value.length());
538488
}
539489

540490
case HEX:
541-
data_size = str->Length() / 2;
542-
break;
543-
544-
default:
545-
CHECK(0 && "unknown encoding");
546-
break;
491+
return str->Length() / 2;
547492
}
548493

549-
return data_size;
494+
UNREACHABLE();
550495
}
551496

552497

src/string_bytes.h

-6
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,6 @@ class StringBytes {
8181
v8::Local<v8::Value> val,
8282
enum encoding enc);
8383

84-
// If the string is external then assign external properties to data and len,
85-
// then return true. If not return false.
86-
static bool GetExternalParts(v8::Local<v8::Value> val,
87-
const char** data,
88-
size_t* len);
89-
9084
// Write the bytes from the string or buffer into the char*
9185
// returns the number of bytes written, which will always be
9286
// <= buflen. Use StorageSize/Size first to know how much

test/parallel/test-fs-write.js

+44
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2020
// USE OR OTHER DEALINGS IN THE SOFTWARE.
2121

22+
// Flags: --expose_externalize_string
2223
'use strict';
2324
const common = require('../common');
2425
const assert = require('assert');
@@ -34,6 +35,49 @@ const fn3 = path.join(tmpdir.path, 'write3.txt');
3435
const expected = 'ümlaut.';
3536
const constants = fs.constants;
3637

38+
/* eslint-disable no-undef */
39+
common.allowGlobals(externalizeString, isOneByteString, x);
40+
41+
{
42+
const expected = 'ümlaut eins'; // Must be a unique string.
43+
externalizeString(expected);
44+
assert.strictEqual(true, isOneByteString(expected));
45+
const fd = fs.openSync(fn, 'w');
46+
fs.writeSync(fd, expected, 0, 'latin1');
47+
fs.closeSync(fd);
48+
assert.strictEqual(expected, fs.readFileSync(fn, 'latin1'));
49+
}
50+
51+
{
52+
const expected = 'ümlaut zwei'; // Must be a unique string.
53+
externalizeString(expected);
54+
assert.strictEqual(true, isOneByteString(expected));
55+
const fd = fs.openSync(fn, 'w');
56+
fs.writeSync(fd, expected, 0, 'utf8');
57+
fs.closeSync(fd);
58+
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
59+
}
60+
61+
{
62+
const expected = '中文 1'; // Must be a unique string.
63+
externalizeString(expected);
64+
assert.strictEqual(false, isOneByteString(expected));
65+
const fd = fs.openSync(fn, 'w');
66+
fs.writeSync(fd, expected, 0, 'ucs2');
67+
fs.closeSync(fd);
68+
assert.strictEqual(expected, fs.readFileSync(fn, 'ucs2'));
69+
}
70+
71+
{
72+
const expected = '中文 2'; // Must be a unique string.
73+
externalizeString(expected);
74+
assert.strictEqual(false, isOneByteString(expected));
75+
const fd = fs.openSync(fn, 'w');
76+
fs.writeSync(fd, expected, 0, 'utf8');
77+
fs.closeSync(fd);
78+
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
79+
}
80+
3781
fs.open(fn, 'w', 0o644, common.mustCall(function(err, fd) {
3882
assert.ifError(err);
3983

0 commit comments

Comments
 (0)