Skip to content

Commit 1640ded

Browse files
committed
src: fix ucs-2 buffer encoding regression
StringBytes::Write() did a plain memcpy() when is_extern is true but that's wrong when the source is a two-byte string and the destination a one-byte or UTF-8 string. The impact is limited to strings > 1,031,913 bytes because those are normally the only strings that are externalized, although the use of the 'externalize strings' extension (--expose_externalize_string) can also trigger it. This commit also cleans up the bytes versus characters confusion in StringBytes::Write() because that was closely intertwined with the UCS-2 encoding regression. One wasn't fixable without the other. Fixes: #1024 Fixes: nodejs/node-v0.x-archive#8683 PR-URL: #1042 Reviewed-By: Trevor Norris <[email protected]>
1 parent 2eda2d6 commit 1640ded

File tree

3 files changed

+49
-39
lines changed

3 files changed

+49
-39
lines changed

src/node_buffer.cc

-3
Original file line numberDiff line numberDiff line change
@@ -415,9 +415,6 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
415415
if (max_length == 0)
416416
return args.GetReturnValue().Set(0);
417417

418-
if (encoding == UCS2)
419-
max_length = max_length / 2;
420-
421418
if (offset >= obj_length)
422419
return env->ThrowRangeError("Offset is out of bounds");
423420

src/string_bytes.cc

+36-36
Original file line numberDiff line numberDiff line change
@@ -279,13 +279,15 @@ size_t StringBytes::Write(Isolate* isolate,
279279
int* chars_written) {
280280
HandleScope scope(isolate);
281281
const char* data = nullptr;
282-
size_t len = 0;
283-
bool is_extern = GetExternalParts(isolate, val, &data, &len);
284-
size_t extlen = len;
282+
size_t nbytes = 0;
283+
const bool is_extern = GetExternalParts(isolate, val, &data, &nbytes);
284+
const size_t external_nbytes = nbytes;
285285

286286
CHECK(val->IsString() == true);
287287
Local<String> str = val.As<String>();
288-
len = len < buflen ? len : buflen;
288+
289+
if (nbytes > buflen)
290+
nbytes = buflen;
289291

290292
int flags = String::HINT_MANY_WRITES_EXPECTED |
291293
String::NO_NULL_TERMINATION |
@@ -295,67 +297,65 @@ size_t StringBytes::Write(Isolate* isolate,
295297
case ASCII:
296298
case BINARY:
297299
case BUFFER:
298-
if (is_extern)
299-
memcpy(buf, data, len);
300-
else
301-
len = str->WriteOneByte(reinterpret_cast<uint8_t*>(buf),
302-
0,
303-
buflen,
304-
flags);
300+
if (is_extern && str->IsOneByte()) {
301+
memcpy(buf, data, nbytes);
302+
} else {
303+
uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
304+
nbytes = str->WriteOneByte(dst, 0, buflen, flags);
305+
}
305306
if (chars_written != nullptr)
306-
*chars_written = len;
307+
*chars_written = nbytes;
307308
break;
308309

309310
case UTF8:
310-
if (is_extern)
311-
// TODO(tjfontaine) should this validate invalid surrogate pairs as
312-
// well?
313-
memcpy(buf, data, len);
314-
else
315-
len = str->WriteUtf8(buf, buflen, chars_written, flags);
311+
nbytes = str->WriteUtf8(buf, buflen, chars_written, flags);
316312
break;
317313

318-
case UCS2:
319-
if (is_extern)
320-
memcpy(buf, data, len);
321-
else
322-
len = str->Write(reinterpret_cast<uint16_t*>(buf), 0, buflen, flags);
314+
case UCS2: {
315+
uint16_t* const dst = reinterpret_cast<uint16_t*>(buf);
316+
size_t nchars;
317+
if (is_extern && !str->IsOneByte()) {
318+
memcpy(buf, data, nbytes);
319+
nchars = nbytes / sizeof(*dst);
320+
} else {
321+
nchars = buflen / sizeof(*dst);
322+
nchars = str->Write(dst, 0, nchars, flags);
323+
nbytes = nchars * sizeof(*dst);
324+
}
323325
if (IsBigEndian()) {
324326
// Node's "ucs2" encoding wants LE character data stored in
325327
// the Buffer, so we need to reorder on BE platforms. See
326328
// http://nodejs.org/api/buffer.html regarding Node's "ucs2"
327329
// encoding specification
328-
uint16_t* buf16 = reinterpret_cast<uint16_t*>(buf);
329-
for (size_t i = 0; i < len; i++) {
330-
buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8);
331-
}
330+
for (size_t i = 0; i < nchars; i++)
331+
dst[i] = dst[i] << 8 | dst[i] >> 8;
332332
}
333333
if (chars_written != nullptr)
334-
*chars_written = len;
335-
len = len * sizeof(uint16_t);
334+
*chars_written = nchars;
336335
break;
336+
}
337337

338338
case BASE64:
339339
if (is_extern) {
340-
len = base64_decode(buf, buflen, data, extlen);
340+
nbytes = base64_decode(buf, buflen, data, external_nbytes);
341341
} else {
342342
String::Value value(str);
343-
len = base64_decode(buf, buflen, *value, value.length());
343+
nbytes = base64_decode(buf, buflen, *value, value.length());
344344
}
345345
if (chars_written != nullptr) {
346-
*chars_written = len;
346+
*chars_written = nbytes;
347347
}
348348
break;
349349

350350
case HEX:
351351
if (is_extern) {
352-
len = hex_decode(buf, buflen, data, extlen);
352+
nbytes = hex_decode(buf, buflen, data, external_nbytes);
353353
} else {
354354
String::Value value(str);
355-
len = hex_decode(buf, buflen, *value, value.length());
355+
nbytes = hex_decode(buf, buflen, *value, value.length());
356356
}
357357
if (chars_written != nullptr) {
358-
*chars_written = len * 2;
358+
*chars_written = nbytes;
359359
}
360360
break;
361361

@@ -364,7 +364,7 @@ size_t StringBytes::Write(Isolate* isolate,
364364
break;
365365
}
366366

367-
return len;
367+
return nbytes;
368368
}
369369

370370

test/parallel/test-stringbytes-external.js

+13
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,16 @@ var PRE_3OF4_APEX = Math.ceil((EXTERN_APEX / 4) * 3) - RADIOS;
106106
}
107107
}
108108
})();
109+
110+
// https://github.com/iojs/io.js/issues/1024
111+
(function() {
112+
var a = Array(1 << 20).join('x');
113+
var b = Buffer(a, 'ucs2').toString('ucs2');
114+
var c = Buffer(b, 'utf8').toString('utf8');
115+
116+
assert.equal(a.length, b.length);
117+
assert.equal(b.length, c.length);
118+
119+
assert.equal(a, b);
120+
assert.equal(b, c);
121+
})();

0 commit comments

Comments
 (0)