Skip to content

Commit e087042

Browse files
W-A-Jamesdurran
andauthored
fix(NODE-5363): defer byte slicing to utf8 decoding API in nodejs (#585)
Co-authored-by: Durran Jordan <[email protected]>
1 parent 2ea58cf commit e087042

8 files changed

+73
-20
lines changed

etc/benchmarks/bson_versions.json

-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
{
22
"versions": [
3-
"1.1.6",
43
"4.6",
5-
"5.0",
6-
"5.1",
7-
"5.2",
84
"5.3"
95
]
106
}

etc/benchmarks/install_bson_versions.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/bin/bash
2-
versions=$(jq '.versions' < bson_versions.json | sed -E 's/(\[|\]|,|")//g')
2+
# To be run from repo root
3+
versions=$(jq '.versions' < etc/benchmarks/bson_versions.json | sed -E 's/(\[|\]|,|")//g')
34
installVersions=''
45
for bson in $versions; do
56
versionNoDot=$(echo $bson | tr -d '.')

etc/benchmarks/main.mjs

+55
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,61 @@ await runner({
129129
}
130130
});
131131

132+
await runner({
133+
skip: true,
134+
name: 'deserialize a large batch of documents each with an array of many Int32s',
135+
iterations,
136+
setup(libs) {
137+
const bson = libs[0].lib;
138+
return bson.serialize({
139+
nextBatch: Array.from({ length: 1000 }, () => ({
140+
_id: new bson.ObjectId(),
141+
arrayField: Array.from({ length: 100 }, (_, i) => i)
142+
}))
143+
});
144+
},
145+
async run(i, bson, document) {
146+
await Promise.all(
147+
Array.from(
148+
{ length: 100 },
149+
(_, i) =>
150+
new Promise(resolve => {
151+
setTimeout(() => {
152+
resolve(bson.lib.deserialize(document, { validation: { utf8: false } }));
153+
}, 20);
154+
})
155+
)
156+
);
157+
}
158+
});
159+
160+
await runner({
161+
skip: true,
162+
name: 'deserialize a large batch of documents each with an array of many Int64s',
163+
iterations,
164+
setup(libs) {
165+
const bson = libs[0].lib;
166+
return bson.serialize({
167+
nextBatch: Array.from({ length: 1000 }, () => ({
168+
_id: new bson.ObjectId(),
169+
arrayField: Array.from({ length: 100 }, (_, i) => bson.Long.fromInt(i))
170+
}))
171+
});
172+
},
173+
async run(i, bson, document) {
174+
await Promise.all(
175+
Array.from(
176+
{ length: 100 },
177+
(_, i) =>
178+
new Promise(resolve => {
179+
setTimeout(() => {
180+
resolve(bson.lib.deserialize(document, { validation: { utf8: false } }));
181+
}, 20);
182+
})
183+
)
184+
);
185+
}
186+
});
132187
// End
133188
console.log(
134189
'Total time taken to benchmark:',

src/binary.ts

+3-2
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,9 @@ export class Binary extends BSONValue {
223223
toString(encoding?: 'hex' | 'base64' | 'utf8' | 'utf-8'): string {
224224
if (encoding === 'hex') return ByteUtils.toHex(this.buffer);
225225
if (encoding === 'base64') return ByteUtils.toBase64(this.buffer);
226-
if (encoding === 'utf8' || encoding === 'utf-8') return ByteUtils.toUTF8(this.buffer);
227-
return ByteUtils.toUTF8(this.buffer);
226+
if (encoding === 'utf8' || encoding === 'utf-8')
227+
return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength);
228+
return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength);
228229
}
229230

230231
/** @internal */

src/parser/deserializer.ts

+7-7
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ function deserializeObject(
236236
if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString');
237237

238238
// Represents the key
239-
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer.subarray(index, i));
239+
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i);
240240

241241
// shouldValidateKey is true if the key should be validated, false otherwise
242242
let shouldValidateKey = true;
@@ -476,7 +476,7 @@ function deserializeObject(
476476
// If are at the end of the buffer there is a problem with the document
477477
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
478478
// Return the C string
479-
const source = ByteUtils.toUTF8(buffer.subarray(index, i));
479+
const source = ByteUtils.toUTF8(buffer, index, i);
480480
// Create the regexp
481481
index = i + 1;
482482

@@ -489,7 +489,7 @@ function deserializeObject(
489489
// If are at the end of the buffer there is a problem with the document
490490
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
491491
// Return the C string
492-
const regExpOptions = ByteUtils.toUTF8(buffer.subarray(index, i));
492+
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
493493
index = i + 1;
494494

495495
// For each option add the corresponding one for javascript
@@ -521,7 +521,7 @@ function deserializeObject(
521521
// If are at the end of the buffer there is a problem with the document
522522
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
523523
// Return the C string
524-
const source = ByteUtils.toUTF8(buffer.subarray(index, i));
524+
const source = ByteUtils.toUTF8(buffer, index, i);
525525
index = i + 1;
526526

527527
// Get the start search index
@@ -533,7 +533,7 @@ function deserializeObject(
533533
// If are at the end of the buffer there is a problem with the document
534534
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
535535
// Return the C string
536-
const regExpOptions = ByteUtils.toUTF8(buffer.subarray(index, i));
536+
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
537537
index = i + 1;
538538

539539
// Set the object
@@ -678,7 +678,7 @@ function deserializeObject(
678678
throw new BSONError('Invalid UTF-8 string in BSON document');
679679
}
680680
}
681-
const namespace = ByteUtils.toUTF8(buffer.subarray(index, index + stringSize - 1));
681+
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1);
682682
// Update parse index position
683683
index = index + stringSize;
684684

@@ -735,7 +735,7 @@ function getValidatedString(
735735
end: number,
736736
shouldValidateUtf8: boolean
737737
) {
738-
const value = ByteUtils.toUTF8(buffer.subarray(start, end));
738+
const value = ByteUtils.toUTF8(buffer, start, end);
739739
// if utf8 validation is on, do the check
740740
if (shouldValidateUtf8) {
741741
for (let i = 0; i < value.length; i++) {

src/utils/byte_utils.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export type ByteUtils = {
2626
/** Create a Uint8Array containing utf8 code units from a string */
2727
fromUTF8: (text: string) => Uint8Array;
2828
/** Create a string from utf8 code units */
29-
toUTF8: (buffer: Uint8Array) => string;
29+
toUTF8: (buffer: Uint8Array, start: number, end: number) => string;
3030
/** Get the utf8 code unit count from a string if it were to be transformed to utf8 */
3131
utf8ByteLength: (input: string) => number;
3232
/** Encode UTF8 bytes generated from `source` string into `destination` at byteOffset. Returns the number of bytes encoded. */

src/utils/node_byte_utils.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ type NodeJsBuffer = ArrayBufferView &
55
Uint8Array & {
66
write(string: string, offset: number, length: undefined, encoding: 'utf8'): number;
77
copy(target: Uint8Array, targetStart: number, sourceStart: number, sourceEnd: number): number;
8-
toString: (this: Uint8Array, encoding: NodeJsEncoding) => string;
8+
toString: (this: Uint8Array, encoding: NodeJsEncoding, start?: number, end?: number) => string;
99
equals: (this: Uint8Array, other: Uint8Array) => boolean;
1010
};
1111
type NodeJsBufferConstructor = Omit<Uint8ArrayConstructor, 'from'> & {
@@ -125,8 +125,8 @@ export const nodeJsByteUtils = {
125125
return Buffer.from(text, 'utf8');
126126
},
127127

128-
toUTF8(buffer: Uint8Array): string {
129-
return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8');
128+
toUTF8(buffer: Uint8Array, start: number, end: number): string {
129+
return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
130130
},
131131

132132
utf8ByteLength(input: string): number {

src/utils/web_byte_utils.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,8 @@ export const webByteUtils = {
172172
return new TextEncoder().encode(text);
173173
},
174174

175-
toUTF8(uint8array: Uint8Array): string {
176-
return new TextDecoder('utf8', { fatal: false }).decode(uint8array);
175+
toUTF8(uint8array: Uint8Array, start: number, end: number): string {
176+
return new TextDecoder('utf8', { fatal: false }).decode(uint8array.slice(start, end));
177177
},
178178

179179
utf8ByteLength(input: string): number {

0 commit comments

Comments
 (0)