Skip to content

Commit 3f028d6

Browse files
committed
fs: improve promise based readFile performance for big files
This significantly reduces the peak memory for the promise based readFile operation by reusing a single memory chunk after each read and strinigifying that chunk immediately. Signed-off-by: Ruben Bridgewater <[email protected]>
1 parent f594cc8 commit 3f028d6

File tree

3 files changed

+69
-34
lines changed

3 files changed

+69
-34
lines changed

benchmark/fs/readfile-promises.js

+8-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,14 @@ const filename = path.resolve(tmpdir.path,
1616
const bench = common.createBenchmark(main, {
1717
duration: [5],
1818
encoding: ['', 'utf-8'],
19-
len: [1024, 16 * 1024 * 1024],
19+
len: [
20+
1024,
21+
512 * 1024,
22+
4 * 1024 ** 2,
23+
8 * 1024 ** 2,
24+
16 * 1024 ** 2,
25+
32 * 1024 ** 2,
26+
],
2027
concurrent: [1, 10]
2128
});
2229

lib/fs.js

+5
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,9 @@ function readFileAfterStat(err, stats) {
331331
if (err)
332332
return context.close(err);
333333

334+
// TODO(BridgeAR): Check if allocating a smaller chunk is better performance
335+
// wise, similar to the promise based version (less peak memory and chunked
336+
// stringify operations vs multiple C++/JS boundary crossings).
334337
const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0;
335338

336339
if (size > kIoMaxLength) {
@@ -340,6 +343,8 @@ function readFileAfterStat(err, stats) {
340343

341344
try {
342345
if (size === 0) {
346+
// TODO(BridgeAR): If an encoding is set, use the StringDecoder to concat
347+
// the result and reuse the buffer instead of allocating a new one.
343348
context.buffers = [];
344349
} else {
345350
context.buffer = Buffer.allocUnsafeSlow(size);

lib/internal/fs/promises.js

+56-33
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ const {
8787
promisify,
8888
} = require('internal/util');
8989
const { EventEmitterMixin } = require('internal/event_target');
90+
const { StringDecoder } = require('string_decoder');
9091
const { watch } = require('internal/fs/watchers');
9192
const { isIterable } = require('internal/streams/utils');
9293
const assert = require('internal/assert');
@@ -419,63 +420,85 @@ async function writeFileHandle(filehandle, data, signal, encoding) {
419420

420421
async function readFileHandle(filehandle, options) {
421422
const signal = options?.signal;
423+
const encoding = options?.encoding;
424+
const decoder = encoding && new StringDecoder(encoding);
422425

423426
checkAborted(signal);
424427

425428
const statFields = await binding.fstat(filehandle.fd, false, kUsePromises);
426429

427430
checkAborted(signal);
428431

429-
let size;
432+
let size = 0;
433+
let length = 0;
430434
if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) {
431435
size = statFields[8/* size */];
432-
} else {
433-
size = 0;
436+
length = encoding ? MathMin(size, kReadFileBufferLength) : size;
437+
}
438+
if (length === 0) {
439+
length = kReadFileUnknownBufferLength;
434440
}
435441

436442
if (size > kIoMaxLength)
437443
throw new ERR_FS_FILE_TOO_LARGE(size);
438444

439-
let endOfFile = false;
440445
let totalRead = 0;
441-
const noSize = size === 0;
442-
const buffers = [];
443-
const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size);
444-
do {
446+
let buffer = Buffer.allocUnsafeSlow(length);
447+
let result = '';
448+
let offset = 0;
449+
let buffers;
450+
const chunkedRead = length > kReadFileBufferLength;
451+
452+
while (true) {
445453
checkAborted(signal);
446-
let buffer;
447-
let offset;
448-
let length;
449-
if (noSize) {
450-
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
451-
offset = 0;
452-
length = kReadFileUnknownBufferLength;
453-
} else {
454-
buffer = fullBuffer;
455-
offset = totalRead;
454+
455+
if (chunkedRead) {
456456
length = MathMin(size - totalRead, kReadFileBufferLength);
457457
}
458458

459459
const bytesRead = (await binding.read(filehandle.fd, buffer, offset,
460-
length, -1, kUsePromises)) || 0;
460+
length, -1, kUsePromises)) ?? 0;
461461
totalRead += bytesRead;
462-
endOfFile = bytesRead === 0 || totalRead === size;
463-
if (noSize && bytesRead > 0) {
464-
const isBufferFull = bytesRead === kReadFileUnknownBufferLength;
465-
const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead);
466-
ArrayPrototypePush(buffers, chunkBuffer);
462+
463+
if (bytesRead === 0 ||
464+
totalRead === size ||
465+
(bytesRead !== buffer.length && !chunkedRead)) {
466+
const singleRead = bytesRead === totalRead;
467+
468+
const bytesToCheck = chunkedRead ? totalRead : bytesRead;
469+
470+
if (bytesToCheck !== buffer.length) {
471+
buffer = buffer.slice(0, bytesToCheck);
472+
}
473+
474+
if (!encoding) {
475+
if (size === 0 && !singleRead) {
476+
ArrayPrototypePush(buffers, buffer);
477+
return Buffer.concat(buffers, totalRead);
478+
}
479+
return buffer;
480+
}
481+
482+
if (singleRead) {
483+
return buffer.toString(encoding);
484+
}
485+
result += decoder.end(buffer);
486+
return result;
467487
}
468-
} while (!endOfFile);
469488

470-
let result;
471-
if (size > 0) {
472-
result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead);
473-
} else {
474-
result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers,
475-
totalRead);
489+
if (encoding) {
490+
result += decoder.write(buffer);
491+
} else if (size !== 0) {
492+
// TODO(BridgeAR): This condition needs a test. A file should be read
493+
// that is chunked without encoding.
494+
offset = totalRead;
495+
} else {
496+
buffers ??= [];
497+
// Unknown file size requires chunks.
498+
ArrayPrototypePush(buffers, buffer);
499+
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
500+
}
476501
}
477-
478-
return options.encoding ? result.toString(options.encoding) : result;
479502
}
480503

481504
// All of the functions are defined as async in order to ensure that errors

0 commit comments

Comments
 (0)