Skip to content

Commit 3a5dd02

Browse files
addaleaxMylesBorins
authored andcommitted
zlib: detect gzip files when using unzip*
Detect whether a gzip file is being passed to `unzip*` by testing the first bytes for the gzip magic bytes, and setting the decompression mode to `GUNZIP` or `INFLATE` according to the result. This enables gzip-only features like multi-member support to be used together with the `unzip*` autodetection support and thereby makes `gunzip*` and `unzip*` return identical results for gzip input again. Add a simple test for checking that features specific to `zlib.gunzip`, notably support for multiple members, also work when using `zlib.unzip`. PR-URL: #5884 Reviewed-By: Ben Noordhuis <[email protected]> Reviewed-By: James M Snell <[email protected]>
1 parent 5c9dddf commit 3a5dd02

File tree

3 files changed

+91
-1
lines changed

3 files changed

+91
-1
lines changed

src/node_zlib.cc

+49-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ class ZCtx : public AsyncWrap {
6969
write_in_progress_(false),
7070
pending_close_(false),
7171
refs_(0),
72-
first_member_ended_(false) {
72+
first_member_ended_(false),
73+
gzip_id_bytes_read_(0) {
7374
MakeWeak<ZCtx>(this);
7475
}
7576

@@ -226,6 +227,8 @@ class ZCtx : public AsyncWrap {
226227
static void Process(uv_work_t* work_req) {
227228
ZCtx *ctx = ContainerOf(&ZCtx::work_req_, work_req);
228229

230+
const Bytef* next_expected_header_byte = nullptr;
231+
229232
// If the avail_out is left at 0, then it means that it ran out
230233
// of room. If there was avail_out left over, then it means
231234
// that all of the input was consumed.
@@ -236,6 +239,50 @@ class ZCtx : public AsyncWrap {
236239
ctx->err_ = deflate(&ctx->strm_, ctx->flush_);
237240
break;
238241
case UNZIP:
242+
if (ctx->strm_.avail_in > 0) {
243+
next_expected_header_byte = ctx->strm_.next_in;
244+
}
245+
246+
switch (ctx->gzip_id_bytes_read_) {
247+
case 0:
248+
if (next_expected_header_byte == nullptr) {
249+
break;
250+
}
251+
252+
if (*next_expected_header_byte == GZIP_HEADER_ID1) {
253+
ctx->gzip_id_bytes_read_ = 1;
254+
next_expected_header_byte++;
255+
256+
if (ctx->strm_.avail_in == 1) {
257+
// The only available byte was already read.
258+
break;
259+
}
260+
} else {
261+
ctx->mode_ = INFLATE;
262+
break;
263+
}
264+
265+
// fallthrough
266+
case 1:
267+
if (next_expected_header_byte == nullptr) {
268+
break;
269+
}
270+
271+
if (*next_expected_header_byte == GZIP_HEADER_ID2) {
272+
ctx->gzip_id_bytes_read_ = 2;
273+
ctx->mode_ = GUNZIP;
274+
} else {
275+
// There is no actual difference between INFLATE and INFLATERAW
276+
// (after initialization).
277+
ctx->mode_ = INFLATE;
278+
}
279+
280+
break;
281+
default:
282+
CHECK(0 && "invalid number of gzip magic number bytes read");
283+
}
284+
285+
// fallthrough
239286
case INFLATE:
240287
case GUNZIP:
241288
case INFLATERAW:
@@ -602,6 +649,7 @@ class ZCtx : public AsyncWrap {
602649
bool pending_close_;
603650
unsigned int refs_;
604651
bool first_member_ended_;
652+
unsigned int gzip_id_bytes_read_;
605653
};
606654

607655

test/parallel/test-zlib-from-concatenated-gzip.js

+14
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,20 @@ zlib.gunzip(data, common.mustCall((err, result) => {
2222
assert.equal(result, 'abcdef', 'result should match original string');
2323
}));
2424

25+
zlib.unzip(data, common.mustCall((err, result) => {
26+
assert.ifError(err);
27+
assert.equal(result, 'abcdef', 'result should match original string');
28+
}));
29+
30+
// Multi-member support does not apply to zlib inflate/deflate.
31+
zlib.unzip(Buffer.concat([
32+
zlib.deflateSync('abc'),
33+
zlib.deflateSync('def')
34+
]), common.mustCall((err, result) => {
35+
assert.ifError(err);
36+
assert.equal(result, 'abc', 'result should match contents of first "member"');
37+
}));
38+
2539
// files that have the "right" magic bytes for starting a new gzip member
2640
// in the middle of themselves, even if they are part of a single
2741
// regularly compressed member
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
'use strict';
2+
const common = require('../common');
3+
const assert = require('assert');
4+
const zlib = require('zlib');
5+
6+
const data = Buffer.concat([
7+
zlib.gzipSync('abc'),
8+
zlib.gzipSync('def')
9+
]);
10+
11+
const resultBuffers = [];
12+
13+
const unzip = zlib.createUnzip()
14+
.on('error', (err) => {
15+
assert.ifError(err);
16+
})
17+
.on('data', (data) => resultBuffers.push(data))
18+
.on('finish', common.mustCall(() => {
19+
assert.deepStrictEqual(Buffer.concat(resultBuffers).toString(), 'abcdef',
20+
'result should match original string');
21+
}));
22+
23+
for (let i = 0; i < data.length; i++) {
24+
// Write each single byte individually.
25+
unzip.write(Buffer.from([data[i]]));
26+
}
27+
28+
unzip.end();

0 commit comments

Comments
 (0)