zlib: fix gzip member head/buffer boundary issue

addaleax · jasnell · commit 61167c3e236a · 2016-04-15T00:40:38.000-07:00
Make sure that, even if an `inflate()` call only sees the first few bytes of a following gzip member, all members are decompressed and part of the full output. Adds tests for the special case that the first `inflate()` call receives only the first few bytes of a second gzip member but not the whole header (or even just the magic bytes). This is a backport of #5883 and contains additional changes to make sure that the behaviour on encountering trailing garbage remains the same (namely to silently discard it if one full member has already been decompressed). PR-URL: #5973 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: James M Snell <jasnell@gmail.com>
diff --git a/src/node_zlib.cc b/src/node_zlib.cc
@@ -43,7 +43,6 @@ enum node_zlib_mode {
 
 #define GZIP_HEADER_ID1 0x1f
 #define GZIP_HEADER_ID2 0x8b
-#define GZIP_MIN_HEADER_SIZE 10
 
 void InitZlib(v8::Local<v8::Object> target);
 
@@ -69,7 +68,8 @@ class ZCtx : public AsyncWrap {
         windowBits_(0),
         write_in_progress_(false),
         pending_close_(false),
-        refs_(0) {
+        refs_(0),
+        first_member_ended_(false) {
     MakeWeak<ZCtx>(this);
   }
 
@@ -257,17 +257,20 @@ class ZCtx : public AsyncWrap {
             ctx->err_ = Z_NEED_DICT;
           }
         }
-        while (ctx->strm_.avail_in >= GZIP_MIN_HEADER_SIZE &&
+
+        if (ctx->err_ == Z_STREAM_END) {
+          ctx->first_member_ended_ = true;
+        }
+
+        while (ctx->strm_.avail_in > 0 &&
                ctx->mode_ == GUNZIP &&
-               ctx->err_ == Z_STREAM_END) {
+               ctx->err_ == Z_STREAM_END &&
+               ctx->strm_.next_in[0] != 0x00) {
           // Bytes remain in input buffer. Perhaps this is another compressed
           // member in the same archive, or just trailing garbage.
-          // Check the header to find out.
-          if (ctx->strm_.next_in[0] != GZIP_HEADER_ID1 ||
-              ctx->strm_.next_in[1] != GZIP_HEADER_ID2) {
-            // Not a valid gzip member
-            break;
-          }
+          // Trailing zero bytes are okay, though, since they are frequently
+          // used for padding.
+
           Reset(ctx);
           ctx->err_ = inflate(&ctx->strm_, ctx->flush_);
         }
@@ -302,6 +305,11 @@ class ZCtx : public AsyncWrap {
       else
         ZCtx::Error(ctx, "Bad dictionary");
       return false;
+    case Z_DATA_ERROR:
+      if (ctx->first_member_ended_) {
+        // Silently discard trailing garbage after fully decompressed member.
+        break;
+      }
     default:
       // something else.
       ZCtx::Error(ctx, "Zlib error");
@@ -593,6 +601,7 @@ class ZCtx : public AsyncWrap {
   bool write_in_progress_;
   bool pending_close_;
   unsigned int refs_;
+  bool first_member_ended_;
 };
 
 
diff --git a/test/parallel/test-zlib-from-concatenated-gzip.js b/test/parallel/test-zlib-from-concatenated-gzip.js
@@ -7,9 +7,12 @@ const zlib = require('zlib');
 const path = require('path');
 const fs = require('fs');
 
+const abcEncoded = zlib.gzipSync('abc');
+const defEncoded = zlib.gzipSync('def');
+
 const data = Buffer.concat([
-  zlib.gzipSync('abc'),
-  zlib.gzipSync('def')
+  abcEncoded,
+  defEncoded
 ]);
 
 assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');
@@ -38,3 +41,26 @@ fs.createReadStream(pmmFileGz)
     assert.deepStrictEqual(Buffer.concat(pmmResultBuffers), pmmExpected,
       'result should match original random garbage');
   }));
+
+// test that the next gzip member can wrap around the input buffer boundary
+[0, 1, 2, 3, 4, defEncoded.length].forEach((offset) => {
+  const resultBuffers = [];
+
+  const unzip = zlib.createGunzip()
+   .on('error', (err) => {
+     assert.ifError(err);
+   })
+   .on('data', (data) => resultBuffers.push(data))
+   .on('finish', common.mustCall(() => {
+     assert.strictEqual(Buffer.concat(resultBuffers).toString(), 'abcdef',
+      `result should match original input (offset = ${offset})`);
+   }));
+
+  // first write: write "abc" + the first bytes of "def"
+  unzip.write(Buffer.concat([
+    abcEncoded, defEncoded.slice(0, offset)
+  ]));
+
+  // write remaining bytes of "def"
+  unzip.end(defEncoded.slice(offset));
+});
diff --git a/test/parallel/test-zlib-from-gzip-with-trailing-garbage.js b/test/parallel/test-zlib-from-gzip-with-trailing-garbage.js
@@ -28,10 +28,11 @@ data = Buffer.concat([
   Buffer(10).fill(0)
 ]);
 
-assert.throws(() => zlib.gunzipSync(data));
+assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');
 
 zlib.gunzip(data, common.mustCall((err, result) => {
-  assert(err);
+  assert.ifError(err);
+  assert.equal(result, 'abcdef', 'result should match original string');
 }));
 
 // In this case the trailing junk is too short to be a gzip segment