Improve block codecs (#492)

elshize · web-flow · commit 0f13c79435cb · 2022-11-28T18:11:27.000-05:00
* Remove unnecessary variable
* Replace uint32_t(-1) with numeric_limits
* Use array instead of vector for buffer in codecs
diff --git a/include/pisa/block_freq_index.hpp b/include/pisa/block_freq_index.hpp
@@ -257,6 +257,7 @@ class block_freq_index {
         void build(std::string const& index_path)
         {
             std::ofstream os(index_path.c_str());
+            std::cout << index_path.c_str() << "\n";
             os.exceptions(std::ios::badbit | std::ios::failbit);
             mapper::detail::freeze_visitor freezer(os, 0);
             freezer(m_params, "m_params");
diff --git a/include/pisa/codec/block_codecs.hpp b/include/pisa/codec/block_codecs.hpp
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <array>
+
 #include "FastPFor/headers/optpfor.h"
 #include "FastPFor/headers/variablebyte.h"
 
@@ -121,13 +123,14 @@ class TightVariableByte {
 };
 
 struct interpolative_block {
-    static const uint64_t block_size = 128;
+    static constexpr std::uint64_t block_size = 128;
 
     static void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out)
     {
         assert(n <= block_size);
-        thread_local std::vector<uint32_t> inbuf(block_size);
-        thread_local std::vector<uint32_t> outbuf;
+        thread_local std::array<std::uint32_t, block_size> inbuf{};
+        thread_local std::vector<uint32_t> outbuf;  // TODO: Can we use array? How long does it need
+                                                    // to be?
         inbuf[0] = *in;
         for (size_t i = 1; i < n; ++i) {
             inbuf[i] = inbuf[i - 1] + in[i];
@@ -148,23 +151,22 @@ struct interpolative_block {
     decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n)
     {
         assert(n <= block_size);
-        uint8_t const* inbuf = in;
-        if (sum_of_values == uint32_t(-1)) {
-            inbuf = TightVariableByte::decode(inbuf, &sum_of_values, 1);
+        if (sum_of_values == std::numeric_limits<std::uint32_t>::max()) {
+            in = TightVariableByte::decode(in, &sum_of_values, 1);
         }
 
         out[n - 1] = sum_of_values;
         size_t read_interpolative = 0;
         if (n > 1) {
-            bit_reader br(inbuf);
+            bit_reader br(in);
             br.read_interpolative(out, n - 1, 0, sum_of_values);
             for (size_t i = n - 1; i > 0; --i) {
                 out[i] -= out[i - 1];
             }
             read_interpolative = ceil_div(br.position(), 8);
         }
 
-        return inbuf + read_interpolative;
+        return in + read_interpolative;
     }
 };
 
@@ -214,7 +216,7 @@ struct optpfor_block {
         uint8_t const* b = nullptr)  // if non-null forces b
     {
         thread_local codec_type optpfor_codec;
-        thread_local std::vector<uint8_t> buf(2 * 4 * block_size);
+        thread_local std::array<std::uint8_t, 2 * 4 * block_size> buf{};
         assert(n <= block_size);
 
         if (n < block_size) {
@@ -284,7 +286,7 @@ struct varint_G8IU_block {
     static void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out)
     {
         thread_local codec_type varint_codec;
-        thread_local std::vector<uint8_t> buf(2 * 4 * block_size);
+        thread_local std::array<std::uint8_t, 2 * 4 * block_size> buf{};
         assert(n <= block_size);
 
         if (n < block_size) {
diff --git a/include/pisa/codec/maskedvbyte.hpp b/include/pisa/codec/maskedvbyte.hpp
@@ -9,7 +9,7 @@
 
 namespace pisa {
 struct maskedvbyte_block {
-    static const uint64_t block_size = 128;
+    static constexpr std::uint64_t block_size = 128;
     static void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out)
     {
         assert(n <= block_size);
@@ -18,7 +18,7 @@ struct maskedvbyte_block {
             interpolative_block::encode(src, sum_of_values, n, out);
             return;
         }
-        thread_local std::vector<uint8_t> buf(2 * block_size * sizeof(uint32_t));
+        thread_local std::array<std::uint8_t, 2 * block_size * sizeof(std::uint32_t)> buf{};
         size_t out_len = vbyte_encode(src, n, buf.data());
         out.insert(out.end(), buf.data(), buf.data() + out_len);
     }
diff --git a/include/pisa/codec/qmx.hpp b/include/pisa/codec/qmx.hpp
@@ -5,8 +5,8 @@
 
 namespace pisa {
 struct qmx_block {
-    static const uint64_t block_size = 128;
-    static const uint64_t overflow = 512;
+    static constexpr std::uint64_t block_size = 128;
+    static constexpr std::uint64_t overflow = 512;
 
     static void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out)
     {
@@ -17,7 +17,7 @@ struct qmx_block {
             return;
         }
         thread_local QMX::compress_integer_qmx_improved qmx_codec;
-        thread_local std::vector<uint8_t> buf(2 * n * sizeof(uint32_t) + overflow);
+        thread_local std::vector<std::uint8_t> buf(2 * n * sizeof(uint32_t) + overflow);
 
         size_t out_len = qmx_codec.encode(buf.data(), buf.size(), in, n);
         TightVariableByte::encode_single(out_len, out);
diff --git a/include/pisa/codec/simple16.hpp b/include/pisa/codec/simple16.hpp
@@ -1,17 +1,19 @@
 #pragma once
 #include "FastPFor/headers/simple16.h"
 
+#include <array>
+
 namespace pisa {
 
 struct simple16_block {
-    static const uint64_t block_size = 128;
+    static constexpr std::uint64_t block_size = 128;
 
     static void
     encode(uint32_t const* in, uint32_t /* sum_of_values */, size_t n, std::vector<uint8_t>& out)
     {
         assert(n <= block_size);
         thread_local FastPForLib::Simple16<false> codec;
-        thread_local std::vector<uint8_t> buf(2 * 8 * block_size);
+        thread_local std::array<std::uint8_t, 2 * 8 * block_size> buf{};
         size_t out_len = buf.size();
         codec.encodeArray(in, n, reinterpret_cast<uint32_t*>(buf.data()), out_len);
         out_len *= 4;
@@ -23,14 +25,14 @@ struct simple16_block {
     {
         assert(n <= block_size);
         FastPForLib::Simple16<false> codec;
-        std::vector<uint32_t> buf(2 * block_size);
+        std::array<std::uint32_t, 2 * block_size> buf{};
 
         auto const* ret = reinterpret_cast<uint8_t const*>(
             codec.decodeArray(reinterpret_cast<uint32_t const*>(in), 8 * n, buf.data(), n));
-        for (size_t i = 0; i < n; ++i) {
-            *out++ = buf[i];
-        }
+
+        std::copy(buf.begin(), std::next(buf.begin(), n), out);
         return ret;
     }
 };
+
 }  // namespace pisa
diff --git a/include/pisa/codec/simple8b.hpp b/include/pisa/codec/simple8b.hpp
@@ -1,17 +1,19 @@
 #pragma once
 #include "FastPFor/headers/simple8b.h"
 
+#include <array>
+
 namespace pisa {
 
 struct simple8b_block {
-    static const uint64_t block_size = 128;
+    static constexpr std::uint64_t block_size = 128;
 
     static void
     encode(uint32_t const* in, uint32_t /* sum_of_values */, size_t n, std::vector<uint8_t>& out)
     {
         assert(n <= block_size);
         thread_local FastPForLib::Simple8b<false> codec;
-        thread_local std::vector<uint8_t> buf(2 * 8 * block_size);
+        thread_local std::array<std::uint8_t, 2 * 8 * block_size> buf{};
         size_t out_len = buf.size();
         codec.encodeArray(in, n, reinterpret_cast<uint32_t*>(buf.data()), out_len);
         out_len *= 4;
diff --git a/include/pisa/codec/streamvbyte.hpp b/include/pisa/codec/streamvbyte.hpp
@@ -1,20 +1,32 @@
 #pragma once
 
+#include <array>
 #include <cassert>
+#include <cstdint>
 #include <vector>
 
 #include "streamvbyte/include/streamvbyte.h"
 
 namespace pisa {
 
+// This is a constexpr version of the function in the streamvbyte library.
+constexpr std::size_t streamvbyte_max_compressedbytes(std::uint32_t length)
+{
+    // number of control bytes:
+    size_t cb = (length + 3) / 4;
+    // maximum number of control bytes:
+    size_t db = (size_t)length * sizeof(uint32_t);
+    return cb + db;
+}
+
 struct streamvbyte_block {
     static const uint64_t block_size = 128;
     static void
     encode(uint32_t const* in, uint32_t /* sum_of_values */, size_t n, std::vector<uint8_t>& out)
     {
         assert(n <= block_size);
         auto* src = const_cast<uint32_t*>(in);
-        thread_local std::vector<uint8_t> buf(streamvbyte_max_compressedbytes(block_size));
+        thread_local std::array<std::uint8_t, pisa::streamvbyte_max_compressedbytes(block_size)> buf{};
         size_t out_len = streamvbyte_encode(src, n, buf.data());
         out.insert(out.end(), buf.data(), buf.data() + out_len);
     }
diff --git a/include/pisa/codec/varintgb.hpp b/include/pisa/codec/varintgb.hpp
@@ -1,9 +1,12 @@
 #pragma once
-#include "codec/block_codecs.hpp"
+
+#include <array>
 #include <vector>
 
 #include "FastPFor/headers/common.h"
 
+#include "codec/block_codecs.hpp"
+
 using namespace std;
 
 namespace pisa {
@@ -252,7 +255,7 @@ struct varintgb_block {
             interpolative_block::encode(in, sum_of_values, n, out);
             return;
         }
-        thread_local std::vector<uint8_t> buf(2 * block_size * sizeof(uint32_t));
+        thread_local std::array<std::uint8_t, 2 * block_size * sizeof(uint32_t)> buf{};
         size_t out_len = varintgb_codec.encodeArray(in, n, buf.data());
         out.insert(out.end(), buf.data(), buf.data() + out_len);
     }

Original file line number	Diff line number	Diff line change
`@@ -257,6 +257,7 @@ class block_freq_index {`
`257`	`257`	`void build(std::string const& index_path)`
`258`	`258`	`{`
`259`	`259`	`std::ofstream os(index_path.c_str());`
	`260`	`+ std::cout << index_path.c_str() << "\n";`
`260`	`261`	`os.exceptions(std::ios::badbit \| std::ios::failbit);`
`261`	`262`	`mapper::detail::freeze_visitor freezer(os, 0);`
`262`	`263`	`freezer(m_params, "m_params");`
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@`
`9`	`9`
`10`	`10`	`namespace pisa {`
`11`	`11`	`struct maskedvbyte_block {`
`12`		`- static const uint64_t block_size = 128;`
	`12`	`+ static constexpr std::uint64_t block_size = 128;`
`13`	`13`	`static void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out)`
`14`	`14`	`{`
`15`	`15`	`assert(n <= block_size);`
`@@ -18,7 +18,7 @@ struct maskedvbyte_block {`
`18`	`18`	`interpolative_block::encode(src, sum_of_values, n, out);`
`19`	`19`	`return;`
`20`	`20`	`}`
`21`		`- thread_local std::vector<uint8_t> buf(2 * block_size * sizeof(uint32_t));`
	`21`	`+ thread_local std::array<std::uint8_t, 2 * block_size * sizeof(std::uint32_t)> buf{};`
`22`	`22`	`size_t out_len = vbyte_encode(src, n, buf.data());`
`23`	`23`	`out.insert(out.end(), buf.data(), buf.data() + out_len);`
`24`	`24`	`}`