Skip to content

Commit d0d1eb7

Browse files
committed
buffer: add SIMD Neon optimization for byteLength
1 parent c9ec72d commit d0d1eb7

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

src/node_buffer.cc

+49
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@
3838
#include <cstring>
3939
#include <climits>
4040

41+
#if defined(__aarch64__) || defined(_M_ARM64)
42+
#define NODE_HAS_SIMD_NEON 1
43+
#endif
44+
45+
#if NODE_HAS_SIMD_NEON
46+
#include <arm_neon.h>
47+
#endif
48+
4149
#define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \
4250
THROW_AND_RETURN_IF_NOT_BUFFER(env, obj, "argument") \
4351

@@ -741,6 +749,46 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
741749
args.GetReturnValue().Set(args[0].As<String>()->Utf8Length(env->isolate()));
742750
}
743751

752+
#if NODE_HAS_SIMD_NEON
753+
uint32_t FastByteLengthUtf8(Local<Value> receiver,
754+
const v8::FastOneByteString& source) {
755+
uint8_t* data =
756+
const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(source.data));
757+
auto length = source.length;
758+
759+
uint32_t result{0};
760+
const int lanes = sizeof(uint8x16_t);
761+
uint8_t remaining = length % lanes;
762+
const auto* simd_end = data + (length / lanes) * lanes;
763+
const auto threshold = vdupq_n_u8(0x80);
764+
765+
for (; data < simd_end; data += lanes) {
766+
// load 16 bits
767+
uint8x16_t input = vld1q_u8(data);
768+
769+
// compare to threshold (0x80)
770+
uint8x16_t with_highbit = vcgeq_u8(input, threshold);
771+
772+
// shift and narrow
773+
uint8x8_t highbits = vshrn_n_u16(vreinterpretq_u16_u8(with_highbit), 4);
774+
775+
// we have 0, 4 or 8 bits per byte
776+
uint8x8_t bitsperbyte = vcnt_u8(highbits);
777+
778+
// sum the bytes vertically to uint32_t
779+
result += vaddlv_u8(bitsperbyte);
780+
}
781+
782+
// we overcounted by a factor of 4
783+
result /= 4;
784+
785+
for (uint8_t j = 0; j < remaining; j++) {
786+
result += (simd_end[j] >> 7);
787+
}
788+
789+
return result + length;
790+
}
791+
#else
744792
uint32_t FastByteLengthUtf8(Local<Value> receiver,
745793
const v8::FastOneByteString& source) {
746794
uint32_t result = 0;
@@ -752,6 +800,7 @@ uint32_t FastByteLengthUtf8(Local<Value> receiver,
752800
result += length;
753801
return result;
754802
}
803+
#endif
755804

756805
static v8::CFunction fast_byte_length_utf8(
757806
v8::CFunction::Make(FastByteLengthUtf8));

0 commit comments

Comments
 (0)