Skip to content

Commit cc25de4

Browse files
committed
buffer: add SIMD Neon optimization for byteLength
1 parent c9ec72d commit cc25de4

File tree

1 file changed

+41
-0
lines changed

1 file changed

+41
-0
lines changed

src/node_buffer.cc

+41
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@
3838
#include <cstring>
3939
#include <climits>
4040

41+
#if defined(__aarch64__) || defined(_M_ARM64)
42+
#define NODE_HAS_SIMD_NEON 1
43+
#endif
44+
45+
#if NODE_HAS_SIMD_NEON
46+
#include <arm_neon.h>
47+
#endif
48+
4149
#define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \
4250
THROW_AND_RETURN_IF_NOT_BUFFER(env, obj, "argument") \
4351

@@ -741,6 +749,38 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
741749
args.GetReturnValue().Set(args[0].As<String>()->Utf8Length(env->isolate()));
742750
}
743751

752+
#if NODE_HAS_SIMD_NEON
753+
uint32_t FastByteLengthUtf8(Local<Value> receiver,
754+
const v8::FastOneByteString& source) {
755+
uint32_t answer{0};
756+
const auto data = reinterpret_cast<const uint8_t*>(source.data);
757+
const uint8x16_t mask = vdupq_n_u8(0x80);
758+
uint8x16_t result_vector = vdupq_n_u8(0);
759+
size_t i = 0;
760+
761+
for(; i < source.length; i += 16) {
762+
// load 16 bytes from data
763+
uint8x16_t values = vld1q_u8(data + i);
764+
765+
// extract the high bits using 0x80 mask
766+
uint8x16_t high_bits = vcgeq_u8(values, mask);
767+
768+
// accumulate the high bits to result_vector
769+
result_vector = vqaddq_u8(result_vector, high_bits);
770+
}
771+
772+
// sum the elements in the result_vector
773+
uint64x2_t sum64 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(result_vector)));
774+
775+
answer = vgetq_lane_u64(sum64, 0) + vgetq_lane_u64(sum64, 1);
776+
777+
for (; i < source.length; ++i) {
778+
answer += (data[i] >> 7);
779+
}
780+
781+
return answer + source.length;
782+
}
783+
#else
744784
uint32_t FastByteLengthUtf8(Local<Value> receiver,
745785
const v8::FastOneByteString& source) {
746786
uint32_t result = 0;
@@ -752,6 +792,7 @@ uint32_t FastByteLengthUtf8(Local<Value> receiver,
752792
result += length;
753793
return result;
754794
}
795+
#endif
755796

756797
static v8::CFunction fast_byte_length_utf8(
757798
v8::CFunction::Make(FastByteLengthUtf8));

0 commit comments

Comments
 (0)