Skip to content

Commit a52d532

Browse files
committed
buffer: add SIMD Neon optimization for byteLength
1 parent c9ec72d commit a52d532

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed

src/node_buffer.cc

+39
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@
3838
#include <cstring>
3939
#include <climits>
4040

41+
#if defined(__aarch64__) || defined(_M_ARM64)
42+
#define NODE_HAS_SIMD_NEON 1
43+
#endif
44+
45+
#if NODE_HAS_SIMD_NEON
46+
#include <arm_neon.h>
47+
#endif
48+
4149
#define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \
4250
THROW_AND_RETURN_IF_NOT_BUFFER(env, obj, "argument") \
4351

@@ -741,6 +749,36 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
741749
args.GetReturnValue().Set(args[0].As<String>()->Utf8Length(env->isolate()));
742750
}
743751

752+
#if NODE_HAS_SIMD_NEON
753+
uint32_t FastByteLengthUtf8(Local<Value> receiver,
754+
const v8::FastOneByteString& source) {
755+
const auto data = reinterpret_cast<const uint8_t*>(source.data);
756+
uint8x16_t result_vector = vdupq_n_u8(0);
757+
size_t i = 0;
758+
759+
for (; i < source.length; i += 16) {
760+
// load 16 bytes from data
761+
uint8x16_t values = vld1q_u8(data + i);
762+
763+
// extract the high bits using 0x80 mask
764+
uint8x16_t high_bits = vshrq_n_u8(values, 7);
765+
766+
// accumulate the high bits to result_vector
767+
result_vector = vqaddq_u8(result_vector, high_bits);
768+
}
769+
770+
// sum the elements in the result_vector
771+
uint64x2_t sum64 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(result_vector)));
772+
773+
uint32_t answer = vgetq_lane_u64(sum64, 0) + vgetq_lane_u64(sum64, 1);
774+
775+
for (; i < source.length; ++i) {
776+
answer += (data[i] >> 7);
777+
}
778+
779+
return answer + source.length;
780+
}
781+
#else
744782
uint32_t FastByteLengthUtf8(Local<Value> receiver,
745783
const v8::FastOneByteString& source) {
746784
uint32_t result = 0;
@@ -752,6 +790,7 @@ uint32_t FastByteLengthUtf8(Local<Value> receiver,
752790
result += length;
753791
return result;
754792
}
793+
#endif
755794

756795
static v8::CFunction fast_byte_length_utf8(
757796
v8::CFunction::Make(FastByteLengthUtf8));

0 commit comments

Comments
 (0)