38
38
#include < cstring>
39
39
#include < climits>
40
40
41
+ #if defined(__aarch64__) || defined(_M_ARM64)
42
+ #define NODE_HAS_SIMD_NEON 1
43
+ #endif
44
+
45
+ #if NODE_HAS_SIMD_NEON
46
+ #include < arm_neon.h>
47
+ #endif
48
+
41
49
#define THROW_AND_RETURN_UNLESS_BUFFER (env, obj ) \
42
50
THROW_AND_RETURN_IF_NOT_BUFFER (env, obj, " argument" ) \
43
51
@@ -741,6 +749,46 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
741
749
args.GetReturnValue ().Set (args[0 ].As <String>()->Utf8Length (env->isolate ()));
742
750
}
743
751
752
+ #if NODE_HAS_SIMD_NEON
753
+ uint32_t FastByteLengthUtf8 (Local<Value> receiver,
754
+ const v8::FastOneByteString& source) {
755
+ uint8_t * data =
756
+ const_cast <uint8_t *>(reinterpret_cast <const uint8_t *>(source.data ));
757
+ auto length = source.length ;
758
+
759
+ uint32_t result{0 };
760
+ const int lanes = sizeof (uint8x16_t );
761
+ uint8_t remaining = length % lanes;
762
+ const auto * simd_end = data + (length / lanes) * lanes;
763
+ const auto threshold = vdupq_n_u8 (0x80 );
764
+
765
+ for (; data < simd_end; data += lanes) {
766
+ // load 16 bits
767
+ uint8x16_t input = vld1q_u8 (data);
768
+
769
+ // compare to threshold (0x80)
770
+ uint8x16_t with_highbit = vcgeq_u8 (input, threshold);
771
+
772
+ // shift and narrow
773
+ uint8x8_t highbits = vshrn_n_u16 (vreinterpretq_u16_u8 (with_highbit), 4 );
774
+
775
+ // we have 0, 4 or 8 bits per byte
776
+ uint8x8_t bitsperbyte = vcnt_u8 (highbits);
777
+
778
+ // sum the bytes vertically to uint32_t
779
+ result += vaddlv_u8 (bitsperbyte);
780
+ }
781
+
782
+ // we overcounted by a factor of 4
783
+ result /= 4 ;
784
+
785
+ for (uint8_t j = 0 ; j < remaining; j++) {
786
+ result += (simd_end[j] >> 7 );
787
+ }
788
+
789
+ return result + length;
790
+ }
791
+ #else
744
792
uint32_t FastByteLengthUtf8 (Local<Value> receiver,
745
793
const v8::FastOneByteString& source) {
746
794
uint32_t result = 0 ;
@@ -752,6 +800,7 @@ uint32_t FastByteLengthUtf8(Local<Value> receiver,
752
800
result += length;
753
801
return result;
754
802
}
803
+ #endif
755
804
756
805
static v8::CFunction fast_byte_length_utf8 (
757
806
v8::CFunction::Make (FastByteLengthUtf8));
0 commit comments