Skip to content

Commit 35bf93b

Browse files
addaleaxruyadorno
authored andcommitted
src: replace custom ASCII validation with simdutf one
PR-URL: #46271 Reviewed-By: Tobias Nießen <[email protected]> Reviewed-By: Richard Lau <[email protected]> Reviewed-By: Colin Ihrig <[email protected]> Reviewed-By: Luigi Pinca <[email protected]> Reviewed-By: Yagiz Nizipli <[email protected]> Reviewed-By: James M Snell <[email protected]> Reviewed-By: Ben Noordhuis <[email protected]> Reviewed-By: Darshan Sen <[email protected]>
1 parent f39fb8c commit 35bf93b

File tree

1 file changed

+3
-55
lines changed

1 file changed

+3
-55
lines changed

src/string_bytes.cc

+3-55
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "env-inl.h"
2626
#include "node_buffer.h"
2727
#include "node_errors.h"
28+
#include "simdutf.h"
2829
#include "util.h"
2930

3031
#include <climits>
@@ -467,60 +468,6 @@ Maybe<size_t> StringBytes::Size(Isolate* isolate,
467468
UNREACHABLE();
468469
}
469470

470-
471-
472-
473-
static bool contains_non_ascii_slow(const char* buf, size_t len) {
474-
for (size_t i = 0; i < len; ++i) {
475-
if (buf[i] & 0x80)
476-
return true;
477-
}
478-
return false;
479-
}
480-
481-
482-
static bool contains_non_ascii(const char* src, size_t len) {
483-
if (len < 16) {
484-
return contains_non_ascii_slow(src, len);
485-
}
486-
487-
const unsigned bytes_per_word = sizeof(uintptr_t);
488-
const unsigned align_mask = bytes_per_word - 1;
489-
const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
490-
491-
if (unaligned > 0) {
492-
const unsigned n = bytes_per_word - unaligned;
493-
if (contains_non_ascii_slow(src, n))
494-
return true;
495-
src += n;
496-
len -= n;
497-
}
498-
499-
500-
#if defined(_WIN64) || defined(_LP64)
501-
const uintptr_t mask = 0x8080808080808080ll;
502-
#else
503-
const uintptr_t mask = 0x80808080l;
504-
#endif
505-
506-
const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
507-
508-
for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
509-
if (srcw[i] & mask)
510-
return true;
511-
}
512-
513-
const unsigned remainder = len & align_mask;
514-
if (remainder > 0) {
515-
const size_t offset = len - remainder;
516-
if (contains_non_ascii_slow(src + offset, remainder))
517-
return true;
518-
}
519-
520-
return false;
521-
}
522-
523-
524471
static void force_ascii_slow(const char* src, char* dst, size_t len) {
525472
for (size_t i = 0; i < len; ++i) {
526473
dst[i] = src[i] & 0x7f;
@@ -634,7 +581,8 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
634581
}
635582

636583
case ASCII:
637-
if (contains_non_ascii(buf, buflen)) {
584+
if (simdutf::validate_ascii_with_errors(buf, buflen).error) {
585+
// The input contains non-ASCII bytes.
638586
char* out = node::UncheckedMalloc(buflen);
639587
if (out == nullptr) {
640588
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);

0 commit comments

Comments
 (0)