From b12ba5832c311a8d7e3268be73a67424148aabc9 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 5 Oct 2024 12:39:21 +0300 Subject: [PATCH 01/15] util: add fast path for Latin1 decoding --- lib/internal/encoding.js | 5 ++ src/encoding_binding.cc | 40 +++++++++++++++ src/encoding_binding.h | 1 + test/cctest/test_encoding_binding.cc | 75 ++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+) create mode 100644 test/cctest/test_encoding_binding.cc diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 252eaa75fac22b..92329040ff167e 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -55,6 +55,7 @@ const { encodeIntoResults, encodeUtf8String, decodeUTF8, + decodeLatin1, } = binding; const { Buffer } = require('buffer'); @@ -443,6 +444,10 @@ function makeTextDecoderICU() { return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); } + if (this[kEncoding] === 'windows-1252') { + return decodeLatin1(input); + } + this.#prepareConverter(); validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 97ddd59fb661c8..ced4fbaf103141 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -1,6 +1,7 @@ #include "encoding_binding.h" #include "ada.h" #include "env-inl.h" +#include "node_buffer.h" #include "node_errors.h" #include "node_external_reference.h" #include "simdutf.h" @@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data, SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8); SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII); SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode); + SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1); } void BindingData::CreatePerContextProperties(Local target, @@ -243,6 +245,44 @@ void BindingData::RegisterTimerExternalReferences( registry->Register(DecodeUTF8); registry->Register(ToASCII); registry->Register(ToUnicode); + registry->Register(DecodeLatin1); +} + +void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + + CHECK_GE(args.Length(), 1); + if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || + args[0]->IsArrayBufferView())) { + return node::THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"input\" argument must be an instance of ArrayBuffer, " + "SharedArrayBuffer, or ArrayBufferView."); + } + + ArrayBufferViewContents buffer(args[0]); + const uint8_t* data = buffer.data(); + size_t length = buffer.length(); + + if (length == 0) { + return args.GetReturnValue().SetEmptyString(); + } + + std::string result(length * 2, '\0'); + + size_t written = simdutf::convert_latin1_to_utf8( + reinterpret_cast(data), length, &result[0]); + + if (written == 0) { + return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( + env->isolate(), "The encoded data was not valid for encoding latin1"); + } + + result.resize(written); + + Local buffer_result = + node::Buffer::Copy(env, result.c_str(), result.length()).ToLocalChecked(); + args.GetReturnValue().Set(buffer_result); } } // namespace encoding_binding diff --git a/src/encoding_binding.h b/src/encoding_binding.h index 2690cb74f8a05b..97f55394d27641 100644 --- a/src/encoding_binding.h +++ b/src/encoding_binding.h @@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject { static void EncodeInto(const v8::FunctionCallbackInfo& args); static void EncodeUtf8String(const v8::FunctionCallbackInfo& args); static void DecodeUTF8(const v8::FunctionCallbackInfo& args); + static void DecodeLatin1(const v8::FunctionCallbackInfo& args); static void ToASCII(const v8::FunctionCallbackInfo& args); static void ToUnicode(const v8::FunctionCallbackInfo& args); diff --git a/test/cctest/test_encoding_binding.cc b/test/cctest/test_encoding_binding.cc new file mode 100644 index 00000000000000..daf26d98ad8684 --- /dev/null +++ b/test/cctest/test_encoding_binding.cc @@ -0,0 +1,75 @@ +#include "encoding_binding.h" +#include "env-inl.h" +#include "gtest/gtest.h" +#include "node_test_fixture.h" +#include "v8.h" + +namespace node { +namespace encoding_binding { + +bool RunDecodeLatin1(Environment* env, + Local args[], + Local* result) { + Isolate* isolate = env->isolate(); + TryCatch try_catch(isolate); + + BindingData::DecodeLatin1(FunctionCallbackInfo(args)); + + if (try_catch.HasCaught()) { + return false; + } + + *result = try_catch.Exception(); + return true; +} + +class EncodingBindingTest : public NodeTestFixture {}; + +TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + Local ab = ArrayBuffer::New(isolate, 0); + Local array = Uint8Array::New(ab, 0, 0); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, ""); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + Local args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")}; + + Local result; + EXPECT_FALSE(RunDecodeLatin1(env, args, &result)); +} + +} // namespace encoding_binding +} // namespace node From 3c239bd6a9afd989443033d774efd3a767e9cf84 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 5 Oct 2024 17:12:56 +0300 Subject: [PATCH 02/15] handle input correctly in decode functions --- lib/internal/encoding.js | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 92329040ff167e..479dea3b8156bd 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -29,6 +29,7 @@ const kDecoder = Symbol('decoder'); const kEncoder = Symbol('encoder'); const kFatal = Symbol('kFatal'); const kUTF8FastPath = Symbol('kUTF8FastPath'); +const kLatin1FastPath = Symbol('kLatin1FastPath'); const kIgnoreBOM = Symbol('kIgnoreBOM'); const { @@ -420,10 +421,13 @@ function makeTextDecoderICU() { this[kFatal] = Boolean(options?.fatal); // Only support fast path for UTF-8. this[kUTF8FastPath] = enc === 'utf-8'; + this[kLatin1FastPath] = enc === 'windows-1252'; this[kHandle] = undefined; - if (!this[kUTF8FastPath]) { - this.#prepareConverter(); + if (this[kUTF8FastPath]) { + decodeUTF8(this.input, this[kIgnoreBOM], this[kFatal]); + } else if (this[kLatin1FastPath]) { + decodeLatin1(this.input); } } @@ -444,7 +448,7 @@ function makeTextDecoderICU() { return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); } - if (this[kEncoding] === 'windows-1252') { + if (this[kLatin1FastPath]) { return decodeLatin1(input); } From 17f317e7959cbe6977575e1f52a984feda77c219 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 5 Oct 2024 18:35:46 +0300 Subject: [PATCH 03/15] repair --- lib/internal/encoding.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 479dea3b8156bd..3c00cc05727680 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -424,10 +424,8 @@ function makeTextDecoderICU() { this[kLatin1FastPath] = enc === 'windows-1252'; this[kHandle] = undefined; - if (this[kUTF8FastPath]) { - decodeUTF8(this.input, this[kIgnoreBOM], this[kFatal]); - } else if (this[kLatin1FastPath]) { - decodeLatin1(this.input); + if (!this[kUTF8FastPath]) { + this.#prepareConverter(); } } From e0a40c6b9bdf335097b00a7c007949cd660557b3 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 5 Oct 2024 19:01:59 +0300 Subject: [PATCH 04/15] Update lib/internal/encoding.js Co-authored-by: Yagiz Nizipli --- lib/internal/encoding.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 3c00cc05727680..691df7b5eb3127 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -424,7 +424,7 @@ function makeTextDecoderICU() { this[kLatin1FastPath] = enc === 'windows-1252'; this[kHandle] = undefined; - if (!this[kUTF8FastPath]) { + if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) { this.#prepareConverter(); } } From b7420b55210a745f68bef130fc707d28ff8e3eee Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 5 Oct 2024 19:14:46 +0300 Subject: [PATCH 05/15] add ignoreBOM and fatal support to decodeLatin1 --- lib/internal/encoding.js | 3 ++- src/encoding_binding.cc | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 691df7b5eb3127..f03f6b5b9b49ca 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -441,13 +441,14 @@ function makeTextDecoderICU() { validateDecoder(this); this[kUTF8FastPath] &&= !(options?.stream); + this[kLatin1FastPath] &&= !(options?.stream); if (this[kUTF8FastPath]) { return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); } if (this[kLatin1FastPath]) { - return decodeLatin1(input); + return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]); } this.#prepareConverter(); diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index ced4fbaf103141..29947d20606b1d 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -260,10 +260,18 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { "SharedArrayBuffer, or ArrayBufferView."); } + bool ignore_bom = args[1]->IsTrue(); + bool has_fatal = args[2]->IsTrue(); + ArrayBufferViewContents buffer(args[0]); const uint8_t* data = buffer.data(); size_t length = buffer.length(); + if (ignore_bom && length > 0 && data[0] == 0xFEFF) { + data++; + length--; + } + if (length == 0) { return args.GetReturnValue().SetEmptyString(); } @@ -273,7 +281,7 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { size_t written = simdutf::convert_latin1_to_utf8( reinterpret_cast(data), length, &result[0]); - if (written == 0) { + if (has_fatal && written == 0) { return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( env->isolate(), "The encoded data was not valid for encoding latin1"); } From 8bc747ec4002a6e99025f865447aa0db00d7c912 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 5 Oct 2024 19:18:27 +0300 Subject: [PATCH 06/15] lint --- lib/internal/encoding.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index f03f6b5b9b49ca..b2ca3c612bf6ef 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -441,7 +441,7 @@ function makeTextDecoderICU() { validateDecoder(this); this[kUTF8FastPath] &&= !(options?.stream); - this[kLatin1FastPath] &&= !(options?.stream); + this[kLatin1FastPath] &&= !(options?.stream); if (this[kUTF8FastPath]) { return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); From fced029095e9f9ff81e3a7d1b45a7e42cf1ced99 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 5 Oct 2024 19:28:10 +0300 Subject: [PATCH 07/15] update tests to support ignoreBOM and fatal for decodeLatin1 --- test/cctest/test_encoding_binding.cc | 69 ++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/test/cctest/test_encoding_binding.cc b/test/cctest/test_encoding_binding.cc index daf26d98ad8684..2cc62028f7b486 100644 --- a/test/cctest/test_encoding_binding.cc +++ b/test/cctest/test_encoding_binding.cc @@ -9,11 +9,18 @@ namespace encoding_binding { bool RunDecodeLatin1(Environment* env, Local args[], + bool ignore_bom, + bool has_fatal, Local* result) { Isolate* isolate = env->isolate(); TryCatch try_catch(isolate); - BindingData::DecodeLatin1(FunctionCallbackInfo(args)); + Local ignoreBOMValue = Boolean::New(isolate, ignore_bom); + Local fatalValue = Boolean::New(isolate, has_fatal); + + Local updatedArgs[] = {args[0], ignoreBOMValue, fatalValue}; + + BindingData::DecodeLatin1(FunctionCallbackInfo(updatedArgs)); if (try_catch.HasCaught()) { return false; @@ -38,7 +45,7 @@ TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) { Local args[] = {array}; Local result; - EXPECT_TRUE(RunDecodeLatin1(env, args, &result)); + EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); String::Utf8Value utf8_result(isolate, result); EXPECT_STREQ(*utf8_result, "Áéó"); @@ -54,7 +61,7 @@ TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) { Local args[] = {array}; Local result; - EXPECT_TRUE(RunDecodeLatin1(env, args, &result)); + EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); String::Utf8Value utf8_result(isolate, result); EXPECT_STREQ(*utf8_result, ""); @@ -68,7 +75,61 @@ TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) { Local args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")}; Local result; - EXPECT_FALSE(RunDecodeLatin1(env, args, &result)); + EXPECT_FALSE(RunDecodeLatin1(env, args, false, false, &result)); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOM) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_FatalInvalidInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t invalid_data[] = {0xFF, 0xFF, 0xFF}; + Local ab = ArrayBuffer::New(isolate, sizeof(invalid_data)); + memcpy(ab->GetBackingStore()->Data(), invalid_data, sizeof(invalid_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(invalid_data)); + Local args[] = {array}; + + Local result; + EXPECT_FALSE(RunDecodeLatin1(env, args, false, true, &result)); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOMAndFatal) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, true, true, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); } } // namespace encoding_binding From af7cca5b76d74f75cb55c4b6677181492925f242 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 5 Oct 2024 19:36:17 +0300 Subject: [PATCH 08/15] lint --- src/encoding_binding.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 29947d20606b1d..1a6e15716c9112 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -267,7 +267,7 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { const uint8_t* data = buffer.data(); size_t length = buffer.length(); - if (ignore_bom && length > 0 && data[0] == 0xFEFF) { + if (ignore_bom && length > 0 && data[0] == 0xFF) { data++; length--; } From 659f1b69e5debb21f1d9d92b76545ab71d1d77f9 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 5 Oct 2024 19:36:56 +0300 Subject: [PATCH 09/15] correct BOM check in decodeLatin1 for uint8_t comparison --- src/encoding_binding.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 1a6e15716c9112..29947d20606b1d 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -267,7 +267,7 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { const uint8_t* data = buffer.data(); size_t length = buffer.length(); - if (ignore_bom && length > 0 && data[0] == 0xFF) { + if (ignore_bom && length > 0 && data[0] == 0xFEFF) { data++; length--; } From b92e571f04983a7cf029649e9b2fef5db312bb75 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 5 Oct 2024 19:45:21 +0300 Subject: [PATCH 10/15] add BOM test case to cover 0xFF in decodeLatin1 --- src/encoding_binding.cc | 2 +- test/cctest/test_encoding_binding.cc | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 29947d20606b1d..1a6e15716c9112 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -267,7 +267,7 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { const uint8_t* data = buffer.data(); size_t length = buffer.length(); - if (ignore_bom && length > 0 && data[0] == 0xFEFF) { + if (ignore_bom && length > 0 && data[0] == 0xFF) { data++; length--; } diff --git a/test/cctest/test_encoding_binding.cc b/test/cctest/test_encoding_binding.cc index 2cc62028f7b486..06cc36d8f6ae34 100644 --- a/test/cctest/test_encoding_binding.cc +++ b/test/cctest/test_encoding_binding.cc @@ -132,5 +132,24 @@ TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOMAndFatal) { EXPECT_STREQ(*utf8_result, "Áéó"); } +TEST_F(EncodingBindingTest, DecodeLatin1_BOMPresent) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xFF, 0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + } // namespace encoding_binding } // namespace node From 94109bf0d72562c086174c7630bbe9f7fa04395c Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sun, 6 Oct 2024 07:53:53 +0300 Subject: [PATCH 11/15] Update encoding_binding.cc Co-authored-by: Yagiz Nizipli --- src/encoding_binding.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 1a6e15716c9112..2304314078c210 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -289,7 +289,7 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { result.resize(written); Local buffer_result = - node::Buffer::Copy(env, result.c_str(), result.length()).ToLocalChecked(); + node::Buffer::Copy(env, result.c_str(), written).ToLocalChecked(); args.GetReturnValue().Set(buffer_result); } From 6dec37f5230e33b46ca12b256a147efd118d9815 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sun, 6 Oct 2024 07:59:00 +0300 Subject: [PATCH 12/15] delete result.resize --- src/encoding_binding.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 2304314078c210..cfac8a58604f66 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -286,8 +286,6 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { env->isolate(), "The encoded data was not valid for encoding latin1"); } - result.resize(written); - Local buffer_result = node::Buffer::Copy(env, result.c_str(), written).ToLocalChecked(); args.GetReturnValue().Set(buffer_result); From f62f4c46aa7e2f26a23995b48702b2366da0bf85 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Mon, 7 Oct 2024 22:31:19 +0300 Subject: [PATCH 13/15] Update encoding_binding.cc Co-authored-by: Yagiz Nizipli --- src/encoding_binding.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index cfac8a58604f66..b7b88488efa75f 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -279,7 +279,7 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { std::string result(length * 2, '\0'); size_t written = simdutf::convert_latin1_to_utf8( - reinterpret_cast(data), length, &result[0]); + reinterpret_cast(data), length, &result.begin()); if (has_fatal && written == 0) { return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( From f96e2ef15559370f3902b1d225370feef96b7e7c Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Mon, 7 Oct 2024 23:00:45 +0300 Subject: [PATCH 14/15] Update encoding_binding.cc Co-authored-by: Daniel Lemire --- src/encoding_binding.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index b7b88488efa75f..a132eeb62306c6 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -279,7 +279,7 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { std::string result(length * 2, '\0'); size_t written = simdutf::convert_latin1_to_utf8( - reinterpret_cast(data), length, &result.begin()); + reinterpret_cast(data), length, result.data()); if (has_fatal && written == 0) { return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( From 77bc5eeec288eb305a08269ef9b0f6a68f42e3b8 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Tue, 8 Oct 2024 15:50:14 +0300 Subject: [PATCH 15/15] update text-decoder to use windows-1252 encoding --- benchmark/util/text-decoder.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/util/text-decoder.js b/benchmark/util/text-decoder.js index dd4f02016df077..1aa60f2dd0bcd6 100644 --- a/benchmark/util/text-decoder.js +++ b/benchmark/util/text-decoder.js @@ -3,7 +3,7 @@ const common = require('../common.js'); const bench = common.createBenchmark(main, { - encoding: ['utf-8', 'latin1', 'iso-8859-3'], + encoding: ['utf-8', 'windows-1252', 'iso-8859-3'], ignoreBOM: [0, 1], fatal: [0, 1], len: [256, 1024 * 16, 1024 * 128],