nodejs · nodejs-github-bot · Dec 3, 2024 · Oct 5, 2024 · Oct 5, 2024 · Oct 5, 2024
diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js
@@ -29,6 +29,7 @@ const kDecoder = Symbol('decoder');
 const kEncoder = Symbol('encoder');
 const kFatal = Symbol('kFatal');
 const kUTF8FastPath = Symbol('kUTF8FastPath');
+const kLatin1FastPath = Symbol('kLatin1FastPath');
 const kIgnoreBOM = Symbol('kIgnoreBOM');
 
 const {
@@ -55,6 +56,7 @@ const {
   encodeIntoResults,
   encodeUtf8String,
   decodeUTF8,
+  decodeLatin1,
 } = binding;
 
 const { Buffer } = require('buffer');
@@ -419,10 +421,13 @@ function makeTextDecoderICU() {
       this[kFatal] = Boolean(options?.fatal);
       // Only support fast path for UTF-8.
       this[kUTF8FastPath] = enc === 'utf-8';
+      this[kLatin1FastPath] = enc === 'windows-1252';
 Modern Web browsers follow the [WHATWG Encoding Standard][] which aliases 
 both `'latin1'` and `'ISO-8859-1'` to `'win-1252'`. This means that while doing 
 something like `http.get()`, if the returned charset is one of those listed in 
 the WHATWG specification it is possible that the server actually returned 
 `'win-1252'`-encoded data, and using `'latin1'` encoding may incorrectly decode 
 the characters. 
 Modern Web browsers follow the [WHATWG Encoding Standard][] which aliases 
 both `'latin1'` and `'ISO-8859-1'` to `'win-1252'`. This means that while doing 
 something like `http.get()`, if the returned charset is one of those listed in 
 the WHATWG specification it is possible that the server actually returned 
 `'win-1252'`-encoded data, and using `'latin1'` encoding may incorrectly decode 
 the characters. 
       this[kHandle] = undefined;
 
-      if (!this[kUTF8FastPath]) {
-        this.#prepareConverter();
+      if (this[kUTF8FastPath]) {
+        decodeUTF8(this.input, this[kIgnoreBOM], this[kFatal]);
+      } else if (this[kLatin1FastPath]) {
+        decodeLatin1(this.input);
       }
     }
 
@@ -443,6 +448,10 @@ function makeTextDecoderICU() {
         return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
       }
 
+      if (this[kLatin1FastPath]) {
+        return decodeLatin1(input);
+      }
+
       this.#prepareConverter();
 
       validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc
@@ -1,6 +1,7 @@
 #include "encoding_binding.h"
 #include "ada.h"
 #include "env-inl.h"
+#include "node_buffer.h"
 #include "node_errors.h"
 #include "node_external_reference.h"
 #include "simdutf.h"
@@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
   SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
   SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
   SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
+  SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
 }
 
 void BindingData::CreatePerContextProperties(Local<Object> target,
@@ -243,6 +245,44 @@ void BindingData::RegisterTimerExternalReferences(
   registry->Register(DecodeUTF8);
   registry->Register(ToASCII);
   registry->Register(ToUnicode);
+  registry->Register(DecodeLatin1);
+}
+
+void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
+  Environment* env = Environment::GetCurrent(args);
+
+  CHECK_GE(args.Length(), 1);
+  if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
+        args[0]->IsArrayBufferView())) {
+    return node::THROW_ERR_INVALID_ARG_TYPE(
+        env->isolate(),
+        "The \"input\" argument must be an instance of ArrayBuffer, "
+        "SharedArrayBuffer, or ArrayBufferView.");
+  }
+
+  ArrayBufferViewContents<uint8_t> buffer(args[0]);
+  const uint8_t* data = buffer.data();
+  size_t length = buffer.length();
+
+  if (length == 0) {
+    return args.GetReturnValue().SetEmptyString();
+  }
+
+  std::string result(length * 2, '\0');
+
+  size_t written = simdutf::convert_latin1_to_utf8(
+      reinterpret_cast<const char*>(data), length, &result[0]);
+
+  if (written == 0) {
+    return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
+        env->isolate(), "The encoded data was not valid for encoding latin1");
+  }
+
+  result.resize(written);
+
+  Local<Object> buffer_result =
+      node::Buffer::Copy(env, result.c_str(), result.length()).ToLocalChecked();
+  args.GetReturnValue().Set(buffer_result);
 }
 
 }  // namespace encoding_binding

diff --git a/src/encoding_binding.h b/src/encoding_binding.h
@@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
   static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
   static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
   static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
+  static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);
 
   static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
   static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);

diff --git a/test/cctest/test_encoding_binding.cc b/test/cctest/test_encoding_binding.cc
@@ -0,0 +1,75 @@
+#include "encoding_binding.h"
+#include "env-inl.h"
+#include "gtest/gtest.h"
+#include "node_test_fixture.h"
+#include "v8.h"
+
+namespace node {
+namespace encoding_binding {
+
+bool RunDecodeLatin1(Environment* env,
+                     Local<Value> args[],
+                     Local<Value>* result) {
+  Isolate* isolate = env->isolate();
+  TryCatch try_catch(isolate);
+
+  BindingData::DecodeLatin1(FunctionCallbackInfo<Value>(args));
+
+  if (try_catch.HasCaught()) {
+    return false;
+  }
+
+  *result = try_catch.Exception();
+  return true;
+}
+
+class EncodingBindingTest : public NodeTestFixture {};
+
+TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) {
+  Environment* env = CreateEnvironment();
+  Isolate* isolate = env->isolate();
+  HandleScope handle_scope(isolate);
+
+  const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3};
+  Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
+  memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
+
+  Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
+  Local<Value> args[] = {array};
+
+  Local<Value> result;
+  EXPECT_TRUE(RunDecodeLatin1(env, args, &result));
+
+  String::Utf8Value utf8_result(isolate, result);
+  EXPECT_STREQ(*utf8_result, "Áéó");
+}
+
+TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) {
+  Environment* env = CreateEnvironment();
+  Isolate* isolate = env->isolate();
+  HandleScope handle_scope(isolate);
+
+  Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, 0);
+  Local<Uint8Array> array = Uint8Array::New(ab, 0, 0);
+  Local<Value> args[] = {array};
+
+  Local<Value> result;
+  EXPECT_TRUE(RunDecodeLatin1(env, args, &result));
+
+  String::Utf8Value utf8_result(isolate, result);
+  EXPECT_STREQ(*utf8_result, "");
+}
+
+TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) {
+  Environment* env = CreateEnvironment();
+  Isolate* isolate = env->isolate();
+  HandleScope handle_scope(isolate);
+
+  Local<Value> args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")};
+
+  Local<Value> result;
+  EXPECT_FALSE(RunDecodeLatin1(env, args, &result));
+}
+
+}  // namespace encoding_binding
+}  // namespace node