Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 84 additions & 137 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const {
ERR_INVALID_THIS,
ERR_NO_ICU,
} = require('internal/errors').codes;
const kMethod = Symbol('method');
const kSingleByte = Symbol('single-byte');
const kHandle = Symbol('handle');
const kFlags = Symbol('flags');
const kEncoding = Symbol('encoding');
Expand Down Expand Up @@ -52,6 +52,8 @@ const {
validateObject,
kValidateObjectAllowObjectsAndNull,
} = require('internal/validators');

const { hasIntl } = internalBinding('config');
const binding = internalBinding('encoding_binding');
const {
encodeInto,
Expand Down Expand Up @@ -405,166 +407,111 @@ function parseInput(input) {
}
}

const TextDecoder =
internalBinding('config').hasIntl ?
makeTextDecoderICU() :
makeTextDecoderJS();

function makeTextDecoderICU() {
const {
decode: _decode,
getConverter,
} = internalBinding('icu');

class TextDecoder {
constructor(encoding = 'utf-8', options = kEmptyObject) {
encoding = `${encoding}`;
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

const enc = getEncodingFromLabel(encoding);
if (enc === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);

let flags = 0;
if (options !== null) {
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}

this[kDecoder] = true;
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kFatal] = Boolean(options?.fatal);
// Only support fast path for UTF-8.
this[kUTF8FastPath] = enc === 'utf-8';
this[kHandle] = undefined;
this[kMethod] = undefined;

if (isSinglebyteEncoding(this.encoding)) {
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
} else if (!this[kUTF8FastPath]) {
this.#prepareConverter();
}
}

#prepareConverter() {
if (this[kHandle] !== undefined) return;
let icuEncoding = this[kEncoding];
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
const handle = getConverter(icuEncoding, this[kFlags]);
if (handle === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
this[kHandle] = handle;
}
let icuDecode, icuGetConverter;
if (hasIntl) {
;({
decode: icuDecode,
getConverter: icuGetConverter,
} = internalBinding('icu'));
}

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
const kBOMSeen = Symbol('BOM seen');

if (this[kMethod]) return this[kMethod](parseInput(input));
let StringDecoder;
function lazyStringDecoder() {
if (StringDecoder === undefined)
({ StringDecoder } = require('string_decoder'));
return StringDecoder;
}
Comment on lines +420 to +425
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a lazy utility in the internal utils.

Copy link
Member Author

@ChALkeR ChALkeR Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@avivkeller this is not new code, it's just moved to an outer scope
Ideally all that should go away with follow-up fixes, string_decoder path is invalid anyway
I don't think it's worth refactoring it further


this[kUTF8FastPath] &&= !(options?.stream);
class TextDecoder {
constructor(encoding = 'utf-8', options = kEmptyObject) {
encoding = `${encoding}`;
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}
const enc = getEncodingFromLabel(encoding);
if (enc === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);

this.#prepareConverter();

let flags = 0;
if (options !== null)
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
let flags = 0;
if (options !== null) {
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}

return _decode(this[kHandle], input, flags, this.encoding);
this[kDecoder] = true;
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kFatal] = Boolean(options?.fatal);
this[kUTF8FastPath] = false;
this[kHandle] = undefined;
this[kSingleByte] = undefined; // Does not care about streaming or BOM

if (enc === 'utf-8') {
this[kUTF8FastPath] = true;
} else if (isSinglebyteEncoding(enc)) {
this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
} else {
this.#prepareConverter(); // Need to throw early if we don't support the encoding
}
}

return TextDecoder;
}

function makeTextDecoderJS() {
let StringDecoder;
function lazyStringDecoder() {
if (StringDecoder === undefined)
({ StringDecoder } = require('string_decoder'));
return StringDecoder;
#prepareConverter() {
if (this[kHandle] !== undefined) return;
if (hasIntl) {
let icuEncoding = this[kEncoding];
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
const handle = icuGetConverter(icuEncoding, this[kFlags]);
if (handle === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
this[kHandle] = handle;
} else if (this[kEncoding] === 'utf-8' || this[kEncoding] === 'utf-16le') {
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
this[kHandle] = new (lazyStringDecoder())(this[kEncoding]);
this[kBOMSeen] = false;
} else {
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
}
}

const kBOMSeen = Symbol('BOM seen');

function hasConverter(encoding) {
return encoding === 'utf-8' || encoding === 'utf-16le';
}
decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

class TextDecoder {
constructor(encoding = 'utf-8', options = kEmptyObject) {
encoding = `${encoding}`;
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
if (this[kSingleByte]) return this[kSingleByte](parseInput(input));

const enc = getEncodingFromLabel(encoding);
if (enc === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
const stream = options?.stream;
if (this[kUTF8FastPath]) {
if (!stream) return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
this[kUTF8FastPath] = false;
}

let flags = 0;
if (options !== null) {
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}
this.#prepareConverter();

this[kDecoder] = true;
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kFatal] = Boolean(options?.fatal);
this[kBOMSeen] = false;
this[kMethod] = undefined;

if (isSinglebyteEncoding(enc)) {
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
} else {
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
this[kHandle] = new (lazyStringDecoder())(enc);
}
if (hasIntl) {
const flags = stream ? 0 : CONVERTER_FLAGS_FLUSH;
return icuDecode(this[kHandle], input, flags, this[kEncoding]);
}

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
input = parseInput(input);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
input = parseInput(input);

if (this[kMethod]) return this[kMethod](input);
let result = stream ? this[kHandle].write(input) : this[kHandle].end(input);

if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
this[kBOMSeen] = false;
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
// If the very first result in the stream is a BOM, and we are not
// explicitly told to ignore it, then we discard it.
if (result[0] === '\ufeff') {
result = StringPrototypeSlice(result, 1);
}
this[kBOMSeen] = true;
}

if (options !== null && options.stream) {
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
} else {
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
}
if (!stream) this[kBOMSeen] = false;

let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
this[kHandle].end(input) :
this[kHandle].write(input);

if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
// If the very first result in the stream is a BOM, and we are not
// explicitly told to ignore it, then we discard it.
if (result[0] === '\ufeff') {
result = StringPrototypeSlice(result, 1);
}
this[kBOMSeen] = true;
}
return result;

return result;
}
}

return TextDecoder;
}

// Mix in some shared properties.
Expand Down
Loading