From 2977ef01604bec1a1aaf00c5777edde50bbe4903 Mon Sep 17 00:00:00 2001 From: woustachemax Date: Thu, 8 Jan 2026 19:58:41 +0530 Subject: [PATCH] Fix encoding canonical names to match WHATWG spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes both implementation and tests to use canonical encoding names: - 'ibm-866' → 'ibm866' (remove hyphen) - 'shift-jis' → 'shift_jis' (use underscore) Per WHATWG Encoding Standard, TextDecoder.encoding should return the canonical encoding name, not the label used to construct it. Fixes #5663 --- src/workerd/api/encoding.h | 4 ++-- src/workerd/api/tests/encoding-test.js | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/workerd/api/encoding.h b/src/workerd/api/encoding.h index 732ee916cba..b1843a8228a 100644 --- a/src/workerd/api/encoding.h +++ b/src/workerd/api/encoding.h @@ -16,7 +16,7 @@ namespace workerd::api { // the second label is the public identifier. #define EW_ENCODINGS(V) \ V(Utf8, "utf-8") \ - V(Ibm866, "ibm-866") \ + V(Ibm866, "ibm866") \ V(Iso8859_2, "iso-8859-2") \ V(Iso8859_3, "iso-8859-3") \ V(Iso8859_4, "iso-8859-4") \ @@ -49,7 +49,7 @@ namespace workerd::api { V(Big5, "big5") \ V(Euc_Jp, "euc-jp") \ V(Iso2022_Jp, "iso-2022-jp") \ - V(Shift_Jis, "shift-jis") \ + V(Shift_Jis, "shift_jis") \ V(Euc_Kr, "euc-kr") \ V(Replacement, "replacement") \ V(Utf16be, "utf-16be") \ diff --git a/src/workerd/api/tests/encoding-test.js b/src/workerd/api/tests/encoding-test.js index fb06649f194..2e1c1afdf83 100644 --- a/src/workerd/api/tests/encoding-test.js +++ b/src/workerd/api/tests/encoding-test.js @@ -401,10 +401,10 @@ export const allTheDecoders = { ['utf-8', 'utf-8'], ['utf8', 'utf-8'], ['x-unicode20utf8', 'utf-8'], - ['866', 'ibm-866'], - ['cp866', 'ibm-866'], - ['csibm866', 'ibm-866'], - ['ibm866', 'ibm-866'], + ['866', 'ibm866'], + ['cp866', 'ibm866'], + ['csibm866', 'ibm866'], + ['ibm866', 'ibm866'], ['csisolatin2', 'iso-8859-2'], ['iso-8859-2', 'iso-8859-2'], ['iso-ir-101', 'iso-8859-2'], @@ -589,14 +589,14 @@ export const allTheDecoders = { ['x-euc-jp', 'euc-jp'], ['csiso2022jp', 'iso-2022-jp'], ['iso-2022-jp', 'iso-2022-jp'], - ['csshiftjis', 'shift-jis'], - ['ms932', 'shift-jis'], - ['ms_kanji', 'shift-jis'], - ['shift-jis', 'shift-jis'], - ['shift_jis', 'shift-jis'], - ['sjis', 'shift-jis'], - ['windows-31j', 'shift-jis'], - ['x-sjis', 'shift-jis'], + ['csshiftjis', 'shift_jis'], + ['ms932', 'shift_jis'], + ['ms_kanji', 'shift_jis'], + ['shift-jis', 'shift_jis'], + ['shift_jis', 'shift_jis'], + ['sjis', 'shift_jis'], + ['windows-31j', 'shift_jis'], + ['x-sjis', 'shift_jis'], ['cseuckr', 'euc-kr'], ['csksc56011987', 'euc-kr'], ['euc-kr', 'euc-kr'],