Skip to content

Commit d267ac2

Browse files
authored
bpo-36778: cp65001 encoding becomes an alias to utf_8 (GH-13230)
1 parent 137be34 commit d267ac2

File tree

5 files changed

+4
-134
lines changed

5 files changed

+4
-134
lines changed

Doc/library/codecs.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -1106,8 +1106,7 @@ particular, the following variants typically exist:
11061106
+-----------------+--------------------------------+--------------------------------+
11071107
| cp1258 | windows-1258 | Vietnamese |
11081108
+-----------------+--------------------------------+--------------------------------+
1109-
| cp65001 | | Windows only: Windows UTF-8 |
1110-
| | | (``CP_UTF8``) |
1109+
| cp65001 | | Alias to ``utf_8`` encoding |
11111110
| | | |
11121111
| | | .. versionadded:: 3.3 |
11131112
+-----------------+--------------------------------+--------------------------------+

Lib/encodings/aliases.py

+1
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@
534534
'utf8' : 'utf_8',
535535
'utf8_ucs2' : 'utf_8',
536536
'utf8_ucs4' : 'utf_8',
537+
'cp65001' : 'utf_8',
537538

538539
# uu_codec codec
539540
'uu' : 'uu_codec',

Lib/encodings/cp65001.py

-43
This file was deleted.

Lib/test/test_codecs.py

-89
Original file line numberDiff line numberDiff line change
@@ -875,95 +875,6 @@ def test_surrogatepass_handler(self):
875875
b"abc\xed\xa0z".decode(self.encoding, "surrogatepass")
876876

877877

878-
@unittest.skipUnless(sys.platform == 'win32',
879-
'cp65001 is a Windows-only codec')
880-
class CP65001Test(ReadTest, unittest.TestCase):
881-
encoding = "cp65001"
882-
883-
def test_encode(self):
884-
tests = [
885-
('abc', 'strict', b'abc'),
886-
('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'),
887-
('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
888-
('\udc80', 'strict', None),
889-
('\udc80', 'ignore', b''),
890-
('\udc80', 'replace', b'?'),
891-
('\udc80', 'backslashreplace', b'\\udc80'),
892-
('\udc80', 'namereplace', b'\\udc80'),
893-
('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
894-
]
895-
for text, errors, expected in tests:
896-
if expected is not None:
897-
try:
898-
encoded = text.encode('cp65001', errors)
899-
except UnicodeEncodeError as err:
900-
self.fail('Unable to encode %a to cp65001 with '
901-
'errors=%r: %s' % (text, errors, err))
902-
self.assertEqual(encoded, expected,
903-
'%a.encode("cp65001", %r)=%a != %a'
904-
% (text, errors, encoded, expected))
905-
else:
906-
self.assertRaises(UnicodeEncodeError,
907-
text.encode, "cp65001", errors)
908-
909-
def test_decode(self):
910-
tests = [
911-
(b'abc', 'strict', 'abc'),
912-
(b'\xc3\xa9\xe2\x82\xac', 'strict', '\xe9\u20ac'),
913-
(b'\xf4\x8f\xbf\xbf', 'strict', '\U0010ffff'),
914-
(b'\xef\xbf\xbd', 'strict', '\ufffd'),
915-
(b'[\xc3\xa9]', 'strict', '[\xe9]'),
916-
# invalid bytes
917-
(b'[\xff]', 'strict', None),
918-
(b'[\xff]', 'ignore', '[]'),
919-
(b'[\xff]', 'replace', '[\ufffd]'),
920-
(b'[\xff]', 'surrogateescape', '[\udcff]'),
921-
(b'[\xed\xb2\x80]', 'strict', None),
922-
(b'[\xed\xb2\x80]', 'ignore', '[]'),
923-
(b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
924-
]
925-
for raw, errors, expected in tests:
926-
if expected is not None:
927-
try:
928-
decoded = raw.decode('cp65001', errors)
929-
except UnicodeDecodeError as err:
930-
self.fail('Unable to decode %a from cp65001 with '
931-
'errors=%r: %s' % (raw, errors, err))
932-
self.assertEqual(decoded, expected,
933-
'%a.decode("cp65001", %r)=%a != %a'
934-
% (raw, errors, decoded, expected))
935-
else:
936-
self.assertRaises(UnicodeDecodeError,
937-
raw.decode, 'cp65001', errors)
938-
939-
def test_lone_surrogates(self):
940-
self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001")
941-
self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")
942-
self.assertEqual("[\uDC80]".encode("cp65001", "backslashreplace"),
943-
b'[\\udc80]')
944-
self.assertEqual("[\uDC80]".encode("cp65001", "namereplace"),
945-
b'[\\udc80]')
946-
self.assertEqual("[\uDC80]".encode("cp65001", "xmlcharrefreplace"),
947-
b'[�]')
948-
self.assertEqual("[\uDC80]".encode("cp65001", "surrogateescape"),
949-
b'[\x80]')
950-
self.assertEqual("[\uDC80]".encode("cp65001", "ignore"),
951-
b'[]')
952-
self.assertEqual("[\uDC80]".encode("cp65001", "replace"),
953-
b'[?]')
954-
955-
def test_surrogatepass_handler(self):
956-
self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"),
957-
b"abc\xed\xa0\x80def")
958-
self.assertEqual(b"abc\xed\xa0\x80def".decode("cp65001", "surrogatepass"),
959-
"abc\ud800def")
960-
self.assertEqual("\U00010fff\uD800".encode("cp65001", "surrogatepass"),
961-
b"\xf0\x90\xbf\xbf\xed\xa0\x80")
962-
self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("cp65001", "surrogatepass"),
963-
"\U00010fff\uD800")
964-
self.assertTrue(codecs.lookup_error("surrogatepass"))
965-
966-
967878
class UTF7Test(ReadTest, unittest.TestCase):
968879
encoding = "utf-7"
969880

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
``cp65001`` encoding (Windows code page 65001) becomes an alias to ``utf_8``
2+
encoding.

0 commit comments

Comments
 (0)