From 8ff7c23f07471f014db2f8508bebfca576c39b02 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 14 Jun 2022 07:25:33 +0300 Subject: [PATCH] gh-91810: Fix regression with writing an XML declaration with encoding='unicode' (GH-93426) Suppress writing an XML declaration in open files in ElementTree.write() with encoding='unicode' and xml_declaration=None. If file patch is passed to ElementTree.write() with encoding='unicode', always open a new file in UTF-8. (cherry picked from commit d7db9dc3cc5b44d0b4ce000571fecf58089a01ec) Co-authored-by: Serhiy Storchaka --- Lib/test/test_xml_etree.py | 16 +++------------- Lib/xml/etree/ElementTree.py | 12 +++++------- ...2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst | 2 ++ 3 files changed, 10 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index aea77b192c1006..afa4641e6906b7 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -3739,13 +3739,7 @@ def test_write_to_filename_as_unicode(self): tree = ET.ElementTree(ET.XML('''\xf8''')) tree.write(TESTFN, encoding='unicode') with open(TESTFN, 'rb') as f: - data = f.read() - expected = "\xf8".encode(encoding, 'xmlcharrefreplace') - if encoding.lower() in ('utf-8', 'ascii'): - self.assertEqual(data, expected) - else: - self.assertIn(b"\xc3\xb8") def test_write_to_text_file(self): self.addCleanup(os_helper.unlink, TESTFN) @@ -3760,17 +3754,13 @@ def test_write_to_text_file(self): tree.write(f, encoding='unicode') self.assertFalse(f.closed) with open(TESTFN, 'rb') as f: - self.assertEqual(f.read(), convlinesep( - b'''\n''' - b'''ø''')) + self.assertEqual(f.read(), b'''ø''') with open(TESTFN, 'w', encoding='ISO-8859-1') as f: tree.write(f, encoding='unicode') self.assertFalse(f.closed) with open(TESTFN, 'rb') as f: - self.assertEqual(f.read(), convlinesep( - b'''\n''' - b'''\xf8''')) + self.assertEqual(f.read(), b'''\xf8''') def test_write_to_binary_file(self): self.addCleanup(os_helper.unlink, TESTFN) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index a5cc65e789c004..1dc80351bf7ddd 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -731,6 +731,7 @@ def write(self, file_or_filename, with _get_writer(file_or_filename, encoding) as (write, declared_encoding): if method == "xml" and (xml_declaration or (xml_declaration is None and + encoding.lower() != "unicode" and declared_encoding.lower() not in ("utf-8", "us-ascii"))): write("\n" % ( declared_encoding,)) @@ -757,13 +758,10 @@ def _get_writer(file_or_filename, encoding): except AttributeError: # file_or_filename is a file name if encoding.lower() == "unicode": - file = open(file_or_filename, "w", - errors="xmlcharrefreplace") - else: - file = open(file_or_filename, "w", encoding=encoding, - errors="xmlcharrefreplace") - with file: - yield file.write, file.encoding + encoding="utf-8" + with open(file_or_filename, "w", encoding=encoding, + errors="xmlcharrefreplace") as file: + yield file.write, encoding else: # file_or_filename is a file-like object # encoding determines if it is a text or binary writer diff --git a/Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst b/Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst new file mode 100644 index 00000000000000..e40005886afc3e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-06-02-08-40-58.gh-issue-91810.Gtk44w.rst @@ -0,0 +1,2 @@ +Suppress writing an XML declaration in open files in ``ElementTree.write()`` +with ``encoding='unicode'`` and ``xml_declaration=None``.