From 6795f586837d5141f368533bac6197919a9c7e94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cbhuvi27=E2=80=9D?= <b.chouksey27@gmail.com>
Date: Tue, 9 Jun 2026 08:19:09 +0530
Subject: [PATCH 1/3] gh-150771: Fix email serialization for shift_jis and
 euc-jp

Convert surrogate-escaped payloads through the input charset before
encoding to iso-2022-jp, fixing UnicodeEncodeError when printing
messages created with set_content().
---
 Lib/email/charset.py                          | 10 +++++-
 Lib/email/message.py                          |  4 ++-
 Lib/test/test_email/test_contentmanager.py    | 32 +++++++++++++++++++
 ...-06-09-12-00-00.gh-issue-150771.K7mNx2.rst |  3 ++
 4 files changed, 47 insertions(+), 2 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst

diff --git a/Lib/email/charset.py b/Lib/email/charset.py
index 5981791820e740c..e3ee13c3912241a 100644
--- a/Lib/email/charset.py
+++ b/Lib/email/charset.py
@@ -16,6 +16,7 @@
 import email.quoprimime
 
 from email import errors
+from email import utils
 from email.encoders import encode_7or8bit
 
 
@@ -438,5 +439,12 @@ def body_encode(self, string):
             return email.quoprimime.body_encode(string)
         else:
             if isinstance(string, str):
-                string = string.encode(self.output_charset).decode('ascii')
+                if utils._has_surrogates(string):
+                    string = string.encode('ascii', 'surrogateescape')
+                    if self.input_charset != self.output_charset:
+                        string = (string.decode(self.input_codec)
+                                  .encode(self.output_codec))
+                    string = string.decode('ascii', 'surrogateescape')
+                else:
+                    string = string.encode(self.output_charset).decode('ascii')
             return string
diff --git a/Lib/email/message.py b/Lib/email/message.py
index 641fb2e944d4311..dff113ea407f07c 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -352,7 +352,9 @@ def set_payload(self, payload, charset=None):
                 return
             if not isinstance(charset, Charset):
                 charset = Charset(charset)
-            payload = payload.encode(charset.output_charset, 'surrogateescape')
+            if not utils._has_surrogates(payload):
+                payload = payload.encode(charset.output_charset,
+                                          'surrogateescape')
         if hasattr(payload, 'decode'):
             self._payload = payload.decode('ascii', 'surrogateescape')
         else:
diff --git a/Lib/test/test_email/test_contentmanager.py b/Lib/test/test_email/test_contentmanager.py
index bc0e5d356181591..a28f9c1402984b0 100644
--- a/Lib/test/test_email/test_contentmanager.py
+++ b/Lib/test/test_email/test_contentmanager.py
@@ -355,6 +355,38 @@ def test_set_text_charset_cp949(self):
         self.assertEqual(m.get_payload(decode=True).decode('ks_c_5601-1987'), content)
         self.assertEqual(m.get_content(), content)
 
+    def test_set_text_charset_shift_jis(self):
+        m = self._make_message()
+        content = "\u65e5\u672c\u8a9e\n"
+        raw_data_manager.set_content(m, content, charset='shift_jis')
+        self.assertEqual(m['Content-Type'], 'text/plain; charset="shift_jis"')
+        self.assertEqual(m['Content-Transfer-Encoding'], '8bit')
+        self.assertEqual(m.get_payload(decode=True), content.encode('shift_jis'))
+        self.assertEqual(m.get_content(), content)
+        # Serialization converts the payload to iso-2022-jp for output.
+        self.assertEqual(str(m), textwrap.dedent("""\
+            Content-Type: text/plain; charset="iso-2022-jp"
+            Content-Transfer-Encoding: 7bit
+
+            \x1b$BF|K\\8l\x1b(B
+            """))
+
+    def test_set_text_charset_euc_jp(self):
+        m = self._make_message()
+        content = "\u65e5\u672c\u8a9e\n"
+        raw_data_manager.set_content(m, content, charset='euc-jp')
+        self.assertEqual(m['Content-Type'], 'text/plain; charset="euc-jp"')
+        self.assertEqual(m['Content-Transfer-Encoding'], '8bit')
+        self.assertEqual(m.get_payload(decode=True), content.encode('euc-jp'))
+        self.assertEqual(m.get_content(), content)
+        # Serialization converts the payload to iso-2022-jp for output.
+        self.assertEqual(str(m), textwrap.dedent("""\
+            Content-Type: text/plain; charset="iso-2022-jp"
+            Content-Transfer-Encoding: 7bit
+
+            \x1b$BF|K\\8l\x1b(B
+            """))
+
     def test_set_text_plain_long_line_heuristics(self):
         m = self._make_message()
         content = ("Simple but long message that is over 78 characters"
diff --git a/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
new file mode 100644
index 000000000000000..79d724f354f237d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
@@ -0,0 +1,3 @@
+Fix serialization of :mod:`email` messages using ``shift_jis`` or ``euc-jp``
+charsets.  Converting surrogate-escaped payloads to the required
+``iso-2022-jp`` output charset no longer raises :exc:`UnicodeEncodeError`.

From f6c53cb11c9617db20f6bd2f235b7b9fc1b79fa8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cbhuvi27=E2=80=9D?= <b.chouksey27@gmail.com>
Date: Sun, 14 Jun 2026 08:15:17 +0530
Subject: [PATCH 2/3] gh-150771: Use output charset in set_text_content for
 shift_jis/euc-jp

Encode the payload with the charset output mapping (iso-2022-jp) when
set_content is called with shift_jis or euc-jp, instead of patching
serialization in body_encode and set_payload. Reverts those changes.
---
 Lib/email/charset.py                                 | 10 +---------
 Lib/email/contentmanager.py                          |  3 ++-
 Lib/email/message.py                                 |  4 +---
 Lib/test/test_email/test_contentmanager.py           | 12 ++++--------
 .../2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst   |  7 ++++---
 5 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/Lib/email/charset.py b/Lib/email/charset.py
index e3ee13c3912241a..5981791820e740c 100644
--- a/Lib/email/charset.py
+++ b/Lib/email/charset.py
@@ -16,7 +16,6 @@
 import email.quoprimime
 
 from email import errors
-from email import utils
 from email.encoders import encode_7or8bit
 
 
@@ -439,12 +438,5 @@ def body_encode(self, string):
             return email.quoprimime.body_encode(string)
         else:
             if isinstance(string, str):
-                if utils._has_surrogates(string):
-                    string = string.encode('ascii', 'surrogateescape')
-                    if self.input_charset != self.output_charset:
-                        string = (string.decode(self.input_codec)
-                                  .encode(self.output_codec))
-                    string = string.decode('ascii', 'surrogateescape')
-                else:
-                    string = string.encode(self.output_charset).decode('ascii')
+                string = string.encode(self.output_charset).decode('ascii')
             return string
diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py
index faf2626bccce651..c0090af716575d7 100644
--- a/Lib/email/contentmanager.py
+++ b/Lib/email/contentmanager.py
@@ -174,7 +174,8 @@ def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
                      params=None, headers=None):
     _prepare_set(msg, 'text', subtype, headers)
 
-    charset = email.charset.Charset(charset).input_charset
+    cs = email.charset.Charset(charset)
+    charset = cs.output_charset
     cte, payload = _encode_text(string, charset, cte, msg.policy)
     msg.set_payload(payload)
     msg.set_param('charset', charset, replace=True)
diff --git a/Lib/email/message.py b/Lib/email/message.py
index dff113ea407f07c..641fb2e944d4311 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -352,9 +352,7 @@ def set_payload(self, payload, charset=None):
                 return
             if not isinstance(charset, Charset):
                 charset = Charset(charset)
-            if not utils._has_surrogates(payload):
-                payload = payload.encode(charset.output_charset,
-                                          'surrogateescape')
+            payload = payload.encode(charset.output_charset, 'surrogateescape')
         if hasattr(payload, 'decode'):
             self._payload = payload.decode('ascii', 'surrogateescape')
         else:
diff --git a/Lib/test/test_email/test_contentmanager.py b/Lib/test/test_email/test_contentmanager.py
index e1c2db0e2a2b0a1..ec3915447107320 100644
--- a/Lib/test/test_email/test_contentmanager.py
+++ b/Lib/test/test_email/test_contentmanager.py
@@ -366,11 +366,9 @@ def test_set_text_charset_shift_jis(self):
         m = self._make_message()
         content = "\u65e5\u672c\u8a9e\n"
         raw_data_manager.set_content(m, content, charset='shift_jis')
-        self.assertEqual(m['Content-Type'], 'text/plain; charset="shift_jis"')
-        self.assertEqual(m['Content-Transfer-Encoding'], '8bit')
-        self.assertEqual(m.get_payload(decode=True), content.encode('shift_jis'))
+        self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"')
+        self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp'))
         self.assertEqual(m.get_content(), content)
-        # Serialization converts the payload to iso-2022-jp for output.
         self.assertEqual(str(m), textwrap.dedent("""\
             Content-Type: text/plain; charset="iso-2022-jp"
             Content-Transfer-Encoding: 7bit
@@ -382,11 +380,9 @@ def test_set_text_charset_euc_jp(self):
         m = self._make_message()
         content = "\u65e5\u672c\u8a9e\n"
         raw_data_manager.set_content(m, content, charset='euc-jp')
-        self.assertEqual(m['Content-Type'], 'text/plain; charset="euc-jp"')
-        self.assertEqual(m['Content-Transfer-Encoding'], '8bit')
-        self.assertEqual(m.get_payload(decode=True), content.encode('euc-jp'))
+        self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"')
+        self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp'))
         self.assertEqual(m.get_content(), content)
-        # Serialization converts the payload to iso-2022-jp for output.
         self.assertEqual(str(m), textwrap.dedent("""\
             Content-Type: text/plain; charset="iso-2022-jp"
             Content-Transfer-Encoding: 7bit
diff --git a/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
index 79d724f354f237d..43d4c2006a6d892 100644
--- a/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
+++ b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
@@ -1,3 +1,4 @@
-Fix serialization of :mod:`email` messages using ``shift_jis`` or ``euc-jp``
-charsets.  Converting surrogate-escaped payloads to the required
-``iso-2022-jp`` output charset no longer raises :exc:`UnicodeEncodeError`.
+Fix :mod:`email` messages created with ``shift_jis`` or ``euc-jp`` charsets.
+:func:`email.contentmanager.set_text_content` now stores the payload using
+the output charset (``iso-2022-jp``) so :func:`str` on the message no longer
+raises :exc:`UnicodeEncodeError`.

From e2f3984ac4fc8847b896c35edf575a024ec1c40d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cbhuvi27=E2=80=9D?= <b.chouksey27@gmail.com>
Date: Sun, 14 Jun 2026 08:52:29 +0530
Subject: [PATCH 3/3] gh-150771: Fix NEWS entry doc references for docs CI

Use plain backticks for set_content() instead of a broken :func: target.
---
 .../Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
index 43d4c2006a6d892..6535e5c48bf0360 100644
--- a/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
+++ b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
@@ -1,4 +1,4 @@
 Fix :mod:`email` messages created with ``shift_jis`` or ``euc-jp`` charsets.
-:func:`email.contentmanager.set_text_content` now stores the payload using
-the output charset (``iso-2022-jp``) so :func:`str` on the message no longer
-raises :exc:`UnicodeEncodeError`.
+``set_content()`` now stores the payload using the output charset
+(``iso-2022-jp``) so printing the message no longer raises
+:exc:`UnicodeEncodeError`.