Skip to content

Commit f04aefa

Browse files
[3.14] gh-145234: Normalize decoded CR in string tokenizer (GH-145281) (#145310)
gh-145234: Normalize decoded CR in string tokenizer (GH-145281) (cherry picked from commit 98b1e51) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
1 parent 86c8467 commit f04aefa

File tree

3 files changed

+26
-0
lines changed

3 files changed

+26
-0
lines changed

Lib/test/test_py_compile.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,14 @@ def test_quiet(self):
207207
with self.assertRaises(py_compile.PyCompileError):
208208
py_compile.compile(bad_coding, doraise=True, quiet=1)
209209

210+
def test_utf7_decoded_cr_compiles(self):
211+
with open(self.source_path, 'wb') as file:
212+
file.write(b"#coding=U7+AA0''\n")
213+
214+
pyc_path = py_compile.compile(self.source_path, self.pyc_path, doraise=True)
215+
self.assertEqual(pyc_path, self.pyc_path)
216+
self.assertTrue(os.path.exists(self.pyc_path))
217+
210218

211219
class PyCompileTestsWithSourceEpoch(PyCompileTestsBase,
212220
unittest.TestCase,
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fixed a ``SystemError`` in the parser when an encoding cookie (for example,
2+
UTF-7) decodes to carriage returns (``\r``). Newlines are now normalized after
3+
decoding in the string tokenizer.
4+
5+
Patch by Pablo Galindo.

Parser/tokenizer/string_tokenizer.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,19 @@ decode_str(const char *input, int single, struct tok_state *tok, int preserve_cr
108108
else if (!_PyTokenizer_ensure_utf8(str, tok, 1)) {
109109
return _PyTokenizer_error_ret(tok);
110110
}
111+
if (utf8 != NULL) {
112+
char *translated = _PyTokenizer_translate_newlines(
113+
str, single, preserve_crlf, tok);
114+
if (translated == NULL) {
115+
Py_DECREF(utf8);
116+
return _PyTokenizer_error_ret(tok);
117+
}
118+
PyMem_Free(tok->input);
119+
tok->input = translated;
120+
str = translated;
121+
Py_CLEAR(utf8);
122+
}
123+
tok->str = str;
111124
assert(tok->decoding_buffer == NULL);
112125
tok->decoding_buffer = utf8; /* CAUTION */
113126
return str;

0 commit comments

Comments
 (0)