Skip to content
Merged
17 changes: 15 additions & 2 deletions Lib/test/test_source_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,7 @@ def test_utf8_non_utf8_third_line_error(self):
b'#third\xa4\n'
b'raise RuntimeError\n')
self.check_script_error(src,
br"'utf-8' codec can't decode byte|"
br"encoding problem: utf8")
br"'utf-8' codec can't decode byte")

def test_crlf(self):
src = (b'print(ascii("""\r\n"""))\n')
Expand Down Expand Up @@ -540,6 +539,20 @@ def check_script_error(self, src, expected, lineno=...):
line = line.removeprefix('\ufeff')
self.assertIn(line.encode(), err)

def test_coding_spec_unknown_encoding(self):
src = (b'# coding: c1252\n'
b'print("Hi!")\n')
self.check_script_error(src, br"unknown encoding: c1252")

def test_coding_spec_decode_error(self):
src = (b'# coding: shift-jis\n'
b'print("\xc4\x85")\n')
self.check_script_error(src, br"'shift_jis' codec can't decode byte")

def test_coding_spec_non_text_encoding(self):
src = (b'# coding: hex_codec\n'
b'print("eggs")\n')
self.check_script_error(src, br"'hex_codec' is not a text encoding")


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix direct execution of files with invalid source encodings to report the
underlying codec lookup or decoding error instead of the generic
``SyntaxError: encoding problem`` message. Patch by Bartosz Sławecki.
5 changes: 3 additions & 2 deletions Parser/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "lexer/lexer.h"
#include "tokenizer/tokenizer.h"
#include "tokenizer/helpers.h"
#include "pegen.h"

// Internal parser functions
Expand Down Expand Up @@ -993,7 +994,7 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2);
if (tok == NULL) {
if (PyErr_Occurred()) {
_PyPegen_raise_tokenizer_init_error(filename_ob);
_PyTokenizer_raise_init_error(filename_ob);
return NULL;
}
return NULL;
Expand Down Expand Up @@ -1051,7 +1052,7 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen
}
if (tok == NULL) {
if (PyErr_Occurred()) {
_PyPegen_raise_tokenizer_init_error(filename_ob);
_PyTokenizer_raise_init_error(filename_ob);
}
return NULL;
}
Expand Down
1 change: 0 additions & 1 deletion Parser/pegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ typedef enum {
} TARGETS_TYPE;

int _Pypegen_raise_decode_error(Parser *p);
void _PyPegen_raise_tokenizer_init_error(PyObject *filename);
int _Pypegen_tokenizer_error(Parser *p);
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...);
void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
Expand Down
47 changes: 0 additions & 47 deletions Parser/pegen_errors.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,53 +10,6 @@

// TOKENIZER ERRORS

void
_PyPegen_raise_tokenizer_init_error(PyObject *filename)
{
if (!(PyErr_ExceptionMatches(PyExc_LookupError)
|| PyErr_ExceptionMatches(PyExc_SyntaxError)
|| PyErr_ExceptionMatches(PyExc_ValueError)
|| PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
return;
}
PyObject *errstr = NULL;
PyObject *tuple = NULL;
PyObject *type;
PyObject *value;
PyObject *tback;
PyErr_Fetch(&type, &value, &tback);
if (PyErr_GivenExceptionMatches(value, PyExc_SyntaxError)) {
if (PyObject_SetAttr(value, &_Py_ID(filename), filename)) {
goto error;
}
PyErr_Restore(type, value, tback);
return;
}
errstr = PyObject_Str(value);
if (!errstr) {
goto error;
}

PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
if (!tmp) {
goto error;
}

tuple = _PyTuple_FromPair(errstr, tmp);
Py_DECREF(tmp);
if (!tuple) {
goto error;
}
PyErr_SetObject(PyExc_SyntaxError, tuple);

error:
Py_XDECREF(type);
Py_XDECREF(value);
Py_XDECREF(tback);
Py_XDECREF(errstr);
Py_XDECREF(tuple);
}

static inline void
raise_unclosed_parentheses_error(Parser *p) {
int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
Expand Down
51 changes: 50 additions & 1 deletion Parser/tokenizer/helpers.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "Python.h"
#include "errcode.h"
#include "pycore_runtime.h" // _Py_ID()
#include "pycore_token.h"
#include "pycore_tuple.h" // _PyTuple_FromPair

#include "../lexer/state.h"

Expand Down Expand Up @@ -149,6 +151,53 @@ _PyTokenizer_warn_invalid_escape_sequence(struct tok_state *tok, int first_inval
return 0;
}

void
_PyTokenizer_raise_init_error(PyObject *filename)
{
if (!(PyErr_ExceptionMatches(PyExc_LookupError)
|| PyErr_ExceptionMatches(PyExc_SyntaxError)
|| PyErr_ExceptionMatches(PyExc_ValueError)
|| PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
return;
}
PyObject *errstr = NULL;
PyObject *tuple = NULL;
PyObject *type;
PyObject *value;
PyObject *tback;
PyErr_Fetch(&type, &value, &tback);
if (PyErr_GivenExceptionMatches(value, PyExc_SyntaxError)) {
if (PyObject_SetAttr(value, &_Py_ID(filename), filename)) {
goto error;
}
PyErr_Restore(type, value, tback);
return;
}
errstr = PyObject_Str(value);
if (!errstr) {
goto error;
}

PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
if (!tmp) {
goto error;
}

tuple = _PyTuple_FromPair(errstr, tmp);
Py_DECREF(tmp);
if (!tuple) {
goto error;
}
PyErr_SetObject(PyExc_SyntaxError, tuple);

error:
Py_XDECREF(type);
Py_XDECREF(value);
Py_XDECREF(tback);
Py_XDECREF(errstr);
Py_XDECREF(tuple);
}

int
_PyTokenizer_parser_warn(struct tok_state *tok, PyObject *category, const char *format, ...)
{
Expand Down Expand Up @@ -418,8 +467,8 @@ _PyTokenizer_check_coding_spec(const char* line, Py_ssize_t size, struct tok_sta
if (tok->encoding == NULL) {
assert(tok->decoding_readline == NULL);
if (strcmp(cs, "utf-8") != 0 && !set_readline(tok, cs)) {
_PyTokenizer_raise_init_error(tok->filename);
_PyTokenizer_error_ret(tok);
PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
PyMem_Free(cs);
return 0;
}
Expand Down
1 change: 1 addition & 0 deletions Parser/tokenizer/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ int _PyTokenizer_indenterror(struct tok_state *tok);
int _PyTokenizer_warn_invalid_escape_sequence(struct tok_state *tok, int first_invalid_escape_char);
int _PyTokenizer_parser_warn(struct tok_state *tok, PyObject *category, const char *format, ...);
char *_PyTokenizer_error_ret(struct tok_state *tok);
void _PyTokenizer_raise_init_error(PyObject *filename);

char *_PyTokenizer_new_string(const char *s, Py_ssize_t len, struct tok_state *tok);
char *_PyTokenizer_translate_newlines(const char *s, int exec_input, int preserve_crlf, struct tok_state *tok);
Expand Down
Loading