diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 5e7dda3a3715a1..cdac16fc94fec0 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -950,6 +950,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fd)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fd2)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fdel)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(feature_version)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fget)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fields)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(file)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 6908cbf78f349e..fab94bc20f4c94 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -441,6 +441,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(fd) STRUCT_FOR_ID(fd2) STRUCT_FOR_ID(fdel) + STRUCT_FOR_ID(feature_version) STRUCT_FOR_ID(fget) STRUCT_FOR_ID(fields) STRUCT_FOR_ID(file) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index da2ed7422c9deb..a21be9bb3123d1 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -948,6 +948,7 @@ extern "C" { INIT_ID(fd), \ INIT_ID(fd2), \ INIT_ID(fdel), \ + INIT_ID(feature_version), \ INIT_ID(fget), \ INIT_ID(fields), \ INIT_ID(file), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index b1f411945e7856..7089e6a099d3c0 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1552,6 +1552,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(feature_version); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(fget); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index e0535d50396316..490d00c3ea9569 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -20,10 +20,12 @@ from __future__ import annotations import _colorize +import _symtable # type: ignore[import-not-found] from abc import ABC, abstractmethod import ast import code +import codeop import linecache from dataclasses import dataclass, field import os.path @@ -211,6 +213,24 @@ def runsource(self, source, filename="", symbol="single"): except (OverflowError, ValueError): self.showsyntaxerror(filename, source=source) return False + + # Validate stuff that cannot be validated with AST parsing only, + # such as assigning to a variable before a global declaration, + # + # While runsource("x = 1; global x") would fail, runsource("x = 1") + # followed by runsource("global x") would still work since preventing + # this requires the REPL to remember the global names whose number + # grows faster than in a regular program, which then becomes less + # efficient or relevant for the user. + flags = self.compile.compiler.flags # may contain active futures + flags &= ~codeop.PyCF_DONT_IMPLY_DEDENT + flags &= ~codeop.PyCF_ALLOW_INCOMPLETE_INPUT + try: + _symtable.symtable(source, filename, "exec", flags=flags) + except (SyntaxError, OverflowError, ValueError): + self.showsyntaxerror(filename, source=source) + return False + if tree.body: *_, last_stmt = tree.body for stmt in tree.body: diff --git a/Lib/test/test_pyrepl/test_interact.py b/Lib/test/test_pyrepl/test_interact.py index 8c0eeab6dcae96..8e0db45e00198b 100644 --- a/Lib/test/test_pyrepl/test_interact.py +++ b/Lib/test/test_pyrepl/test_interact.py @@ -5,7 +5,7 @@ from unittest.mock import patch from textwrap import dedent -from test.support import force_not_colorized +from test.support import captured_stdout, force_not_colorized from _pyrepl.console import InteractiveColoredConsole from _pyrepl.simple_interact import _more_lines @@ -131,6 +131,11 @@ def test_runsource_shows_syntax_error_for_failed_compilation(self): console.runsource(source) mock_showsyntaxerror.assert_called_once() + def test_runsource_shows_syntax_error_for_failed_symtable_checks(self): + # Some checks cannot be performed by AST parsing only. + # See https://github.com/python/cpython/issues/137376. + self._test_runsource_error("x = 1; global x; x = 2") + def test_runsource_survives_null_bytes(self): console = InteractiveColoredConsole() source = "\x00\n" @@ -153,26 +158,46 @@ def test_no_active_future(self): self.assertEqual(f.getvalue(), "{'x': }\n") def test_future_annotations(self): - console = InteractiveColoredConsole() - source = dedent("""\ - from __future__ import annotations - def g(x: int): ... - print(g.__annotations__) - """) - f = io.StringIO() - with contextlib.redirect_stdout(f): - result = console.runsource(source) - self.assertFalse(result) - self.assertEqual(f.getvalue(), "{'x': 'int'}\n") + self._test_runsource_future( + "from __future__ import annotations", + ["def g(x: int): ...", "print(g.__annotations__)"], + "{'x': 'int'}\n", + ) def test_future_barry_as_flufl(self): + self._test_runsource_future( + "from __future__ import barry_as_FLUFL", + ["""print("black" <> 'blue')"""], + "True\n", + ) + + def _test_runsource_error(self, buggy_source): console = InteractiveColoredConsole() - f = io.StringIO() - with contextlib.redirect_stdout(f): - result = console.runsource("from __future__ import barry_as_FLUFL\n") - result = console.runsource("""print("black" <> 'blue')\n""") - self.assertFalse(result) - self.assertEqual(f.getvalue(), "True\n") + with patch.object(console, "showsyntaxerror") as handler: + result = console.runsource(buggy_source) + handler.assert_called_once() + + def _test_runsource_future(self, future_statement, statements, expected): + """Run future_statement + statements. + + This checks whether a standalone future statement remains active + for the entire session lifetime. + """ + with self.subTest("standalone source"): + console = InteractiveColoredConsole() + source = "\n".join([future_statement, *statements]) + with captured_stdout() as stdout: + result = console.runsource(source) + self.assertFalse(result) + self.assertEqual(stdout.getvalue(), expected) + + with self.subTest("__future__ executed separtely"): + console = InteractiveColoredConsole() + with captured_stdout() as stdout: + result = console.runsource(future_statement) + result = console.runsource("\n".join(statements)) + self.assertFalse(result) + self.assertEqual(stdout.getvalue(), expected) class TestMoreLines(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2025-08-04-14-38-28.gh-issue-137376.iHSKBL.rst b/Misc/NEWS.d/next/Library/2025-08-04-14-38-28.gh-issue-137376.iHSKBL.rst new file mode 100644 index 00000000000000..4457436b75c524 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-04-14-38-28.gh-issue-137376.iHSKBL.rst @@ -0,0 +1,3 @@ +Ensure that :exc:`SyntaxError` is raised when given incorrect REPL inputs +that are only detected when processing symbol tables. Patch by Bénédikt +Tran. diff --git a/Modules/clinic/symtablemodule.c.h b/Modules/clinic/symtablemodule.c.h index 2ecd3afc00d2be..a7f6ed0fcf459b 100644 --- a/Modules/clinic/symtablemodule.c.h +++ b/Modules/clinic/symtablemodule.c.h @@ -2,30 +2,69 @@ preserve [clinic start generated code]*/ -#include "pycore_modsupport.h" // _PyArg_CheckPositional() +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_symtable_symtable__doc__, -"symtable($module, source, filename, startstr, /)\n" +"symtable($module, source, filename, startstr, /, *, flags=0,\n" +" feature_version=-1)\n" "--\n" "\n" "Return symbol and scope dictionaries used internally by compiler."); #define _SYMTABLE_SYMTABLE_METHODDEF \ - {"symtable", _PyCFunction_CAST(_symtable_symtable), METH_FASTCALL, _symtable_symtable__doc__}, + {"symtable", _PyCFunction_CAST(_symtable_symtable), METH_FASTCALL|METH_KEYWORDS, _symtable_symtable__doc__}, static PyObject * _symtable_symtable_impl(PyObject *module, PyObject *source, - PyObject *filename, const char *startstr); + PyObject *filename, const char *startstr, int flags, + int feature_version); static PyObject * -_symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +_symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(flags), &_Py_ID(feature_version), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "", "", "flags", "feature_version", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "symtable", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[5]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 3; PyObject *source; PyObject *filename; const char *startstr; + int flags = 0; + int feature_version = -1; - if (!_PyArg_CheckPositional("symtable", nargs, 3, 3)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 3, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { goto exit; } source = args[0]; @@ -45,9 +84,26 @@ _symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs) PyErr_SetString(PyExc_ValueError, "embedded null character"); goto exit; } - return_value = _symtable_symtable_impl(module, source, filename, startstr); + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[3]) { + flags = PyLong_AsInt(args[3]); + if (flags == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + feature_version = PyLong_AsInt(args[4]); + if (feature_version == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_kwonly: + return_value = _symtable_symtable_impl(module, source, filename, startstr, flags, feature_version); exit: return return_value; } -/*[clinic end generated code: output=931964a76a72f850 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f30a99893fa9de05 input=a9049054013a1b77]*/ diff --git a/Modules/symtablemodule.c b/Modules/symtablemodule.c index d0d5223e5acea8..c4ff5a1a08fc97 100644 --- a/Modules/symtablemodule.c +++ b/Modules/symtablemodule.c @@ -16,14 +16,18 @@ _symtable.symtable filename: object(converter='PyUnicode_FSDecoder') startstr: str / + * + flags: int = 0 + feature_version: int = -1 Return symbol and scope dictionaries used internally by compiler. [clinic start generated code]*/ static PyObject * _symtable_symtable_impl(PyObject *module, PyObject *source, - PyObject *filename, const char *startstr) -/*[clinic end generated code: output=59eb0d5fc7285ac4 input=9dd8a50c0c36a4d7]*/ + PyObject *filename, const char *startstr, int flags, + int feature_version) +/*[clinic end generated code: output=1e766ac3387e156a input=03e9deda7ab5a9d7]*/ { struct symtable *st; PyObject *t; @@ -31,7 +35,14 @@ _symtable_symtable_impl(PyObject *module, PyObject *source, PyCompilerFlags cf = _PyCompilerFlags_INIT; PyObject *source_copy = NULL; - cf.cf_flags = PyCF_SOURCE_IS_UTF8; + cf.cf_flags = flags | PyCF_SOURCE_IS_UTF8; + if (feature_version >= 0 && (flags & PyCF_ONLY_AST)) { + cf.cf_feature_version = feature_version; + } + if (flags & ~(PyCF_MASK | PyCF_MASK_OBSOLETE | PyCF_COMPILE_MASK)) { + PyErr_SetString(PyExc_ValueError, "_symtable.symtable(): unrecognised flags"); + return NULL; + } const char *str = _Py_SourceAsString(source, "symtable", "string or bytes", &cf, &source_copy); if (str == NULL) {