Skip to content

Commit 75ab1f8

Browse files
committed
gh-150724: Optimize JIT keyword calls to exact args
1 parent 84630e2 commit 75ab1f8

3 files changed

Lines changed: 244 additions & 2 deletions

File tree

Lib/test/test_capi/test_opt.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,27 @@ def dummy(x):
715715
self.assertNotIn("_GUARD_CODE_VERSION__PUSH_FRAME", uops)
716716
self.assertNotIn("_GUARD_IP__PUSH_FRAME", uops)
717717

718+
def test_call_kw_py_exact_args(self):
719+
def callee(x, a, b):
720+
return x + a + b
721+
722+
def testfunc(n):
723+
total = 0
724+
for i in range(n):
725+
total += callee(i, b=2, a=1)
726+
return total
727+
728+
res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
729+
self.assertEqual(res, TIER2_THRESHOLD * (TIER2_THRESHOLD - 1) // 2 + 3 * TIER2_THRESHOLD)
730+
self.assertIsNotNone(ex)
731+
uops = get_opnames(ex)
732+
self.assertNotIn("_PY_FRAME_KW", uops)
733+
self.assertTrue(
734+
any(opname.startswith("_INIT_CALL_PY_EXACT_ARGS") for opname in uops),
735+
uops,
736+
)
737+
self.assertIn("_BINARY_OP_ADD_INT", uops)
738+
718739
def test_int_type_propagate_through_range(self):
719740
def testfunc(n):
720741

Python/optimizer_bytecodes.c

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1286,7 +1286,118 @@ dummy_func(void) {
12861286
}
12871287

12881288
op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame)) {
1289-
new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable, NULL, 0));
1289+
bool valid = false;
1290+
PyObject *func_o = sym_get_const(ctx, callable);
1291+
PyObject *kwnames_o = sym_get_const(ctx, kwnames);
1292+
bool has_self = sym_is_not_null(self_or_null);
1293+
PyCodeObject *co = NULL;
1294+
Py_ssize_t total_args = 0;
1295+
int desired[256];
1296+
JitOptRef frame_args[257];
1297+
1298+
if ((has_self || sym_is_null(self_or_null)) &&
1299+
func_o != NULL && PyFunction_Check(func_o) &&
1300+
kwnames_o != NULL && PyTuple_CheckExact(kwnames_o) &&
1301+
oparg <= 256)
1302+
{
1303+
PyFunctionObject *func = (PyFunctionObject *)func_o;
1304+
co = (PyCodeObject *)func->func_code;
1305+
Py_ssize_t kwcount = PyTuple_GET_SIZE(kwnames_o);
1306+
total_args = oparg + has_self;
1307+
Py_ssize_t positional_args = total_args - kwcount;
1308+
Py_ssize_t positional_stack_args = positional_args - has_self;
1309+
1310+
if ((co->co_flags & (CO_OPTIMIZED | CO_VARARGS | CO_VARKEYWORDS)) == CO_OPTIMIZED &&
1311+
co->co_kwonlyargcount == 0 &&
1312+
co->co_argcount == total_args &&
1313+
positional_args >= has_self)
1314+
{
1315+
int source_for_local[257];
1316+
for (int i = 0; i < total_args; i++) {
1317+
source_for_local[i] = -1;
1318+
}
1319+
if (has_self) {
1320+
source_for_local[0] = -2;
1321+
}
1322+
for (int i = 0; i < positional_stack_args; i++) {
1323+
source_for_local[has_self + i] = i;
1324+
}
1325+
1326+
valid = true;
1327+
for (Py_ssize_t i = 0; valid && i < kwcount; i++) {
1328+
PyObject *keyword = PyTuple_GET_ITEM(kwnames_o, i);
1329+
if (!PyUnicode_CheckExact(keyword)) {
1330+
valid = false;
1331+
break;
1332+
}
1333+
int target = -1;
1334+
for (int j = co->co_posonlyargcount; j < co->co_argcount; j++) {
1335+
PyObject *varname = PyTuple_GET_ITEM(co->co_localsplusnames, j);
1336+
if (keyword == varname || PyUnicode_Equal(keyword, varname)) {
1337+
target = j;
1338+
break;
1339+
}
1340+
}
1341+
if (target < has_self || target < 0 || source_for_local[target] != -1) {
1342+
valid = false;
1343+
break;
1344+
}
1345+
source_for_local[target] = (int)(positional_stack_args + i);
1346+
}
1347+
1348+
if (has_self) {
1349+
frame_args[0] = self_or_null;
1350+
}
1351+
for (int local = 0; valid && local < co->co_argcount; local++) {
1352+
if (source_for_local[local] == -1) {
1353+
valid = false;
1354+
break;
1355+
}
1356+
if (local >= has_self) {
1357+
int source = source_for_local[local];
1358+
desired[local - has_self] = source;
1359+
frame_args[local] = args[source];
1360+
}
1361+
}
1362+
}
1363+
}
1364+
1365+
if (!valid) {
1366+
new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable, NULL, 0));
1367+
}
1368+
else {
1369+
int current[256];
1370+
for (int i = 0; i < oparg; i++) {
1371+
current[i] = i;
1372+
}
1373+
1374+
ADD_OP(_POP_TOP, 0, 0);
1375+
for (int pos = 0; pos < oparg - 1; pos++) {
1376+
int source = desired[pos];
1377+
int source_pos = pos;
1378+
while (current[source_pos] != source) {
1379+
source_pos++;
1380+
}
1381+
if (source_pos != pos) {
1382+
int top = oparg - 1;
1383+
if (source_pos != top) {
1384+
ADD_OP(_SWAP, oparg - source_pos, 0);
1385+
int temp = current[source_pos];
1386+
current[source_pos] = current[top];
1387+
current[top] = temp;
1388+
}
1389+
ADD_OP(_SWAP, oparg - pos, 0);
1390+
int temp = current[pos];
1391+
current[pos] = current[top];
1392+
current[top] = temp;
1393+
}
1394+
}
1395+
1396+
ADD_OP(_CHECK_FUNCTION_EXACT_ARGS, oparg, 0);
1397+
ADD_OP(_CHECK_STACK_SPACE_OPERAND, 0, co->co_framesize);
1398+
ADD_OP(_INIT_CALL_PY_EXACT_ARGS, oparg, 0);
1399+
new_frame = PyJitRef_WrapInvalid(frame_new_from_symbol(ctx, callable, frame_args, (int)total_args));
1400+
}
12901401
}
12911402

12921403
op(_PY_FRAME_EX, (func_st, null, callargs_st, kwargs_st -- ex_frame)) {

Python/optimizer_cases.c.h

Lines changed: 111 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)