From 3bf238d426e39b8032ceae19d4559d766be6af07 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 22 Jan 2026 16:03:00 +0000 Subject: [PATCH 1/2] Optimize bytes.translate() by deferring change detection Move the equality check out of the hot loop to allow better compiler optimization. Instead of checking each byte during translation, perform a single memcmp at the end to determine if the input can be returned unchanged. This allows compilers to unroll and pipeline the loops, resulting in ~2x throughput improvement for medium-to-large inputs (tested on an AMD zen2). No change observed on small inputs. It will also be faster for bytes subclasses as those do not need change detection. --- Objects/bytesobject.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 2b0925017f29e4..56de99bde11682 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2237,11 +2237,15 @@ bytes_translate_impl(PyBytesObject *self, PyObject *table, /* If no deletions are required, use faster code */ for (i = inlen; --i >= 0; ) { c = Py_CHARMASK(*input++); - if (Py_CHARMASK((*output++ = table_chars[c])) != c) - changed = 1; - } - if (!changed && PyBytes_CheckExact(input_obj)) { - Py_SETREF(result, Py_NewRef(input_obj)); + *output++ = table_chars[c]; + } + /* Check if anything changed (for returning original object) */ + /* We save this check until the end so that the compiler will */ + /* unroll the loop above leading to MUCH faster code. */ + if (PyBytes_CheckExact(input_obj)) { + if (memcmp(PyBytes_AS_STRING(input_obj), output_start, inlen) == 0) { + Py_SETREF(result, Py_NewRef(input_obj)); + } } PyBuffer_Release(&del_table_view); PyBuffer_Release(&table_view); From 46c162377366bc3c27b7ebb3c919e055ad6049cf Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 22 Jan 2026 16:20:40 +0000 Subject: [PATCH 2/2] NEWS entry --- .../2026-01-22-16-20-16.gh-issue-144157.dxyp7k.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-01-22-16-20-16.gh-issue-144157.dxyp7k.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-22-16-20-16.gh-issue-144157.dxyp7k.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-22-16-20-16.gh-issue-144157.dxyp7k.rst new file mode 100644 index 00000000000000..ff62d739d7804c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-22-16-20-16.gh-issue-144157.dxyp7k.rst @@ -0,0 +1,2 @@ +:meth:`bytes.translate` now allows the compiler to unroll its loop more +usefully for a 2x speedup in the common no-deletions specified case.