Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d69416d
gh-142831: Fix UAF in `_json` module
ashm-dev Dec 17, 2025
b153c2a
add news
ashm-dev Dec 17, 2025
64fcd75
add test
ashm-dev Dec 18, 2025
e385958
fix comments
ashm-dev Dec 18, 2025
211551a
Merge branch 'main' into json
ashm-dev Dec 18, 2025
5d6b2a8
fix test
ashm-dev Dec 18, 2025
a5a76d6
Merge branch 'main' into json
kumaraditya303 Dec 19, 2025
66c3af1
clean up test
kumaraditya303 Dec 19, 2025
aa92e34
Merge branch 'json' of https://github.com/ashm-dev/cpython into pr/as…
kumaraditya303 Dec 19, 2025
adfeee2
Revert "clean up test"
kumaraditya303 Dec 19, 2025
948ef90
Merge branch 'main' into json
ashm-dev Dec 19, 2025
a6734fe
Remove outdated comment about Py_INCREF in JSON encoder
ashm-dev Jan 19, 2026
facfd5d
Move test_mutate_items_during_encode to test_speedups.py
ashm-dev Jan 19, 2026
6fbb6d9
Merge branch 'main' into json
ashm-dev Jan 19, 2026
3c29d13
fix lint error
ashm-dev Jan 19, 2026
b4e50c9
Address review: remove redundant comments, use nonlocal, add list mut…
ashm-dev Mar 11, 2026
37b25e9
Merge branch 'main' into json
ashm-dev Mar 11, 2026
cced98b
Revert "Address review: remove redundant comments, use nonlocal, add …
ashm-dev Mar 11, 2026
cfee66f
Address review: use nonlocal, add gc_collect, add list mutation test
ashm-dev Mar 11, 2026
878a3ed
Remove redundant comments per review
ashm-dev Mar 11, 2026
951b206
Merge remote-tracking branch 'origin/main' into json
gpshead Apr 11, 2026
646b447
Move NEWS entry from Core_and_Builtins to Library
gpshead Apr 11, 2026
a412165
Add comments explaining why Py_INCREF is needed in iteration functions
gpshead Apr 11, 2026
7e3c835
Improve test assertions and robustness for UAF tests
gpshead Apr 11, 2026
727696d
Simplify borrowed-ref comments to focus on the why
gpshead Apr 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions Lib/test/test_json/test_speedups.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from test.test_json import CTest
from test.support import gc_collect


class BadBool:
Expand Down Expand Up @@ -111,3 +112,63 @@ def test_current_indent_level(self):
self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2)
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0)
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}])

def test_mutate_dict_items_during_encode(self):
# gh-142831: Clearing the items list via a re-entrant key encoder
# must not cause a use-after-free. BadDict.items() returns a
# mutable list; encode_str clears it while iterating.
items = None

class BadDict(dict):
def items(self):
nonlocal items
items = [("boom", object())]
return items

cleared = False
def encode_str(obj):
nonlocal items, cleared
if items is not None:
items.clear()
items = None
cleared = True
gc_collect()
return '"x"'

encoder = self.json.encoder.c_make_encoder(
None, lambda o: "null",
encode_str, None,
": ", ", ", False,
False, True
)

# Must not crash (use-after-free under ASan before fix)
encoder(BadDict(real=1), 0)
self.assertTrue(cleared)

def test_mutate_list_during_encode(self):
# gh-142831: Clearing a list mid-iteration via the default
# callback must not cause a use-after-free.
call_count = 0
lst = [object() for _ in range(10)]

def default(obj):
nonlocal call_count
call_count += 1
if call_count == 3:
lst.clear()
gc_collect()
return None

encoder = self.json.encoder.c_make_encoder(
None, default,
self.json.encoder.c_encode_basestring, None,
": ", ", ", False,
False, True
)

# Must not crash (use-after-free under ASan before fix)
encoder(lst, 0)
# Verify the mutation path was actually hit and the loop
# stopped iterating after the list was cleared.
self.assertEqual(call_count, 3)
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix a crash in the :mod:`json` module where a use-after-free could occur if
the object being encoded is modified during serialization.
28 changes: 6 additions & 22 deletions Modules/_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -1745,15 +1745,12 @@ _encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *key, *value;
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
PyObject *item = PyList_GET_ITEM(items, i);
#ifdef Py_GIL_DISABLED
// gh-119438: in the free-threading build the critical section on items can get suspended
// gh-142831: encoder_encode_key_value() can invoke user code
// that mutates the items list, invalidating this borrowed ref.
Py_INCREF(item);
#endif
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
#ifdef Py_GIL_DISABLED
Py_DECREF(item);
#endif
return -1;
}

Expand All @@ -1762,14 +1759,10 @@ _encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
if (encoder_encode_key_value(s, writer, first, dct, key, value,
indent_level, indent_cache,
separator) < 0) {
#ifdef Py_GIL_DISABLED
Py_DECREF(item);
#endif
return -1;
}
#ifdef Py_GIL_DISABLED
Py_DECREF(item);
#endif
}

return 0;
Expand All @@ -1784,10 +1777,8 @@ _encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next(dct, &pos, &key, &value)) {
// gh-119438, gh-145244: key and value are borrowed refs from
// PyDict_Next(). encoder_encode_key_value() may invoke user
// Python code (the 'default' callback) that can mutate or
// clear the dict, so we must hold strong references.
// gh-145244: encoder_encode_key_value() can invoke user code
// that mutates the dict, invalidating these borrowed refs.
Py_INCREF(key);
Py_INCREF(value);
if (encoder_encode_key_value(s, writer, first, dct, key, value,
Expand Down Expand Up @@ -1902,28 +1893,21 @@ _encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
{
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
#ifdef Py_GIL_DISABLED
// gh-119438: in the free-threading build the critical section on s_fast can get suspended
// gh-142831: encoder_listencode_obj() can invoke user code
// that mutates the sequence, invalidating this borrowed ref.
Py_INCREF(obj);
#endif
if (i) {
if (PyUnicodeWriter_WriteStr(writer, separator) < 0) {
#ifdef Py_GIL_DISABLED
Py_DECREF(obj);
#endif
return -1;
}
}
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
#ifdef Py_GIL_DISABLED
Py_DECREF(obj);
#endif
return -1;
}
#ifdef Py_GIL_DISABLED
Py_DECREF(obj);
#endif
}
return 0;
}
Expand Down
Loading