Skip to content

Commit da151fd

Browse files
[3.12] gh-105699: Use a _Py_hashtable_t for the PyModuleDef Cache (gh-106974) (gh-107412)
gh-105699: Use a _Py_hashtable_t for the PyModuleDef Cache (gh-106974) This fixes a crasher due to a race condition, triggered infrequently when two isolated (own GIL) subinterpreters simultaneously initialize their sys or builtins modules. The crash happened due the combination of the "detached" thread state we were using and the "last holder" logic we use for the GIL. It turns out it's tricky to use the same thread state for different threads. Who could have guessed? We solve the problem by eliminating the one object we were still sharing between interpreters. We replace it with a low-level hashtable, using the "raw" allocator to avoid tying it to the main interpreter. We also remove the accommodations for "detached" thread states, which were a dubious idea to start with. (cherry picked from commit 8ba4df9)
1 parent e5ca2aa commit da151fd

File tree

6 files changed

+26592
-26672
lines changed

6 files changed

+26592
-26672
lines changed

Doc/data/python3.12.abi

+26,464-26,484
Large diffs are not rendered by default.

Include/internal/pycore_import.h

+6-7
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
extern "C" {
66
#endif
77

8+
#include "pycore_hashtable.h" // _Py_hashtable_t
9+
#include "pycore_time.h" // _PyTime_t
10+
811

912
struct _import_runtime_state {
1013
/* The builtin modules (defined in config.c). */
@@ -15,19 +18,15 @@ struct _import_runtime_state {
1518
See PyInterpreterState.modules_by_index for more info. */
1619
Py_ssize_t last_module_index;
1720
struct {
18-
/* A thread state tied to the main interpreter,
19-
used exclusively for when the extensions dict is access/modified
20-
from an arbitrary thread. */
21-
PyThreadState main_tstate;
22-
/* A lock to guard the dict. */
21+
/* A lock to guard the cache. */
2322
PyThread_type_lock mutex;
24-
/* A dict mapping (filename, name) to PyModuleDef for modules.
23+
/* The actual cache of (filename, name, PyModuleDef) for modules.
2524
Only legacy (single-phase init) extension modules are added
2625
and only if they support multiple initialization (m_size >- 0)
2726
or are imported in the main interpreter.
2827
This is initialized lazily in _PyImport_FixupExtensionObject().
2928
Modules are added there and looked up in _imp.find_extension(). */
30-
PyObject *dict;
29+
_Py_hashtable_t *hashtable;
3130
} extensions;
3231
/* Package context -- the full module name for package imports */
3332
const char * pkgcontext;

Include/internal/pycore_pystate.h

-5
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,6 @@ PyAPI_FUNC(void) _PyThreadState_Init(
128128
PyThreadState *tstate);
129129
PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate);
130130

131-
extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *);
132-
extern void _PyThreadState_ClearDetached(PyThreadState *);
133-
extern void _PyThreadState_BindDetached(PyThreadState *);
134-
extern void _PyThreadState_UnbindDetached(PyThreadState *);
135-
136131

137132
/* Other */
138133

Include/internal/pycore_runtime_init.h

-5
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,6 @@ extern PyTypeObject _PyExc_MemoryError;
4141
in accordance with the specification. */ \
4242
.autoTSSkey = Py_tss_NEEDS_INIT, \
4343
.parser = _parser_runtime_state_INIT, \
44-
.imports = { \
45-
.extensions = { \
46-
.main_tstate = _PyThreadState_INIT, \
47-
}, \
48-
}, \
4944
.ceval = { \
5045
.perf = _PyEval_RUNTIME_PERF_INIT, \
5146
}, \

Python/import.c

+122-102
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
#include "Python.h"
44

5+
#include "pycore_hashtable.h" // _Py_hashtable_new_full()
56
#include "pycore_import.h" // _PyImport_BootstrapImp()
67
#include "pycore_initconfig.h" // _PyStatus_OK()
78
#include "pycore_interp.h" // struct _import_runtime_state
89
#include "pycore_namespace.h" // _PyNamespace_Type
10+
#include "pycore_object.h" // _Py_SetImmortal()
911
#include "pycore_pyerrors.h" // _PyErr_SetString()
1012
#include "pycore_pyhash.h" // _Py_KeyedHash()
1113
#include "pycore_pylifecycle.h"
@@ -912,172 +914,192 @@ extensions_lock_release(void)
912914
dictionary, to avoid loading shared libraries twice.
913915
*/
914916

917+
static void *
918+
hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep)
919+
{
920+
Py_ssize_t str1_len, str2_len;
921+
const char *str1_data = PyUnicode_AsUTF8AndSize(str1, &str1_len);
922+
const char *str2_data = PyUnicode_AsUTF8AndSize(str2, &str2_len);
923+
if (str1_data == NULL || str2_data == NULL) {
924+
return NULL;
925+
}
926+
/* Make sure sep and the NULL byte won't cause an overflow. */
927+
assert(SIZE_MAX - str1_len - str2_len > 2);
928+
size_t size = str1_len + 1 + str2_len + 1;
929+
930+
char *key = PyMem_RawMalloc(size);
931+
if (key == NULL) {
932+
PyErr_NoMemory();
933+
return NULL;
934+
}
935+
936+
strncpy(key, str1_data, str1_len);
937+
key[str1_len] = sep;
938+
strncpy(key + str1_len + 1, str2_data, str2_len + 1);
939+
assert(strlen(key) == size - 1);
940+
return key;
941+
}
942+
943+
static Py_uhash_t
944+
hashtable_hash_str(const void *key)
945+
{
946+
return _Py_HashBytes(key, strlen((const char *)key));
947+
}
948+
949+
static int
950+
hashtable_compare_str(const void *key1, const void *key2)
951+
{
952+
return strcmp((const char *)key1, (const char *)key2) == 0;
953+
}
954+
915955
static void
916-
_extensions_cache_init(void)
956+
hashtable_destroy_str(void *ptr)
917957
{
918-
/* The runtime (i.e. main interpreter) must be initializing,
919-
so we don't need to worry about the lock. */
920-
_PyThreadState_InitDetached(&EXTENSIONS.main_tstate,
921-
_PyInterpreterState_Main());
958+
PyMem_RawFree(ptr);
922959
}
923960

961+
#define HTSEP ':'
962+
924963
static PyModuleDef *
925964
_extensions_cache_get(PyObject *filename, PyObject *name)
926965
{
927966
PyModuleDef *def = NULL;
967+
void *key = NULL;
928968
extensions_lock_acquire();
929969

930-
PyObject *key = PyTuple_Pack(2, filename, name);
931-
if (key == NULL) {
970+
if (EXTENSIONS.hashtable == NULL) {
932971
goto finally;
933972
}
934973

935-
PyObject *extensions = EXTENSIONS.dict;
936-
if (extensions == NULL) {
974+
key = hashtable_key_from_2_strings(filename, name, HTSEP);
975+
if (key == NULL) {
976+
goto finally;
977+
}
978+
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(
979+
EXTENSIONS.hashtable, key);
980+
if (entry == NULL) {
937981
goto finally;
938982
}
939-
def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
983+
def = (PyModuleDef *)entry->value;
940984

941985
finally:
942-
Py_XDECREF(key);
943986
extensions_lock_release();
987+
if (key != NULL) {
988+
PyMem_RawFree(key);
989+
}
944990
return def;
945991
}
946992

947993
static int
948994
_extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def)
949995
{
950996
int res = -1;
951-
PyThreadState *oldts = NULL;
952997
extensions_lock_acquire();
953998

954-
/* Swap to the main interpreter, if necessary. This matters if
955-
the dict hasn't been created yet or if the item isn't in the
956-
dict yet. In both cases we must ensure the relevant objects
957-
are created using the main interpreter. */
958-
PyThreadState *main_tstate = &EXTENSIONS.main_tstate;
959-
PyInterpreterState *interp = _PyInterpreterState_GET();
960-
if (!_Py_IsMainInterpreter(interp)) {
961-
_PyThreadState_BindDetached(main_tstate);
962-
oldts = _PyThreadState_Swap(interp->runtime, main_tstate);
963-
assert(!_Py_IsMainInterpreter(oldts->interp));
964-
965-
/* Make sure the name and filename objects are owned
966-
by the main interpreter. */
967-
name = PyUnicode_InternFromString(PyUnicode_AsUTF8(name));
968-
assert(name != NULL);
969-
filename = PyUnicode_InternFromString(PyUnicode_AsUTF8(filename));
970-
assert(filename != NULL);
999+
if (EXTENSIONS.hashtable == NULL) {
1000+
_Py_hashtable_allocator_t alloc = {PyMem_RawMalloc, PyMem_RawFree};
1001+
EXTENSIONS.hashtable = _Py_hashtable_new_full(
1002+
hashtable_hash_str,
1003+
hashtable_compare_str,
1004+
hashtable_destroy_str, // key
1005+
/* There's no need to decref the def since it's immortal. */
1006+
NULL, // value
1007+
&alloc
1008+
);
1009+
if (EXTENSIONS.hashtable == NULL) {
1010+
PyErr_NoMemory();
1011+
goto finally;
1012+
}
9711013
}
9721014

973-
PyObject *key = PyTuple_Pack(2, filename, name);
1015+
void *key = hashtable_key_from_2_strings(filename, name, HTSEP);
9741016
if (key == NULL) {
9751017
goto finally;
9761018
}
9771019

978-
PyObject *extensions = EXTENSIONS.dict;
979-
if (extensions == NULL) {
980-
extensions = PyDict_New();
981-
if (extensions == NULL) {
1020+
int already_set = 0;
1021+
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(
1022+
EXTENSIONS.hashtable, key);
1023+
if (entry == NULL) {
1024+
if (_Py_hashtable_set(EXTENSIONS.hashtable, key, def) < 0) {
1025+
PyMem_RawFree(key);
1026+
PyErr_NoMemory();
9821027
goto finally;
9831028
}
984-
EXTENSIONS.dict = extensions;
985-
}
986-
987-
PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
988-
if (PyErr_Occurred()) {
989-
goto finally;
9901029
}
991-
else if (actual != NULL) {
992-
/* We expect it to be static, so it must be the same pointer. */
993-
assert(def == actual);
994-
res = 0;
995-
goto finally;
1030+
else {
1031+
if (entry->value == NULL) {
1032+
entry->value = def;
1033+
}
1034+
else {
1035+
/* We expect it to be static, so it must be the same pointer. */
1036+
assert((PyModuleDef *)entry->value == def);
1037+
already_set = 1;
1038+
}
1039+
PyMem_RawFree(key);
9961040
}
997-
998-
/* This might trigger a resize, which is why we must switch
999-
to the main interpreter. */
1000-
res = PyDict_SetItem(extensions, key, (PyObject *)def);
1001-
if (res < 0) {
1002-
res = -1;
1003-
goto finally;
1041+
if (!already_set) {
1042+
/* We assume that all module defs are statically allocated
1043+
and will never be freed. Otherwise, we would incref here. */
1044+
_Py_SetImmortal(def);
10041045
}
10051046
res = 0;
10061047

10071048
finally:
1008-
Py_XDECREF(key);
1009-
if (oldts != NULL) {
1010-
_PyThreadState_Swap(interp->runtime, oldts);
1011-
_PyThreadState_UnbindDetached(main_tstate);
1012-
Py_DECREF(name);
1013-
Py_DECREF(filename);
1014-
}
10151049
extensions_lock_release();
10161050
return res;
10171051
}
10181052

1019-
static int
1053+
static void
10201054
_extensions_cache_delete(PyObject *filename, PyObject *name)
10211055
{
1022-
int res = -1;
1023-
PyThreadState *oldts = NULL;
1056+
void *key = NULL;
10241057
extensions_lock_acquire();
10251058

1026-
PyObject *key = PyTuple_Pack(2, filename, name);
1027-
if (key == NULL) {
1059+
if (EXTENSIONS.hashtable == NULL) {
1060+
/* It was never added. */
10281061
goto finally;
10291062
}
10301063

1031-
PyObject *extensions = EXTENSIONS.dict;
1032-
if (extensions == NULL) {
1033-
res = 0;
1064+
key = hashtable_key_from_2_strings(filename, name, HTSEP);
1065+
if (key == NULL) {
10341066
goto finally;
10351067
}
10361068

1037-
PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
1038-
if (PyErr_Occurred()) {
1069+
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(
1070+
EXTENSIONS.hashtable, key);
1071+
if (entry == NULL) {
1072+
/* It was never added. */
10391073
goto finally;
10401074
}
1041-
else if (actual == NULL) {
1042-
/* It was already removed or never added. */
1043-
res = 0;
1075+
if (entry->value == NULL) {
1076+
/* It was already removed. */
10441077
goto finally;
10451078
}
1046-
1047-
/* Swap to the main interpreter, if necessary. */
1048-
PyThreadState *main_tstate = &EXTENSIONS.main_tstate;
1049-
PyInterpreterState *interp = _PyInterpreterState_GET();
1050-
if (!_Py_IsMainInterpreter(interp)) {
1051-
_PyThreadState_BindDetached(main_tstate);
1052-
oldts = _PyThreadState_Swap(interp->runtime, main_tstate);
1053-
assert(!_Py_IsMainInterpreter(oldts->interp));
1054-
}
1055-
1056-
if (PyDict_DelItem(extensions, key) < 0) {
1057-
goto finally;
1058-
}
1059-
res = 0;
1079+
/* If we hadn't made the stored defs immortal, we would decref here.
1080+
However, this decref would be problematic if the module def were
1081+
dynamically allocated, it were the last ref, and this function
1082+
were called with an interpreter other than the def's owner. */
1083+
entry->value = NULL;
10601084

10611085
finally:
1062-
if (oldts != NULL) {
1063-
_PyThreadState_Swap(interp->runtime, oldts);
1064-
_PyThreadState_UnbindDetached(main_tstate);
1065-
}
1066-
Py_XDECREF(key);
10671086
extensions_lock_release();
1068-
return res;
1087+
if (key != NULL) {
1088+
PyMem_RawFree(key);
1089+
}
10691090
}
10701091

10711092
static void
10721093
_extensions_cache_clear_all(void)
10731094
{
10741095
/* The runtime (i.e. main interpreter) must be finalizing,
10751096
so we don't need to worry about the lock. */
1076-
// XXX assert(_Py_IsMainInterpreter(_PyInterpreterState_GET()));
1077-
Py_CLEAR(EXTENSIONS.dict);
1078-
_PyThreadState_ClearDetached(&EXTENSIONS.main_tstate);
1097+
_Py_hashtable_destroy(EXTENSIONS.hashtable);
1098+
EXTENSIONS.hashtable = NULL;
10791099
}
10801100

1101+
#undef HTSEP
1102+
10811103

10821104
static bool
10831105
check_multi_interp_extensions(PyInterpreterState *interp)
@@ -1238,6 +1260,8 @@ import_find_extension(PyThreadState *tstate, PyObject *name,
12381260
PyObject *m_copy = def->m_base.m_copy;
12391261
/* Module does not support repeated initialization */
12401262
if (m_copy == NULL) {
1263+
/* It might be a core module (e.g. sys & builtins),
1264+
for which we don't set m_copy. */
12411265
m_copy = get_core_module_dict(tstate->interp, name, filename);
12421266
if (m_copy == NULL) {
12431267
return NULL;
@@ -1307,9 +1331,7 @@ clear_singlephase_extension(PyInterpreterState *interp,
13071331
}
13081332

13091333
/* Clear the cached module def. */
1310-
if (_extensions_cache_delete(filename, name) < 0) {
1311-
return -1;
1312-
}
1334+
_extensions_cache_delete(filename, name);
13131335

13141336
return 0;
13151337
}
@@ -3059,6 +3081,8 @@ void
30593081
_PyImport_Fini(void)
30603082
{
30613083
/* Destroy the database used by _PyImport_{Fixup,Find}Extension */
3084+
// XXX Should we actually leave them (mostly) intact, since we don't
3085+
// ever dlclose() the module files?
30623086
_extensions_cache_clear_all();
30633087

30643088
/* Use the same memory allocator as _PyImport_Init(). */
@@ -3096,10 +3120,6 @@ _PyImport_Fini2(void)
30963120
PyStatus
30973121
_PyImport_InitCore(PyThreadState *tstate, PyObject *sysmod, int importlib)
30983122
{
3099-
if (_Py_IsMainInterpreter(tstate->interp)) {
3100-
_extensions_cache_init();
3101-
}
3102-
31033123
// XXX Initialize here: interp->modules and interp->import_func.
31043124
// XXX Initialize here: sys.modules and sys.meta_path.
31053125

0 commit comments

Comments
 (0)