-
-
Notifications
You must be signed in to change notification settings - Fork 33.9k
gh-140746: Fix Thread.start() that can hang indefinitely #140799
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
9286aa9
4a75fe7
68628e0
d66016c
e28b16f
fe123a1
590c54c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -930,7 +930,6 @@ class is implemented. | |
| if _HAVE_THREAD_NATIVE_ID: | ||
| self._native_id = None | ||
| self._os_thread_handle = _ThreadHandle() | ||
| self._started = Event() | ||
| self._initialized = True | ||
| # Copy of sys.stderr used by self._invoke_excepthook() | ||
| self._stderr = _sys.stderr | ||
|
|
@@ -940,7 +939,6 @@ class is implemented. | |
|
|
||
| def _after_fork(self, new_ident=None): | ||
| # Private! Called by threading._after_fork(). | ||
| self._started._at_fork_reinit() | ||
| if new_ident is not None: | ||
| # This thread is alive. | ||
| self._ident = new_ident | ||
|
|
@@ -955,7 +953,7 @@ def _after_fork(self, new_ident=None): | |
| def __repr__(self): | ||
| assert self._initialized, "Thread.__init__() was not called" | ||
| status = "initial" | ||
| if self._started.is_set(): | ||
| if self._os_thread_handle.is_bootstraped(): | ||
| status = "started" | ||
| if self._os_thread_handle.is_done(): | ||
| status = "stopped" | ||
|
|
@@ -978,7 +976,7 @@ def start(self): | |
| if not self._initialized: | ||
| raise RuntimeError("thread.__init__() not called") | ||
|
|
||
| if self._started.is_set(): | ||
| if self._os_thread_handle.is_bootstraped(): | ||
| raise RuntimeError("threads can only be started once") | ||
|
|
||
| with _active_limbo_lock: | ||
|
|
@@ -1001,7 +999,15 @@ def start(self): | |
| with _active_limbo_lock: | ||
| del _limbo[self] | ||
| raise | ||
| self._started.wait() # Will set ident and native_id | ||
| self._os_thread_handle.wait_bootstraped() | ||
|
|
||
| # It's possible that the _bootstrap(_inner) fails in the new Thread (e.g. Memory Error); | ||
| # We have to clean `_limbo` and `_active` here to avoid inconsistent state as much as possible | ||
| if self._os_thread_handle.is_failed(): | ||
|
Comment on lines
+1002
to
+1006
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do we know that the thread hasn't started? Shouldn't we raise a RuntimeError exception ?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Because the new thread modifies the state of
AFAIK, that's not possible to raise Exception between Threads.
Then you cannot fix up the |
||
| with _active_limbo_lock: | ||
| _limbo.pop(self, None) | ||
| if self._ident: | ||
| _active.pop(self._ident, None) | ||
|
|
||
| def run(self): | ||
| """Method representing the thread's activity. | ||
|
|
@@ -1061,7 +1067,7 @@ def _bootstrap_inner(self): | |
| if _HAVE_THREAD_NATIVE_ID: | ||
| self._set_native_id() | ||
| self._set_os_name() | ||
| self._started.set() | ||
| self._os_thread_handle.set_bootstraped() | ||
| with _active_limbo_lock: | ||
| _active[self._ident] = self | ||
| del _limbo[self] | ||
|
|
@@ -1081,11 +1087,7 @@ def _bootstrap_inner(self): | |
| def _delete(self): | ||
| "Remove current thread from the dict of currently running threads." | ||
| with _active_limbo_lock: | ||
| del _active[get_ident()] | ||
| # There must not be any python code between the previous line | ||
| # and after the lock is released. Otherwise a tracing function | ||
| # could try to acquire the lock again in the same thread, (in | ||
| # current_thread()), and would block. | ||
| _active.pop(self._ident, None) | ||
|
|
||
| def join(self, timeout=None): | ||
| """Wait until the thread terminates. | ||
|
|
@@ -1113,7 +1115,7 @@ def join(self, timeout=None): | |
| """ | ||
| if not self._initialized: | ||
| raise RuntimeError("Thread.__init__() not called") | ||
| if not self._started.is_set(): | ||
| if not self._os_thread_handle.is_done() and not self._os_thread_handle.is_bootstraped(): | ||
| raise RuntimeError("cannot join thread before it is started") | ||
| if self is current_thread(): | ||
| raise RuntimeError("cannot join current thread") | ||
|
|
@@ -1176,7 +1178,7 @@ def is_alive(self): | |
|
|
||
| """ | ||
| assert self._initialized, "Thread.__init__() not called" | ||
| return self._started.is_set() and not self._os_thread_handle.is_done() | ||
| return self._os_thread_handle.is_bootstraped() and not self._os_thread_handle.is_done() | ||
|
|
||
| @property | ||
| def daemon(self): | ||
|
|
@@ -1199,7 +1201,7 @@ def daemon(self, daemonic): | |
| raise RuntimeError("Thread.__init__() not called") | ||
| if daemonic and not _daemon_threads_allowed(): | ||
| raise RuntimeError('daemon threads are disabled in this interpreter') | ||
| if self._started.is_set(): | ||
| if self._os_thread_handle.is_bootstraped() or self._os_thread_handle.is_done(): | ||
| raise RuntimeError("cannot set daemon status of active thread") | ||
| self._daemonic = daemonic | ||
|
|
||
|
|
@@ -1385,7 +1387,6 @@ class _MainThread(Thread): | |
|
|
||
| def __init__(self): | ||
| Thread.__init__(self, name="MainThread", daemon=False) | ||
| self._started.set() | ||
| self._ident = _get_main_thread_ident() | ||
| self._os_thread_handle = _make_thread_handle(self._ident) | ||
| if _HAVE_THREAD_NATIVE_ID: | ||
|
|
@@ -1433,7 +1434,6 @@ class _DummyThread(Thread): | |
| def __init__(self): | ||
| Thread.__init__(self, name=_newname("Dummy-%d"), | ||
| daemon=_daemon_threads_allowed()) | ||
| self._started.set() | ||
| self._set_ident() | ||
| self._os_thread_handle = _make_thread_handle(self._ident) | ||
| if _HAVE_THREAD_NATIVE_ID: | ||
|
|
@@ -1443,7 +1443,7 @@ def __init__(self): | |
| _DeleteDummyThreadOnDel(self) | ||
|
|
||
| def is_alive(self): | ||
| if not self._os_thread_handle.is_done() and self._started.is_set(): | ||
| if self._os_thread_handle.is_bootstraped() and not self._os_thread_handle.is_done(): | ||
| return True | ||
| raise RuntimeError("thread is not alive") | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Fix :func:`threading.Thread.start` that can hang indefinitely in case of heap memory exhaustion during initialization of the new thread. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is that worth testing a use case where the _
bootstrapmethod will be never called just to check that the thread is not running ?Is that worth testing a use case where the an exception is raised in the
_start_joinable_threadjust to check that the thread is not running ? This last one could simulate your initial issue.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My initial issue is when the new thread crashes inside the _bootstrap/_bootstrap_inner before signaling that it starts.
There is already
test_start_new_thread_failedand that sounds a bit out of scope IMO.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree, sorry for the misunderstood.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where is this issue actually located ? Is your example with few memory (ulimit -v 1000000) the only one that often fails ?
It bothers me that we don't have a reproducible example, even though I understand that this is a complex issue.
If you agree, I will try to work on a reproductible example.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can be located at every place that allocates memory before
self._started.is_set()in the new Thread.PyObject_Callinthread_runis only one example; it can happen calling_bootstrap_inner,self._set_ident(),self._set_native_id(), and so on. In my test, I chose one of them to demonstrate the problem and test it.