Skip to content

Commit 3dac488

Browse files
authored
fix(studio): handle unexpected exceptions in updates thread (#864)
1 parent 6e29c5e commit 3dac488

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

Diff for: src/dvclive/live.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -929,10 +929,17 @@ def post_data_to_studio(self):
929929
self._studio_queue = queue.Queue()
930930

931931
def worker():
932+
error_occurred = False
932933
while True:
933934
item, data = self._studio_queue.get()
934-
post_to_studio(item, "data", data)
935-
self._studio_queue.task_done()
935+
try:
936+
if not error_occurred:
937+
post_to_studio(item, "data", data)
938+
except Exception:
939+
logger.exception("Failed to post data to studio")
940+
error_occurred = True
941+
finally:
942+
self._studio_queue.task_done()
936943

937944
threading.Thread(target=worker, daemon=True).start()
938945

Diff for: tests/test_post_to_studio.py

+30
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,36 @@ def long_post(*args, **kwargs):
274274
assert metrics_file.read_text() == metrics_content
275275

276276

277+
def test_studio_update_raises_exception(tmp_path, mocked_dvc_repo, mocked_studio_post):
278+
# Test that if a studio update raises an exception, main process doesn't hang on
279+
# queue join in the Live main thread.
280+
# https://github.com/iterative/dvclive/pull/864
281+
mocked_post, valid_response = mocked_studio_post
282+
283+
def post_raises_exception(*args, **kwargs):
284+
if kwargs["json"]["type"] == "data":
285+
# We'll hit this sleep only once, other calls are ignored
286+
# after the exception is raised
287+
time.sleep(1)
288+
raise Exception("test exception") # noqa: TRY002, TRY003
289+
return valid_response
290+
291+
mocked_post.side_effect = post_raises_exception
292+
293+
with Live() as live:
294+
live.log_metric("foo", 1)
295+
live.log_metric("foo", 2)
296+
live.log_metric("foo", 3)
297+
298+
# Only 1 data call is made, other calls are ignored after the exception is raised
299+
assert mocked_post.call_count == 3
300+
assert [e.kwargs["json"]["type"] for e in mocked_post.call_args_list] == [
301+
"start",
302+
"data",
303+
"done",
304+
]
305+
306+
277307
@pytest.mark.studio
278308
def test_post_to_studio_skip_start_and_done_on_env_var(
279309
tmp_dir, mocked_dvc_repo, mocked_studio_post, monkeypatch

0 commit comments

Comments
 (0)