Skip to content

Commit fb4f981

Browse files
committedAug 7, 2024
Engine: Ignore failing process state change for core.sqlite_dos
For each process state change, the engine calls the utility function `aiida.engine.utils.set_process_state_change_timestamp`. This calls `set_global_variable` on the storage plugin to update the `process|state_change|.*` key in the settings table. This value is used in `verdi process list` to show when the last time a process changed its state, which serves as a proxy of daemon activity. When multiple processes would be running, this call would throw an exception for the `core.sqlite_dos` storage plugin. This is because SQLite does not support concurrent writes that touch the same page, which is the case when multiple writes are updating the same row. Since the updating of the timestamp is not crucial for AiiDA functioning properly, especially since it is because another process was trying to perform the same update, it is safe to ignore the failed update and simply log that as a warning. Cherry-pick: 1b8c58b
1 parent 923fd9f commit fb4f981

File tree

2 files changed

+65
-1
lines changed

2 files changed

+65
-1
lines changed
 

‎src/aiida/engine/utils.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,8 @@ def set_process_state_change_timestamp(node: 'ProcessNode') -> None:
268268
269269
:param process: the Process instance that changed its state
270270
"""
271+
from sqlalchemy.exc import OperationalError
272+
271273
from aiida.common import timezone
272274
from aiida.manage import get_manager
273275
from aiida.orm import CalculationNode, ProcessNode, WorkflowNode
@@ -287,7 +289,17 @@ def set_process_state_change_timestamp(node: 'ProcessNode') -> None:
287289
value = timezone.now().isoformat()
288290

289291
backend = get_manager().get_profile_storage()
290-
backend.set_global_variable(key, value, description)
292+
293+
try:
294+
backend.set_global_variable(key, value, description)
295+
except OperationalError:
296+
# This typically happens for SQLite-based storage plugins like ``core.sqlite_dos``. Since this is merely an
297+
# update of a timestamp that is likely to be updated soon again, just ignoring the failed update is not a
298+
# problem.
299+
LOGGER.warning(
300+
f'Failed to write global variable `{key}` to `{value}` because the database was locked. If the storage '
301+
'plugin being used is `core.sqlite_dos` this is to be expected and can be safely ignored.'
302+
)
291303

292304

293305
def get_process_state_change_timestamp(process_type: Optional[str] = None) -> Optional[datetime]:

‎tests/engine/test_utils.py

+52
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,19 @@
99
"""Test engine utilities such as the exponential backoff mechanism."""
1010

1111
import asyncio
12+
import contextlib
1213

1314
import pytest
1415
from aiida import orm
1516
from aiida.engine import calcfunction, workfunction
1617
from aiida.engine.utils import (
1718
InterruptableFuture,
1819
exponential_backoff_retry,
20+
get_process_state_change_timestamp,
1921
instantiate_process,
2022
interruptable_task,
2123
is_process_function,
24+
set_process_state_change_timestamp,
2225
)
2326

2427
ITERATION = 0
@@ -225,3 +228,52 @@ async def coro():
225228

226229
result = await task_fut
227230
assert result == 'NOT ME!!!'
231+
232+
233+
@pytest.mark.parametrize('with_transaction', (True, False))
234+
@pytest.mark.parametrize('monkeypatch_process_state_change', (True, False))
235+
def test_set_process_state_change_timestamp(manager, with_transaction, monkeypatch_process_state_change, monkeypatch):
236+
"""Test :func:`aiida.engine.utils.set_process_state_change_timestamp`.
237+
238+
This function is known to except when the ``core.sqlite_dos`` storage plugin is used and multiple processes are run.
239+
The function is called each time a process changes state and since it is updating the same row in the settings table
240+
the limitation of SQLite to not allow concurrent writes to the same page causes an exception to be thrown because
241+
the database is locked. This exception is caught in ``set_process_state_change_timestamp`` and simply is ignored.
242+
This test makes sure that if this happens, any other state changes, e.g. an extra being set on a node, are not
243+
accidentally reverted, when the changes are performed in an explicit transaction or not.
244+
"""
245+
storage = manager.get_profile_storage()
246+
247+
node = orm.CalculationNode().store()
248+
extra_key = 'some_key'
249+
extra_value = 'some value'
250+
251+
# Initialize the process state change timestamp so it is possible to check whether it was changed or not at the
252+
# end of the test.
253+
set_process_state_change_timestamp(node)
254+
current_timestamp = get_process_state_change_timestamp()
255+
assert current_timestamp is not None
256+
257+
if monkeypatch_process_state_change:
258+
259+
def set_global_variable(*_, **__):
260+
from sqlalchemy.exc import OperationalError
261+
262+
raise OperationalError('monkey failure', None, '', '')
263+
264+
monkeypatch.setattr(storage, 'set_global_variable', set_global_variable)
265+
266+
transaction_context = storage.transaction if with_transaction else contextlib.nullcontext
267+
268+
with transaction_context():
269+
node.base.extras.set(extra_key, extra_value)
270+
set_process_state_change_timestamp(node)
271+
272+
# The node extra should always have been set, regardless if the process state change excepted
273+
assert node.base.extras.get(extra_key) == extra_value
274+
275+
# The process state change should have changed if the storage plugin was not monkeypatched to fail
276+
if monkeypatch_process_state_change:
277+
assert get_process_state_change_timestamp() == current_timestamp
278+
else:
279+
assert get_process_state_change_timestamp() != current_timestamp

0 commit comments

Comments
 (0)
Please sign in to comment.