Skip to content

Commit 28ef3a5

Browse files
authored
Disregard old test repository quirks for migration (#4954)
An old test created a `.gitignore` file in the Node repository folder `path`, while also creating files under a `raw_input` folder. This adds a logic test for this specific edge-case, migrating the `raw_input` folder and ignoring the `path` folder for the specific Node. Fixes #4910
1 parent b5b5051 commit 28ef3a5

File tree

3 files changed

+54
-5
lines changed

3 files changed

+54
-5
lines changed

aiida/backends/general/migrations/utils.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -252,12 +252,16 @@ def get_node_repository_dirpaths(basepath, shard=None):
252252
path = None
253253

254254
if 'path' in subdirs and 'raw_input' in subdirs:
255-
# If the `path` is empty, we simply ignore and set `raw_input` to be migrated, otherwise we add
256-
# the entry to `contains_both` which will cause the migration to fail.
257-
if os.listdir(dirpath / 'path'):
258-
contains_both.append(str(dirpath))
259-
else:
255+
# If the `path` folder is empty OR it contains *only* a `.gitignore`, we simply ignore and set
256+
# `raw_input` to be migrated, otherwise we add the entry to `contains_both` which will cause the
257+
# migration to fail.
258+
# See issue #4910 (https://github.com/aiidateam/aiida-core/issues/4910) for more information on the
259+
# `.gitignore` case.
260+
path_contents = os.listdir(dirpath / 'path')
261+
if not path_contents or path_contents == ['.gitignore']:
260262
path = dirpath / 'raw_input'
263+
else:
264+
contains_both.append(str(dirpath))
261265
elif 'path' in subdirs:
262266
path = dirpath / 'path'
263267
elif 'raw_input' in subdirs:

tests/backends/aiida_django/migrations/test_migrations_0047_migrate_repository.py

+21
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# pylint: disable=import-error,no-name-in-module,invalid-name
1111
"""Test migration of the old file repository to the disk object store."""
1212
import hashlib
13+
import os
1314

1415
from aiida.backends.general.migrations import utils
1516
from .test_migrations_common import TestMigrations
@@ -33,14 +34,25 @@ def setUpBeforeMigration(self):
3334
dbnode_02.save()
3435
dbnode_03 = DbNode(user_id=self.default_user.id)
3536
dbnode_03.save()
37+
dbnode_04 = DbNode(user_id=self.default_user.id)
38+
dbnode_04.save()
3639

3740
self.node_01_pk = dbnode_01.pk
3841
self.node_02_pk = dbnode_02.pk
3942
self.node_03_pk = dbnode_03.pk
43+
self.node_04_pk = dbnode_04.pk
4044

4145
utils.put_object_from_string(dbnode_01.uuid, 'sub/path/file_b.txt', 'b')
4246
utils.put_object_from_string(dbnode_01.uuid, 'sub/file_a.txt', 'a')
4347
utils.put_object_from_string(dbnode_02.uuid, 'output.txt', 'output')
48+
utils.put_object_from_string(dbnode_04.uuid, '.gitignore', 'test')
49+
50+
# If both `path` and `raw_input` subfolders are present and `.gitignore` is in `path`, it should be ignored.
51+
# Cannot use `put_object_from_string` here as it statically writes under the `path` folder.
52+
raw_input_sub_folder = utils.get_node_repository_sub_folder(dbnode_04.uuid, subfolder='raw_input')
53+
os.makedirs(raw_input_sub_folder, exist_ok=True)
54+
with open(os.path.join(raw_input_sub_folder, 'input.txt'), 'w', encoding='utf-8') as handle:
55+
handle.write('input')
4456

4557
# When multiple migrations are ran, it is possible that migration 0047 is run at a point where the repository
4658
# container does not have a UUID (at that point in the migration) and so the setting gets set to `None`. This
@@ -56,6 +68,7 @@ def test_migration(self):
5668
node_01 = DbNode.objects.get(pk=self.node_01_pk)
5769
node_02 = DbNode.objects.get(pk=self.node_02_pk)
5870
node_03 = DbNode.objects.get(pk=self.node_03_pk)
71+
node_04 = DbNode.objects.get(pk=self.node_04_pk)
5972

6073
assert node_01.repository_metadata == {
6174
'o': {
@@ -83,11 +96,19 @@ def test_migration(self):
8396
}
8497
}
8598
assert node_03.repository_metadata == {}
99+
assert node_04.repository_metadata == {
100+
'o': {
101+
'input.txt': {
102+
'k': hashlib.sha256('input'.encode('utf-8')).hexdigest()
103+
}
104+
}
105+
}
86106

87107
for hashkey, content in (
88108
(node_01.repository_metadata['o']['sub']['o']['path']['o']['file_b.txt']['k'], b'b'),
89109
(node_01.repository_metadata['o']['sub']['o']['file_a.txt']['k'], b'a'),
90110
(node_02.repository_metadata['o']['output.txt']['k'], b'output'),
111+
(node_04.repository_metadata['o']['input.txt']['k'], b'input'),
91112
):
92113
assert utils.get_repository_object(hashkey) == content
93114

tests/backends/aiida_sqlalchemy/test_migrations.py

+24
Original file line numberDiff line numberDiff line change
@@ -1816,29 +1816,44 @@ def setUpBeforeMigration(self):
18161816
node_02 = DbNode(user_id=default_user.id, uuid=get_new_uuid())
18171817
node_03 = DbNode(user_id=default_user.id, uuid=get_new_uuid())
18181818
node_04 = DbNode(user_id=default_user.id, uuid=get_new_uuid())
1819+
node_05 = DbNode(user_id=default_user.id, uuid=get_new_uuid())
18191820

18201821
session.add(node_01)
18211822
session.add(node_02)
18221823
session.add(node_03) # Empty repository folder
18231824
session.add(node_04) # Both `path` and `raw_input` subfolder
1825+
session.add(node_05) # Both `path` and `raw_input` subfolder & `.gitignore` in `path`
18241826
session.commit()
18251827

18261828
assert node_01.uuid is not None
18271829
assert node_02.uuid is not None
18281830
assert node_03.uuid is not None
18291831
assert node_04.uuid is not None
1832+
assert node_05.uuid is not None
18301833

18311834
self.node_01_pk = node_01.id
18321835
self.node_02_pk = node_02.id
18331836
self.node_03_pk = node_03.id
18341837
self.node_04_pk = node_04.id
1838+
self.node_05_pk = node_05.id
18351839

18361840
utils.put_object_from_string(node_01.uuid, 'sub/path/file_b.txt', 'b')
18371841
utils.put_object_from_string(node_01.uuid, 'sub/file_a.txt', 'a')
18381842
utils.put_object_from_string(node_02.uuid, 'output.txt', 'output')
18391843

18401844
os.makedirs(utils.get_node_repository_sub_folder(node_04.uuid, 'path'), exist_ok=True)
18411845
os.makedirs(utils.get_node_repository_sub_folder(node_04.uuid, 'raw_input'), exist_ok=True)
1846+
os.makedirs(utils.get_node_repository_sub_folder(node_05.uuid, 'path'), exist_ok=True)
1847+
os.makedirs(utils.get_node_repository_sub_folder(node_05.uuid, 'raw_input'), exist_ok=True)
1848+
1849+
utils.put_object_from_string(node_05.uuid, '.gitignore', 'test')
1850+
with open(
1851+
os.path.join(
1852+
utils.get_node_repository_sub_folder(node_05.uuid, 'raw_input'), 'input.txt'),
1853+
'w',
1854+
encoding='utf-8',
1855+
) as handle:
1856+
handle.write('input')
18421857

18431858
# Add a repository folder for a node that no longer exists - i.e. it may have been deleted.
18441859
utils.put_object_from_string(get_new_uuid(), 'file_of_deleted_node', 'output')
@@ -1859,6 +1874,7 @@ def test_migration(self):
18591874
node_01 = session.query(DbNode).filter(DbNode.id == self.node_01_pk).one()
18601875
node_02 = session.query(DbNode).filter(DbNode.id == self.node_02_pk).one()
18611876
node_03 = session.query(DbNode).filter(DbNode.id == self.node_03_pk).one()
1877+
node_05 = session.query(DbNode).filter(DbNode.id == self.node_05_pk).one()
18621878

18631879
assert node_01.repository_metadata == {
18641880
'o': {
@@ -1886,11 +1902,19 @@ def test_migration(self):
18861902
}
18871903
}
18881904
assert node_03.repository_metadata == {}
1905+
assert node_05.repository_metadata == {
1906+
'o': {
1907+
'input.txt': {
1908+
'k': hashlib.sha256('input'.encode('utf-8')).hexdigest()
1909+
}
1910+
}
1911+
}
18891912

18901913
for hashkey, content in (
18911914
(node_01.repository_metadata['o']['sub']['o']['path']['o']['file_b.txt']['k'], b'b'),
18921915
(node_01.repository_metadata['o']['sub']['o']['file_a.txt']['k'], b'a'),
18931916
(node_02.repository_metadata['o']['output.txt']['k'], b'output'),
1917+
(node_05.repository_metadata['o']['input.txt']['k'], b'input'),
18941918
):
18951919
assert utils.get_repository_object(hashkey) == content
18961920

0 commit comments

Comments
 (0)