Skip to content

Commit 4bf1dc6

Browse files
committed
feat(cachi2): add support for git-submodules
for git-submodules pkg manager, OSBS msut hadnle all the work, cachi2 doesn't manipuilate git. Submodules has to be cloned and metadata exported into SBOM and request.json Signed-off-by: Martin Basti <[email protected]>
1 parent 970e682 commit 4bf1dc6

9 files changed

+541
-5
lines changed

atomic_reactor/constants.py

+2
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@
143143
HTTP_CLIENT_STATUS_RETRY = (408, 429, 500, 502, 503, 504)
144144
# requests timeout in seconds
145145
HTTP_REQUEST_TIMEOUT = 600
146+
# git cmd timeout in seconds
147+
GIT_CMD_TIMEOUT = 600
146148
# max retries for git clone
147149
GIT_MAX_RETRIES = 3
148150
# how many seconds should wait before another try of git clone

atomic_reactor/plugins/cachi2_init.py

+13
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing import Any, Optional, List, Dict
1111
from pathlib import Path
1212

13+
import git
1314
from osbs.utils import clone_git_repo
1415

1516
from atomic_reactor.constants import (
@@ -28,6 +29,8 @@
2829
from atomic_reactor.utils.cachi2 import (
2930
remote_source_to_cachi2, clone_only, validate_paths,
3031
normalize_gomod_pkg_manager, enforce_sandbox,
32+
has_git_submodule_manager, update_submodules,
33+
get_submodules_sbom_components, get_submodules_request_json_deps,
3134
)
3235

3336

@@ -135,6 +138,15 @@ def process_remote_sources(self) -> List[Dict[str, Any]]:
135138
remote_source_data["ref"]
136139
)
137140

141+
if has_git_submodule_manager(remote_source_data):
142+
update_submodules(source_path_app)
143+
repo = git.Repo(str(source_path_app))
144+
git_submodules = {
145+
"sbom_components": get_submodules_sbom_components(repo),
146+
"request_json_dependencies": get_submodules_request_json_deps(repo)
147+
}
148+
remote_source["git_submodules"] = git_submodules
149+
138150
remove_unsafe_symlinks = False
139151
flags = remote_source_data.get("flags", [])
140152
if "remove-unsafe-symlinks" in flags:
@@ -144,6 +156,7 @@ def process_remote_sources(self) -> List[Dict[str, Any]]:
144156
source_path_app,
145157
remove_unsafe_symlinks,
146158
)
159+
147160
validate_paths(source_path_app, remote_source_data.get("packages", {}))
148161

149162
if clone_only(remote_source_data):

atomic_reactor/plugins/cachi2_postprocess.py

+40-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from atomic_reactor.constants import (
2121
CACHITO_ENV_ARG_ALIAS,
2222
CACHITO_ENV_FILENAME,
23+
CACHI2_BUILD_DIR,
2324
PLUGIN_CACHI2_INIT,
2425
PLUGIN_CACHI2_POSTPROCESS,
2526
REMOTE_SOURCE_DIR,
@@ -110,13 +111,36 @@ def run(self) -> Optional[List[Dict[str, Any]]]:
110111
return None
111112

112113
processed_remote_sources = self.postprocess_remote_sources()
114+
self.postprocess_git_submodules_global_sbom()
113115
self.inject_remote_sources(processed_remote_sources)
114116

115117
return [
116118
self.remote_source_to_output(remote_source)
117119
for remote_source in processed_remote_sources
118120
]
119121

122+
def postprocess_git_submodules_global_sbom(self):
123+
"""atomic-reactor is responsbile for handling git-submodules. Global SBOM must be updated"""
124+
all_sbom_components = []
125+
for remote_source in self.init_plugin_data:
126+
git_submodules = remote_source.get('git_submodules')
127+
if not git_submodules:
128+
continue
129+
130+
all_sbom_components.extend(git_submodules['sbom_components'])
131+
132+
if not all_sbom_components:
133+
return
134+
135+
global_sbom_path = self.workflow.build_dir.path/CACHI2_BUILD_DIR/"bom.json"
136+
with open(global_sbom_path, 'r') as global_sbom_f:
137+
global_sbom_data = json.load(global_sbom_f)
138+
global_sbom_data['components'].extend(all_sbom_components)
139+
140+
with open(global_sbom_path, 'w') as global_sbom_f:
141+
json.dump(global_sbom_data, global_sbom_f)
142+
global_sbom_f.flush()
143+
120144
def postprocess_remote_sources(self) -> List[Cachi2RemoteSource]:
121145
"""Process remote source requests and return information about the processed sources."""
122146

@@ -132,12 +156,26 @@ def postprocess_remote_sources(self) -> List[Cachi2RemoteSource]:
132156
with open(sbom_path, 'r') as sbom_f:
133157
sbom_data = json.load(sbom_f)
134158

159+
# request_json must be generated before modifications to sboms are done
160+
request_json = generate_request_json(
161+
remote_source['remote_source'], sbom_data, json_env_data)
162+
163+
# update metadata with submodules info
164+
git_submodules = remote_source.get('git_submodules')
165+
if git_submodules:
166+
sbom_data['components'].extend(git_submodules['sbom_components'])
167+
168+
with open(sbom_path, 'w') as sbom_f:
169+
json.dump(sbom_data, sbom_f)
170+
sbom_f.flush()
171+
172+
request_json['dependencies'].extend(git_submodules['request_json_dependencies'])
173+
135174
remote_source_obj = Cachi2RemoteSource(
136175
name=remote_source['name'],
137176
tarball_path=Path(remote_source['source_path'], 'remote-source.tar.gz'),
138177
sources_path=Path(remote_source['source_path']),
139-
json_data=generate_request_json(
140-
remote_source['remote_source'], sbom_data, json_env_data),
178+
json_data=request_json,
141179
json_env_data=json_env_data,
142180
)
143181
processed_remote_sources.append(remote_source_obj)

atomic_reactor/utils/cachi2.py

+90
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,14 @@
1313
from typing import Any, Callable, Dict, Optional, Tuple, List
1414
from pathlib import Path
1515
import os.path
16+
import urllib
1617

18+
import git
1719
from packageurl import PackageURL
1820

21+
from atomic_reactor import constants
22+
from atomic_reactor.utils import retries
23+
1924
logger = logging.getLogger(__name__)
2025

2126

@@ -287,4 +292,89 @@ def clone_only(remote_source: Dict[str, Any]) -> bool:
287292
if pkg_managers is not None and len(pkg_managers) == 0:
288293
return True
289294

295+
# only git-submodule
296+
if pkg_managers is not None and pkg_managers == ['git-submodule']:
297+
return True
298+
290299
return False
300+
301+
302+
def has_git_submodule_manager(remote_source: Dict[str, Any]) -> bool:
303+
"""Returns true when for specific remote source git-submodule manager is requested"""
304+
pkg_managers = remote_source.get("pkg_managers") or []
305+
return 'git-submodule' in pkg_managers
306+
307+
308+
def update_submodules(repopath: Path):
309+
"""Update submodules in the given repo"""
310+
cmd = ["git", "submodule", "update", "--init", "--filter=blob:none"]
311+
params = {
312+
"cwd": str(repopath),
313+
"timeout": constants.GIT_CMD_TIMEOUT,
314+
}
315+
retries.run_cmd(cmd, **params)
316+
317+
318+
def get_submodules_sbom_components(repo: git.Repo) -> List[Dict]:
319+
"""Get SBOM components of submodules in the specified repository"""
320+
321+
def to_vcs_purl(pkg_name, repo_url, ref):
322+
"""
323+
Generate the vcs purl representation of the package.
324+
325+
Use the most specific purl type possible, e.g. pkg:github if repo comes from
326+
github.com. Fall back to using pkg:generic with a ?vcs_url qualifier.
327+
328+
:param str pkg_name: name of package
329+
:param str repo_url: url of git repository for package
330+
:param str ref: git ref of package
331+
:return: the PURL string of the Package object
332+
:rtype: str
333+
"""
334+
repo_url = repo_url.rstrip("/")
335+
parsed_url = urllib.parse.urlparse(repo_url)
336+
337+
pkg_type_for_hostname = {
338+
"github.com": "github",
339+
"bitbucket.org": "bitbucket",
340+
}
341+
pkg_type = pkg_type_for_hostname.get(parsed_url.hostname, "generic")
342+
343+
if pkg_type == "generic":
344+
vcs_url = urllib.parse.quote(f"{repo_url}@{ref}", safe="")
345+
purl = f"pkg:generic/{pkg_name}?vcs_url={vcs_url}"
346+
else:
347+
# pkg:github and pkg:bitbucket use the same format
348+
namespace, repo = parsed_url.path.lstrip("/").rsplit("/", 1)
349+
if repo.endswith(".git"):
350+
repo = repo[: -len(".git")]
351+
purl = f"pkg:{pkg_type}/{namespace.lower()}/{repo.lower()}@{ref}"
352+
353+
return purl
354+
355+
submodules_sbom_components = [
356+
{
357+
"type": "library",
358+
"name": sm.name,
359+
"version": f"{sm.url}#{sm.hexsha}",
360+
"purl": to_vcs_purl(sm.name, sm.url, sm.hexsha)
361+
}
362+
for sm in repo.submodules
363+
]
364+
365+
return submodules_sbom_components
366+
367+
368+
def get_submodules_request_json_deps(repo: git.Repo) -> List[Dict]:
369+
"""Get dependencies for request.json from submodule"""
370+
submodules_request_json_dependencies = [
371+
{
372+
"type": "git-submodule",
373+
"name": sm.name,
374+
"path": sm.name,
375+
"version": f"{sm.url}#{sm.hexsha}",
376+
}
377+
for sm in repo.submodules
378+
]
379+
380+
return submodules_request_json_dependencies

atomic_reactor/utils/retries.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def get_retrying_requests_session(client_statuses=HTTP_CLIENT_STATUS_RETRY,
8989
max_tries=SUBPROCESS_MAX_RETRIES + 1, # total tries is N retries + 1 initial attempt
9090
jitter=None, # use deterministic backoff, do not apply random jitter
9191
)
92-
def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None) -> bytes:
92+
def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None, **params) -> bytes:
9393
"""Run a subprocess command, retry on any non-zero exit status.
9494
9595
Whenever an attempt fails, the stdout and stderr of the failed command will be logged.
@@ -98,12 +98,14 @@ def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None) -> bytes:
9898
9999
If a cleanup command is specified it'll be run on exception before retry.
100100
101+
:param params: optional params to be passed to subprocess.run function
102+
101103
:return: bytes, the combined stdout and stderr (if any) of the command
102104
"""
103105
logger.debug("Running %s", " ".join(cmd))
104106

105107
try:
106-
process = subprocess.run(cmd, check=True, capture_output=True)
108+
process = subprocess.run(cmd, check=True, capture_output=True, **params)
107109
except subprocess.CalledProcessError as e:
108110
logger.warning(
109111
"%s failed:\nSTDOUT:\n%s\nSTDERR:\n%s",
@@ -114,7 +116,7 @@ def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None) -> bytes:
114116
if cleanup_cmd:
115117
try:
116118
logger.debug("Running %s", " ".join(cleanup_cmd))
117-
subprocess.run(cleanup_cmd, check=True, capture_output=True)
119+
subprocess.run(cleanup_cmd, check=True, capture_output=True, **params)
118120
except subprocess.CalledProcessError as c_e:
119121
logger.warning(
120122
"Cleanup command: %s failed:\nSTDOUT:\n%s\nSTDERR:\n%s",

requirements.in

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
backoff
33
dockerfile-parse>=0.0.13
44
flatpak-module-tools>=0.14
5+
gitpython
56
jsonschema
67
paramiko>=3.4.0
78
PyYAML

requirements.txt

+6
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ editables==0.5
4747
# via hatchling
4848
flatpak-module-tools==0.14
4949
# via -r requirements.in
50+
gitdb==4.0.12
51+
# via gitpython
52+
gitpython==3.1.44
53+
# via -r requirements.in
5054
googleapis-common-protos==1.60.0
5155
# via
5256
# opentelemetry-exporter-otlp-proto-grpc
@@ -176,6 +180,8 @@ six==1.16.0
176180
# koji
177181
# osbs-client
178182
# python-dateutil
183+
smmap==5.0.2
184+
# via gitdb
179185
tomli==2.0.1
180186
# via hatchling
181187
trove-classifiers==2023.8.7

0 commit comments

Comments
 (0)