Skip to content

Commit 1de6289

Browse files
committed
feat: Cache build environments by content hash
Write an archive consisting of the build environment provided by pip, and then extract it at a path containing the SHA256 hash of its contents. This prevents unnecessary rebuilds triggered by CMake thinking that packages provided by pip in the build environment have been changed, when it's only the temporary path that changed. Because the path contains the hash of its contents, this is perfectly safe since a changed build environment will change the hash. Signed-off-by: Tobias Markus <[email protected]>
1 parent 26ef703 commit 1de6289

File tree

2 files changed

+124
-2
lines changed

2 files changed

+124
-2
lines changed

src/scikit_build_core/builder/builder.py

+121-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
from __future__ import annotations
22

33
import dataclasses
4+
import hashlib
5+
import os
46
import re
57
import sys
68
import sysconfig
9+
import tarfile
10+
import tempfile
711
from collections.abc import Iterable, Mapping, Sequence
812
from pathlib import Path
13+
from typing import BinaryIO
914

1015
from packaging.version import Version
1116

@@ -22,6 +27,7 @@
2227
get_python_library,
2328
get_soabi,
2429
)
30+
from .wheel_tag import WheelTag
2531

2632
__all__: list[str] = ["Builder", "get_archs", "archs_to_tags"]
2733

@@ -64,6 +70,83 @@ def archs_to_tags(archs: list[str]) -> list[str]:
6470
return archs
6571

6672

73+
@dataclasses.dataclass(init=False)
74+
class BuildEnvArchive:
75+
_archive_file: BinaryIO
76+
hash: hashlib._Hash
77+
78+
def __init__(self, env_dir: Path) -> None:
79+
self._archive_file = tempfile.TemporaryFile(prefix="build-env-archive-", suffix=".tar") # type: ignore[assignment]
80+
81+
# Rewrite environment path to be relative to root
82+
# Example:
83+
# /tmp/pip-build-env-pklovjqz/overlay/lib/python3.11/site-packages
84+
# is rewritten into
85+
# tmp/pip-build-env-pklovjqz/overlay/lib/python3.11/site-packages
86+
prefix = Path(env_dir)
87+
prefix = prefix.relative_to(prefix.root)
88+
89+
def ext_filter(ti: tarfile.TarInfo) -> tarfile.TarInfo | None:
90+
pname = Path(ti.name)
91+
92+
if ti.type is tarfile.LNKTYPE:
93+
logger.warning(
94+
"Unexpected link inside build environment archive (path={})", pname
95+
)
96+
elif (
97+
ti.type is not tarfile.REGTYPE
98+
and ti.type is not tarfile.AREGTYPE
99+
and ti.type is not tarfile.DIRTYPE
100+
):
101+
logger.warning(
102+
"Unexpected file type inside build environment archive (path={})",
103+
pname,
104+
)
105+
106+
# Rewrite name to be relative to site-packages inside the build environment
107+
ti.name = str(pname.relative_to(prefix))
108+
109+
# FIXME: __pycache__ files don't have consistent hashes - why?
110+
if "__pycache__" in ti.name:
111+
return None
112+
113+
# Reset mtime to zero
114+
# This is safe (regarding build tool out-of-date detection)
115+
# since the resulting archive is content-addressed through its hash
116+
ti.mtime = 0
117+
118+
return ti
119+
120+
with tarfile.open(
121+
fileobj=self._archive_file, mode="x", dereference=True
122+
) as dir_tar:
123+
dir_tar.add(env_dir, filter=ext_filter)
124+
125+
self._archive_file.flush()
126+
127+
archive_len = self._archive_file.tell()
128+
self._archive_file.seek(0)
129+
130+
self.hash = hashlib.file_digest(self._archive_file, hashlib.sha256) # type: ignore[attr-defined]
131+
self._archive_file.seek(0)
132+
133+
logger.debug(
134+
"created build env archive len={} sha256={}",
135+
archive_len,
136+
self.hash.hexdigest(),
137+
)
138+
139+
def extract(self, destination: Path) -> None:
140+
self._archive_file.seek(0)
141+
with tarfile.open(fileobj=self._archive_file, mode="r") as dir_tar:
142+
dir_tar.extractall(path=destination)
143+
144+
# Reset atime/mtime of the destination directory
145+
# Otherwise CMake would consider the directory out of date
146+
# FIXME: Apparently not necessary?
147+
# os.utime(destination, times=(0,0))
148+
149+
67150
@dataclasses.dataclass
68151
class Builder:
69152
settings: ScikitBuildSettings
@@ -79,6 +162,31 @@ def get_cmake_args(self) -> list[str]:
79162

80163
return [*self.settings.cmake.args, *env_cmake_args]
81164

165+
# FIXME: Proper setting for build env dir
166+
def _build_dir(self) -> Path:
167+
tags = WheelTag.compute_best(
168+
archs_to_tags(get_archs(os.environ)),
169+
self.settings.wheel.py_api,
170+
expand_macos=self.settings.wheel.expand_macos_universal_tags,
171+
)
172+
173+
assert self.settings.build_dir is not None
174+
# A build dir can be specified, otherwise use a temporary directory
175+
build_dir = Path(
176+
self.settings.build_dir.format(
177+
cache_tag=sys.implementation.cache_tag,
178+
wheel_tag=str(tags),
179+
)
180+
)
181+
logger.info("Build directory: {}", build_dir.resolve())
182+
183+
return build_dir.resolve()
184+
185+
def _build_env_cache_dir(self, hash: hashlib._Hash) -> Path:
186+
base_dir = self._build_dir()
187+
base_dir = base_dir.with_name(base_dir.name + "-build-env-cache")
188+
return base_dir / hash.hexdigest()
189+
82190
def configure(
83191
self,
84192
*,
@@ -103,9 +211,20 @@ def configure(
103211
site_packages = Path(sysconfig.get_path("purelib"))
104212
self.config.prefix_dirs.append(site_packages)
105213
logger.debug("SITE_PACKAGES: {}", site_packages)
106-
if site_packages != DIR.parent.parent:
214+
215+
if self.settings.cache_build_env:
216+
if not self.settings.experimental:
217+
msg = "Experimental features must be enabled to use build environment caching"
218+
raise AssertionError(msg)
219+
220+
archive = BuildEnvArchive(DIR.parent.parent)
221+
targettree = self._build_env_cache_dir(archive.hash)
222+
archive.extract(targettree)
223+
self.config.prefix_dirs.append(targettree)
224+
225+
elif site_packages != DIR.parent.parent:
107226
self.config.prefix_dirs.append(DIR.parent.parent)
108-
logger.debug("Extra SITE_PACKAGES: {}", site_packages)
227+
logger.debug("Extra SITE_PACKAGES: {}", DIR.parent.parent)
109228

110229
# Add the FindPython backport if needed
111230
fp_backport = self.settings.backport.find_python

src/scikit_build_core/settings/skbuild_model.py

+3
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,6 @@ class ScikitBuildSettings:
148148

149149
#: The build directory. Defaults to a temporary directory, but can be set.
150150
build_dir: str = ""
151+
152+
#: Whether to cache build environments. Experimental feature.
153+
cache_build_env: bool = True

0 commit comments

Comments
 (0)