Skip to content

Commit c87af4f

Browse files
authored
Merge pull request #241 from rnc/ISSUE236
Handle Docker 25 OCI archive format
2 parents 166b7c5 + 09a333c commit c87af4f

File tree

8 files changed

+196
-107
lines changed

8 files changed

+196
-107
lines changed

.github/workflows/squash.yml

+35-6
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,52 @@ on:
99
- main
1010

1111
jobs:
12-
build:
12+
build-docker-24:
1313
runs-on: ubuntu-20.04
1414
strategy:
1515
fail-fast: false
1616
matrix:
17-
python-version: ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11']
17+
python-version: ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12']
1818
steps:
19-
- uses: actions/checkout@v3
19+
- uses: actions/checkout@v4
2020
- name: Set up Python ${{ matrix.python-version }}
21-
uses: actions/setup-python@v4
21+
uses: actions/setup-python@v5
2222
with:
2323
python-version: ${{ matrix.python-version }}
2424
- name: Setup
2525
run: |
2626
sudo apt-get update
27-
pip install -U pip
28-
pip install "tox<4.0.0"
27+
pip install "tox<4.0.0" setuptools
28+
- name: Info
29+
run: |
30+
docker version
31+
docker info
32+
- name: Run tests
33+
run: |
34+
PV=${{ matrix.python-version }}
35+
echo "Running tests for Python version $PV ( ${PV/./} )"
36+
make test-py"${PV/./}"
37+
build-docker-25:
38+
runs-on: ubuntu-20.04
39+
strategy:
40+
fail-fast: false
41+
matrix:
42+
python-version: ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12']
43+
steps:
44+
- uses: actions/checkout@v4
45+
- name: Set up Python ${{ matrix.python-version }}
46+
uses: actions/setup-python@v5
47+
with:
48+
python-version: ${{ matrix.python-version }}
49+
- name: Setup
50+
run: |
51+
for pkg in containerd runc; do sudo apt-get remove $pkg; done
52+
sudo apt-get update
53+
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
54+
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu focal stable"
55+
apt-cache policy docker-ce
56+
sudo apt-get install docker-ce docker-ce-cli containerd.io
57+
pip install "tox<4.0.0" setuptools
2958
- name: Info
3059
run: |
3160
docker version

Makefile

+3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ test-py310: prepare
2121
test-py311: prepare
2222
tox -e py311 -- tests
2323

24+
test-py312: prepare
25+
tox -e py312 -- tests
26+
2427
test-unit: prepare
2528
tox -- tests/test_unit*
2629

docker_squash/image.py

+53-48
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,18 @@
1010
import tarfile
1111
import tempfile
1212
import threading
13-
from typing import List
13+
from typing import List, Optional, Union
1414

15-
import docker
15+
import docker as docker_library
1616

1717
from docker_squash.errors import SquashError, SquashUnnecessaryError
1818

1919

2020
class Chdir(object):
2121
"""Context manager for changing the current working directory"""
2222

23-
def __init__(self, newPath):
24-
self.newPath = os.path.expanduser(newPath)
23+
def __init__(self, new_path):
24+
self.newPath = os.path.expanduser(new_path)
2525

2626
def __enter__(self):
2727
self.savedPath = os.getcwd()
@@ -43,18 +43,26 @@ class Image(object):
4343
""" Image format version """
4444

4545
def __init__(
46-
self, log, docker, image, from_layer, tmp_dir=None, tag=None, comment=""
46+
self,
47+
log,
48+
docker,
49+
image,
50+
from_layer,
51+
tmp_dir: Optional[str] = None,
52+
tag: Optional[str] = None,
53+
comment: Optional[str] = "",
4754
):
48-
self.log = log
55+
self.log: logging.Logger = log
4956
self.debug = self.log.isEnabledFor(logging.DEBUG)
5057
self.docker = docker
51-
self.image = image
52-
self.from_layer = from_layer
53-
self.tag = tag
54-
self.comment = comment
58+
self.image: str = image
59+
self.from_layer: str = from_layer
60+
self.tag: str = tag
61+
self.comment: str = comment
5562
self.image_name = None
5663
self.image_tag = None
5764
self.squash_id = None
65+
self.oci_format = False
5866

5967
# Workaround for https://play.golang.org/p/sCsWMXYxqy
6068
#
@@ -68,7 +76,7 @@ def __init__(
6876
)
6977
""" Date used in metadata, already formatted using the `%Y-%m-%dT%H:%M:%S.%fZ` format """
7078

71-
self.tmp_dir = tmp_dir
79+
self.tmp_dir: str = tmp_dir
7280
""" Main temporary directory to save all working files. This is the root directory for all other temporary files. """
7381

7482
def squash(self):
@@ -95,11 +103,11 @@ def _initialize_directories(self):
95103
raise SquashError("Preparing temporary directory failed")
96104

97105
# Temporary location on the disk of the old, unpacked *image*
98-
self.old_image_dir = os.path.join(self.tmp_dir, "old")
106+
self.old_image_dir: str = os.path.join(self.tmp_dir, "old")
99107
# Temporary location on the disk of the new, unpacked, squashed *image*
100-
self.new_image_dir = os.path.join(self.tmp_dir, "new")
108+
self.new_image_dir: str = os.path.join(self.tmp_dir, "new")
101109
# Temporary location on the disk of the squashed *layer*
102-
self.squashed_dir = os.path.join(self.new_image_dir, "squashed")
110+
self.squashed_dir: str = os.path.join(self.new_image_dir, "squashed")
103111

104112
for d in self.old_image_dir, self.new_image_dir:
105113
os.makedirs(d)
@@ -115,14 +123,12 @@ def _squash_id(self, layer):
115123
squash_id = self.docker.inspect_image(layer)["Id"]
116124
except Exception:
117125
raise SquashError(
118-
"Could not get the layer ID to squash, please check provided 'layer' argument: %s"
119-
% layer
126+
f"Could not get the layer ID to squash, please check provided 'layer' argument: {layer}"
120127
)
121128

122129
if squash_id not in self.old_image_layers:
123130
raise SquashError(
124-
"Couldn't find the provided layer (%s) in the %s image"
125-
% (layer, self.image)
131+
f"Couldn't find the provided layer ({layer}) in the {self.image} image"
126132
)
127133

128134
self.log.debug("Layer ID to squash from: %s" % squash_id)
@@ -138,16 +144,14 @@ def _validate_number_of_layers(self, number_of_layers):
138144
# Only positive numbers are correct
139145
if number_of_layers <= 0:
140146
raise SquashError(
141-
"Number of layers to squash cannot be less or equal 0, provided: %s"
142-
% number_of_layers
147+
f"Number of layers to squash cannot be less or equal 0, provided: {number_of_layers}"
143148
)
144149

145150
# Do not squash if provided number of layer to squash is bigger
146151
# than number of actual layers in the image
147152
if number_of_layers > len(self.old_image_layers):
148153
raise SquashError(
149-
"Cannot squash %s layers, the %s image contains only %s layers"
150-
% (number_of_layers, self.image, len(self.old_image_layers))
154+
f"Cannot squash {number_of_layers} layers, the {self.image} image contains only {len(self.old_image_layers)} layers"
151155
)
152156

153157
def _before_squashing(self):
@@ -164,17 +168,14 @@ def _before_squashing(self):
164168
self.old_image_id = self.docker.inspect_image(self.image)["Id"]
165169
except SquashError:
166170
raise SquashError(
167-
"Could not get the image ID to squash, please check provided 'image' argument: %s"
168-
% self.image
171+
f"Could not get the image ID to squash, please check provided 'image' argument: {self.image}"
169172
)
170173

171174
self.old_image_layers = []
172175

173176
# Read all layers in the image
174177
self._read_layers(self.old_image_layers, self.old_image_id)
175-
176178
self.old_image_layers.reverse()
177-
178179
self.log.info("Old image has %s layers", len(self.old_image_layers))
179180
self.log.debug("Old layers: %s", self.old_image_layers)
180181

@@ -193,8 +194,7 @@ def _before_squashing(self):
193194

194195
if not squash_id:
195196
raise SquashError(
196-
"The %s layer could not be found in the %s image"
197-
% (self.from_layer, self.image)
197+
f"The {self.from_layer} layer could not be found in the {self.image} image"
198198
)
199199

200200
number_of_layers = (
@@ -212,7 +212,7 @@ def _before_squashing(self):
212212

213213
if len(self.layers_to_squash) < 1:
214214
raise SquashError(
215-
"Invalid number of layers to squash: %s" % len(self.layers_to_squash)
215+
f"Invalid number of layers to squash: {len(self.layers_to_squash)}"
216216
)
217217

218218
if len(self.layers_to_squash) == 1:
@@ -233,6 +233,7 @@ def _before_squashing(self):
233233

234234
def _after_squashing(self):
235235
self.log.debug("Removing from disk already squashed layers...")
236+
self.log.debug("Cleaning up %s temporary directory" % self.old_image_dir)
236237
shutil.rmtree(self.old_image_dir, ignore_errors=True)
237238

238239
self.size_after = self._dir_size(self.new_image_dir)
@@ -281,29 +282,28 @@ def load_squashed_image(self):
281282
% (self.image_name, self.image_tag)
282283
)
283284

284-
def _files_in_layers(self, layers, directory):
285+
def _files_in_layers(self, layers):
285286
"""
286287
Prepare a list of files in all layers
287288
"""
288289
files = {}
289290

290291
for layer in layers:
291292
self.log.debug("Generating list of files in layer '%s'..." % layer)
292-
tar_file = os.path.join(directory, layer, "layer.tar")
293+
tar_file = self._extract_tar_name(layer)
293294
with tarfile.open(tar_file, "r", format=tarfile.PAX_FORMAT) as tar:
294295
files[layer] = [self._normalize_path(x) for x in tar.getnames()]
295296
self.log.debug("Done, found %s files" % len(files[layer]))
296297

297298
return files
298299

299-
def _prepare_tmp_directory(self, tmp_dir):
300+
def _prepare_tmp_directory(self, tmp_dir: str) -> str:
300301
"""Creates temporary directory that is used to work on layers"""
301302

302303
if tmp_dir:
303304
if os.path.exists(tmp_dir):
304305
raise SquashError(
305-
"The '%s' directory already exists, please remove it before you proceed"
306-
% tmp_dir
306+
f"The '{tmp_dir}' directory already exists, please remove it before you proceed"
307307
)
308308
os.makedirs(tmp_dir)
309309
else:
@@ -374,9 +374,9 @@ def _save_image(self, image_id, directory):
374374
try:
375375
image = self.docker.get_image(image_id)
376376

377-
if int(docker.__version__.split(".")[0]) < 3:
377+
if int(docker_library.__version__.split(".")[0]) < 3:
378378
# Docker library prior to 3.0.0 returned the requests
379-
# object directly which cold be used to read from
379+
# object directly which could be used to read from
380380
self.log.debug(
381381
"Extracting image using HTTPResponse object directly"
382382
)
@@ -408,10 +408,10 @@ def _save_image(self, image_id, directory):
408408
except Exception as e:
409409
self.log.exception(e)
410410
self.log.warning(
411-
"An error occured while saving the %s image, retrying..." % image_id
411+
f"An error occurred while saving the {image_id} image, retrying..."
412412
)
413413

414-
raise SquashError("Couldn't save %s image!" % image_id)
414+
raise SquashError(f"Couldn't save {image_id} image!")
415415

416416
def _unpack(self, tar_file, directory):
417417
"""Unpacks tar archive to selected directory"""
@@ -500,7 +500,7 @@ def _read_old_metadata(self, old_json_file):
500500

501501
return metadata
502502

503-
def _move_layers(self, layers, src, dest):
503+
def _move_layers(self, layers, src: str, dest: str):
504504
"""
505505
This moves all the layers that should be copied as-is.
506506
In other words - all layers that are not meant to be squashed will be
@@ -530,7 +530,7 @@ def _marker_files(self, tar, members):
530530
"""
531531
Searches for marker files in the specified archive.
532532
533-
Docker marker files are files taht have the .wh. prefix in the name.
533+
Docker marker files are files that have the .wh. prefix in the name.
534534
These files mark the corresponding file to be removed (hidden) when
535535
we start a container from the image.
536536
"""
@@ -609,7 +609,9 @@ def _add_markers(self, markers, tar, files_in_layers, added_symlinks):
609609
else:
610610
self.log.debug("Skipping '%s' marker file..." % marker.name)
611611

612-
def _normalize_path(self, path):
612+
def _normalize_path(
613+
self, path: Union[str, pathlib.Path]
614+
) -> Union[str, pathlib.Path]:
613615
return os.path.normpath(os.path.join("/", path))
614616

615617
def _add_hardlinks(self, squashed_tar, squashed_files, to_skip, skipped_hard_links):
@@ -743,17 +745,15 @@ def _add_symlinks(self, squashed_tar, squashed_files, to_skip, skipped_sym_links
743745

744746
return added_symlinks
745747

746-
def _squash_layers(self, layers_to_squash, layers_to_move):
747-
self.log.info("Starting squashing...")
748+
def _squash_layers(self, layers_to_squash: List[str], layers_to_move: List[str]):
749+
self.log.info(f"Starting squashing for {self.squashed_tar}...")
748750

749751
# Reverse the layers to squash - we begin with the newest one
750752
# to make the tar lighter
751753
layers_to_squash.reverse()
752754

753755
# Find all files in layers that we don't squash
754-
files_in_layers_to_move = self._files_in_layers(
755-
layers_to_move, self.old_image_dir
756-
)
756+
files_in_layers_to_move = self._files_in_layers(layers_to_move)
757757

758758
with tarfile.open(
759759
self.squashed_tar, "w", format=tarfile.PAX_FORMAT
@@ -770,8 +770,7 @@ def _squash_layers(self, layers_to_squash, layers_to_move):
770770
reading_layers: List[tarfile.TarFile] = []
771771

772772
for layer_id in layers_to_squash:
773-
layer_tar_file = os.path.join(self.old_image_dir, layer_id, "layer.tar")
774-
773+
layer_tar_file = self._extract_tar_name(layer_id)
775774
self.log.info("Squashing file '%s'..." % layer_tar_file)
776775

777776
# Open the exiting layer to squash
@@ -1028,3 +1027,9 @@ def _path_hierarchy(self, path):
10281027
return itertools.accumulate(
10291028
path.parts[:-1], func=lambda head, tail: str(path.__class__(head, tail))
10301029
)
1030+
1031+
def _extract_tar_name(self, path: str) -> str:
1032+
if self.oci_format:
1033+
return os.path.join(self.old_image_dir, path)
1034+
else:
1035+
return os.path.join(self.old_image_dir, path, "layer.tar")

0 commit comments

Comments
 (0)