Skip to content

Commit 6716254

Browse files
authored
GH-101362: Optimise PurePath(PurePath(...)) (GH-101667)
The previous `_parse_args()` method pulled the `_parts` out of any supplied `PurePath` objects; these were subsequently joined in `_from_parts()` using `os.path.join()`. This is actually a slower form of joining than calling `fspath()` on the path object, because it doesn't take advantage of the fact that the contents of `_parts` is normalized! This reduces the time taken to run `PurePath("foo", "bar")` by ~20%, and the time taken to run `PurePath(p, "cheese")`, where `p = PurePath("/foo", "bar", "baz")`, by ~40%. Automerge-Triggered-By: GH:AlexWaygood
1 parent 3e60e02 commit 6716254

File tree

4 files changed

+45
-27
lines changed

4 files changed

+45
-27
lines changed

Doc/library/pathlib.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,9 @@ we also call *flavours*:
105105
PurePosixPath('setup.py')
106106

107107
Each element of *pathsegments* can be either a string representing a
108-
path segment, an object implementing the :class:`os.PathLike` interface
109-
which returns a string, or another path object::
108+
path segment, or an object implementing the :class:`os.PathLike` interface
109+
where the :meth:`~os.PathLike.__fspath__` method returns a string,
110+
such as another path object::
110111

111112
>>> PurePath('foo', 'some/path', 'bar')
112113
PurePosixPath('foo/some/path/bar')

Lib/pathlib.py

+11-25
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,14 @@ def _parse_parts(cls, parts):
281281
path = cls._flavour.join(*parts)
282282
sep = cls._flavour.sep
283283
altsep = cls._flavour.altsep
284+
if isinstance(path, str):
285+
# Force-cast str subclasses to str (issue #21127)
286+
path = str(path)
287+
else:
288+
raise TypeError(
289+
"argument should be a str or an os.PathLike "
290+
"object where __fspath__ returns a str, "
291+
f"not {type(path).__name__!r}")
284292
if altsep:
285293
path = path.replace(altsep, sep)
286294
drv, root, rel = cls._flavour.splitroot(path)
@@ -291,32 +299,10 @@ def _parse_parts(cls, parts):
291299
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
292300
return drv, root, parsed
293301

294-
@classmethod
295-
def _parse_args(cls, args):
296-
# This is useful when you don't want to create an instance, just
297-
# canonicalize some constructor arguments.
298-
parts = []
299-
for a in args:
300-
if isinstance(a, PurePath):
301-
parts += a._parts
302-
else:
303-
a = os.fspath(a)
304-
if isinstance(a, str):
305-
# Force-cast str subclasses to str (issue #21127)
306-
parts.append(str(a))
307-
else:
308-
raise TypeError(
309-
"argument should be a str object or an os.PathLike "
310-
"object returning str, not %r"
311-
% type(a))
312-
return cls._parse_parts(parts)
313-
314302
@classmethod
315303
def _from_parts(cls, args):
316-
# We need to call _parse_args on the instance, so as to get the
317-
# right flavour.
318304
self = object.__new__(cls)
319-
drv, root, parts = self._parse_args(args)
305+
drv, root, parts = self._parse_parts(args)
320306
self._drv = drv
321307
self._root = root
322308
self._parts = parts
@@ -575,7 +561,7 @@ def joinpath(self, *args):
575561
anchored).
576562
"""
577563
drv1, root1, parts1 = self._drv, self._root, self._parts
578-
drv2, root2, parts2 = self._parse_args(args)
564+
drv2, root2, parts2 = self._parse_parts(args)
579565
if root2:
580566
if not drv2 and drv1:
581567
return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
@@ -662,7 +648,7 @@ def match(self, path_pattern):
662648
return True
663649

664650
# Can't subclass os.PathLike from PurePath and keep the constructor
665-
# optimizations in PurePath._parse_args().
651+
# optimizations in PurePath.__slots__.
666652
os.PathLike.register(PurePath)
667653

668654

Lib/test/test_pathlib.py

+27
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,33 @@ def test_constructor_common(self):
166166
self.assertEqual(P(P('a'), P('b')), P('a/b'))
167167
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
168168

169+
def test_bytes(self):
170+
P = self.cls
171+
message = (r"argument should be a str or an os\.PathLike object "
172+
r"where __fspath__ returns a str, not 'bytes'")
173+
with self.assertRaisesRegex(TypeError, message):
174+
P(b'a')
175+
with self.assertRaises(TypeError):
176+
P(b'a', 'b')
177+
with self.assertRaises(TypeError):
178+
P('a', b'b')
179+
with self.assertRaises(TypeError):
180+
P('a').joinpath(b'b')
181+
with self.assertRaises(TypeError):
182+
P('a') / b'b'
183+
with self.assertRaises(TypeError):
184+
b'a' / P('b')
185+
with self.assertRaises(TypeError):
186+
P('a').match(b'b')
187+
with self.assertRaises(TypeError):
188+
P('a').relative_to(b'b')
189+
with self.assertRaises(TypeError):
190+
P('a').with_name(b'b')
191+
with self.assertRaises(TypeError):
192+
P('a').with_stem(b'b')
193+
with self.assertRaises(TypeError):
194+
P('a').with_suffix(b'b')
195+
169196
def _check_str_subclass(self, *args):
170197
# Issue #21127: it should be possible to construct a PurePath object
171198
# from a str subclass instance, and it then gets converted to
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Speed up :class:`pathlib.PurePath` construction by handling arguments more
2+
uniformly. When a :class:`pathlib.Path` argument is supplied,
3+
we use its string representation rather than joining its parts
4+
with :func:`os.path.join`.

0 commit comments

Comments
 (0)