Skip to content

Commit b78dad7

Browse files
chfwwillmcgugan
authored andcommitted
Implement TarFS.geturl and ZipFS.geturl and Fix #329, #333, #340 (#330)
* ✨ provide geturl for ReadZipFS. As a user of zipsf, I need geturl to provide FSURL string. * 🐛 on windows and python 3, fs.open_fs(osfs(~/).geturl('myfolder/subfolder')) triggers CreateFailed 🐛 osfs.geturl() cannot be opened by itself * 🔬 all test cases are in and ✨ support geturl for read tar file system * 🔥 remove unwanted comment in code * 📖 update change log and contributor md * :short: update code with black * 📖 update change log * 👕 provide type info * 💚 update unit tests * 🔥 remove dead code * 💚 update tarfs unit test * 🔥 remove unwanted change * :short: run black over osfs.py * 🐛 fix hidden exception at fs.close() when opening an absent zip/tar file URL. fix #333 * 📝 update the behavior of geturl of zipfs and tarfs * 👕 address review feedback ✨ url quote the files for proper url string * 💚 fix broken tests * ♿ add helpful exception info to help developers, who create pypifs, gitfs, fs.datalake et al. fix #340 * 🐛 fix windows path test * ✨ uniformly support fs purpose * 🔨 quote around the root path. #340 * 🚜 alternative file uri implementation * 🔬 try windows path test case where unicode characters stays as they are * 🐛 fix unit test expectation because of the difference between windows and linux file uri * 🚜 avoid Windows File URI for fs purpose * 🐛 before quote, utf8 string needs to be encoded. https://stackoverflow.com/questions/15115588/urllib-quote-throws-keyerror * 🚜 respect rfc 3986, where unicode will be quoted * 💚 🔨 code refactor and fix broken unit tests * 👕 address review feedback from @lurch * 💚 fix typo in code and 👕 update assertions * 🔥 remove unused variable * 👕 address further comments from @lurch * 💚 update windows test case. fix the typo * 🐛 colon:tmp is bad path under windows * 🐛 forward slash on Windows is a valid path separator * 💚 fix unit tests on travis-ci * 👕 address review comments * 👕 mypy compliance * 👕 dot the i and cross the t
1 parent 667d477 commit b78dad7

11 files changed

+239
-33
lines changed

CHANGELOG.md

+8-2
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,26 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
77

88
## [2.4.11] - Unreleased
99

10+
### Added
11+
12+
- Added geturl for TarFS and ZipFS for 'fs' purpose. NoURL for 'download' purpose.
13+
- Added helpful root path in CreateFailed exception [#340](https://github.com/PyFilesystem/pyfilesystem2/issues/340)
14+
1015
### Fixed
1116

1217
- Fixed tests leaving tmp files
1318
- Fixed typing issues
1419
- Fixed link namespace returning bytes
20+
- Fixed broken FSURL in windows [#329](https://github.com/PyFilesystem/pyfilesystem2/issues/329)
21+
- Fixed hidden exception at fs.close() when opening an absent zip/tar file URL [#333](https://github.com/PyFilesystem/pyfilesystem2/issues/333)
1522
- Fixed abstract class import from `collections` which would break on Python 3.8
1623
- Fixed incorrect imports of `mock` on Python 3
1724
- Removed some unused imports and unused `requirements.txt` file
1825
- Added mypy checks to Travis
1926

2027
### Changed
2128

22-
Entire test suite has been migrated to [pytest](https://docs.pytest.org/en/latest/).
23-
Closes [#327](https://github.com/PyFilesystem/pyfilesystem2/issues/327).
29+
- Entire test suite has been migrated to [pytest](https://docs.pytest.org/en/latest/). Closes [#327](https://github.com/PyFilesystem/pyfilesystem2/issues/327).
2430

2531
## [2.4.10] - 2019-07-29
2632

CONTRIBUTORS.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
Many thanks to the following developers for contributing to this project:
44

5+
- [C. W.](https://github.com/chfw)
56
- [Diego Argueta](https://github.com/dargueta)
67
- [Geoff Jukes](https://github.com/geoffjukes)
78
- [Giampaolo](https://github.com/gpcimino)

fs/_url_tools.py

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import re
2+
import six
3+
import platform
4+
5+
if False: # typing.TYPE_CHECKING
6+
from typing import Text, Union, BinaryIO
7+
8+
_WINDOWS_PLATFORM = platform.system() == "Windows"
9+
10+
11+
def url_quote(path_snippet):
12+
# type: (Text) -> Text
13+
"""
14+
On Windows, it will separate drive letter and quote windows
15+
path alone. No magic on Unix-alie path, just pythonic
16+
`pathname2url`
17+
18+
Arguments:
19+
path_snippet: a file path, relative or absolute.
20+
"""
21+
if _WINDOWS_PLATFORM and _has_drive_letter(path_snippet):
22+
drive_letter, path = path_snippet.split(":", 1)
23+
if six.PY2:
24+
path = path.encode("utf-8")
25+
path = six.moves.urllib.request.pathname2url(path)
26+
path_snippet = "{}:{}".format(drive_letter, path)
27+
else:
28+
if six.PY2:
29+
path_snippet = path_snippet.encode("utf-8")
30+
path_snippet = six.moves.urllib.request.pathname2url(path_snippet)
31+
return path_snippet
32+
33+
34+
def _has_drive_letter(path_snippet):
35+
# type: (Text) -> bool
36+
"""
37+
The following path will get True
38+
D:/Data
39+
C:\\My Dcouments\\ test
40+
41+
And will get False
42+
43+
/tmp/abc:test
44+
45+
Arguments:
46+
path_snippet: a file path, relative or absolute.
47+
"""
48+
windows_drive_pattern = ".:[/\\\\].*$"
49+
return re.match(windows_drive_pattern, path_snippet) is not None

fs/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1633,7 +1633,7 @@ def hash(self, path, name):
16331633
fs.errors.UnsupportedHash: If the requested hash is not supported.
16341634
16351635
"""
1636-
_path = self.validatepath(path)
1636+
self.validatepath(path)
16371637
try:
16381638
hash_object = hashlib.new(name)
16391639
except ValueError:

fs/osfs.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
sendfile = None # type: ignore # pragma: no cover
4040

4141
from . import errors
42-
from .errors import FileExists
4342
from .base import FS
4443
from .enums import ResourceType
4544
from ._fscompat import fsencode, fsdecode, fspath
@@ -49,6 +48,7 @@
4948
from .error_tools import convert_os_errors
5049
from .mode import Mode, validate_open_mode
5150
from .errors import FileExpected, NoURL
51+
from ._url_tools import url_quote
5252

5353
if False: # typing.TYPE_CHECKING
5454
from typing import (
@@ -137,7 +137,8 @@ def __init__(
137137
)
138138
else:
139139
if not os.path.isdir(_root_path):
140-
raise errors.CreateFailed("root path does not exist")
140+
message = "root path '{}' does not exist".format(_root_path)
141+
raise errors.CreateFailed(message)
141142

142143
_meta = self._meta = {
143144
"network": False,
@@ -526,7 +527,6 @@ def _scandir(self, path, namespaces=None):
526527
namespaces = namespaces or ()
527528
_path = self.validatepath(path)
528529
sys_path = self.getsyspath(_path)
529-
_sys_path = fsencode(sys_path)
530530
with convert_os_errors("scandir", path, directory=True):
531531
for entry_name in os.listdir(sys_path):
532532
_entry_name = fsdecode(entry_name)
@@ -584,9 +584,14 @@ def getsyspath(self, path):
584584

585585
def geturl(self, path, purpose="download"):
586586
# type: (Text, Text) -> Text
587-
if purpose != "download":
587+
sys_path = self.getsyspath(path)
588+
if purpose == "download":
589+
return "file://" + sys_path
590+
elif purpose == "fs":
591+
url_path = url_quote(sys_path)
592+
return "osfs://" + url_path
593+
else:
588594
raise NoURL(path, purpose)
589-
return "file://" + self.getsyspath(path)
590595

591596
def gettype(self, path):
592597
# type: (Text) -> ResourceType

fs/tarfs.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from __future__ import print_function
55
from __future__ import unicode_literals
66

7-
import copy
87
import os
98
import tarfile
109
import typing
@@ -17,14 +16,14 @@
1716
from .base import FS
1817
from .compress import write_tar
1918
from .enums import ResourceType
20-
from .errors import IllegalBackReference
19+
from .errors import IllegalBackReference, NoURL
2120
from .info import Info
2221
from .iotools import RawWrapper
2322
from .opener import open_fs
24-
from .path import dirname, relpath, basename, isbase, normpath, parts, frombase
23+
from .path import relpath, basename, isbase, normpath, parts, frombase
2524
from .wrapfs import WrapFS
2625
from .permissions import Permissions
27-
26+
from ._url_tools import url_quote
2827

2928
if False: # typing.TYPE_CHECKING
3029
from tarfile import TarInfo
@@ -461,16 +460,25 @@ def removedir(self, path):
461460
def close(self):
462461
# type: () -> None
463462
super(ReadTarFS, self).close()
464-
self._tar.close()
463+
if hasattr(self, "_tar"):
464+
self._tar.close()
465465

466466
def isclosed(self):
467467
# type: () -> bool
468468
return self._tar.closed # type: ignore
469469

470+
def geturl(self, path, purpose="download"):
471+
# type: (Text, Text) -> Text
472+
if purpose == "fs" and isinstance(self._file, six.string_types):
473+
quoted_file = url_quote(self._file)
474+
quoted_path = url_quote(path)
475+
return "tar://{}!/{}".format(quoted_file, quoted_path)
476+
else:
477+
raise NoURL(path, purpose)
478+
470479

471480
if __name__ == "__main__": # pragma: no cover
472481
from fs.tree import render
473-
from fs.opener import open_fs
474482

475483
with TarFS("tests.tar") as tar_fs:
476484
print(tar_fs.listdir("/"))

fs/zipfs.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from .path import dirname, forcedir, normpath, relpath
2323
from .time import datetime_to_epoch
2424
from .wrapfs import WrapFS
25+
from ._url_tools import url_quote
2526

2627
if False: # typing.TYPE_CHECKING
2728
from typing import (
@@ -434,7 +435,8 @@ def removedir(self, path):
434435
def close(self):
435436
# type: () -> None
436437
super(ReadZipFS, self).close()
437-
self._zip.close()
438+
if hasattr(self, "_zip"):
439+
self._zip.close()
438440

439441
def readbytes(self, path):
440442
# type: (Text) -> bytes
@@ -444,3 +446,12 @@ def readbytes(self, path):
444446
zip_name = self._path_to_zip_name(path)
445447
zip_bytes = self._zip.read(zip_name)
446448
return zip_bytes
449+
450+
def geturl(self, path, purpose="download"):
451+
# type: (Text, Text) -> Text
452+
if purpose == "fs" and isinstance(self._file, six.string_types):
453+
quoted_file = url_quote(self._file)
454+
quoted_path = url_quote(path)
455+
return "zip://{}!/{}".format(quoted_file, quoted_path)
456+
else:
457+
raise errors.NoURL(path, purpose)

tests/test_osfs.py

+40-5
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,11 @@
77
import shutil
88
import tempfile
99
import unittest
10-
1110
import pytest
1211

13-
from fs import osfs
14-
from fs.path import relpath
12+
from fs import osfs, open_fs
13+
from fs.path import relpath, dirname
1514
from fs import errors
16-
1715
from fs.test import FSTestCases
1816

1917
from six import text_type
@@ -77,7 +75,7 @@ def assert_text(self, path, contents):
7775

7876
def test_not_exists(self):
7977
with self.assertRaises(errors.CreateFailed):
80-
fs = osfs.OSFS("/does/not/exists/")
78+
osfs.OSFS("/does/not/exists/")
8179

8280
def test_expand_vars(self):
8381
self.fs.makedir("TYRIONLANISTER")
@@ -162,3 +160,40 @@ def test_validatepath(self):
162160
with self.assertRaises(errors.InvalidCharsInPath):
163161
with self.fs.open("13 – Marked Register.pdf", "wb") as fh:
164162
fh.write(b"foo")
163+
164+
def test_consume_geturl(self):
165+
self.fs.create("foo")
166+
try:
167+
url = self.fs.geturl("foo", purpose="fs")
168+
except errors.NoURL:
169+
self.assertFalse(self.fs.hasurl("foo"))
170+
else:
171+
self.assertTrue(self.fs.hasurl("foo"))
172+
173+
# Should not throw an error
174+
base_dir = dirname(url)
175+
open_fs(base_dir)
176+
177+
def test_complex_geturl(self):
178+
self.fs.makedirs("foo/bar ha")
179+
test_fixtures = [
180+
# test file, expected url path
181+
["foo", "foo"],
182+
["foo-bar", "foo-bar"],
183+
["foo_bar", "foo_bar"],
184+
["foo/bar ha/barz", "foo/bar%20ha/barz"],
185+
["example b.txt", "example%20b.txt"],
186+
["exampleㄓ.txt", "example%E3%84%93.txt"],
187+
]
188+
file_uri_prefix = "osfs://"
189+
for test_file, relative_url_path in test_fixtures:
190+
self.fs.create(test_file)
191+
expected = file_uri_prefix + self.fs.getsyspath(relative_url_path).replace(
192+
"\\", "/"
193+
)
194+
actual = self.fs.geturl(test_file, purpose="fs")
195+
196+
self.assertEqual(actual, expected)
197+
198+
def test_geturl_return_no_url(self):
199+
self.assertRaises(errors.NoURL, self.fs.geturl, "test/path", "upload")

tests/test_tarfs.py

+34-10
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
import tarfile
88
import tempfile
99
import unittest
10-
1110
import pytest
1211

1312
from fs import tarfs
1413
from fs.enums import ResourceType
1514
from fs.compress import write_tar
1615
from fs.opener import open_fs
1716
from fs.opener.errors import NotWriteable
17+
from fs.errors import NoURL
1818
from fs.test import FSTestCases
1919

2020
from .test_archives import ArchiveTestCases
@@ -93,15 +93,6 @@ def destroy_fs(self, fs):
9393
os.remove(fs._tar_file)
9494
del fs._tar_file
9595

96-
def assert_is_bzip(self):
97-
try:
98-
tarfile.open(fs._tar_file, "r:gz")
99-
except tarfile.ReadError:
100-
self.fail("{} is not a valid gz archive".format(fs._tar_file))
101-
for other_comps in ["xz", "bz2", ""]:
102-
with self.assertRaises(tarfile.ReadError):
103-
tarfile.open(fs._tar_file, "r:{}".format(other_comps))
104-
10596

10697
@pytest.mark.skipif(six.PY2, reason="Python2 does not support LZMA")
10798
class TestWriteXZippedTarFS(FSTestCases, unittest.TestCase):
@@ -181,11 +172,44 @@ def test_read_from_filename(self):
181172
except:
182173
self.fail("Couldn't open tarfs from filename")
183174

175+
def test_read_non_existent_file(self):
176+
fs = tarfs.TarFS(open(self._temp_path, "rb"))
177+
# it has been very difficult to catch exception in __del__()
178+
del fs._tar
179+
try:
180+
fs.close()
181+
except AttributeError:
182+
self.fail("Could not close tar fs properly")
183+
except Exception:
184+
self.fail("Strange exception in closing fs")
185+
184186
def test_getinfo(self):
185187
super(TestReadTarFS, self).test_getinfo()
186188
top = self.fs.getinfo("top.txt", ["tar"])
187189
self.assertTrue(top.get("tar", "is_file"))
188190

191+
def test_geturl_for_fs(self):
192+
test_fixtures = [
193+
# test_file, expected
194+
["foo/bar/egg/foofoo", "foo/bar/egg/foofoo"],
195+
["foo/bar egg/foo foo", "foo/bar%20egg/foo%20foo"],
196+
]
197+
tar_file_path = self._temp_path.replace("\\", "/")
198+
for test_file, expected_file in test_fixtures:
199+
expected = "tar://{tar_file_path}!/{file_inside_tar}".format(
200+
tar_file_path=tar_file_path, file_inside_tar=expected_file
201+
)
202+
self.assertEqual(self.fs.geturl(test_file, purpose="fs"), expected)
203+
204+
def test_geturl_for_fs_but_file_is_binaryio(self):
205+
self.fs._file = six.BytesIO()
206+
self.assertRaises(NoURL, self.fs.geturl, "test", "fs")
207+
208+
def test_geturl_for_download(self):
209+
test_file = "foo/bar/egg/foofoo"
210+
with self.assertRaises(NoURL):
211+
self.fs.geturl(test_file)
212+
189213

190214
class TestBrokenPaths(unittest.TestCase):
191215
@classmethod

0 commit comments

Comments
 (0)