Skip to content

Commit bd67a34

Browse files
authored
Merge pull request #3028 from activeloopai/fix_
fix
2 parents d661451 + 5c8c854 commit bd67a34

File tree

5 files changed

+27
-27
lines changed

5 files changed

+27
-27
lines changed

deeplake/core/compression.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -507,11 +507,11 @@ def get_compression(header=None, path=None):
507507
return fmt[1:]
508508
if header:
509509
if (
510-
header[4:12] == b"\x66\x74\x79\x70\x4D\x53\x4E\x56"
511-
or header[4:12] == b"\x66\x74\x79\x70\x69\x73\x6F\x6D"
510+
header[4:12] == b"\x66\x74\x79\x70\x4d\x53\x4e\x56"
511+
or header[4:12] == b"\x66\x74\x79\x70\x69\x73\x6f\x6d"
512512
):
513513
return "mp4"
514-
if header[0:4] == b"\x1A\x45\xDF\xA3":
514+
if header[0:4] == b"\x1a\x45\xdf\xa3":
515515
return "mkv"
516516
if (
517517
header[0:2] == b"\xff\xfb"
@@ -525,12 +525,12 @@ def get_compression(header=None, path=None):
525525
return "wav"
526526
if header[0:4] == b"\x52\x49\x46\x46" and header[8:12] == b"\x41\x56\x49\x20":
527527
return "avi"
528-
if header[128:132] == b"\x44\x49\x43\x4D":
528+
if header[128:132] == b"\x44\x49\x43\x4d":
529529
return "dcm"
530530
if header[0:4] == b"\x6e\x2b\x31\x00":
531531
return "nii"
532532
if any(
533-
header[: len(x)] == x for x in [b"\x73\x6F\x6C\x69", b"numpy-stl", b"solid"]
533+
header[: len(x)] == x for x in [b"\x73\x6f\x6c\x69", b"numpy-stl", b"solid"]
534534
):
535535
return "stl"
536536
if not Image.OPEN:
@@ -811,7 +811,7 @@ def _read_jpeg_shape_from_buffer(buf: bytes) -> Tuple[int, ...]:
811811

812812

813813
def _read_dicom_shape_and_dtype(
814-
f: Union[bytes, BinaryIO]
814+
f: Union[bytes, BinaryIO],
815815
) -> Tuple[Tuple[int, ...], str]:
816816
try:
817817
from pydicom import dcmread

deeplake/core/serialize.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def serialize_linked_tiled_sample(sample: LinkedTiledSample):
311311

312312

313313
def deserialize_linked_tiled_sample(
314-
byts: Union[bytes, memoryview]
314+
byts: Union[bytes, memoryview],
315315
) -> LinkedTiledSample:
316316
byts = memoryview(byts)
317317
# Read version
@@ -374,7 +374,7 @@ def serialize_chunkids(version: str, arr: np.ndarray) -> memoryview:
374374

375375

376376
def deserialize_chunkids(
377-
byts: Union[bytes, memoryview]
377+
byts: Union[bytes, memoryview],
378378
) -> Tuple[str, np.ndarray, type]:
379379
"""Deserializes a chunk ID encoder from the serialized byte stream. This is how the encoder can be accessed/modified after it is read from storage.
380380

deeplake/core/version_control/test/test_dataset_diff.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
def test_tobytes():
55
diff = DatasetDiff()
66
diff.tensor_renamed("old1", "newer1")
7-
diff.tensor_renamed("old2", "\u604f\u7D59")
7+
diff.tensor_renamed("old2", "\u604f\u7d59")
88
diff.tensor_deleted("deleted1")
99
diff.tensor_deleted("deleted2")
1010
diff.tensor_deleted("deleted3")
@@ -17,8 +17,8 @@ def test_tobytes():
1717
len("newer1".encode("utf-8")).to_bytes(8, "big"),
1818
"old1newer1".encode("utf-8"),
1919
len("old2".encode("utf-8")).to_bytes(8, "big"),
20-
len("\u604f\u7D59".encode("utf-8")).to_bytes(8, "big"),
21-
"old2\u604f\u7D59".encode("utf-8"),
20+
len("\u604f\u7d59".encode("utf-8")).to_bytes(8, "big"),
21+
"old2\u604f\u7d59".encode("utf-8"),
2222
int(3).to_bytes(8, "big"),
2323
len("deleted1".encode("utf-8")).to_bytes(8, "big"),
2424
"deleted1".encode("utf-8"),

deeplake/integrations/labelbox/labelbox_utils.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numpy as np
2-
from typing import Generator, Tuple, Optional, Any
2+
from typing import Generator, Tuple, Dict, Optional, Any
33
import labelbox as lb # type: ignore
44
import av
55
import requests
@@ -38,7 +38,7 @@ def filter_video_paths_(video_paths, strategy):
3838

3939

4040
def frame_generator_(
41-
video_path: str, header: Optional[dict[str, Any]] = None, retries: int = 5
41+
video_path: str, header: Optional[Dict[str, Any]] = None, retries: int = 5
4242
) -> Generator[Tuple[int, np.ndarray], None, None]:
4343
"""
4444
Generate frames from a video file.
@@ -76,7 +76,7 @@ def get_video_container(current_retries):
7676

7777
def frames_batch_generator_(
7878
video_path: str,
79-
header: Optional[dict[str, Any]] = None,
79+
header: Optional[Dict[str, Any]] = None,
8080
batch_size=100,
8181
retries: int = 5,
8282
):

deeplake/integrations/mmdet/mmdet_.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
"""
2-
Deep Lake offers an integration with MMDetection, a popular open-source object detection toolbox based on PyTorch.
2+
Deep Lake offers an integration with MMDetection, a popular open-source object detection toolbox based on PyTorch.
33
The integration enables users to train models while streaming Deep Lake dataset using the transformation, training, and evaluation tools built by MMDet.
44
55
Learn more about MMDetection `here <https://mmdetection.readthedocs.io/en/latest/>`_.
66
77
Integration Interface
88
~~~~~~~~~~~~~~~~~~~~~
9-
MMDetection works with configs. Deeplake adopted this strategy, and in order to train MMDet models, you need to create/specify your model
10-
and training/validation config. Deep Lake integration's logic is almost the same as MMDetection's with some minor modifications. The integrations
11-
with MMDET occurs in the deeplake.integrations.mmdet module. At a high-level, Deep Lake is responsible for the pytorch dataloader that streams data
9+
MMDetection works with configs. Deeplake adopted this strategy, and in order to train MMDet models, you need to create/specify your model
10+
and training/validation config. Deep Lake integration's logic is almost the same as MMDetection's with some minor modifications. The integrations
11+
with MMDET occurs in the deeplake.integrations.mmdet module. At a high-level, Deep Lake is responsible for the pytorch dataloader that streams data
1212
to the training framework, while MMDET is used for the training, transformation, and evaluation logic. Let us take a look at the config with deeplake changes:
1313
1414
Deeplake integration requires the following parameters to be specified in the configuration file:
@@ -17,9 +17,9 @@
1717
- ``train``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the training data
1818
- ``val``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the validation data
1919
- ``pipeline``: List of transformations. This parameter exists for train as well as for val.
20-
20+
2121
- Example:
22-
22+
2323
>>> pipeline = [dict(type="Resize", img_scale=[(320, 320), (608, 608)], keep_ratio=True), dict(type="RandomFlip", flip_ratio=0.5), dict(type="PhotoMetricDistortion")]
2424
2525
- ``deeplake_path``: Path to the deeplake dataset. This parameter exists for train as well as for val.
@@ -38,10 +38,10 @@
3838
- ``"num_workers"``: Number of workers to use. If not specified, dataloader will use ``workers_per_gpu``.
3939
4040
- ``deeplake_dataloader_type``: Optional parameter. If specified, it represents the type of deeplake dataloader to use.
41-
- ``deeplake_metrics_format``: Optional parameter. If specified, it represents the format of the deeplake metrics that will be used during evaluation. Defaults to COCO.
42-
Avaliable values are: "COCO", "PascalVOC". If COCO format is used, you can specify whether you want to evaluate on bbox only or also want to evaluate on masks.
43-
To do that you need to specify the format of the metric in metric.
44-
41+
- ``deeplake_metrics_format``: Optional parameter. If specified, it represents the format of the deeplake metrics that will be used during evaluation. Defaults to COCO.
42+
Avaliable values are: "COCO", "PascalVOC". If COCO format is used, you can specify whether you want to evaluate on bbox only or also want to evaluate on masks.
43+
To do that you need to specify the format of the metric in metric.
44+
4545
Example:
4646
4747
>>> deeplake_metrics_format = "COCO"
@@ -105,7 +105,7 @@
105105
... ])
106106
... ]
107107
>>> #--------------------------------------DEEPLAKE INPUTS------------------------------------------------------------#
108-
>>> TOKEN = "INSERT_YOUR_DEEPLAKE_TOKEN"
108+
>>> TOKEN = "INSERT_YOUR_DEEPLAKE_TOKEN"
109109
>>> data = dict(
110110
... # samples_per_gpu=4, # Is used instead of batch_size if deeplake_dataloader is not specified below
111111
... # workers_per_gpu=8, # Is used instead of num_workers if deeplake_dataloader is not specified below
@@ -121,9 +121,9 @@
121121
... deeplake_commit_id="",
122122
... #OPTIONAL - Loads a dataset view for training based on view_id
123123
... deeplake_view_id="",
124-
... # OPTIONAL - {"mmdet_key": "deep_lake_tensor",...} - Maps Deep Lake tensors to MMDET dictionary keys.
124+
... # OPTIONAL - {"mmdet_key": "deep_lake_tensor",...} - Maps Deep Lake tensors to MMDET dictionary keys.
125125
... # If not specified, Deep Lake will auto-infer the mapping, but it might make mistakes if datasets have many tensors
126-
... deeplake_tensors = {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories", "gt_masks": "masks},
126+
... deeplake_tensors = {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories", "gt_masks": "masks},
127127
... # OPTIONAL - Parameters to use for the Deep Lake dataloader. If unspecified, the integration uses
128128
... # the parameters in other parts of the cfg file such as samples_per_gpu, and others.
129129
... deeplake_dataloader = {"shuffle": True, "batch_size": 4, 'num_workers': 8}

0 commit comments

Comments
 (0)