Merge pull request #3028 from activeloopai/fix_

activesoull · web-flow · commit bd67a34a160b · 2025-01-31T10:51:00.000+04:00
fix
diff --git a/deeplake/core/compression.py b/deeplake/core/compression.py
@@ -507,11 +507,11 @@ def get_compression(header=None, path=None):
                 return fmt[1:]
     if header:
         if (
-            header[4:12] == b"\x66\x74\x79\x70\x4D\x53\x4E\x56"
-            or header[4:12] == b"\x66\x74\x79\x70\x69\x73\x6F\x6D"
+            header[4:12] == b"\x66\x74\x79\x70\x4d\x53\x4e\x56"
+            or header[4:12] == b"\x66\x74\x79\x70\x69\x73\x6f\x6d"
         ):
             return "mp4"
-        if header[0:4] == b"\x1A\x45\xDF\xA3":
+        if header[0:4] == b"\x1a\x45\xdf\xa3":
             return "mkv"
         if (
             header[0:2] == b"\xff\xfb"
@@ -525,12 +525,12 @@ def get_compression(header=None, path=None):
             return "wav"
         if header[0:4] == b"\x52\x49\x46\x46" and header[8:12] == b"\x41\x56\x49\x20":
             return "avi"
-        if header[128:132] == b"\x44\x49\x43\x4D":
+        if header[128:132] == b"\x44\x49\x43\x4d":
             return "dcm"
         if header[0:4] == b"\x6e\x2b\x31\x00":
             return "nii"
         if any(
-            header[: len(x)] == x for x in [b"\x73\x6F\x6C\x69", b"numpy-stl", b"solid"]
+            header[: len(x)] == x for x in [b"\x73\x6f\x6c\x69", b"numpy-stl", b"solid"]
         ):
             return "stl"
         if not Image.OPEN:
@@ -811,7 +811,7 @@ def _read_jpeg_shape_from_buffer(buf: bytes) -> Tuple[int, ...]:
 
 
 def _read_dicom_shape_and_dtype(
-    f: Union[bytes, BinaryIO]
+    f: Union[bytes, BinaryIO],
 ) -> Tuple[Tuple[int, ...], str]:
     try:
         from pydicom import dcmread
diff --git a/deeplake/core/serialize.py b/deeplake/core/serialize.py
@@ -311,7 +311,7 @@ def serialize_linked_tiled_sample(sample: LinkedTiledSample):
 
 
 def deserialize_linked_tiled_sample(
-    byts: Union[bytes, memoryview]
+    byts: Union[bytes, memoryview],
 ) -> LinkedTiledSample:
     byts = memoryview(byts)
     # Read version
@@ -374,7 +374,7 @@ def serialize_chunkids(version: str, arr: np.ndarray) -> memoryview:
 
 
 def deserialize_chunkids(
-    byts: Union[bytes, memoryview]
+    byts: Union[bytes, memoryview],
 ) -> Tuple[str, np.ndarray, type]:
     """Deserializes a chunk ID encoder from the serialized byte stream. This is how the encoder can be accessed/modified after it is read from storage.
 
diff --git a/deeplake/core/version_control/test/test_dataset_diff.py b/deeplake/core/version_control/test/test_dataset_diff.py
@@ -4,7 +4,7 @@
 def test_tobytes():
     diff = DatasetDiff()
     diff.tensor_renamed("old1", "newer1")
-    diff.tensor_renamed("old2", "\u604f\u7D59")
+    diff.tensor_renamed("old2", "\u604f\u7d59")
     diff.tensor_deleted("deleted1")
     diff.tensor_deleted("deleted2")
     diff.tensor_deleted("deleted3")
@@ -17,8 +17,8 @@ def test_tobytes():
             len("newer1".encode("utf-8")).to_bytes(8, "big"),
             "old1newer1".encode("utf-8"),
             len("old2".encode("utf-8")).to_bytes(8, "big"),
-            len("\u604f\u7D59".encode("utf-8")).to_bytes(8, "big"),
-            "old2\u604f\u7D59".encode("utf-8"),
+            len("\u604f\u7d59".encode("utf-8")).to_bytes(8, "big"),
+            "old2\u604f\u7d59".encode("utf-8"),
             int(3).to_bytes(8, "big"),
             len("deleted1".encode("utf-8")).to_bytes(8, "big"),
             "deleted1".encode("utf-8"),
diff --git a/deeplake/integrations/labelbox/labelbox_utils.py b/deeplake/integrations/labelbox/labelbox_utils.py
@@ -1,5 +1,5 @@
 import numpy as np
-from typing import Generator, Tuple, Optional, Any
+from typing import Generator, Tuple, Dict, Optional, Any
 import labelbox as lb  # type: ignore
 import av
 import requests
@@ -38,7 +38,7 @@ def filter_video_paths_(video_paths, strategy):
 
 
 def frame_generator_(
-    video_path: str, header: Optional[dict[str, Any]] = None, retries: int = 5
+    video_path: str, header: Optional[Dict[str, Any]] = None, retries: int = 5
 ) -> Generator[Tuple[int, np.ndarray], None, None]:
     """
     Generate frames from a video file.
@@ -76,7 +76,7 @@ def get_video_container(current_retries):
 
 def frames_batch_generator_(
     video_path: str,
-    header: Optional[dict[str, Any]] = None,
+    header: Optional[Dict[str, Any]] = None,
     batch_size=100,
     retries: int = 5,
 ):
diff --git a/deeplake/integrations/mmdet/mmdet_.py b/deeplake/integrations/mmdet/mmdet_.py
@@ -1,14 +1,14 @@
 """
-Deep Lake offers an integration with MMDetection, a popular open-source object detection toolbox based on PyTorch. 
+Deep Lake offers an integration with MMDetection, a popular open-source object detection toolbox based on PyTorch.
 The integration enables users to train models while streaming Deep Lake dataset using the transformation, training, and evaluation tools built by MMDet.
 
 Learn more about MMDetection `here <https://mmdetection.readthedocs.io/en/latest/>`_.
 
 Integration Interface
 ~~~~~~~~~~~~~~~~~~~~~
-MMDetection works with configs. Deeplake adopted this strategy, and in order to train MMDet models, you need to create/specify your model 
-and training/validation config. Deep Lake integration's logic is almost the same as MMDetection's with some minor modifications. The integrations 
-with MMDET occurs in the deeplake.integrations.mmdet module. At a high-level, Deep Lake is responsible for the pytorch dataloader that streams data 
+MMDetection works with configs. Deeplake adopted this strategy, and in order to train MMDet models, you need to create/specify your model
+and training/validation config. Deep Lake integration's logic is almost the same as MMDetection's with some minor modifications. The integrations
+with MMDET occurs in the deeplake.integrations.mmdet module. At a high-level, Deep Lake is responsible for the pytorch dataloader that streams data
 to the training framework, while MMDET is used for the training, transformation, and evaluation logic. Let us take a look at the config with deeplake changes:
 
 Deeplake integration requires the following parameters to be specified in the configuration file:
@@ -17,9 +17,9 @@
     - ``train``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the training data
     - ``val``: Keyword argument of data, a dictionary where one can specify dataset path, credentials, transformations of the validation data
     - ``pipeline``: List of transformations. This parameter exists for train as well as for val.
-    
+
         - Example:
-    
+
             >>> pipeline =  [dict(type="Resize", img_scale=[(320, 320), (608, 608)], keep_ratio=True), dict(type="RandomFlip", flip_ratio=0.5), dict(type="PhotoMetricDistortion")]
 
     - ``deeplake_path``: Path to the deeplake dataset. This parameter exists for train as well as for val.
@@ -38,10 +38,10 @@
         - ``"num_workers"``: Number of workers to use. If not specified, dataloader will use ``workers_per_gpu``.
 
 - ``deeplake_dataloader_type``: Optional parameter. If specified, it represents the type of deeplake dataloader to use.
-- ``deeplake_metrics_format``: Optional parameter. If specified, it represents the format of the deeplake metrics that will be used during evaluation. Defaults to COCO. 
-    Avaliable values are: "COCO", "PascalVOC". If COCO format is used, you can specify whether you want to evaluate on bbox only or also want to evaluate on masks. 
-    To do that you need to specify the format of the metric in metric. 
-  
+- ``deeplake_metrics_format``: Optional parameter. If specified, it represents the format of the deeplake metrics that will be used during evaluation. Defaults to COCO.
+    Avaliable values are: "COCO", "PascalVOC". If COCO format is used, you can specify whether you want to evaluate on bbox only or also want to evaluate on masks.
+    To do that you need to specify the format of the metric in metric.
+
 Example:
 
 >>> deeplake_metrics_format = "COCO"
@@ -105,7 +105,7 @@
 ...         ])
 ... ]
 >>> #--------------------------------------DEEPLAKE INPUTS------------------------------------------------------------#
->>> TOKEN = "INSERT_YOUR_DEEPLAKE_TOKEN" 
+>>> TOKEN = "INSERT_YOUR_DEEPLAKE_TOKEN"
 >>> data = dict(
 ...     # samples_per_gpu=4, # Is used instead of batch_size if deeplake_dataloader is not specified below
 ...     # workers_per_gpu=8, # Is used instead of num_workers if deeplake_dataloader is not specified below
@@ -121,9 +121,9 @@
 ...         deeplake_commit_id="",
 ...         #OPTIONAL - Loads a dataset view for training based on view_id
 ...         deeplake_view_id="",
-...         # OPTIONAL - {"mmdet_key": "deep_lake_tensor",...} - Maps Deep Lake tensors to MMDET dictionary keys. 
+...         # OPTIONAL - {"mmdet_key": "deep_lake_tensor",...} - Maps Deep Lake tensors to MMDET dictionary keys.
 ...         # If not specified, Deep Lake will auto-infer the mapping, but it might make mistakes if datasets have many tensors
-...         deeplake_tensors = {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories", "gt_masks": "masks},         
+...         deeplake_tensors = {"img": "images", "gt_bboxes": "boxes", "gt_labels": "categories", "gt_masks": "masks},
 ...         # OPTIONAL - Parameters to use for the Deep Lake dataloader. If unspecified, the integration uses
 ...         # the parameters in other parts of the cfg file such as samples_per_gpu, and others.
 ...         deeplake_dataloader = {"shuffle": True, "batch_size": 4, 'num_workers': 8}