Merge pull request #42 from hugovk/cache-processed-activities

hugovk · web-flow · commit 157d0de813a1 · 2024-02-11T13:42:34.000+02:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -61,4 +61,5 @@ version.source = "vcs"
 local_scheme = "no-local-version"
 
 [tool.isort]
+add_imports = "from __future__ import annotations"
 profile = "black"
diff --git a/src/stravavis/__main__.py b/src/stravavis/__main__.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from . import cli
 
 if __name__ == "__main__":
diff --git a/src/stravavis/cli.py b/src/stravavis/cli.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import argparse
 import glob
 import os.path
@@ -89,7 +91,7 @@ def main():
     if os.path.isdir(args.path):
         args.path = os.path.join(args.path, "*")
 
-    filenames = glob.glob(args.path)
+    filenames = sorted(glob.glob(args.path))
     if not filenames:
         sys.exit(f"No files found matching {args.path}")
 
diff --git a/src/stravavis/plot_calendar.py b/src/stravavis/plot_calendar.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import calmap
 import matplotlib.pyplot as plt
 import pandas as pd
diff --git a/src/stravavis/plot_dumbbell.py b/src/stravavis/plot_dumbbell.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import pandas as pd
 from plotnine import (
     aes,
diff --git a/src/stravavis/plot_elevations.py b/src/stravavis/plot_elevations.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import math
 
 import matplotlib.pyplot as plt
diff --git a/src/stravavis/plot_facets.py b/src/stravavis/plot_facets.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import math
 
 import matplotlib.pyplot as plt
diff --git a/src/stravavis/plot_landscape.py b/src/stravavis/plot_landscape.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import matplotlib.pyplot as plt
 import pandas as pd
 from rich.progress import track
diff --git a/src/stravavis/plot_map.py b/src/stravavis/plot_map.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from math import log, pi, tan
 
 import matplotlib.pyplot as plt
diff --git a/src/stravavis/process_activities.py b/src/stravavis/process_activities.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import pandas as pd
 
 
diff --git a/src/stravavis/process_data.py b/src/stravavis/process_data.py
@@ -1,15 +1,18 @@
 from __future__ import annotations
 
+import hashlib
 import math
+import tempfile
 from multiprocessing import Pool
+from pathlib import Path
 
 import fit2gpx
 import gpxpy
 import pandas as pd
 from rich.progress import track
 
 
-def process_file(fpath):
+def process_file(fpath: str) -> pd.DataFrame | None:
     if fpath.endswith(".gpx"):
         return process_gpx(fpath)
     elif fpath.endswith(".fit"):
@@ -18,7 +21,7 @@ def process_file(fpath):
 
 # Function for processing an individual GPX file
 # Ref: https://pypi.org/project/gpxpy/
-def process_gpx(gpxfile):
+def process_gpx(gpxfile: str) -> pd.DataFrame | None:
     with open(gpxfile, encoding="utf-8") as f:
         try:
             activity = gpxpy.parse(f)
@@ -64,7 +67,7 @@ def process_gpx(gpxfile):
 
 # Function for processing an individual FIT file
 # Ref: https://github.com/dodo-saba/fit2gpx
-def process_fit(fitfile):
+def process_fit(fitfile: str) -> pd.DataFrame:
     conv = fit2gpx.Converter()
     df_lap, df = conv.fit_to_dataframes(fname=fitfile)
 
@@ -101,9 +104,33 @@ def process_fit(fitfile):
     return df
 
 
+def load_cache(filenames: list[str]) -> tuple[Path, pd.DataFrame | None]:
+    # Create a cache key from the filenames
+    key = hashlib.md5("".join(filenames).encode("utf-8")).hexdigest()
+
+    # Create a cache directory
+    dir_name = Path(tempfile.gettempdir()) / "stravavis"
+    dir_name.mkdir(parents=True, exist_ok=True)
+    cache_filename = dir_name / f"cached_activities_{key}.pkl"
+    print(f"Cache filename: {cache_filename}")
+
+    # Load cache if it exists
+    try:
+        df = pd.read_pickle(cache_filename)
+        print("Loaded cached activities")
+        return cache_filename, df
+    except FileNotFoundError:
+        print("Cache not found")
+        return cache_filename, None
+
+
 # Function for processing (unzipped) GPX and FIT files in a directory (path)
 def process_data(filenames: list[str]) -> pd.DataFrame:
     # Process all files (GPX or FIT)
+    cache_filename, df = load_cache(filenames)
+    if df is not None:
+        return df
+
     with Pool() as pool:
         try:
             it = pool.imap_unordered(process_file, filenames)
@@ -117,4 +144,7 @@ def process_data(filenames: list[str]) -> pd.DataFrame:
 
     df["time"] = pd.to_datetime(df["time"], utc=True)
 
+    # Save cache
+    df.to_pickle(cache_filename)
+
     return df

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+from __future__ import annotations`
	`2`	`+`
`1`	`3`	`from . import cli`
`2`	`4`
`3`	`5`	`if __name__ == "__main__":`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+from __future__ import annotations`
	`2`	`+`
`1`	`3`	`import calmap`
`2`	`4`	`import matplotlib.pyplot as plt`
`3`	`5`	`import pandas as pd`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+from __future__ import annotations`
	`2`	`+`
`1`	`3`	`import math`
`2`	`4`
`3`	`5`	`import matplotlib.pyplot as plt`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+from __future__ import annotations`
	`2`	`+`
`1`	`3`	`from math import log, pi, tan`
`2`	`4`
`3`	`5`	`import matplotlib.pyplot as plt`