Skip to content

Commit 2cc2364

Browse files
committed
Cache processed activities
1 parent 3f05e06 commit 2cc2364

File tree

2 files changed

+35
-4
lines changed

2 files changed

+35
-4
lines changed

src/stravavis/cli.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def main():
8989
if os.path.isdir(args.path):
9090
args.path = os.path.join(args.path, "*")
9191

92-
filenames = glob.glob(args.path)
92+
filenames = sorted(glob.glob(args.path))
9393
if not filenames:
9494
sys.exit(f"No files found matching {args.path}")
9595

src/stravavis/process_data.py

+34-3
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
from __future__ import annotations
22

3+
import glob
4+
import hashlib
35
import math
6+
import tempfile
47
from multiprocessing import Pool
8+
from pathlib import Path
59

610
import fit2gpx
711
import gpxpy
812
import pandas as pd
913
from rich.progress import track
1014

1115

12-
def process_file(fpath):
16+
def process_file(fpath: str) -> pd.DataFrame | None:
1317
if fpath.endswith(".gpx"):
1418
return process_gpx(fpath)
1519
elif fpath.endswith(".fit"):
@@ -18,7 +22,7 @@ def process_file(fpath):
1822

1923
# Function for processing an individual GPX file
2024
# Ref: https://pypi.org/project/gpxpy/
21-
def process_gpx(gpxfile):
25+
def process_gpx(gpxfile: str) -> pd.DataFrame | None:
2226
with open(gpxfile, encoding="utf-8") as f:
2327
try:
2428
activity = gpxpy.parse(f)
@@ -64,7 +68,7 @@ def process_gpx(gpxfile):
6468

6569
# Function for processing an individual FIT file
6670
# Ref: https://github.com/dodo-saba/fit2gpx
67-
def process_fit(fitfile):
71+
def process_fit(fitfile: str) -> pd.DataFrame:
6872
conv = fit2gpx.Converter()
6973
df_lap, df = conv.fit_to_dataframes(fname=fitfile)
7074

@@ -101,9 +105,33 @@ def process_fit(fitfile):
101105
return df
102106

103107

108+
def load_cache(filenames: list[str]) -> tuple[Path, pd.DataFrame | None]:
109+
# Create a cache key from the filenames
110+
key = hashlib.md5("".join(filenames).encode("utf-8")).hexdigest()
111+
112+
# Create a cache directory
113+
dir_name = Path(tempfile.gettempdir()) / "stravavis"
114+
dir_name.mkdir(parents=True, exist_ok=True)
115+
cache_filename = dir_name / f"cached_activities_{key}.pkl"
116+
print(f"Cache filename: {cache_filename}")
117+
118+
# Load cache if it exists
119+
try:
120+
df = pd.read_pickle(cache_filename)
121+
print("Loaded cached activities")
122+
return cache_filename, df
123+
except FileNotFoundError:
124+
print("Cache not found")
125+
return cache_filename, None
126+
127+
104128
# Function for processing (unzipped) GPX and FIT files in a directory (path)
105129
def process_data(filenames: list[str]) -> pd.DataFrame:
106130
# Process all files (GPX or FIT)
131+
cache_filename, df = load_cache(filenames)
132+
if df is not None:
133+
return df
134+
107135
with Pool() as pool:
108136
try:
109137
it = pool.imap_unordered(process_file, filenames)
@@ -117,4 +145,7 @@ def process_data(filenames: list[str]) -> pd.DataFrame:
117145

118146
df["time"] = pd.to_datetime(df["time"], utc=True)
119147

148+
# Save cache
149+
df.to_pickle(cache_filename)
150+
120151
return df

0 commit comments

Comments
 (0)