1
1
from __future__ import annotations
2
2
3
+ import hashlib
3
4
import math
5
+ import tempfile
4
6
from multiprocessing import Pool
7
+ from pathlib import Path
5
8
6
9
import fit2gpx
7
10
import gpxpy
8
11
import pandas as pd
9
12
from rich .progress import track
10
13
11
14
12
- def process_file (fpath ) :
15
+ def process_file (fpath : str ) -> pd . DataFrame | None :
13
16
if fpath .endswith (".gpx" ):
14
17
return process_gpx (fpath )
15
18
elif fpath .endswith (".fit" ):
@@ -18,7 +21,7 @@ def process_file(fpath):
18
21
19
22
# Function for processing an individual GPX file
20
23
# Ref: https://pypi.org/project/gpxpy/
21
- def process_gpx (gpxfile ) :
24
+ def process_gpx (gpxfile : str ) -> pd . DataFrame | None :
22
25
with open (gpxfile , encoding = "utf-8" ) as f :
23
26
try :
24
27
activity = gpxpy .parse (f )
@@ -64,7 +67,7 @@ def process_gpx(gpxfile):
64
67
65
68
# Function for processing an individual FIT file
66
69
# Ref: https://github.com/dodo-saba/fit2gpx
67
- def process_fit (fitfile ) :
70
+ def process_fit (fitfile : str ) -> pd . DataFrame :
68
71
conv = fit2gpx .Converter ()
69
72
df_lap , df = conv .fit_to_dataframes (fname = fitfile )
70
73
@@ -101,9 +104,33 @@ def process_fit(fitfile):
101
104
return df
102
105
103
106
107
+ def load_cache (filenames : list [str ]) -> tuple [Path , pd .DataFrame | None ]:
108
+ # Create a cache key from the filenames
109
+ key = hashlib .md5 ("" .join (filenames ).encode ("utf-8" )).hexdigest ()
110
+
111
+ # Create a cache directory
112
+ dir_name = Path (tempfile .gettempdir ()) / "stravavis"
113
+ dir_name .mkdir (parents = True , exist_ok = True )
114
+ cache_filename = dir_name / f"cached_activities_{ key } .pkl"
115
+ print (f"Cache filename: { cache_filename } " )
116
+
117
+ # Load cache if it exists
118
+ try :
119
+ df = pd .read_pickle (cache_filename )
120
+ print ("Loaded cached activities" )
121
+ return cache_filename , df
122
+ except FileNotFoundError :
123
+ print ("Cache not found" )
124
+ return cache_filename , None
125
+
126
+
104
127
# Function for processing (unzipped) GPX and FIT files in a directory (path)
105
128
def process_data (filenames : list [str ]) -> pd .DataFrame :
106
129
# Process all files (GPX or FIT)
130
+ cache_filename , df = load_cache (filenames )
131
+ if df is not None :
132
+ return df
133
+
107
134
with Pool () as pool :
108
135
try :
109
136
it = pool .imap_unordered (process_file , filenames )
@@ -117,4 +144,7 @@ def process_data(filenames: list[str]) -> pd.DataFrame:
117
144
118
145
df ["time" ] = pd .to_datetime (df ["time" ], utc = True )
119
146
147
+ # Save cache
148
+ df .to_pickle (cache_filename )
149
+
120
150
return df
0 commit comments