1
1
from __future__ import annotations
2
2
3
+ import glob
4
+ import hashlib
3
5
import math
6
+ import tempfile
4
7
from multiprocessing import Pool
8
+ from pathlib import Path
5
9
6
10
import fit2gpx
7
11
import gpxpy
8
12
import pandas as pd
9
13
from rich .progress import track
10
14
11
15
12
- def process_file (fpath ) :
16
+ def process_file (fpath : str ) -> pd . DataFrame | None :
13
17
if fpath .endswith (".gpx" ):
14
18
return process_gpx (fpath )
15
19
elif fpath .endswith (".fit" ):
@@ -18,7 +22,7 @@ def process_file(fpath):
18
22
19
23
# Function for processing an individual GPX file
20
24
# Ref: https://pypi.org/project/gpxpy/
21
- def process_gpx (gpxfile ) :
25
+ def process_gpx (gpxfile : str ) -> pd . DataFrame | None :
22
26
with open (gpxfile , encoding = "utf-8" ) as f :
23
27
try :
24
28
activity = gpxpy .parse (f )
@@ -64,7 +68,7 @@ def process_gpx(gpxfile):
64
68
65
69
# Function for processing an individual FIT file
66
70
# Ref: https://github.com/dodo-saba/fit2gpx
67
- def process_fit (fitfile ) :
71
+ def process_fit (fitfile : str ) -> pd . DataFrame :
68
72
conv = fit2gpx .Converter ()
69
73
df_lap , df = conv .fit_to_dataframes (fname = fitfile )
70
74
@@ -101,9 +105,33 @@ def process_fit(fitfile):
101
105
return df
102
106
103
107
108
+ def load_cache (filenames : list [str ]) -> tuple [Path , pd .DataFrame | None ]:
109
+ # Create a cache key from the filenames
110
+ key = hashlib .md5 ("" .join (filenames ).encode ("utf-8" )).hexdigest ()
111
+
112
+ # Create a cache directory
113
+ dir_name = Path (tempfile .gettempdir ()) / "stravavis"
114
+ dir_name .mkdir (parents = True , exist_ok = True )
115
+ cache_filename = dir_name / f"cached_activities_{ key } .pkl"
116
+ print (f"Cache filename: { cache_filename } " )
117
+
118
+ # Load cache if it exists
119
+ try :
120
+ df = pd .read_pickle (cache_filename )
121
+ print ("Loaded cached activities" )
122
+ return cache_filename , df
123
+ except FileNotFoundError :
124
+ print ("Cache not found" )
125
+ return cache_filename , None
126
+
127
+
104
128
# Function for processing (unzipped) GPX and FIT files in a directory (path)
105
129
def process_data (filenames : list [str ]) -> pd .DataFrame :
106
130
# Process all files (GPX or FIT)
131
+ cache_filename , df = load_cache (filenames )
132
+ if df is not None :
133
+ return df
134
+
107
135
with Pool () as pool :
108
136
try :
109
137
it = pool .imap_unordered (process_file , filenames )
@@ -117,4 +145,7 @@ def process_data(filenames: list[str]) -> pd.DataFrame:
117
145
118
146
df ["time" ] = pd .to_datetime (df ["time" ], utc = True )
119
147
148
+ # Save cache
149
+ df .to_pickle (cache_filename )
150
+
120
151
return df
0 commit comments