1
1
import magic
2
2
import logging
3
+ from timeit import default_timer
3
4
from tempfile import mkdtemp
4
5
from datetime import datetime
5
6
from pkg_resources import get_distribution
15
16
from sentry_sdk import capture_exception
16
17
from followthemoney .helpers import entity_filename
17
18
from followthemoney .namespace import Namespace
19
+ from prometheus_client import Counter , Histogram
18
20
19
21
from ingestors .directory import DirectoryIngestor
20
22
from ingestors .exc import ProcessingException
23
25
24
26
log = logging .getLogger (__name__ )
25
27
28
+ INGEST_SUCCEEDED = Counter (
29
+ "ingest_succeeded_total" ,
30
+ "Successful ingestions" ,
31
+ ["ingestor" ],
32
+ )
33
+ INGEST_FAILED = Counter (
34
+ "ingest_failed_total" ,
35
+ "Failed ingestions" ,
36
+ ["ingestor" ],
37
+ )
38
+ INGEST_DURATION = Histogram (
39
+ "ingest_duration_seconds" ,
40
+ "Ingest duration by ingestor" ,
41
+ ["ingestor" ],
42
+ )
43
+ INGEST_INGESTED_BYTES = Counter (
44
+ "ingest_ingested_bytes_total" ,
45
+ "Total number of bytes ingested" ,
46
+ ["ingestor" ],
47
+ )
48
+
26
49
27
50
class Manager (object ):
28
51
"""Handles the lifecycle of an ingestor. This can be subclassed to embed it
@@ -138,8 +161,10 @@ def ingest_entity(self, entity):
138
161
def ingest (self , file_path , entity , ** kwargs ):
139
162
"""Main execution step of an ingestor."""
140
163
file_path = ensure_path (file_path )
164
+ file_size = None
141
165
if file_path .is_file () and not entity .has ("fileSize" ):
142
- entity .add ("fileSize" , file_path .stat ().st_size )
166
+ file_size = file_path .stat ().st_size # size in bytes
167
+ entity .add ("fileSize" , file_size )
143
168
144
169
now = datetime .now ()
145
170
now_string = now .strftime ("%Y-%m-%dT%H:%M:%S.%f" )
@@ -148,14 +173,32 @@ def ingest(self, file_path, entity, **kwargs):
148
173
entity .set ("processingAgent" , get_distribution ("ingest" ).version )
149
174
entity .set ("processedAt" , now_string )
150
175
176
+ ingestor_class = None
177
+ ingestor_name = None
178
+
151
179
try :
152
180
ingestor_class = self .auction (file_path , entity )
153
- log .info ("Ingestor [%r]: %s" , entity , ingestor_class .__name__ )
181
+ ingestor_name = ingestor_class .__name__
182
+ log .info ("Ingestor [%r]: %s" , entity , ingestor_name )
183
+
184
+ start_time = default_timer ()
154
185
self .delegate (ingestor_class , file_path , entity )
186
+ duration = max (0 , default_timer () - start_time )
187
+
188
+ INGEST_SUCCEEDED .labels (ingestor_name ).inc ()
189
+ INGEST_DURATION .labels (ingestor_name ).observe (duration )
190
+ INGEST_INGESTED_BYTES .labels (ingestor_name ).inc (file_size )
191
+
155
192
entity .set ("processingStatus" , self .STATUS_SUCCESS )
156
193
except ProcessingException as pexc :
157
- entity .set ("processingError" , stringify (pexc ))
158
194
log .exception ("[%r] Failed to process: %s" , entity , pexc )
195
+
196
+ if ingestor_name :
197
+ INGEST_FAILED .labels (ingestor_name ).inc ()
198
+ else :
199
+ INGEST_FAILED .labels (None ).inc ()
200
+
201
+ entity .set ("processingError" , stringify (pexc ))
159
202
capture_exception (pexc )
160
203
finally :
161
204
self .finalize (entity )
0 commit comments