@@ -36,7 +36,6 @@ def cache_lookup(self, url):
36
36
if descr is not None and descr != "" :
37
37
return descr
38
38
if url not in DescriptionCache .ignore_set :
39
- print (f"cache_lookup: { url } " )
40
39
descr = gettitle .get_meta_descr (url )
41
40
if descr is not None :
42
41
self .map_url_to_descr [url ] = descr
@@ -66,6 +65,7 @@ def get_all(self):
66
65
67
66
cache_lookup_ok = 0
68
67
cache_lookup_failed = 0
68
+ failed_lookups = []
69
69
70
70
for entry in json_data :
71
71
#print("Category {} SubCategory {}".format(entry.get("c"), entry.get("sc")))
@@ -86,13 +86,18 @@ def get_all(self):
86
86
87
87
description = self .desc_cache .cache_lookup ( url ) #, self.soup_builder )
88
88
if description is None or description == "" :
89
+ print ("failed lookup: {url}" )
89
90
cache_lookup_failed += 1
91
+ failed_lookups .append ( url )
90
92
else :
91
93
cache_lookup_ok += 1
92
94
93
95
entry = (entry .get ("t" ), entry .get ("s" ), url , description )
94
96
print (f"Cache lookup succeeded: { cache_lookup_ok } failed: { cache_lookup_failed } " )
95
-
97
+ if len (failed_lookups ) != 0 :
98
+ with open ("failed_lookups.txt" , "w" ) as failed_lookups :
99
+ print ("failed lookups:\n " , "\n " .join (failed_lookups ) )
100
+ failed_lookups .write ("\n " .join (failed_lookups ))
96
101
97
102
sub_cat_obj .append (entry )
98
103
num_entries = num_entries + 1
0 commit comments