@@ -71,44 +71,56 @@ async def acrawl(self):
71
71
await stealth_async (page )
72
72
73
73
# google trends
74
- url = f"https://trends.google.com/trends/trendingsearches/realtime?geo={ self .country } &hl={ self .language } &category={ self .category } "
75
- await page .goto (url , wait_until = "load" )
76
- elements = await page .query_selector_all ("//div[@class='title']" )
77
- keywords = [
78
- x for e in elements for x in (await e .inner_text ()).split (" • " )
79
- ]
80
- logger .info (f"google_trends() GOT { len (keywords )} keywords" )
74
+ try :
75
+ await page .goto (
76
+ f"https://trends.google.com/trends/trendingsearches/realtime?geo={ self .country } &hl={ self .language } &category={ self .category } " ,
77
+ wait_until = "load" ,
78
+ )
79
+ elements = await page .query_selector_all ("//div[@class='title']" )
80
+ keywords = [
81
+ x for e in elements for x in (await e .inner_text ()).split (" • " )
82
+ ]
83
+ logger .info (f"google_trends() GOT { len (keywords )} keywords" )
84
+ except Exception as ex :
85
+ keywords = []
86
+ logger .warning (f"google_trends() { type (ex ).__name__ } : { ex } " )
81
87
82
88
# google search
83
89
for keyword in keywords :
84
- await page .goto ("https://www.google.com" )
85
- await page .fill ('textarea[name="q"]' , keyword )
86
- await page .press ('textarea[name="q"]' , "Enter" )
87
- while True :
88
- # Check for a popup window and close it
89
- if len (self .browser .pages ) > 1 :
90
- await self .browser .pages [1 ].close ()
91
- # Scroll to the bottom of the page
92
- await page .mouse .wheel (0 , 1000 )
93
- await page .wait_for_load_state ("networkidle" )
94
- await asyncio .sleep (0.2 )
95
- elements = await page .query_selector_all (
96
- "//div[starts-with(@class, 'g ')]//span/a[@href]"
90
+ try :
91
+ await page .goto ("https://www.google.com" , wait_until = "load" )
92
+ await page .fill ('textarea[name="q"]' , keyword )
93
+ await page .press ('textarea[name="q"]' , "Enter" )
94
+ while True :
95
+ # Check for a popup window and close it
96
+ if len (self .browser .pages ) > 1 :
97
+ await self .browser .pages [1 ].close ()
98
+ # Scroll to the bottom of the page
99
+ await page .mouse .wheel (0 , 1000 )
100
+ await asyncio .sleep (0.25 )
101
+ elements = await page .query_selector_all (
102
+ "//div[starts-with(@class, 'g ')]//span/a[@href]"
103
+ )
104
+ if len (elements ) > 50 :
105
+ break
106
+ result_urls = [await link .get_attribute ("href" ) for link in elements ]
107
+ logger .info (
108
+ f"google_search() { keyword = } GOT { len (result_urls )} results"
97
109
)
98
- if len (elements ) > 50 :
99
- break
100
- result_urls = [await link .get_attribute ("href" ) for link in elements ]
101
- logger .info (
102
- f"google_search() { keyword = } GOT { len (result_urls )} results"
103
- )
110
+ except Exception as ex :
111
+ result_urls = []
112
+ logger .warning (f"google_search() { type (ex ).__name__ } : { ex } " )
104
113
105
114
# browse urls in parallel
106
115
tasks = [asyncio .create_task (self .abrowse (url )) for url in result_urls ]
107
116
await asyncio .gather (* tasks )
108
117
109
118
def crawl (self ):
110
- asyncio .run (self .acrawl ())
111
-
119
+ while True :
120
+ try :
121
+ asyncio .run (self .acrawl ())
122
+ except Exception as ex :
123
+ logger .warning (f"crawl() { type (ex ).__name__ } : { ex } " )
112
124
113
125
if __name__ == "__main__" :
114
126
fake_traffic = FakeTraffic (
0 commit comments