Skip to content

Commit 4c66499

Browse files
committed
Update google search pagination
1 parent d1b3958 commit 4c66499

File tree

1 file changed

+14
-18
lines changed

1 file changed

+14
-18
lines changed

fake_traffic/fake_traffic.py

+14-18
Original file line numberDiff line numberDiff line change
@@ -74,34 +74,30 @@ async def acrawl(self):
7474

7575
# google search
7676
for keyword in keywords:
77+
search_urls = []
7778
try:
7879
await page.goto("https://www.google.com", wait_until="load")
7980
await page.fill('textarea[name="q"]', keyword)
8081
await page.press('textarea[name="q"]', "Enter")
81-
for _ in range(30):
82-
# Check for a popup window and close it
83-
if len(self.browser.pages) > 1:
84-
await self.browser.pages[1].close()
85-
# Scroll to the bottom of the page
86-
await page.mouse.wheel(0, 1000)
87-
await asyncio.sleep(0.25)
88-
elements = await page.query_selector_all(
89-
"//div[starts-with(@class, 'g ')]//span/a[@href]"
90-
)
91-
if len(elements) > 100:
92-
break
93-
result_urls = [
94-
await link.get_attribute("href") for link in elements
95-
]
82+
# pagination
83+
for _ in range(10):
84+
await page.wait_for_load_state("load")
85+
# parse urls
86+
elements = await page.locator(
87+
"xpath=//div[starts-with(@class, 'g ')]//span/a[@href]"
88+
).all()
89+
page_urls = [await e.get_attribute("href") for e in elements]
90+
search_urls.extend(page_urls)
91+
# click the "Next" button
92+
await page.locator("xpath=//td[@role='heading']").last.click()
9693
logger.info(
97-
f"google_search() {keyword=} GOT {len(result_urls)} results"
94+
f"google_search() {keyword=} GOT {len(search_urls)} results"
9895
)
9996
except Exception as ex:
100-
result_urls = []
10197
logger.warning(f"google_search() {type(ex).__name__}: {ex}")
10298

10399
# browse urls in parallel
104-
tasks = [asyncio.create_task(self.abrowse(url)) for url in result_urls]
100+
tasks = [asyncio.create_task(self.abrowse(url)) for url in search_urls]
105101
await asyncio.gather(*tasks)
106102

107103
def crawl(self):

0 commit comments

Comments
 (0)