Skip to content

Commit 7e441c1

Browse files
committed
feat:
- Adding local testing options
1 parent 2fc66c5 commit 7e441c1

File tree

4 files changed

+86
-55
lines changed

4 files changed

+86
-55
lines changed

.github/workflows.yml

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: Go Test
2+
3+
on: [ push, pull_request ]
4+
5+
jobs:
6+
test:
7+
name: Run Tests
8+
runs-on: ubuntu-latest
9+
steps:
10+
11+
- name: Set up Go 1.x
12+
uses: actions/setup-go@v2
13+
with:
14+
go-version: ^1.21 # SemVer range, or 1.x to match the latest stable Go version
15+
16+
- name: Check out code into the Go module directory
17+
uses: actions/checkout@v2
18+
19+
- name: Test
20+
run: |
21+
cd mydirectory
22+
go test ./...

dynamoDBx/update.go

+9-4
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,18 @@ func (db *DB) UpdateWebsite(page site.Page, website site.Website) error {
1515
if err != nil {
1616
return err
1717
}
18-
if reflect.DeepEqual(websiteDB, site.Website{}) {
19-
return db.AddWebsite(website)
18+
if reflect.DeepEqual(websiteDB, &site.Website{}) && err == nil {
19+
println("Website not found")
20+
websiteDB = &website
21+
} else {
22+
website.Links = append(websiteDB.Links, page.Url)
2023
}
2124

22-
websiteDB.Links = append(websiteDB.Links, page.Url)
2325
websiteDB.ProminenceValue += 1
24-
av, err := attributevalue.MarshalMap(websiteDB)
26+
websiteDB.Links = append(websiteDB.Links, page.Url)
27+
website = *websiteDB
28+
29+
av, err := attributevalue.MarshalMap(website)
2530
if err != nil {
2631
return err
2732
}

formating/text.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ func resolveURL(baseURL, relURL string) string {
4747
func removeDuplicates(links []string) []string {
4848
encountered := map[string]bool{}
4949
var result []string
50+
5051
for v := range links {
51-
if encountered[links[v]] == true {
52-
} else {
52+
if !encountered[links[v]] {
5353
encountered[links[v]] = true
5454
result = append(result, links[v])
5555
}

handler/flow.go

+53-49
Original file line numberDiff line numberDiff line change
@@ -3,70 +3,74 @@ package handler
33
import (
44
"context"
55
"log"
6+
"sync"
67
"webcrawler/formating"
78
"webcrawler/site"
89
)
910

1011
func (h *Server) Scan(ctx context.Context) {
1112

12-
//Wait group
13-
for i := 0; i < 5; i++ {
13+
for i := 0; i < 20; i++ {
1414

1515
links, err := h.Queue.Fetch(ctx)
1616

1717
log.Printf("Length of links %d", len(links))
18+
1819
if err != nil {
1920
log.Printf("fetching %v", err)
2021
}
2122

23+
wg := sync.WaitGroup{}
2224
for _, link := range links {
23-
//item, err := h.Db.FetchWebsite(link.Url)
24-
//if err == nil && reflect.DeepEqual(item, site.Website{}) {
25-
// h.Queue.Remove(ctx, *link.Handler)
26-
// log.Printf("Skipping")
27-
// continue
28-
//
29-
//}
30-
log.Printf("Scanning %s", link.Url)
31-
valid, err := site.FetchRobots(link.Url)
32-
if err != nil {
33-
log.Printf("fetching robots %v", err)
34-
}
35-
if !valid {
36-
log.Printf("Robots disallowed")
37-
h.Queue.Remove(ctx, *link.Handler)
38-
continue
39-
}
40-
41-
page, resp, err := site.NewPage(link.Url)
42-
if err != nil {
43-
log.Printf("creating page %v", err)
44-
}
45-
err = h.Db.AddPage(page)
46-
47-
if err != nil {
48-
log.Printf("adding page %v", err)
49-
}
50-
err = h.Queue.Remove(ctx, *link.Handler)
51-
if err != nil {
52-
log.Printf("failed to remove item from queue for url %s with error %s", link.Url, err)
53-
}
54-
55-
linksNew, err := formating.GetLinks(link.Url, resp)
56-
if err != nil {
57-
log.Printf("getting links %v", err)
58-
}
59-
website := site.NewWebsite(link.Url, linksNew)
60-
61-
err = h.Queue.BatchAdd(ctx, linksNew)
62-
if err != nil {
63-
log.Printf("adding links to queue %v", err)
64-
}
65-
66-
err = h.Db.UpdateWebsite(page, website)
67-
if err != nil {
68-
log.Printf("updating website %v", err)
69-
}
25+
26+
wg.Add(1)
27+
go func() {
28+
29+
defer wg.Done()
30+
valid, err := site.FetchRobots(link.Url)
31+
if err != nil {
32+
log.Printf("fetching robots %v", err)
33+
}
34+
if !valid {
35+
log.Printf("Robots disallowed")
36+
h.Queue.Remove(ctx, *link.Handler)
37+
return
38+
}
39+
40+
page, resp, err := site.NewPage(link.Url)
41+
if err != nil {
42+
return
43+
}
44+
if err := h.Db.AddPage(page); err != nil {
45+
log.Printf("adding page %v", err)
46+
return
47+
}
48+
49+
linksNew, err := formating.GetLinks(link.Url, resp)
50+
if err != nil {
51+
log.Printf("getting links %v", err)
52+
return
53+
}
54+
website := site.NewWebsite(link.Url, linksNew)
55+
56+
err = h.Queue.BatchAdd(ctx, linksNew)
57+
if err != nil {
58+
log.Printf("adding links to queue %v", err)
59+
return
60+
}
61+
62+
err = h.Db.UpdateWebsite(page, website)
63+
if err != nil {
64+
log.Printf("updating website %v", err)
65+
}
66+
67+
if err := h.Queue.Remove(ctx, *link.Handler); err != nil {
68+
log.Printf("removing link from queue %v", err)
69+
}
70+
71+
}()
72+
73+
wg.Wait()
7074
}
7175

7276
}

0 commit comments

Comments
 (0)