@@ -3,70 +3,74 @@ package handler
3
3
import (
4
4
"context"
5
5
"log"
6
+ "sync"
6
7
"webcrawler/formating"
7
8
"webcrawler/site"
8
9
)
9
10
10
11
func (h * Server ) Scan (ctx context.Context ) {
11
12
12
- //Wait group
13
- for i := 0 ; i < 5 ; i ++ {
13
+ for i := 0 ; i < 20 ; i ++ {
14
14
15
15
links , err := h .Queue .Fetch (ctx )
16
16
17
17
log .Printf ("Length of links %d" , len (links ))
18
+
18
19
if err != nil {
19
20
log .Printf ("fetching %v" , err )
20
21
}
21
22
23
+ wg := sync.WaitGroup {}
22
24
for _ , link := range links {
23
- //item, err := h.Db.FetchWebsite(link.Url)
24
- //if err == nil && reflect.DeepEqual(item, site.Website{}) {
25
- // h.Queue.Remove(ctx, *link.Handler)
26
- // log.Printf("Skipping")
27
- // continue
28
- //
29
- //}
30
- log .Printf ("Scanning %s" , link .Url )
31
- valid , err := site .FetchRobots (link .Url )
32
- if err != nil {
33
- log .Printf ("fetching robots %v" , err )
34
- }
35
- if ! valid {
36
- log .Printf ("Robots disallowed" )
37
- h .Queue .Remove (ctx , * link .Handler )
38
- continue
39
- }
40
-
41
- page , resp , err := site .NewPage (link .Url )
42
- if err != nil {
43
- log .Printf ("creating page %v" , err )
44
- }
45
- err = h .Db .AddPage (page )
46
-
47
- if err != nil {
48
- log .Printf ("adding page %v" , err )
49
- }
50
- err = h .Queue .Remove (ctx , * link .Handler )
51
- if err != nil {
52
- log .Printf ("failed to remove item from queue for url %s with error %s" , link .Url , err )
53
- }
54
-
55
- linksNew , err := formating .GetLinks (link .Url , resp )
56
- if err != nil {
57
- log .Printf ("getting links %v" , err )
58
- }
59
- website := site .NewWebsite (link .Url , linksNew )
60
-
61
- err = h .Queue .BatchAdd (ctx , linksNew )
62
- if err != nil {
63
- log .Printf ("adding links to queue %v" , err )
64
- }
65
-
66
- err = h .Db .UpdateWebsite (page , website )
67
- if err != nil {
68
- log .Printf ("updating website %v" , err )
69
- }
25
+
26
+ wg .Add (1 )
27
+ go func () {
28
+
29
+ defer wg .Done ()
30
+ valid , err := site .FetchRobots (link .Url )
31
+ if err != nil {
32
+ log .Printf ("fetching robots %v" , err )
33
+ }
34
+ if ! valid {
35
+ log .Printf ("Robots disallowed" )
36
+ h .Queue .Remove (ctx , * link .Handler )
37
+ return
38
+ }
39
+
40
+ page , resp , err := site .NewPage (link .Url )
41
+ if err != nil {
42
+ return
43
+ }
44
+ if err := h .Db .AddPage (page ); err != nil {
45
+ log .Printf ("adding page %v" , err )
46
+ return
47
+ }
48
+
49
+ linksNew , err := formating .GetLinks (link .Url , resp )
50
+ if err != nil {
51
+ log .Printf ("getting links %v" , err )
52
+ return
53
+ }
54
+ website := site .NewWebsite (link .Url , linksNew )
55
+
56
+ err = h .Queue .BatchAdd (ctx , linksNew )
57
+ if err != nil {
58
+ log .Printf ("adding links to queue %v" , err )
59
+ return
60
+ }
61
+
62
+ err = h .Db .UpdateWebsite (page , website )
63
+ if err != nil {
64
+ log .Printf ("updating website %v" , err )
65
+ }
66
+
67
+ if err := h .Queue .Remove (ctx , * link .Handler ); err != nil {
68
+ log .Printf ("removing link from queue %v" , err )
69
+ }
70
+
71
+ }()
72
+
73
+ wg .Wait ()
70
74
}
71
75
72
76
}
0 commit comments