forked from earthboundkid/flowmatic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
manage_tasks_example_test.go
102 lines (93 loc) · 2.08 KB
/
manage_tasks_example_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package flowmatic_test
import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"slices"
"strings"
"testing/fstest"
"github.com/carlmjohnson/flowmatic"
)
func ExampleManageTasks() {
// Example site to crawl with recursive links
srv := httptest.NewServer(http.FileServer(http.FS(fstest.MapFS{
"index.html": &fstest.MapFile{
Data: []byte("/a.html"),
},
"a.html": &fstest.MapFile{
Data: []byte("/b1.html\n/b2.html"),
},
"b1.html": &fstest.MapFile{
Data: []byte("/c.html"),
},
"b2.html": &fstest.MapFile{
Data: []byte("/c.html"),
},
"c.html": &fstest.MapFile{
Data: []byte("/"),
},
})))
defer srv.Close()
cl := srv.Client()
// Task fetches a page and extracts the URLs
task := func(u string) ([]string, error) {
res, err := cl.Get(srv.URL + u)
if err != nil {
return nil, err
}
defer res.Body.Close()
body, err := io.ReadAll(res.Body)
if err != nil {
return nil, err
}
return strings.Split(string(body), "\n"), nil
}
// Manager keeps track of which pages have been visited and the results graph
tried := map[string]int{}
results := map[string][]string{}
manager := func(req string, urls []string, err error) ([]string, bool) {
if err != nil {
// If there's a problem fetching a page, try three times
if tried[req] < 3 {
tried[req]++
return []string{req}, true
}
return nil, false
}
results[req] = urls
var newurls []string
for _, u := range urls {
if tried[u] == 0 {
newurls = append(newurls, u)
tried[u]++
}
}
return newurls, true
}
// Process the tasks with as many workers as GOMAXPROCS
flowmatic.ManageTasks(flowmatic.MaxProcs, task, manager, "/")
keys := make([]string, 0, len(results))
for key := range results {
keys = append(keys, key)
}
slices.Sort(keys)
for _, key := range keys {
fmt.Println(key, "links to:")
for _, v := range results[key] {
fmt.Println("- ", v)
}
}
// Output:
// / links to:
// - /a.html
// /a.html links to:
// - /b1.html
// - /b2.html
// /b1.html links to:
// - /c.html
// /b2.html links to:
// - /c.html
// /c.html links to:
// - /
}