Skip to content

Commit 498a5df

Browse files
committed
find many attrs from one single query
1 parent d3e6d74 commit 498a5df

File tree

2 files changed

+110
-26
lines changed

2 files changed

+110
-26
lines changed

gluahttpscrape.go

+59-14
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,15 @@ func NewHttpScrapeModule() *httpScrapeModule {
1818

1919
func (h *httpScrapeModule) Loader(L *lua.LState) int {
2020
mod := L.SetFuncs(L.NewTable(), map[string]lua.LGFunction{
21-
"find_attr_by_class": h.findAttrByClass,
22-
"find_text_by_class": h.findTextByClass,
23-
"find_attr_by_id": h.findAttrById,
24-
"find_text_by_id": h.findTextById,
25-
"find_attr_by_tag": h.findAttrByTag,
26-
"find_text_by_tag": h.findTextByTag,
21+
"find_attr_by_class": h.findAttrByClass,
22+
"find_attr_by_id": h.findAttrById,
23+
"find_attr_by_tag": h.findAttrByTag,
24+
"find_attrs_by_class": h.findAttrsByClass,
25+
"find_attrs_by_id": h.findAttrsById,
26+
"find_attrs_by_tag": h.findAttrsByTag,
27+
"find_text_by_id": h.findTextById,
28+
"find_text_by_class": h.findTextByClass,
29+
"find_text_by_tag": h.findTextByTag,
2730
})
2831
L.Push(mod)
2932
return 1
@@ -62,6 +65,36 @@ func (h *httpScrapeModule) findAttr(selector string, L *lua.LState) int {
6265
return 2
6366
}
6467

68+
func (h *httpScrapeModule) findAttrs(selector string, L *lua.LState) int {
69+
body := L.ToString(1)
70+
attrsCount := L.ToInt(2)
71+
attrs := []string{}
72+
for i := 1; i <= attrsCount; i++ {
73+
attrNow := L.ToString(2 + i)
74+
attrs = append(attrs, attrNow)
75+
}
76+
query := L.ToString(2 + attrsCount + 1)
77+
L.Pop(2 + attrsCount + 1)
78+
root, err := html.Parse(strings.NewReader(body))
79+
if err != nil {
80+
L.Push(lua.LNil)
81+
L.Push(lua.LString(err.Error()))
82+
return 2
83+
}
84+
results := scrape.FindAll(root, getMatcher(selector, query))
85+
attrResults := []map[string]string{}
86+
for _, result := range results {
87+
attrResults = append(attrResults, make(map[string]string))
88+
idx := len(attrResults) - 1
89+
for _, attr := range attrs {
90+
attrResults[idx][attr] = scrape.Attr(result, attr)
91+
}
92+
}
93+
L.Push(luar.New(L, attrResults))
94+
L.Push(lua.LNil)
95+
return 2
96+
}
97+
6598
func (h *httpScrapeModule) findText(selector string, L *lua.LState) int {
6699
body := L.ToString(1)
67100
query := L.ToString(2)
@@ -86,22 +119,34 @@ func (h *httpScrapeModule) findAttrByClass(L *lua.LState) int {
86119
return h.findAttr("class", L)
87120
}
88121

89-
func (h *httpScrapeModule) findTextByClass(L *lua.LState) int {
90-
return h.findText("class", L)
91-
}
92-
93122
func (h *httpScrapeModule) findAttrById(L *lua.LState) int {
94123
return h.findAttr("id", L)
95124
}
96125

97-
func (h *httpScrapeModule) findTextById(L *lua.LState) int {
98-
return h.findText("id", L)
99-
}
100-
101126
func (h *httpScrapeModule) findAttrByTag(L *lua.LState) int {
102127
return h.findAttr("tag", L)
103128
}
104129

130+
func (h *httpScrapeModule) findAttrsByClass(L *lua.LState) int {
131+
return h.findAttrs("class", L)
132+
}
133+
134+
func (h *httpScrapeModule) findAttrsById(L *lua.LState) int {
135+
return h.findAttrs("id", L)
136+
}
137+
138+
func (h *httpScrapeModule) findAttrsByTag(L *lua.LState) int {
139+
return h.findAttrs("tag", L)
140+
}
141+
142+
func (h *httpScrapeModule) findTextByClass(L *lua.LState) int {
143+
return h.findText("class", L)
144+
}
145+
146+
func (h *httpScrapeModule) findTextById(L *lua.LState) int {
147+
return h.findText("id", L)
148+
}
149+
105150
func (h *httpScrapeModule) findTextByTag(L *lua.LState) int {
106151
return h.findText("tag", L)
107152
}

gluahttpscrape_test.go

+51-12
Original file line numberDiff line numberDiff line change
@@ -20,45 +20,84 @@ func TestFindAttrByClass(t *testing.T) {
2020
}
2121
}
2222

23-
func TestFindTextByClass(t *testing.T) {
23+
func TestFindAttrById(t *testing.T) {
2424
if err := evalLua(t, `
2525
local scrape = require("scrape")
26-
response, error = scrape.find_text_by_class("`+httpBody+`", "testclass")
27-
assert_equal("My First Heading", response[1])
28-
assert_equal("My First Heading", response[2])
26+
response, error = scrape.find_attr_by_id("`+httpBody+`", "href", "testid")
27+
assert_equal("testhref", response[1])
28+
assert_equal("testhref2", response[2])
2929
`); err != nil {
3030
t.Errorf("Failed to evaluate script: %s", err)
3131
}
3232
}
3333

34-
func TestFindAttrById(t *testing.T) {
34+
func TestFindAttrByTag(t *testing.T) {
3535
if err := evalLua(t, `
3636
local scrape = require("scrape")
37-
response, error = scrape.find_attr_by_id("`+httpBody+`", "href", "testid")
37+
response, error = scrape.find_attr_by_tag("`+httpBody+`", "href", "h1")
3838
assert_equal("testhref", response[1])
3939
assert_equal("testhref2", response[2])
4040
`); err != nil {
4141
t.Errorf("Failed to evaluate script: %s", err)
4242
}
4343
}
4444

45-
func TestFindTextById(t *testing.T) {
45+
func TestFindAttrsByClass(t *testing.T) {
4646
if err := evalLua(t, `
4747
local scrape = require("scrape")
48-
response, error = scrape.find_text_by_id("`+httpBody+`", "testid")
48+
response, error = scrape.find_attrs_by_class("`+httpBody+`", 2, "id", "href", "testclass")
49+
assert_equal("testhref", response[1]["href"])
50+
assert_equal("testid", response[1]["id"])
51+
assert_equal("testid", response[2]["id"])
52+
assert_equal("testhref2", response[2]["href"])
53+
`); err != nil {
54+
t.Errorf("Failed to evaluate script: %s", err)
55+
}
56+
}
57+
58+
func TestFindAttrsById(t *testing.T) {
59+
if err := evalLua(t, `
60+
local scrape = require("scrape")
61+
response, error = scrape.find_attrs_by_id("`+httpBody+`", 2, "id", "href", "testid")
62+
assert_equal("testhref", response[1]["href"])
63+
assert_equal("testid", response[1]["id"])
64+
assert_equal("testid", response[2]["id"])
65+
assert_equal("testhref2", response[2]["href"])
66+
`); err != nil {
67+
t.Errorf("Failed to evaluate script: %s", err)
68+
}
69+
}
70+
71+
func TestFindAttrsByTag(t *testing.T) {
72+
if err := evalLua(t, `
73+
local scrape = require("scrape")
74+
response, error = scrape.find_attrs_by_tag("`+httpBody+`", 2, "id", "href", "h1")
75+
assert_equal("testhref", response[1]["href"])
76+
assert_equal("testid", response[1]["id"])
77+
assert_equal("testid", response[2]["id"])
78+
assert_equal("testhref2", response[2]["href"])
79+
`); err != nil {
80+
t.Errorf("Failed to evaluate script: %s", err)
81+
}
82+
}
83+
84+
func TestFindTextByClass(t *testing.T) {
85+
if err := evalLua(t, `
86+
local scrape = require("scrape")
87+
response, error = scrape.find_text_by_class("`+httpBody+`", "testclass")
4988
assert_equal("My First Heading", response[1])
5089
assert_equal("My First Heading", response[2])
5190
`); err != nil {
5291
t.Errorf("Failed to evaluate script: %s", err)
5392
}
5493
}
5594

56-
func TestFindAttrByTag(t *testing.T) {
95+
func TestFindTextById(t *testing.T) {
5796
if err := evalLua(t, `
5897
local scrape = require("scrape")
59-
response, error = scrape.find_attr_by_tag("`+httpBody+`", "href", "h1")
60-
assert_equal("testhref", response[1])
61-
assert_equal("testhref2", response[2])
98+
response, error = scrape.find_text_by_id("`+httpBody+`", "testid")
99+
assert_equal("My First Heading", response[1])
100+
assert_equal("My First Heading", response[2])
62101
`); err != nil {
63102
t.Errorf("Failed to evaluate script: %s", err)
64103
}

0 commit comments

Comments
 (0)