2024-12-13 13:11:02 +01:00
|
|
|
package lwb
|
|
|
|
|
|
|
|
import (
|
|
|
|
"net/http"
|
2024-12-13 21:33:27 +01:00
|
|
|
"net/http/cookiejar"
|
2024-12-13 13:11:02 +01:00
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
"github.com/tsukinoko-kun/lwb/util"
|
|
|
|
"golang.org/x/net/html"
|
|
|
|
)
|
|
|
|
|
2024-12-13 21:33:27 +01:00
|
|
|
type Browser struct {
|
|
|
|
url string
|
|
|
|
userAgent string
|
|
|
|
http *http.Client
|
|
|
|
document *html.Node
|
|
|
|
cookies *cookiejar.Jar
|
|
|
|
mut sync.RWMutex
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewBrowser(userAgent string) (*Browser, error) {
|
|
|
|
cj, err := cookiejar.New(nil)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2024-12-13 13:11:02 +01:00
|
|
|
}
|
|
|
|
|
2024-12-13 21:33:27 +01:00
|
|
|
hc := &http.Client{
|
|
|
|
Jar: cj,
|
2024-12-13 13:11:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
b := &Browser{
|
|
|
|
userAgent: userAgent,
|
2024-12-13 21:33:27 +01:00
|
|
|
cookies: cj,
|
|
|
|
http: hc,
|
2024-12-13 13:11:02 +01:00
|
|
|
}
|
|
|
|
|
2024-12-13 21:33:27 +01:00
|
|
|
return b, nil
|
2024-12-13 13:11:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (b *Browser) Get(url string) error {
|
|
|
|
b.mut.Lock()
|
|
|
|
defer b.mut.Unlock()
|
|
|
|
|
|
|
|
b.url = url
|
|
|
|
|
2024-12-13 21:33:27 +01:00
|
|
|
resp, err := b.http.Get(url)
|
2024-12-13 13:11:02 +01:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2024-12-13 21:33:27 +01:00
|
|
|
defer resp.Body.Close()
|
2024-12-13 13:11:02 +01:00
|
|
|
|
2024-12-13 21:33:27 +01:00
|
|
|
// Parse the HTML document
|
|
|
|
b.document, err = html.Parse(resp.Body)
|
2024-12-13 13:11:02 +01:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *Browser) GetElementById(id string) *Element {
|
|
|
|
b.mut.RLock()
|
|
|
|
defer b.mut.RUnlock()
|
|
|
|
|
|
|
|
var nodes util.Stack[*html.Node] = []*html.Node{b.document}
|
|
|
|
for !nodes.Empty() {
|
|
|
|
node := nodes.Pop()
|
|
|
|
for _, a := range node.Attr {
|
|
|
|
if strings.ToLower(a.Key) != "id" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if a.Val != id {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
return &Element{node: node}
|
|
|
|
}
|
|
|
|
|
|
|
|
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
|
|
|
nodes.Push(c)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *Browser) GetElementsByClassName(class string) []*Element {
|
|
|
|
b.mut.RLock()
|
|
|
|
defer b.mut.RUnlock()
|
|
|
|
|
|
|
|
var elements []*Element
|
|
|
|
var nodes util.Stack[*html.Node] = []*html.Node{b.document}
|
|
|
|
for !nodes.Empty() {
|
|
|
|
node := nodes.Pop()
|
|
|
|
attr_loop:
|
|
|
|
for _, a := range node.Attr {
|
|
|
|
if strings.ToLower(a.Key) != "class" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
for _, c := range classNames(a.Val) {
|
|
|
|
if c == class {
|
|
|
|
elements = append(elements, &Element{node: node})
|
|
|
|
break attr_loop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
|
|
|
nodes.Push(c)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func classNames(class string) []string {
|
|
|
|
return strings.Split(class, " ")
|
|
|
|
}
|