lwb/browser.go

140 lines
2.5 KiB
Go
Raw Normal View History

2024-12-13 13:11:02 +01:00
package lwb
import (
"net/http"
2024-12-13 21:33:27 +01:00
"net/http/cookiejar"
2024-12-13 13:11:02 +01:00
"strings"
"sync"
2025-02-06 19:31:32 +01:00
"git.frankmayer.dev/tsukinoko-kun/lwb/util"
2024-12-13 13:11:02 +01:00
"golang.org/x/net/html"
)
2024-12-13 21:33:27 +01:00
type Browser struct {
url string
userAgent string
http *http.Client
document *html.Node
cookies *cookiejar.Jar
2025-02-06 19:31:32 +01:00
mut *sync.RWMutex
2024-12-13 21:33:27 +01:00
}
func NewBrowser(userAgent string) (*Browser, error) {
cj, err := cookiejar.New(nil)
if err != nil {
return nil, err
2024-12-13 13:11:02 +01:00
}
2024-12-13 21:33:27 +01:00
hc := &http.Client{
Jar: cj,
2024-12-13 13:11:02 +01:00
}
b := &Browser{
2025-02-06 19:31:32 +01:00
url: "about:blank",
2024-12-13 13:11:02 +01:00
userAgent: userAgent,
2024-12-13 21:33:27 +01:00
http: hc,
2025-02-06 19:31:32 +01:00
document: nil,
cookies: cj,
mut: &sync.RWMutex{},
2024-12-13 13:11:02 +01:00
}
2024-12-13 21:33:27 +01:00
return b, nil
2024-12-13 13:11:02 +01:00
}
func (b *Browser) Get(url string) error {
b.mut.Lock()
defer b.mut.Unlock()
b.url = url
2024-12-13 21:33:27 +01:00
resp, err := b.http.Get(url)
2024-12-13 13:11:02 +01:00
if err != nil {
return err
}
2024-12-13 21:33:27 +01:00
defer resp.Body.Close()
2024-12-13 13:11:02 +01:00
2024-12-13 21:33:27 +01:00
// Parse the HTML document
b.document, err = html.Parse(resp.Body)
2024-12-13 13:11:02 +01:00
if err != nil {
return err
}
return nil
}
2025-02-06 19:31:32 +01:00
func (b *Browser) FindElementByContent(textContent string) *Element {
b.mut.RLock()
defer b.mut.RUnlock()
textContentTrimed := strings.TrimSpace(textContent)
var nodes util.Stack[*html.Node] = []*html.Node{b.document}
for !nodes.Empty() {
node := nodes.Pop()
if strings.TrimSpace(node.Data) == textContentTrimed {
return &Element{node: node, browser: b}
}
for c := node.FirstChild; c != nil; c = c.NextSibling {
nodes.Push(c)
}
}
return nil
}
2024-12-13 13:11:02 +01:00
func (b *Browser) GetElementById(id string) *Element {
b.mut.RLock()
defer b.mut.RUnlock()
var nodes util.Stack[*html.Node] = []*html.Node{b.document}
for !nodes.Empty() {
node := nodes.Pop()
for _, a := range node.Attr {
if strings.ToLower(a.Key) != "id" {
continue
}
if a.Val != id {
break
}
2025-02-06 19:31:32 +01:00
return &Element{node: node, browser: b}
2024-12-13 13:11:02 +01:00
}
for c := node.FirstChild; c != nil; c = c.NextSibling {
nodes.Push(c)
}
}
return nil
}
func (b *Browser) GetElementsByClassName(class string) []*Element {
b.mut.RLock()
defer b.mut.RUnlock()
var elements []*Element
var nodes util.Stack[*html.Node] = []*html.Node{b.document}
for !nodes.Empty() {
node := nodes.Pop()
attr_loop:
for _, a := range node.Attr {
if strings.ToLower(a.Key) != "class" {
continue
}
for _, c := range classNames(a.Val) {
if c == class {
2025-02-06 19:31:32 +01:00
elements = append(elements, &Element{node: node, browser: b})
2024-12-13 13:11:02 +01:00
break attr_loop
}
}
}
for c := node.FirstChild; c != nil; c = c.NextSibling {
nodes.Push(c)
}
}
return nil
}
func classNames(class string) []string {
return strings.Split(class, " ")
}