This commit is contained in:
Frank Mayer 2025-02-06 19:31:32 +01:00
parent a2a1e7b63c
commit b4bcc7406a
Signed by: tsukinoko-kun
GPG Key ID: 427B3E61E69C2E51
7 changed files with 146 additions and 6 deletions

@ -6,7 +6,7 @@ import (
"strings" "strings"
"sync" "sync"
"github.com/tsukinoko-kun/lwb/util" "git.frankmayer.dev/tsukinoko-kun/lwb/util"
"golang.org/x/net/html" "golang.org/x/net/html"
) )
@ -16,7 +16,7 @@ type Browser struct {
http *http.Client http *http.Client
document *html.Node document *html.Node
cookies *cookiejar.Jar cookies *cookiejar.Jar
mut sync.RWMutex mut *sync.RWMutex
} }
func NewBrowser(userAgent string) (*Browser, error) { func NewBrowser(userAgent string) (*Browser, error) {
@ -30,9 +30,12 @@ func NewBrowser(userAgent string) (*Browser, error) {
} }
b := &Browser{ b := &Browser{
url: "about:blank",
userAgent: userAgent, userAgent: userAgent,
cookies: cj,
http: hc, http: hc,
document: nil,
cookies: cj,
mut: &sync.RWMutex{},
} }
return b, nil return b, nil
@ -59,6 +62,26 @@ func (b *Browser) Get(url string) error {
return nil return nil
} }
func (b *Browser) FindElementByContent(textContent string) *Element {
b.mut.RLock()
defer b.mut.RUnlock()
textContentTrimed := strings.TrimSpace(textContent)
var nodes util.Stack[*html.Node] = []*html.Node{b.document}
for !nodes.Empty() {
node := nodes.Pop()
if strings.TrimSpace(node.Data) == textContentTrimed {
return &Element{node: node, browser: b}
}
for c := node.FirstChild; c != nil; c = c.NextSibling {
nodes.Push(c)
}
}
return nil
}
func (b *Browser) GetElementById(id string) *Element { func (b *Browser) GetElementById(id string) *Element {
b.mut.RLock() b.mut.RLock()
defer b.mut.RUnlock() defer b.mut.RUnlock()
@ -73,7 +96,7 @@ func (b *Browser) GetElementById(id string) *Element {
if a.Val != id { if a.Val != id {
break break
} }
return &Element{node: node} return &Element{node: node, browser: b}
} }
for c := node.FirstChild; c != nil; c = c.NextSibling { for c := node.FirstChild; c != nil; c = c.NextSibling {
@ -98,7 +121,7 @@ func (b *Browser) GetElementsByClassName(class string) []*Element {
} }
for _, c := range classNames(a.Val) { for _, c := range classNames(a.Val) {
if c == class { if c == class {
elements = append(elements, &Element{node: node}) elements = append(elements, &Element{node: node, browser: b})
break attr_loop break attr_loop
} }
} }

@ -30,3 +30,10 @@ func (self *Element) Click() error {
return ErrorNotClickable return ErrorNotClickable
} }
func (self *Element) TextContent() string {
if self == nil {
return ""
}
return strings.TrimSpace(self.node.Data)
}

27
example/example_test.go Normal file

@ -0,0 +1,27 @@
package example_test
import (
"testing"
"git.frankmayer.dev/tsukinoko-kun/lwb"
)
func TestExample(t *testing.T) {
b, err := lwb.NewBrowser(lwb.BuildFirefoxUserAgent("135.0"))
if err != nil {
t.Fatal("failed to start virtual browser", err)
}
if err := b.Get("https://example.com/"); err != nil {
t.Fatal("failed to get github.com", err)
}
moreInfoEl := b.FindElementByContent("More information...")
if moreInfoEl == nil {
t.Fatal("element not found")
}
if err := moreInfoEl.Click(); err != nil {
t.Fatal("click failed", err)
}
}

9
example/go.mod Normal file

@ -0,0 +1,9 @@
module example
go 1.23.5
replace git.frankmayer.dev/tsukinoko-kun/lwb v0.0.0 => ../
require git.frankmayer.dev/tsukinoko-kun/lwb v0.0.0
require golang.org/x/net v0.34.0 // indirect

2
example/go.sum Normal file

@ -0,0 +1,2 @@
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=

2
go.mod

@ -1,4 +1,4 @@
module github.com/tsukinoko-kun/lwb module git.frankmayer.dev/tsukinoko-kun/lwb
go 1.23 go 1.23

72
useragent.go Normal file

@ -0,0 +1,72 @@
package lwb
import (
"fmt"
"runtime"
)
// getSystemInfo returns a string describing the OS and architecture
// in a style similar to what many browsers use in their UA string.
func getSystemInfo() string {
os := runtime.GOOS
arch := runtime.GOARCH
switch os {
case "windows":
// We assume Windows NT 10.0 for simplicity.
if arch == "amd64" {
return "Windows NT 10.0; Win64; x64"
} else if arch == "386" {
return "Windows NT 10.0"
} else {
return "Windows NT 10.0; " + arch
}
case "linux":
// Many Linux browsers use "X11" in the UA string.
if arch == "amd64" {
return "X11; Linux x86_64"
} else if arch == "386" {
return "X11; Linux i686"
} else {
return "X11; Linux " + arch
}
case "darwin":
// For macOS we have to invent a version number since Go doesn't provide one.
// Here we assume a recent macOS version.
if arch == "amd64" {
return "Macintosh; Intel Mac OS X 10_15_7"
} else if arch == "arm64" {
return "Macintosh; ARM Mac OS X 11_0"
} else {
return "Macintosh; Mac OS X"
}
default:
// For other operating systems, just output the raw GOOS and GOARCH.
return fmt.Sprintf("%s; %s", os, arch)
}
}
// BuildFirefoxUserAgent builds a Firefox UA string using the given version.
// Example output on Windows:
// "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0"
func BuildFirefoxUserAgent(firefoxVersion string) string {
systemInfo := getSystemInfo()
return fmt.Sprintf(
"Mozilla/5.0 (%s; rv:%s) Gecko/20100101 Firefox/%s",
systemInfo,
firefoxVersion,
firefoxVersion,
)
}
// BuildChromeUserAgent builds a Chrome UA string using the given version.
// Example output on Windows:
// "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.6943.53 Safari/537.36"
func BuildChromeUserAgent(chromeVersion string) string {
systemInfo := getSystemInfo()
return fmt.Sprintf(
"Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36",
systemInfo,
chromeVersion,
)
}