parent
a2a1e7b63c
commit
b4bcc7406a
33
browser.go
33
browser.go
@ -6,7 +6,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/tsukinoko-kun/lwb/util"
|
"git.frankmayer.dev/tsukinoko-kun/lwb/util"
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -16,7 +16,7 @@ type Browser struct {
|
|||||||
http *http.Client
|
http *http.Client
|
||||||
document *html.Node
|
document *html.Node
|
||||||
cookies *cookiejar.Jar
|
cookies *cookiejar.Jar
|
||||||
mut sync.RWMutex
|
mut *sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewBrowser(userAgent string) (*Browser, error) {
|
func NewBrowser(userAgent string) (*Browser, error) {
|
||||||
@ -30,9 +30,12 @@ func NewBrowser(userAgent string) (*Browser, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
b := &Browser{
|
b := &Browser{
|
||||||
|
url: "about:blank",
|
||||||
userAgent: userAgent,
|
userAgent: userAgent,
|
||||||
cookies: cj,
|
|
||||||
http: hc,
|
http: hc,
|
||||||
|
document: nil,
|
||||||
|
cookies: cj,
|
||||||
|
mut: &sync.RWMutex{},
|
||||||
}
|
}
|
||||||
|
|
||||||
return b, nil
|
return b, nil
|
||||||
@ -59,6 +62,26 @@ func (b *Browser) Get(url string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *Browser) FindElementByContent(textContent string) *Element {
|
||||||
|
b.mut.RLock()
|
||||||
|
defer b.mut.RUnlock()
|
||||||
|
|
||||||
|
textContentTrimed := strings.TrimSpace(textContent)
|
||||||
|
|
||||||
|
var nodes util.Stack[*html.Node] = []*html.Node{b.document}
|
||||||
|
for !nodes.Empty() {
|
||||||
|
node := nodes.Pop()
|
||||||
|
if strings.TrimSpace(node.Data) == textContentTrimed {
|
||||||
|
return &Element{node: node, browser: b}
|
||||||
|
}
|
||||||
|
|
||||||
|
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
||||||
|
nodes.Push(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (b *Browser) GetElementById(id string) *Element {
|
func (b *Browser) GetElementById(id string) *Element {
|
||||||
b.mut.RLock()
|
b.mut.RLock()
|
||||||
defer b.mut.RUnlock()
|
defer b.mut.RUnlock()
|
||||||
@ -73,7 +96,7 @@ func (b *Browser) GetElementById(id string) *Element {
|
|||||||
if a.Val != id {
|
if a.Val != id {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
return &Element{node: node}
|
return &Element{node: node, browser: b}
|
||||||
}
|
}
|
||||||
|
|
||||||
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
||||||
@ -98,7 +121,7 @@ func (b *Browser) GetElementsByClassName(class string) []*Element {
|
|||||||
}
|
}
|
||||||
for _, c := range classNames(a.Val) {
|
for _, c := range classNames(a.Val) {
|
||||||
if c == class {
|
if c == class {
|
||||||
elements = append(elements, &Element{node: node})
|
elements = append(elements, &Element{node: node, browser: b})
|
||||||
break attr_loop
|
break attr_loop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -30,3 +30,10 @@ func (self *Element) Click() error {
|
|||||||
|
|
||||||
return ErrorNotClickable
|
return ErrorNotClickable
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (self *Element) TextContent() string {
|
||||||
|
if self == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(self.node.Data)
|
||||||
|
}
|
||||||
|
27
example/example_test.go
Normal file
27
example/example_test.go
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
package example_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.frankmayer.dev/tsukinoko-kun/lwb"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestExample(t *testing.T) {
|
||||||
|
b, err := lwb.NewBrowser(lwb.BuildFirefoxUserAgent("135.0"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal("failed to start virtual browser", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := b.Get("https://example.com/"); err != nil {
|
||||||
|
t.Fatal("failed to get github.com", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
moreInfoEl := b.FindElementByContent("More information...")
|
||||||
|
if moreInfoEl == nil {
|
||||||
|
t.Fatal("element not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := moreInfoEl.Click(); err != nil {
|
||||||
|
t.Fatal("click failed", err)
|
||||||
|
}
|
||||||
|
}
|
9
example/go.mod
Normal file
9
example/go.mod
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
module example
|
||||||
|
|
||||||
|
go 1.23.5
|
||||||
|
|
||||||
|
replace git.frankmayer.dev/tsukinoko-kun/lwb v0.0.0 => ../
|
||||||
|
|
||||||
|
require git.frankmayer.dev/tsukinoko-kun/lwb v0.0.0
|
||||||
|
|
||||||
|
require golang.org/x/net v0.34.0 // indirect
|
2
example/go.sum
Normal file
2
example/go.sum
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
|
||||||
|
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
|
2
go.mod
2
go.mod
@ -1,4 +1,4 @@
|
|||||||
module github.com/tsukinoko-kun/lwb
|
module git.frankmayer.dev/tsukinoko-kun/lwb
|
||||||
|
|
||||||
go 1.23
|
go 1.23
|
||||||
|
|
||||||
|
72
useragent.go
Normal file
72
useragent.go
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
package lwb
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"runtime"
|
||||||
|
)
|
||||||
|
|
||||||
|
// getSystemInfo returns a string describing the OS and architecture
|
||||||
|
// in a style similar to what many browsers use in their UA string.
|
||||||
|
func getSystemInfo() string {
|
||||||
|
os := runtime.GOOS
|
||||||
|
arch := runtime.GOARCH
|
||||||
|
|
||||||
|
switch os {
|
||||||
|
case "windows":
|
||||||
|
// We assume Windows NT 10.0 for simplicity.
|
||||||
|
if arch == "amd64" {
|
||||||
|
return "Windows NT 10.0; Win64; x64"
|
||||||
|
} else if arch == "386" {
|
||||||
|
return "Windows NT 10.0"
|
||||||
|
} else {
|
||||||
|
return "Windows NT 10.0; " + arch
|
||||||
|
}
|
||||||
|
case "linux":
|
||||||
|
// Many Linux browsers use "X11" in the UA string.
|
||||||
|
if arch == "amd64" {
|
||||||
|
return "X11; Linux x86_64"
|
||||||
|
} else if arch == "386" {
|
||||||
|
return "X11; Linux i686"
|
||||||
|
} else {
|
||||||
|
return "X11; Linux " + arch
|
||||||
|
}
|
||||||
|
case "darwin":
|
||||||
|
// For macOS we have to invent a version number since Go doesn't provide one.
|
||||||
|
// Here we assume a recent macOS version.
|
||||||
|
if arch == "amd64" {
|
||||||
|
return "Macintosh; Intel Mac OS X 10_15_7"
|
||||||
|
} else if arch == "arm64" {
|
||||||
|
return "Macintosh; ARM Mac OS X 11_0"
|
||||||
|
} else {
|
||||||
|
return "Macintosh; Mac OS X"
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// For other operating systems, just output the raw GOOS and GOARCH.
|
||||||
|
return fmt.Sprintf("%s; %s", os, arch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildFirefoxUserAgent builds a Firefox UA string using the given version.
|
||||||
|
// Example output on Windows:
|
||||||
|
// "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0"
|
||||||
|
func BuildFirefoxUserAgent(firefoxVersion string) string {
|
||||||
|
systemInfo := getSystemInfo()
|
||||||
|
return fmt.Sprintf(
|
||||||
|
"Mozilla/5.0 (%s; rv:%s) Gecko/20100101 Firefox/%s",
|
||||||
|
systemInfo,
|
||||||
|
firefoxVersion,
|
||||||
|
firefoxVersion,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildChromeUserAgent builds a Chrome UA string using the given version.
|
||||||
|
// Example output on Windows:
|
||||||
|
// "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.6943.53 Safari/537.36"
|
||||||
|
func BuildChromeUserAgent(chromeVersion string) string {
|
||||||
|
systemInfo := getSystemInfo()
|
||||||
|
return fmt.Sprintf(
|
||||||
|
"Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36",
|
||||||
|
systemInfo,
|
||||||
|
chromeVersion,
|
||||||
|
)
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user