diff --git a/browser.go b/browser.go index 65832c7..fdf7b48 100644 --- a/browser.go +++ b/browser.go @@ -6,7 +6,7 @@ import ( "strings" "sync" - "github.com/tsukinoko-kun/lwb/util" + "git.frankmayer.dev/tsukinoko-kun/lwb/util" "golang.org/x/net/html" ) @@ -16,7 +16,7 @@ type Browser struct { http *http.Client document *html.Node cookies *cookiejar.Jar - mut sync.RWMutex + mut *sync.RWMutex } func NewBrowser(userAgent string) (*Browser, error) { @@ -30,9 +30,12 @@ func NewBrowser(userAgent string) (*Browser, error) { } b := &Browser{ + url: "about:blank", userAgent: userAgent, - cookies: cj, http: hc, + document: nil, + cookies: cj, + mut: &sync.RWMutex{}, } return b, nil @@ -59,6 +62,26 @@ func (b *Browser) Get(url string) error { return nil } +func (b *Browser) FindElementByContent(textContent string) *Element { + b.mut.RLock() + defer b.mut.RUnlock() + + textContentTrimed := strings.TrimSpace(textContent) + + var nodes util.Stack[*html.Node] = []*html.Node{b.document} + for !nodes.Empty() { + node := nodes.Pop() + if strings.TrimSpace(node.Data) == textContentTrimed { + return &Element{node: node, browser: b} + } + + for c := node.FirstChild; c != nil; c = c.NextSibling { + nodes.Push(c) + } + } + return nil +} + func (b *Browser) GetElementById(id string) *Element { b.mut.RLock() defer b.mut.RUnlock() @@ -73,7 +96,7 @@ func (b *Browser) GetElementById(id string) *Element { if a.Val != id { break } - return &Element{node: node} + return &Element{node: node, browser: b} } for c := node.FirstChild; c != nil; c = c.NextSibling { @@ -98,7 +121,7 @@ func (b *Browser) GetElementsByClassName(class string) []*Element { } for _, c := range classNames(a.Val) { if c == class { - elements = append(elements, &Element{node: node}) + elements = append(elements, &Element{node: node, browser: b}) break attr_loop } } diff --git a/element.go b/element.go index 9515152..02c9a1a 100644 --- a/element.go +++ b/element.go @@ -30,3 +30,10 @@ func (self *Element) Click() error { return ErrorNotClickable } + +func (self *Element) TextContent() string { + if self == nil { + return "" + } + return strings.TrimSpace(self.node.Data) +} diff --git a/example/example_test.go b/example/example_test.go new file mode 100644 index 0000000..e30a514 --- /dev/null +++ b/example/example_test.go @@ -0,0 +1,27 @@ +package example_test + +import ( + "testing" + + "git.frankmayer.dev/tsukinoko-kun/lwb" +) + +func TestExample(t *testing.T) { + b, err := lwb.NewBrowser(lwb.BuildFirefoxUserAgent("135.0")) + if err != nil { + t.Fatal("failed to start virtual browser", err) + } + + if err := b.Get("https://example.com/"); err != nil { + t.Fatal("failed to get github.com", err) + } + + moreInfoEl := b.FindElementByContent("More information...") + if moreInfoEl == nil { + t.Fatal("element not found") + } + + if err := moreInfoEl.Click(); err != nil { + t.Fatal("click failed", err) + } +} diff --git a/example/go.mod b/example/go.mod new file mode 100644 index 0000000..746f950 --- /dev/null +++ b/example/go.mod @@ -0,0 +1,9 @@ +module example + +go 1.23.5 + +replace git.frankmayer.dev/tsukinoko-kun/lwb v0.0.0 => ../ + +require git.frankmayer.dev/tsukinoko-kun/lwb v0.0.0 + +require golang.org/x/net v0.34.0 // indirect diff --git a/example/go.sum b/example/go.sum new file mode 100644 index 0000000..4db452a --- /dev/null +++ b/example/go.sum @@ -0,0 +1,2 @@ +golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= +golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= diff --git a/go.mod b/go.mod index 37e9120..562e991 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/tsukinoko-kun/lwb +module git.frankmayer.dev/tsukinoko-kun/lwb go 1.23 diff --git a/useragent.go b/useragent.go new file mode 100644 index 0000000..0c37e6d --- /dev/null +++ b/useragent.go @@ -0,0 +1,72 @@ +package lwb + +import ( + "fmt" + "runtime" +) + +// getSystemInfo returns a string describing the OS and architecture +// in a style similar to what many browsers use in their UA string. +func getSystemInfo() string { + os := runtime.GOOS + arch := runtime.GOARCH + + switch os { + case "windows": + // We assume Windows NT 10.0 for simplicity. + if arch == "amd64" { + return "Windows NT 10.0; Win64; x64" + } else if arch == "386" { + return "Windows NT 10.0" + } else { + return "Windows NT 10.0; " + arch + } + case "linux": + // Many Linux browsers use "X11" in the UA string. + if arch == "amd64" { + return "X11; Linux x86_64" + } else if arch == "386" { + return "X11; Linux i686" + } else { + return "X11; Linux " + arch + } + case "darwin": + // For macOS we have to invent a version number since Go doesn't provide one. + // Here we assume a recent macOS version. + if arch == "amd64" { + return "Macintosh; Intel Mac OS X 10_15_7" + } else if arch == "arm64" { + return "Macintosh; ARM Mac OS X 11_0" + } else { + return "Macintosh; Mac OS X" + } + default: + // For other operating systems, just output the raw GOOS and GOARCH. + return fmt.Sprintf("%s; %s", os, arch) + } +} + +// BuildFirefoxUserAgent builds a Firefox UA string using the given version. +// Example output on Windows: +// "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0" +func BuildFirefoxUserAgent(firefoxVersion string) string { + systemInfo := getSystemInfo() + return fmt.Sprintf( + "Mozilla/5.0 (%s; rv:%s) Gecko/20100101 Firefox/%s", + systemInfo, + firefoxVersion, + firefoxVersion, + ) +} + +// BuildChromeUserAgent builds a Chrome UA string using the given version. +// Example output on Windows: +// "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.6943.53 Safari/537.36" +func BuildChromeUserAgent(chromeVersion string) string { + systemInfo := getSystemInfo() + return fmt.Sprintf( + "Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", + systemInfo, + chromeVersion, + ) +}