commit f9082aa509d8bf82963c29e07a250dfee8ce0be4 Author: Frank Mayer Date: Fri Dec 13 13:11:02 2024 +0100 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e43b0f9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..aa2ff83 --- /dev/null +++ b/LICENSE @@ -0,0 +1,26 @@ +MIT NON-AI License + +Copyright (c) 2024, Frank Mayer + +Permission is hereby granted, free of charge, to any person obtaining a copy of the software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions. + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +In addition, the following restrictions apply: + +1. The Software and any modifications made to it may not be used for the purpose of training or improving machine learning algorithms, +including but not limited to artificial intelligence, natural language processing, or data mining. This condition applies to any derivatives, +modifications, or updates based on the Software code. Any usage of the Software in an AI-training dataset is considered a breach of this License. + +2. The Software may not be included in any dataset used for training or improving machine learning algorithms, +including but not limited to artificial intelligence, natural language processing, or data mining. + +3. Any person or organization found to be in violation of these restrictions will be subject to legal action and may be held liable +for any damages resulting from such use. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..29d382b --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/tsukinoko-kun/lwb + +go 1.23.2 + +require golang.org/x/net v0.32.0 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..bd400ed --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= diff --git a/main.go b/main.go new file mode 100644 index 0000000..02e3dc4 --- /dev/null +++ b/main.go @@ -0,0 +1,107 @@ +package lwb + +import ( + "net/http" + "strings" + "sync" + + "github.com/tsukinoko-kun/lwb/util" + "golang.org/x/net/html" +) + +type ( + Browser struct { + url string + userAgent string + document *html.Node + mut sync.RWMutex + } + + Element struct { + node *html.Node + } +) + +func NewBrowser(userAgent string) *Browser { + b := &Browser{ + userAgent: userAgent, + } + + return b +} + +func (b *Browser) Get(url string) error { + b.mut.Lock() + defer b.mut.Unlock() + + b.url = url + + rest, err := http.Get(url) + if err != nil { + return err + } + defer rest.Body.Close() + + b.document, err = html.Parse(rest.Body) + if err != nil { + return err + } + + return nil +} + +func (b *Browser) GetElementById(id string) *Element { + b.mut.RLock() + defer b.mut.RUnlock() + + var nodes util.Stack[*html.Node] = []*html.Node{b.document} + for !nodes.Empty() { + node := nodes.Pop() + for _, a := range node.Attr { + if strings.ToLower(a.Key) != "id" { + continue + } + if a.Val != id { + break + } + return &Element{node: node} + } + + for c := node.FirstChild; c != nil; c = c.NextSibling { + nodes.Push(c) + } + } + return nil +} + +func (b *Browser) GetElementsByClassName(class string) []*Element { + b.mut.RLock() + defer b.mut.RUnlock() + + var elements []*Element + var nodes util.Stack[*html.Node] = []*html.Node{b.document} + for !nodes.Empty() { + node := nodes.Pop() + attr_loop: + for _, a := range node.Attr { + if strings.ToLower(a.Key) != "class" { + continue + } + for _, c := range classNames(a.Val) { + if c == class { + elements = append(elements, &Element{node: node}) + break attr_loop + } + } + } + + for c := node.FirstChild; c != nil; c = c.NextSibling { + nodes.Push(c) + } + } + return nil +} + +func classNames(class string) []string { + return strings.Split(class, " ") +} diff --git a/util/stack.go b/util/stack.go new file mode 100644 index 0000000..1d77ee8 --- /dev/null +++ b/util/stack.go @@ -0,0 +1,21 @@ +package util + +type Stack[T any] []T + +func (s *Stack[T]) Push(v T) { + *s = append(*s, v) +} + +func (s *Stack[T]) Pop() T { + v := (*s)[len(*s)-1] + *s = (*s)[:len(*s)-1] + return v +} + +func (s Stack[T]) Peek() T { + return s[len(s)-1] +} + +func (s Stack[T]) Empty() bool { + return len(s) == 0 +}