mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender-pwa.git
synced 2025-07-16 09:38:51 +02:00
203 lines
4.8 KiB
Go
203 lines
4.8 KiB
Go
package fetch
|
|
|
|
import (
|
|
"golang.org/x/net/html"
|
|
"strings"
|
|
)
|
|
|
|
// Find the first <table> element in the HTML document
|
|
func findFirstTable(node *html.Node) *html.Node {
|
|
if node.Type == html.ElementNode && node.Data == "table" {
|
|
return node
|
|
}
|
|
// Traverse child nodes recursively
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
found := findFirstTable(child)
|
|
if found != nil {
|
|
return found
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Find the first <span> element with the specified class attribute value
|
|
func findFirstSpanWithClass(node *html.Node, classValue string) *html.Node {
|
|
|
|
// Check if the current node is a <span> element with the specified class attribute value
|
|
if node.Type == html.ElementNode && node.Data == "span" {
|
|
if hasClassAttribute(node, classValue) {
|
|
return node
|
|
}
|
|
}
|
|
|
|
// Traverse child nodes recursively
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
found := findFirstSpanWithClass(child, classValue)
|
|
if found != nil {
|
|
return found
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Check if the specified element has the specified class attribute value
|
|
func hasClassAttribute(node *html.Node, classValue string) bool {
|
|
for _, attr := range node.Attr {
|
|
if attr.Key == "class" && strings.Contains(attr.Val, classValue) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Get Tables with days
|
|
func getEventTables(node *html.Node) [][]*html.Node {
|
|
var eventTables [][]*html.Node
|
|
tables := findTables(node)
|
|
// get all tables with events
|
|
for events := range tables {
|
|
rows := findTableRows(tables[events])
|
|
// check that a first row exists
|
|
if len(rows) > 0 {
|
|
rows = rows[1:]
|
|
eventTables = append(eventTables, rows)
|
|
}
|
|
}
|
|
return eventTables
|
|
}
|
|
|
|
// Get Tables with days
|
|
func getAllDayLabels(node *html.Node) []string {
|
|
paragraphs := findParagraphs(node)
|
|
var dayArray []string
|
|
|
|
for _, p := range paragraphs {
|
|
label := getDayLabel(p)
|
|
if label != "" {
|
|
dayArray = append(dayArray, label)
|
|
}
|
|
}
|
|
return dayArray
|
|
}
|
|
|
|
// Find all <p> elements in the HTML document
|
|
func findParagraphs(node *html.Node) []*html.Node {
|
|
var paragraphs []*html.Node
|
|
|
|
if node.Type == html.ElementNode && node.Data == "p" {
|
|
paragraphs = append(paragraphs, node)
|
|
}
|
|
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
paragraphs = append(paragraphs, findParagraphs(child)...)
|
|
}
|
|
|
|
return paragraphs
|
|
}
|
|
|
|
// Find all <tr> elements in <tbody>, excluding the first one
|
|
func findTableRows(node *html.Node) []*html.Node {
|
|
var tableRows []*html.Node
|
|
|
|
if node.Type == html.ElementNode && node.Data == "tbody" {
|
|
child := node.FirstChild
|
|
for child != nil {
|
|
if child.Type == html.ElementNode && child.Data == "tr" {
|
|
tableRows = append(tableRows, child)
|
|
}
|
|
child = child.NextSibling
|
|
}
|
|
}
|
|
|
|
// Traverse child nodes recursively
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
var tableRowElement = findTableRows(child)
|
|
if tableRowElement != nil {
|
|
tableRows = append(tableRows, tableRowElement...)
|
|
}
|
|
}
|
|
|
|
// check if tableRows is nil
|
|
if tableRows == nil {
|
|
return []*html.Node{}
|
|
} else {
|
|
return tableRows
|
|
}
|
|
}
|
|
|
|
// Find all <p> elements in the HTML document
|
|
func findTables(node *html.Node) []*html.Node {
|
|
var tables []*html.Node
|
|
|
|
if node.Type == html.ElementNode && node.Data == "table" {
|
|
tables = append(tables, node)
|
|
}
|
|
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
tables = append(tables, findDayTables(child)...)
|
|
}
|
|
|
|
return tables
|
|
}
|
|
|
|
// Find all <p> elements in the HTML document
|
|
func findDayTables(node *html.Node) []*html.Node {
|
|
var tables []*html.Node
|
|
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
tables = append(tables, findDayTables(child)...)
|
|
}
|
|
|
|
if node.Type == html.ElementNode && node.Data == "table" && hasClassAttribute(node, "spreadsheet") {
|
|
tables = append(tables, node)
|
|
}
|
|
|
|
return tables
|
|
}
|
|
|
|
// Get the text content of the specified node and its descendants
|
|
func getDayLabel(node *html.Node) string {
|
|
|
|
child := node.FirstChild
|
|
if child != nil {
|
|
if child.Type == html.ElementNode && child.Data == "span" {
|
|
if child.FirstChild != nil {
|
|
return child.FirstChild.Data
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// Find all <td> elements in the current <tr>
|
|
func findTableData(node *html.Node) []*html.Node {
|
|
var tableData []*html.Node
|
|
|
|
if node.Type == html.ElementNode && node.Data == "tr" {
|
|
child := node.FirstChild
|
|
for child != nil {
|
|
if child.Type == html.ElementNode && child.Data == "td" {
|
|
tableData = append(tableData, child)
|
|
}
|
|
child = child.NextSibling
|
|
}
|
|
}
|
|
|
|
return tableData
|
|
}
|
|
|
|
// Get the text content of the specified node and its descendants
|
|
func getTextContent(node *html.Node) string {
|
|
var textContent string
|
|
|
|
if node.Type == html.TextNode {
|
|
textContent = node.Data
|
|
}
|
|
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
textContent += getTextContent(child)
|
|
}
|
|
|
|
return textContent
|
|
}
|