feat:#52 added new fetcher from new endpoint

This commit is contained in:
masterElmar
2023-11-30 00:00:38 +01:00
parent d887927645
commit 921a2c7a28
12 changed files with 610 additions and 63 deletions

View File

@@ -0,0 +1,296 @@
package v1
import (
"fmt"
"htwkalender/model"
"htwkalender/service/date"
"htwkalender/service/db"
"htwkalender/service/fetch"
"net/http"
"regexp"
"strconv"
"strings"
"time"
"github.com/google/uuid"
"github.com/labstack/echo/v5"
"github.com/pocketbase/pocketbase"
"github.com/pocketbase/pocketbase/apis"
"github.com/pocketbase/pocketbase/tools/types"
"golang.org/x/net/html"
)
func GetSeminarEvents(c echo.Context, app *pocketbase.PocketBase) error {
seminarGroupsLabel := db.GetAllCourses(app)
seminarGroups := GetSeminarGroupsEventsFromHTML(seminarGroupsLabel)
seminarGroups = ClearEmptySeminarGroups(seminarGroups)
seminarGroups = ReplaceEmptyEventNames(seminarGroups)
savedRecords, dbError := db.SaveSeminarGroupEvents(seminarGroups, app)
if dbError != nil {
return apis.NewNotFoundError("Events could not be saved", dbError.Error())
}
return c.JSON(http.StatusOK, savedRecords)
}
func ReplaceEmptyEventNames(groups []model.SeminarGroup) []model.SeminarGroup {
for i, group := range groups {
for j, event := range group.Events {
if event.Name == "" {
groups[i].Events[j].Name = "Sonderveranstaltungen"
}
}
}
return groups
}
func ClearEmptySeminarGroups(seminarGroups []model.SeminarGroup) []model.SeminarGroup {
var newSeminarGroups []model.SeminarGroup
for _, seminarGroup := range seminarGroups {
if len(seminarGroup.Events) > 0 && seminarGroup.Course != "" {
newSeminarGroups = append(newSeminarGroups, seminarGroup)
}
}
return newSeminarGroups
}
func GetSeminarGroupsEventsFromHTML(seminarGroupsLabel []string) []model.SeminarGroup {
var seminarGroups []model.SeminarGroup
for _, seminarGroupLabel := range seminarGroupsLabel {
ssUrl := "https://stundenplan.htwk-leipzig.de/" + string("ss") + "/Berichte/Text-Listen;Studenten-Sets;name;" + seminarGroupLabel + "?template=sws_semgrp&weeks=1-65"
result, getError := fetch.GetHTML(ssUrl)
if getError == nil {
seminarGroup := parseSeminarGroup(result)
seminarGroups = append(seminarGroups, seminarGroup)
}
wsUrl := "https://stundenplan.htwk-leipzig.de/" + string("ws") + "/Berichte/Text-Listen;Studenten-Sets;name;" + seminarGroupLabel + "?template=sws_semgrp&weeks=1-65"
result, getError = fetch.GetHTML(wsUrl)
if getError == nil {
seminarGroup := parseSeminarGroup(result)
seminarGroups = append(seminarGroups, seminarGroup)
}
}
return seminarGroups
}
func splitEventType(events []model.Event) []model.Event {
for i, event := range events {
matched, _ := regexp.Match("^([VPS])([wp])$", []byte(event.EventType))
if matched {
eventType := event.EventType
event.EventType = eventType[0:1]
event.Compulsory = eventType[1:2]
events[i] = event
}
}
return events
}
func parseSeminarGroup(result string) model.SeminarGroup {
doc, err := html.Parse(strings.NewReader(result))
if err != nil {
fmt.Printf("Error occurred while parsing the HTML document: %s\n", err.Error())
return model.SeminarGroup{}
}
table := findFirstTable(doc)
eventTables := getEventTables(doc)
allDayLabels := getAllDayLabels(doc)
if eventTables == nil || allDayLabels == nil {
return model.SeminarGroup{}
}
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels)
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
events := splitEventsBySingleWeek(splitEventsByWeekVal)
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
course := findFirstSpanWithClass(table, "header-2-0-1").FirstChild.Data
semester, year := extractSemesterAndYear(semesterString)
events = convertWeeksToDates(events, semester, year)
events = generateUUIDs(events, course)
events = splitEventType(events)
var seminarGroup = model.SeminarGroup{
University: findFirstSpanWithClass(table, "header-1-0-0").FirstChild.Data,
Course: course,
Events: events,
}
return seminarGroup
}
func generateUUIDs(events []model.Event, course string) []model.Event {
for i, event := range events {
// generate a hash value from the event name, course and semester
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+course))
events[i].UUID = hash.String()
}
return events
}
// convertWeeksToDates converts the week and year to a date
// The date is calculated based on the week and the year
// The time is unset and 23:00 is used as default
// Additionally the semester is added to the event
func convertWeeksToDates(events []model.Event, semester string, year string) []model.Event {
var newEvents []model.Event
eventYear, _ := strconv.Atoi(year)
// for each event we need to calculate the start and end date based on the week and the year
for _, event := range events {
eventWeek, _ := strconv.Atoi(event.Week)
eventDay, _ := date.GetDateFromWeekNumber(eventYear, eventWeek, event.Day)
start := replaceTimeForDate(eventDay, event.Start.Time())
end := replaceTimeForDate(eventDay, event.End.Time())
//Check if end is before start
if end.Before(start) {
end = end.AddDate(0, 0, 1)
}
newEvent := event
newEvent.Start, _ = types.ParseDateTime(start.In(time.UTC))
newEvent.End, _ = types.ParseDateTime(end.In(time.UTC))
newEvent.Semester = semester
newEvents = append(newEvents, newEvent)
}
return newEvents
}
// replaceTimeForDate replaces hour, minute, second, nsec for the selected date
func replaceTimeForDate(date time.Time, replacementTime time.Time) time.Time {
return time.Date(date.Year(), date.Month(), date.Day(), replacementTime.Hour(), replacementTime.Minute(), replacementTime.Second(), replacementTime.Nanosecond(), date.Location())
}
func extractSemesterAndYear(semesterString string) (string, string) {
winterPattern := "Wintersemester"
summerPattern := "Sommersemester"
winterMatch := strings.Contains(semesterString, winterPattern)
summerMatch := strings.Contains(semesterString, summerPattern)
semester := ""
semesterShortcut := ""
if winterMatch {
semester = "Wintersemester"
semesterShortcut = "ws"
} else if summerMatch {
semester = "Sommersemester"
semesterShortcut = "ss"
} else {
return "", ""
}
yearPattern := `\d{4}`
combinedPattern := semester + `\s` + yearPattern
re := regexp.MustCompile(combinedPattern)
match := re.FindString(semesterString)
year := ""
if match != "" {
reYear := regexp.MustCompile(yearPattern)
year = reYear.FindString(match)
}
return semesterShortcut, year
}
func toEvents(tables [][]*html.Node, days []string) []model.Event {
var events []model.Event
for table := range tables {
for row := range tables[table] {
tableData := findTableData(tables[table][row])
if len(tableData) > 0 {
start, _ := types.ParseDateTime(createTimeFromHourAndMinuteString(getTextContent(tableData[1])))
end, _ := types.ParseDateTime(createTimeFromHourAndMinuteString(getTextContent(tableData[2])))
events = append(events, model.Event{
Day: days[table],
Week: getTextContent(tableData[0]),
Start: start,
End: end,
Name: getTextContent(tableData[3]),
EventType: getTextContent(tableData[4]),
Prof: getTextContent(tableData[5]),
Rooms: getTextContent(tableData[6]),
Notes: getTextContent(tableData[7]),
BookedAt: getTextContent(tableData[8]),
})
}
}
}
return events
}
// createEventFromTableData should create an event from the table data
// tableTime represents Hour and Minute like HH:MM
// tableDate returns a Time
func createTimeFromHourAndMinuteString(tableTime string) time.Time {
timeParts := strings.Split(tableTime, ":")
hour, _ := strconv.Atoi(timeParts[0])
minute, _ := strconv.Atoi(timeParts[1])
return time.Date(0, 0, 0, hour, minute, 0, 0, time.UTC)
}
func splitEventsByWeek(events []model.Event) []model.Event {
var newEvents []model.Event
for _, event := range events {
weeks := strings.Split(event.Week, ",")
for _, week := range weeks {
newEvent := event
newEvent.Week = strings.TrimSpace(week)
newEvents = append(newEvents, newEvent)
}
}
return newEvents
}
func splitEventsBySingleWeek(events []model.Event) []model.Event {
var newEvents []model.Event
for _, event := range events {
if strings.Contains(event.Week, "-") {
weeks := splitWeekRange(event.Week)
for _, week := range weeks {
newEvent := event
newEvent.Week = week
newEvents = append(newEvents, newEvent)
}
} else {
newEvents = append(newEvents, event)
}
}
return newEvents
}
func splitWeekRange(weekRange string) []string {
parts := strings.Split(weekRange, "-")
if len(parts) != 2 {
return nil // Invalid format
}
start, errStart := strconv.Atoi(strings.TrimSpace(parts[0]))
end, errEnd := strconv.Atoi(strings.TrimSpace(parts[1]))
if errStart != nil || errEnd != nil {
return nil // Error converting to integers
}
var weeks []string
for i := start; i <= end; i++ {
weeks = append(weeks, strconv.Itoa(i))
}
return weeks
}

View File

@@ -0,0 +1,496 @@
package v1
import (
"fmt"
"github.com/pocketbase/pocketbase/tools/types"
"htwkalender/model"
"reflect"
"testing"
"time"
)
func Test_extractSemesterAndYear(t *testing.T) {
type args struct {
semesterString string
}
tests := []struct {
name string
args args
want string
want1 string
}{
{
name: "Test 1",
args: args{
semesterString: "Wintersemester 2023/24 (Planungszeitraum 01.09.2023 bis 03.03.2024)",
},
want: "ws",
want1: "2023",
},
{
name: "Test 2",
args: args{
semesterString: "Sommersemester 2023 (Planungszeitraum 06.03. bis 31.08.2023)",
},
want: "ss",
want1: "2023",
},
{
name: "Test 3",
args: args{
semesterString: "Sommersemester 2010 (Planungszeitraum 06.03. bis 31.08.2023)",
},
want: "ss",
want1: "2010",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, got1 := extractSemesterAndYear(tt.args.semesterString)
if got != tt.want {
t.Errorf("extractSemesterAndYear() got = %v, want %v", got, tt.want)
}
if got1 != tt.want1 {
t.Errorf("extractSemesterAndYear() got1 = %v, want %v", got1, tt.want1)
}
})
}
}
func Test_replaceEmptyEventNames(t *testing.T) {
type args struct {
groups []model.SeminarGroup
}
tests := []struct {
name string
args args
want []model.SeminarGroup
}{
{
name: "Test 1",
args: args{
groups: []model.SeminarGroup{
{
Events: []model.Event{
{
Name: "Test",
},
},
},
},
},
want: []model.SeminarGroup{
{
Events: []model.Event{
{
Name: "Test",
},
},
},
},
},
{
name: "Test 1",
args: args{
groups: []model.SeminarGroup{
{
Events: []model.Event{
{
Name: "",
},
},
},
},
},
want: []model.SeminarGroup{
{
Events: []model.Event{
{
Name: "Sonderveranstaltungen",
},
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := ReplaceEmptyEventNames(tt.args.groups); !reflect.DeepEqual(got, tt.want) {
t.Errorf("ReplaceEmptyEventNames() = %v, want %v", got, tt.want)
}
})
}
}
func Test_splitEventType(t *testing.T) {
type args struct {
events []model.Event
}
tests := []struct {
name string
args args
want []model.Event
}{
{
name: "Test 1",
args: args{
events: []model.Event{
{
EventType: "V",
},
},
},
want: []model.Event{
{
EventType: "V",
Compulsory: "",
},
},
},
{
name: "Test 2",
args: args{
events: []model.Event{
{
EventType: "Vw",
},
},
},
want: []model.Event{
{
EventType: "V",
Compulsory: "w",
},
},
},
{
name: "Test 3",
args: args{
events: []model.Event{
{
EventType: "Sperr",
},
},
},
want: []model.Event{
{
EventType: "Sperr",
Compulsory: "",
},
},
},
{
name: "Test 4",
args: args{
events: []model.Event{
{
EventType: "Sperr",
},
{
EventType: "Vw",
},
},
},
want: []model.Event{
{
EventType: "Sperr",
Compulsory: "",
},
{
EventType: "V",
Compulsory: "w",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := splitEventType(tt.args.events); !reflect.DeepEqual(got, tt.want) {
t.Errorf("splitEventType() = %v, want %v", got, tt.want)
}
})
}
}
func Test_generateUUIDs(t *testing.T) {
type args struct {
events []model.Event
course string
}
tests := []struct {
name string
args args
want []model.Event
}{
{
name: "Test 1",
args: args{
events: []model.Event{
{
Name: " Arbeitssicherheit / Rechtsformen von Unternehmen B435 SBB (wpf) & B348 BIB (pf) 5. FS",
},
},
course: "21BIB-2a",
},
want: []model.Event{
{
Name: " Arbeitssicherheit / Rechtsformen von Unternehmen B435 SBB (wpf) & B348 BIB (pf) 5. FS",
UUID: "3720afdc-10c7-5b72-9489-cffb70cb0c13",
},
},
},
{
name: "Test 2",
args: args{
events: []model.Event{
{
Name: " Arbeitssicherheit / Rechtsformen von Unternehmen B435 SBB (wpf) & B348 BIB (pf) 5. FS",
},
},
course: "21BIB-2b",
},
want: []model.Event{
{
Name: " Arbeitssicherheit / Rechtsformen von Unternehmen B435 SBB (wpf) & B348 BIB (pf) 5. FS",
UUID: "81083480-bcf1-5452-af84-bb27d79282d8",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := generateUUIDs(tt.args.events, tt.args.course); !reflect.DeepEqual(got, tt.want) {
t.Errorf("generateUUIDs() = %v, want %v", got, tt.want)
}
})
}
}
func Test_createTimeFromHourAndMinuteString(t *testing.T) {
europeTime, _ := time.LoadLocation("Europe/Berlin")
type args struct {
tableTime string
}
tests := []struct {
name string
args args
want time.Time
}{
{
name: "Test 1",
args: args{
tableTime: "08:00",
},
want: time.Date(0, 0, 0, 8, 0, 0, 0, europeTime),
},
{
name: "Test 2",
args: args{
tableTime: "08:15",
},
want: time.Date(0, 0, 0, 8, 15, 0, 0, europeTime),
},
{
name: "Test 3",
args: args{
tableTime: "08:30",
},
want: time.Date(0, 0, 0, 8, 30, 0, 0, europeTime),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := createTimeFromHourAndMinuteString(tt.args.tableTime); !reflect.DeepEqual(got, tt.want) {
t.Errorf("createTimeFromHourAndMinuteString() = %v, want %v", got, tt.want)
}
})
}
}
func Test_replaceTimeInDate(t *testing.T) {
type args struct {
date time.Time
time time.Time
}
tests := []struct {
name string
args args
want time.Time
}{
{
name: "Test 1",
args: args{
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
time: time.Date(0, 0, 0, 8, 0, 0, 0, time.UTC),
},
want: time.Date(2021, 1, 1, 8, 0, 0, 0, time.UTC),
},
{
name: "Test 2",
args: args{
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
time: time.Date(0, 0, 0, 8, 15, 0, 0, time.UTC),
},
want: time.Date(2021, 1, 1, 8, 15, 0, 0, time.UTC),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := replaceTimeForDate(tt.args.date, tt.args.time); !reflect.DeepEqual(got, tt.want) {
t.Errorf("addTimeToDate() = %v, want %v", got, tt.want)
}
})
}
}
func Test_convertWeeksToDates(t *testing.T) {
type args struct {
events []model.Event
semester string
year string
}
returnDateTime := func(date time.Time) types.DateTime {
dateTime, err := types.ParseDateTime(date)
if err != nil {
fmt.Println(err)
}
return dateTime
}
tests := []struct {
name string
args args
want []model.Event
}{
{
name: "Test Wintertime",
args: args{
events: []model.Event{
{
Week: "1",
Day: "Montag",
Start: returnDateTime(time.Date(0, 0, 0, 7, 30, 0, 0, time.UTC)),
End: returnDateTime(time.Date(0, 0, 0, 9, 0, 0, 0, time.UTC)),
},
},
semester: "ws",
year: "2021",
},
want: []model.Event{
{
Week: "1",
Day: "Montag",
Start: returnDateTime(time.Date(2021, 1, 4, 6, 30, 0, 0, time.UTC)),
End: returnDateTime(time.Date(2021, 1, 4, 8, 0, 0, 0, time.UTC)),
Semester: "ws",
},
},
},
{
name: "Test Summertime",
args: args{
events: []model.Event{
{
Week: "30",
Day: "Donnerstag",
Start: returnDateTime(time.Date(0, 0, 0, 7, 30, 0, 0, time.UTC)),
End: returnDateTime(time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC)),
},
},
semester: "ws",
year: "2023",
},
want: []model.Event{
{
Week: "30",
Day: "Donnerstag",
Start: returnDateTime(time.Date(2023, 7, 27, 5, 30, 0, 0, time.UTC)),
End: returnDateTime(time.Date(2023, 7, 27, 22, 0, 0, 0, time.UTC)),
Semester: "ws",
},
},
},
{
name: "Test NextDay",
args: args{
events: []model.Event{
{
Week: "45",
Day: "Donnerstag",
Start: returnDateTime(time.Date(0, 0, 0, 7, 30, 0, 0, time.UTC)),
End: returnDateTime(time.Date(0, 0, 0, 4, 0, 0, 0, time.UTC)),
},
},
semester: "ws",
year: "2023",
},
want: []model.Event{
{
Week: "45",
Day: "Donnerstag",
Start: returnDateTime(time.Date(2023, 11, 9, 6, 30, 0, 0, time.UTC)),
End: returnDateTime(time.Date(2023, 11, 10, 3, 0, 0, 0, time.UTC)),
Semester: "ws",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := convertWeeksToDates(tt.args.events, tt.args.semester, tt.args.year); !reflect.DeepEqual(got, tt.want) {
t.Errorf("convertWeeksToDates() = %v, want %v", got, tt.want)
}
})
}
}
func Test_replaceTimeForDate(t *testing.T) {
type args struct {
date time.Time
replacementTime time.Time
}
tests := []struct {
name string
args args
want time.Time
}{
{
name: "Replace Hour",
args: args{
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
replacementTime: time.Date(0, 0, 0, 8, 0, 0, 0, time.UTC),
},
want: time.Date(2021, 1, 1, 8, 0, 0, 0, time.UTC),
},
{
name: "Replace Hour and Minute",
args: args{
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
replacementTime: time.Date(0, 0, 0, 8, 15, 0, 0, time.UTC),
},
want: time.Date(2021, 1, 1, 8, 15, 0, 0, time.UTC),
},
{
name: "Replace Hour and Minute",
args: args{
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
replacementTime: time.Date(0, 0, 0, 8, 30, 0, 0, time.UTC),
},
want: time.Date(2021, 1, 1, 8, 30, 0, 0, time.UTC),
},
{
name: "Replace Hour and Minute without Year, Month, Day",
args: args{
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
replacementTime: time.Date(2023, 10, 3, 8, 30, 0, 0, time.UTC),
},
want: time.Date(2021, 1, 1, 8, 30, 0, 0, time.UTC),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := replaceTimeForDate(tt.args.date, tt.args.replacementTime); !reflect.DeepEqual(got, tt.want) {
t.Errorf("replaceTimeForDate() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -0,0 +1,114 @@
package v1
import (
"encoding/xml"
"fmt"
"github.com/labstack/echo/v5"
"github.com/pocketbase/pocketbase"
"github.com/pocketbase/pocketbase/apis"
"github.com/pocketbase/pocketbase/models"
"htwkalender/model"
"htwkalender/service/db"
"io"
"net/http"
)
func getSeminarHTML(semester string) (string, error) {
url := "https://stundenplan.htwk-leipzig.de/stundenplan/xml/public/semgrp_" + semester + ".xml"
// Send GET request
response, err := http.Get(url)
if err != nil {
fmt.Printf("Error occurred while making the request: %s\n", err.Error())
return "", err
}
defer func(Body io.ReadCloser) {
err := Body.Close()
if err != nil {
return
}
}(response.Body)
// Read the response body
body, err := io.ReadAll(response.Body)
if err != nil {
fmt.Printf("Error occurred while reading the response: %s\n", err.Error())
return "", err
}
return string(body), err
}
func SeminarGroups(c echo.Context, app *pocketbase.PocketBase) error {
var groups []model.SeminarGroup
resultSummer, _ := getSeminarHTML("ss")
resultWinter, _ := getSeminarHTML("ws")
groups = parseSeminarGroups(resultSummer)
groups = append(groups, parseSeminarGroups(resultWinter)...)
// filter duplicates
groups = removeDuplicates(groups)
collection, dbError := db.FindCollection(app, "groups")
if dbError != nil {
return apis.NewNotFoundError("Collection not found", dbError)
}
var insertedGroups []*models.Record
insertedGroups, dbError = db.SaveGroups(groups, collection, app)
if dbError != nil {
return apis.NewNotFoundError("Records could not be saved", dbError)
}
return c.JSON(http.StatusOK, insertedGroups)
}
func removeDuplicates(groups []model.SeminarGroup) []model.SeminarGroup {
var uniqueGroups []model.SeminarGroup
for _, group := range groups {
if !contains(uniqueGroups, group) {
uniqueGroups = append(uniqueGroups, group)
}
}
return uniqueGroups
}
func contains(groups []model.SeminarGroup, group model.SeminarGroup) bool {
for _, a := range groups {
if a.Course == group.Course {
return true
}
}
return false
}
func parseSeminarGroups(result string) []model.SeminarGroup {
var studium model.Studium
err := xml.Unmarshal([]byte(result), &studium)
if err != nil {
return nil
}
var seminarGroups []model.SeminarGroup
for _, Fakultaet := range studium.Fakultaet {
for _, Studiengang := range Fakultaet.Studiengang {
for _, Studienrichtung := range Studiengang.Semgrp {
seminarGroup := model.SeminarGroup{
University: "HTWK-Leipzig",
GroupShortcut: Studiengang.Name,
GroupId: Studiengang.ID,
Course: Studienrichtung.Name,
Faculty: Fakultaet.Name,
FacultyId: Fakultaet.ID,
}
seminarGroups = append(seminarGroups, seminarGroup)
}
}
}
return seminarGroups
}

View File

@@ -0,0 +1,201 @@
package v1
import (
"golang.org/x/net/html"
"strings"
)
// Find the first <table> element in the HTML document
func findFirstTable(node *html.Node) *html.Node {
if node.Type == html.ElementNode && node.Data == "table" {
return node
}
// Traverse child nodes recursively
for child := node.FirstChild; child != nil; child = child.NextSibling {
found := findFirstTable(child)
if found != nil {
return found
}
}
return nil
}
// Find the first <span> element with the specified class attribute value
func findFirstSpanWithClass(node *html.Node, classValue string) *html.Node {
// Check if the current node is a <span> element with the specified class attribute value
if node.Type == html.ElementNode && node.Data == "span" {
if hasClassAttribute(node, classValue) {
return node
}
}
// Traverse child nodes recursively
for child := node.FirstChild; child != nil; child = child.NextSibling {
found := findFirstSpanWithClass(child, classValue)
if found != nil {
return found
}
}
return nil
}
// Check if the specified element has the specified class attribute value
func hasClassAttribute(node *html.Node, classValue string) bool {
for _, attr := range node.Attr {
if attr.Key == "class" && strings.Contains(attr.Val, classValue) {
return true
}
}
return false
}
// Get Tables with days
func getEventTables(node *html.Node) [][]*html.Node {
var eventTables [][]*html.Node
tables := findTables(node)
// get all tables with events
for events := range tables {
rows := findTableRows(tables[events])
// check that a first row exists
if len(rows) > 0 {
rows = rows[1:]
eventTables = append(eventTables, rows)
}
}
return eventTables
}
// Get Tables with days
func getAllDayLabels(node *html.Node) []string {
paragraphs := findParagraphs(node)
var dayArray []string
for _, p := range paragraphs {
label := getDayLabel(p)
if label != "" {
dayArray = append(dayArray, label)
}
}
return dayArray
}
// Find all <p> elements in the HTML document
func findParagraphs(node *html.Node) []*html.Node {
var paragraphs []*html.Node
if node.Type == html.ElementNode && node.Data == "p" {
paragraphs = append(paragraphs, node)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
paragraphs = append(paragraphs, findParagraphs(child)...)
}
return paragraphs
}
// Find all <tr> elements in <tbody>, excluding the first one
func findTableRows(node *html.Node) []*html.Node {
var tableRows []*html.Node
if node.Type == html.ElementNode && node.Data == "tbody" {
child := node.FirstChild
for child != nil {
if child.Type == html.ElementNode && child.Data == "tr" {
tableRows = append(tableRows, child)
}
child = child.NextSibling
}
}
// Traverse child nodes recursively
for child := node.FirstChild; child != nil; child = child.NextSibling {
var tableRowElement = findTableRows(child)
if tableRowElement != nil {
tableRows = append(tableRows, tableRowElement...)
}
}
// check if tableRows is nil
if tableRows == nil {
return []*html.Node{}
} else {
return tableRows
}
}
// Find all <p> elements in the HTML document
func findTables(node *html.Node) []*html.Node {
var tables []*html.Node
if node.Type == html.ElementNode && node.Data == "table" {
tables = append(tables, node)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
tables = append(tables, findDayTables(child)...)
}
return tables
}
// Find all <p> elements in the HTML document
func findDayTables(node *html.Node) []*html.Node {
var tables []*html.Node
for child := node.FirstChild; child != nil; child = child.NextSibling {
tables = append(tables, findDayTables(child)...)
}
if node.Type == html.ElementNode && node.Data == "table" && hasClassAttribute(node, "spreadsheet") {
tables = append(tables, node)
}
return tables
}
// Get the text content of the specified node and its descendants
func getDayLabel(node *html.Node) string {
child := node.FirstChild
if child != nil {
if child.Type == html.ElementNode && child.Data == "span" {
if child.FirstChild != nil {
return child.FirstChild.Data
}
}
}
return ""
}
// Find all <td> elements in the current <tr>
func findTableData(node *html.Node) []*html.Node {
var tableData []*html.Node
if node.Type == html.ElementNode && node.Data == "tr" {
child := node.FirstChild
for child != nil {
if child.Type == html.ElementNode && child.Data == "td" {
tableData = append(tableData, child)
}
child = child.NextSibling
}
}
return tableData
}
// Get the text content of the specified node and its descendants
func getTextContent(node *html.Node) string {
var textContent string
if node.Type == html.TextNode {
textContent = node.Data
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
textContent += getTextContent(child)
}
return textContent
}