Files
htwkalender/backend/service/fetch/v2/fetcher.go
2024-01-21 17:53:49 +01:00

121 lines
3.5 KiB
Go

package v2
import (
"fmt"
"github.com/google/uuid"
"github.com/pocketbase/pocketbase"
"golang.org/x/net/html"
"htwkalender/model"
"htwkalender/service/db"
"htwkalender/service/fetch"
localTime "htwkalender/service/functions/time"
"strings"
)
func ParseEventsFromRemote(app *pocketbase.PocketBase) (model.Events, error) {
savedRecords, err := FetchAllEventsAndSave(app, localTime.RealClock{})
if err != nil {
return nil, err
}
return savedRecords, nil
}
func FetchAllEventsAndSave(app *pocketbase.PocketBase, clock localTime.Clock) ([]model.Event, error) {
var savedRecords []model.Event
if (clock.Now().Month() >= 3) && (clock.Now().Month() <= 10) {
url := "https://stundenplan.htwk-leipzig.de/ss/Berichte/Text-Listen;Veranstaltungsarten;name;Vp%0AVw%0AV%0ASp%0ASw%0AS%0APp%0APw%0AP%0AZV%0ATut%0ASperr%0Apf%0Awpf%0Afak%0A%0A?&template=sws_modul&weeks=1-65&combined=yes"
events, err := parseEventForOneSemester(url)
if err != nil {
return nil, fmt.Errorf("failed to parse events for summmer semester: %w", err)
}
savedEvents, dbError := db.SaveEvents(events, app)
if dbError != nil {
return nil, fmt.Errorf("failed to save events: %w", dbError)
}
savedRecords = append(savedEvents, events...)
}
if (clock.Now().Month() >= 9) || (clock.Now().Month() <= 4) {
url := "https://stundenplan.htwk-leipzig.de/ws/Berichte/Text-Listen;Veranstaltungsarten;name;Vp%0AVw%0AV%0ASp%0ASw%0AS%0APp%0APw%0AP%0AZV%0ATut%0ASperr%0Apf%0Awpf%0Afak%0A%0A?&template=sws_modul&weeks=1-65&combined=yes"
events, err := parseEventForOneSemester(url)
if err != nil {
return nil, fmt.Errorf("failed to parse events for winter semester: %w", err)
}
savedEvents, dbError := db.SaveEvents(events, app)
if dbError != nil {
return nil, fmt.Errorf("failed to save events: %w", dbError)
}
savedRecords = append(savedEvents, events...)
}
return savedRecords, nil
}
func parseEventForOneSemester(url string) ([]model.Event, error) {
// Fetch Webpage from URL
webpage, err := fetch.GetHTML(url)
if err != nil {
return nil, err
}
// Parse HTML to Node Tree
var doc *html.Node
doc, err = parseHTML(webpage, err)
if err != nil {
return nil, err
}
// Get all event tables and all day labels
eventTables := getEventTables(doc)
allDayLabels := getAllDayLabels(doc)
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels)
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
events := splitEventsBySingleWeek(splitEventsByWeekVal)
if events == nil {
return nil, err
}
table := findFirstTable(doc)
if table == nil {
return nil, fmt.Errorf("failed to find first table")
}
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
semester, year := extractSemesterAndYear(semesterString)
events = convertWeeksToDates(events, semester, year)
events = generateUUIDs(events)
events = splitEventType(events)
var seminarGroup = model.SeminarGroup{
University: findFirstSpanWithClass(table, "header-1-0-0").FirstChild.Data,
Events: events,
}
if seminarGroup.Events == nil && seminarGroup.University == "" {
return nil, err
}
return events, nil
}
func parseHTML(webpage string, err error) (*html.Node, error) {
doc, err := html.Parse(strings.NewReader(webpage))
if err != nil {
return nil, err
}
return doc, nil
}
func generateUUIDs(events []model.Event) []model.Event {
for i, event := range events {
// generate a hash value from the event name, course and semester
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+event.Course))
events[i].UUID = hash.String()
}
return events
}