Files
htwkalender/services/data-manager/service/fetch/v2/fetcher.go
2025-04-20 14:04:23 +02:00

208 lines
5.7 KiB
Go

//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
//Copyright (C) 2024 HTWKalender support@htwkalender.de
//This program is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU Affero General Public License for more details.
//You should have received a copy of the GNU Affero General Public License
//along with this program. If not, see <https://www.gnu.org/licenses/>.
package v2
import (
"fmt"
"github.com/google/uuid"
"github.com/pocketbase/pocketbase"
"github.com/pocketbase/pocketbase/core"
"golang.org/x/net/html"
"htwkalender/data-manager/model"
"htwkalender/data-manager/service/db"
"htwkalender/data-manager/service/fetch"
v1 "htwkalender/data-manager/service/fetch/v1"
"htwkalender/data-manager/service/functions"
localTime "htwkalender/data-manager/service/functions/time"
"log/slog"
"strings"
)
func ParseEventsFromRemote(app *pocketbase.PocketBase) (model.Events, error) {
savedRecords, err := FetchAllEventsAndSave(app, localTime.RealClock{})
if err != nil {
return nil, err
}
return savedRecords, nil
}
func FetchAllEventsAndSave(app *pocketbase.PocketBase, clock localTime.Clock) ([]model.Event, error) {
var savedRecords []model.Event
var err error = nil
var stubUrl = [2]string{
"https://stundenplan.htwk-leipzig.de/",
"/Berichte/Text-Listen;Veranstaltungsarten;name;" +
"Vp%0A" +
"Vw%0A" +
"V%0A" +
"Sp%0A" +
"Sw%0A" +
"S%0A" +
"Pp%0A" +
"Pw%0A" +
"P%0A" +
"ZV%0A" +
"Tut%0A" +
"Sperr%0A" +
"pf%0A" +
"wpf%0A" +
"fak%0A" +
"Pruefung%0A" +
"gebucht%0A" +
"Vertretung%0A" +
"Fremdveranst.%0A" +
"Buchen%0A" +
"%0A?&template=sws_modul&weeks=1-65&combined=yes",
}
// Fetch and save events for all semesters
for _, semester := range functions.CalculateSemesterList(clock) {
events, fetchErr := fetchAndSaveAllEventsForSemester(app, semester, stubUrl)
if fetchErr != nil {
return nil, fmt.Errorf("failed to fetch and save events for "+semester+": %w", err)
}
savedRecords = append(savedRecords, events...)
}
return savedRecords, err
}
func fetchAndSaveAllEventsForSemester(
app *pocketbase.PocketBase,
semester string,
stubUrl [2]string,
) ([]model.Event, error) {
var savedRecords []model.Event
url := stubUrl[0] + semester + stubUrl[1]
events, err := parseEventForOneSemester(url)
if err != nil {
return nil, fmt.Errorf("failed to parse events for "+semester+": %w", err)
}
err = updateDatabase(app, events, "Sport", semester)
return savedRecords, err
}
func updateDatabase(base *pocketbase.PocketBase, eventsToBeAdded []model.Event, course string, semester string) error {
var addedEvents []model.Event
var err error
// to in transaction the events will be added and deleted
err = base.App.RunInTransaction(func(app core.App) error {
err = db.DeleteAllEventsRatherThenCourse(app, course, semester)
if err != nil {
return err
}
addedEvents, err = db.SaveEvents(eventsToBeAdded, base)
if err != nil {
return err
}
return nil
})
if err != nil {
return err
}
slog.Info("Added events: ", "events", len(addedEvents))
return nil
}
func parseEventForOneSemester(url string) ([]model.Event, error) {
// Fetch Webpage from URL
webpage, err := fetch.GetHTML(url)
if err != nil {
return nil, err
}
// Parse HTML to Node Tree
var doc *html.Node
doc, err = parseHTML(webpage)
if err != nil {
return nil, err
}
// Get all event tables and all day labels
eventTables := getEventTables(doc)
allDayLabels := getAllDayLabels(doc)
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels)
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
events := splitEventsBySingleWeek(splitEventsByWeekVal)
if events == nil {
return nil, err
}
table := findFirstTable(doc)
if table == nil {
return nil, fmt.Errorf("failed to find first table")
}
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
semester, year := extractSemesterAndYear(semesterString)
events = convertWeeksToDates(events, semester, year)
events, err = v1.SplitEventType(events)
if err != nil {
slog.Error("Error occurred while splitting event types: ", "error", err)
return nil, err
}
events = switchNameAndNotesForExam(events)
events = generateUUIDs(events)
return events, nil
}
// switch name and notes for Pruefung events when Note is not empty and Name starts with "Prüfungen" and contains email
func switchNameAndNotesForExam(events []model.Event) []model.Event {
for i, event := range events {
if event.EventType == "Pruefung" {
if event.Notes != "" && strings.HasPrefix(event.Name, "Prüfungen") && strings.Contains(event.Name, "@") {
events[i].Name = event.Notes
events[i].Notes = event.Name
}
}
}
return events
}
func parseHTML(webpage string) (*html.Node, error) {
doc, err := html.Parse(strings.NewReader(webpage))
if err != nil {
return nil, err
}
return doc, nil
}
// generateUUIDs generates a UUID for each event based on the event name, course and semester
// the UUID is used to identify the event in the database
func generateUUIDs(events []model.Event) []model.Event {
for i, event := range events {
// generate a hash value from the event name, course and semester
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+event.Course))
events[i].UUID = hash.String()
}
return events
}