mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender.git
synced 2025-08-08 04:39:13 +02:00
feat:#34 refactored function to intended service, fixed docker files
This commit is contained in:
181
services/data-manager/service/fetch/v2/fetcher.go
Normal file
181
services/data-manager/service/fetch/v2/fetcher.go
Normal file
@@ -0,0 +1,181 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/google/uuid"
|
||||
"github.com/pocketbase/pocketbase"
|
||||
"golang.org/x/net/html"
|
||||
"htwkalender/model"
|
||||
"htwkalender/service/db"
|
||||
"htwkalender/service/fetch"
|
||||
v1 "htwkalender/service/fetch/v1"
|
||||
"htwkalender/service/functions"
|
||||
localTime "htwkalender/service/functions/time"
|
||||
"log/slog"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func ParseEventsFromRemote(app *pocketbase.PocketBase) (model.Events, error) {
|
||||
savedRecords, err := FetchAllEventsAndSave(app, localTime.RealClock{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return savedRecords, nil
|
||||
}
|
||||
|
||||
func FetchAllEventsAndSave(app *pocketbase.PocketBase, clock localTime.Clock) ([]model.Event, error) {
|
||||
var savedRecords []model.Event
|
||||
var err error = nil
|
||||
|
||||
var stubUrl = [2]string{
|
||||
"https://stundenplan.htwk-leipzig.de/",
|
||||
"/Berichte/Text-Listen;Veranstaltungsarten;name;" +
|
||||
"Vp%0A" +
|
||||
"Vw%0A" +
|
||||
"V%0A" +
|
||||
"Sp%0A" +
|
||||
"Sw%0A" +
|
||||
"S%0A" +
|
||||
"Pp%0A" +
|
||||
"Pw%0A" +
|
||||
"P%0A" +
|
||||
"ZV%0A" +
|
||||
"Tut%0A" +
|
||||
"Sperr%0A" +
|
||||
"pf%0A" +
|
||||
"wpf%0A" +
|
||||
"fak%0A" +
|
||||
"Pruefung%0A" +
|
||||
"gebucht%0A" +
|
||||
"Vertretung%0A" +
|
||||
"Fremdveranst.%0A" +
|
||||
"Buchen%0A" +
|
||||
"%0A?&template=sws_modul&weeks=1-65&combined=yes",
|
||||
}
|
||||
|
||||
// Fetch and save events for all semesters
|
||||
for _, semester := range functions.CalculateSemesterList(clock) {
|
||||
events, fetchErr := fetchAndSaveAllEventsForSemester(app, semester, stubUrl)
|
||||
if fetchErr != nil {
|
||||
return nil, fmt.Errorf("failed to fetch and save events for "+semester+": %w", err)
|
||||
}
|
||||
savedRecords = append(savedRecords, events...)
|
||||
}
|
||||
|
||||
return savedRecords, err
|
||||
}
|
||||
|
||||
func fetchAndSaveAllEventsForSemester(
|
||||
app *pocketbase.PocketBase,
|
||||
semester string,
|
||||
stubUrl [2]string,
|
||||
) ([]model.Event, error) {
|
||||
var savedRecords []model.Event
|
||||
url := stubUrl[0] + semester + stubUrl[1]
|
||||
events, err := parseEventForOneSemester(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse events for "+semester+": %w", err)
|
||||
}
|
||||
err = db.DeleteAllEventsBySemesterWithoutCourse(app, "Sport", semester)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to delete all events for "+semester+": %w", err)
|
||||
}
|
||||
savedEvents, dbError := db.SaveEvents(events, app)
|
||||
if dbError != nil {
|
||||
return nil, fmt.Errorf("failed to save events for "+semester+": %w", dbError)
|
||||
}
|
||||
savedRecords = append(savedRecords, savedEvents...)
|
||||
return savedRecords, err
|
||||
}
|
||||
|
||||
func parseEventForOneSemester(url string) ([]model.Event, error) {
|
||||
// Fetch Webpage from URL
|
||||
webpage, err := fetch.GetHTML(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Parse HTML to Node Tree
|
||||
var doc *html.Node
|
||||
doc, err = parseHTML(webpage)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Get all event tables and all day labels
|
||||
eventTables := getEventTables(doc)
|
||||
allDayLabels := getAllDayLabels(doc)
|
||||
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels)
|
||||
|
||||
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
|
||||
events := splitEventsBySingleWeek(splitEventsByWeekVal)
|
||||
|
||||
if events == nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
table := findFirstTable(doc)
|
||||
|
||||
if table == nil {
|
||||
return nil, fmt.Errorf("failed to find first table")
|
||||
}
|
||||
|
||||
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
|
||||
semester, year := extractSemesterAndYear(semesterString)
|
||||
events = convertWeeksToDates(events, semester, year)
|
||||
events, err = v1.SplitEventType(events)
|
||||
if err != nil {
|
||||
slog.Error("Error occurred while splitting event types: ", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
events = switchNameAndNotesForExam(events)
|
||||
events = generateUUIDs(events)
|
||||
|
||||
return events, nil
|
||||
}
|
||||
|
||||
// switch name and notes for Pruefung events when Note is not empty and Name starts with "Prüfungen" and contains email
|
||||
func switchNameAndNotesForExam(events []model.Event) []model.Event {
|
||||
for i, event := range events {
|
||||
if event.EventType == "Pruefung" {
|
||||
if event.Notes != "" && strings.HasPrefix(event.Name, "Prüfungen") && strings.Contains(event.Name, "@") {
|
||||
events[i].Name = event.Notes
|
||||
events[i].Notes = event.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
func parseHTML(webpage string) (*html.Node, error) {
|
||||
doc, err := html.Parse(strings.NewReader(webpage))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
func generateUUIDs(events []model.Event) []model.Event {
|
||||
for i, event := range events {
|
||||
// generate a hash value from the event name, course and semester
|
||||
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+event.Course))
|
||||
events[i].UUID = hash.String()
|
||||
}
|
||||
return events
|
||||
}
|
Reference in New Issue
Block a user