Files
htwkalender/backend/service/fetch/v2/fetcher.go
2023-12-08 11:39:45 +01:00

157 lines
3.7 KiB
Go

package v2
import (
"github.com/google/uuid"
"github.com/labstack/echo/v5"
"github.com/pocketbase/pocketbase"
"golang.org/x/net/html"
"htwkalender/model"
"htwkalender/service/db"
"htwkalender/service/fetch"
"strconv"
"strings"
"time"
)
func ParseEventsFromRemote(c echo.Context, app *pocketbase.PocketBase) error {
err, savedRecords := FetchAllEventsAndSave(app)
if err != nil {
return err
} else {
savedRecordsLength := strconv.FormatInt(int64(len(savedRecords)), 10)
return c.JSON(200, "Successfully saved "+savedRecordsLength+" events")
}
}
func FetchAllEventsAndSave(app *pocketbase.PocketBase) (error, []model.Event) {
var err error
var savedRecords []model.Event
var events []model.Event
var stubUrl = [2]string{
"https://stundenplan.htwk-leipzig.de/",
"/Berichte/Text-Listen;Veranstaltungsarten;name;" +
"Vp%0A" +
"Vw%0A" +
"V%0A" +
"Sp%0A" +
"Sw%0A" +
"S%0A" +
"Pp%0A" +
"Pw%0A" +
"P%0A" +
"ZV%0A" +
"Tut%0A" +
"Sperr%0A" +
"pf%0A" +
"wpf%0A" +
"fak%0A" +
"Pruefung%0A" +
"Vertretung%0A" +
"Fremdveranst.%0A" +
"Buchen%0A" +
"%0A?&template=sws_modul&weeks=1-65&combined=yes",
}
if (time.Now().Month() >= 3) && (time.Now().Month() <= 10) {
url := stubUrl[0] + "ss" + stubUrl[1]
events, err = parseEventForOneSemester(url)
savedEvents, dbError := db.SaveEvents(events, app)
err = dbError
savedRecords = append(savedRecords, savedEvents...)
}
if (time.Now().Month() >= 9) || (time.Now().Month() <= 4) {
url := stubUrl[0] + "ws" + stubUrl[1]
events, err = parseEventForOneSemester(url)
savedEvents, dbError := db.SaveEvents(events, app)
err = dbError
savedRecords = append(savedRecords, savedEvents...)
}
return err, savedRecords
}
func parseEventForOneSemester(url string) ([]model.Event, error) {
// Fetch Webpage from URL
webpage, err := fetch.GetHTML(url)
if err != nil {
return nil, err
}
// Parse HTML to Node Tree
doc, err2 := parseHTML(err, webpage)
if err2 != nil {
return nil, err2
}
// Get all event tables and all day labels
eventTables := getEventTables(doc)
allDayLabels := getAllDayLabels(doc)
if eventTables == nil || allDayLabels == nil {
return nil, err
}
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels)
if eventsWithCombinedWeeks == nil {
return nil, err
}
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
events := splitEventsBySingleWeek(splitEventsByWeekVal)
if events == nil {
return nil, err
}
table := findFirstTable(doc)
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
semester, year := extractSemesterAndYear(semesterString)
events = convertWeeksToDates(events, semester, year)
events = generateUUIDs(events)
events = splitEventType(events)
events = switchNameAndNotesForPruefung(events)
var seminarGroup = model.SeminarGroup{
University: findFirstSpanWithClass(table, "header-1-0-0").FirstChild.Data,
Events: events,
}
if seminarGroup.Events == nil && seminarGroup.University == "" {
return nil, err
}
return events, nil
}
func switchNameAndNotesForPruefung(events []model.Event) []model.Event {
for i, event := range events {
if event.EventType == "Pruefung" {
events[i].Name = event.Notes
events[i].Notes = event.Name
}
}
return events
}
func parseHTML(err error, webpage string) (*html.Node, error) {
doc, err := html.Parse(strings.NewReader(webpage))
if err != nil {
return nil, err
}
return doc, nil
}
func generateUUIDs(events []model.Event) []model.Event {
for i, event := range events {
// generate a hash value from the event name, course and semester
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+event.Course))
events[i].UUID = hash.String()
}
return events
}