mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender.git
synced 2025-08-03 02:09:15 +02:00
feat:#52 added new fetcher from new endpoint
This commit is contained in:
95
backend/service/fetch/v2/fetcher.go
Normal file
95
backend/service/fetch/v2/fetcher.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package v2
|
||||
|
||||
import (
|
||||
"github.com/google/uuid"
|
||||
"github.com/labstack/echo/v5"
|
||||
"github.com/pocketbase/pocketbase"
|
||||
"golang.org/x/net/html"
|
||||
"htwkalender/model"
|
||||
"htwkalender/service/db"
|
||||
"htwkalender/service/fetch"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func ParseEventsFromRemote(c echo.Context, app *pocketbase.PocketBase) error {
|
||||
|
||||
url := "https://stundenplan.htwk-leipzig.de/ws/Berichte/Text-Listen;Veranstaltungsarten;name;Vp%0AVw%0AV%0ASp%0ASw%0AS%0APp%0APw%0AP%0AZV%0ATut%0ASperr%0Apf%0Awpf%0Afak%0A%0A?&template=sws_modul&weeks=1-65&combined=yes"
|
||||
|
||||
// Fetch Webpage from URL
|
||||
webpage, err := fetch.GetHTML(url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Parse HTML to Node Tree
|
||||
doc, err2 := parseHTML(err, webpage)
|
||||
if err2 != nil {
|
||||
return err2
|
||||
}
|
||||
|
||||
// Get all event tables and all day labels
|
||||
eventTables := getEventTables(doc)
|
||||
allDayLabels := getAllDayLabels(doc)
|
||||
|
||||
if eventTables == nil || allDayLabels == nil {
|
||||
return err
|
||||
}
|
||||
|
||||
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels)
|
||||
|
||||
if eventsWithCombinedWeeks == nil {
|
||||
return err
|
||||
}
|
||||
|
||||
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
|
||||
events := splitEventsBySingleWeek(splitEventsByWeekVal)
|
||||
|
||||
if events == nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Search Semester String in Page Head
|
||||
table := findFirstTable(doc)
|
||||
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
|
||||
|
||||
semester, year := extractSemesterAndYear(semesterString)
|
||||
events = convertWeeksToDates(events, semester, year)
|
||||
events = generateUUIDs(events)
|
||||
events = splitEventType(events)
|
||||
|
||||
var seminarGroup = model.SeminarGroup{
|
||||
University: findFirstSpanWithClass(table, "header-1-0-0").FirstChild.Data,
|
||||
Events: events,
|
||||
}
|
||||
|
||||
if seminarGroup.Events == nil && seminarGroup.University == "" {
|
||||
return err
|
||||
}
|
||||
|
||||
savedRecords, dbError := db.SaveEvents(events, app)
|
||||
|
||||
if dbError != nil {
|
||||
return dbError
|
||||
} else {
|
||||
savedRecordsLength := strconv.FormatInt(int64(len(savedRecords)), 10)
|
||||
return c.JSON(200, "Successfully saved "+savedRecordsLength+" events")
|
||||
}
|
||||
}
|
||||
|
||||
func parseHTML(err error, webpage string) (*html.Node, error) {
|
||||
doc, err := html.Parse(strings.NewReader(webpage))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
func generateUUIDs(events []model.Event) []model.Event {
|
||||
for i, event := range events {
|
||||
// generate a hash value from the event name, course and semester
|
||||
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+event.Course))
|
||||
events[i].UUID = hash.String()
|
||||
}
|
||||
return events
|
||||
}
|
Reference in New Issue
Block a user