mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender.git
synced 2025-08-06 19:59:13 +02:00
feat:#52 added new fetcher from new endpoint
This commit is contained in:
@@ -2,7 +2,8 @@ package service
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"htwkalender/service/events"
|
"htwkalender/service/events"
|
||||||
"htwkalender/service/fetch"
|
"htwkalender/service/fetch/v1"
|
||||||
|
v2 "htwkalender/service/fetch/v2"
|
||||||
"htwkalender/service/ical"
|
"htwkalender/service/ical"
|
||||||
"htwkalender/service/room"
|
"htwkalender/service/room"
|
||||||
"io"
|
"io"
|
||||||
@@ -21,11 +22,28 @@ func AddRoutes(app *pocketbase.PocketBase) {
|
|||||||
Method: http.MethodGet,
|
Method: http.MethodGet,
|
||||||
Path: "/api/fetchPlans",
|
Path: "/api/fetchPlans",
|
||||||
Handler: func(c echo.Context) error {
|
Handler: func(c echo.Context) error {
|
||||||
return fetch.GetSeminarEvents(c, app)
|
return v1.GetSeminarEvents(c, app)
|
||||||
|
},
|
||||||
|
Middlewares: []echo.MiddlewareFunc{
|
||||||
|
apis.ActivityLogger(app),
|
||||||
|
//apis.RequireAdminAuth(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
app.OnBeforeServe().Add(func(e *core.ServeEvent) error {
|
||||||
|
_, err := e.Router.AddRoute(echo.Route{
|
||||||
|
Method: http.MethodGet,
|
||||||
|
Path: "/api/v2/fetch",
|
||||||
|
Handler: func(c echo.Context) error {
|
||||||
|
return v2.ParseEventsFromRemote(c, app)
|
||||||
},
|
},
|
||||||
Middlewares: []echo.MiddlewareFunc{
|
Middlewares: []echo.MiddlewareFunc{
|
||||||
apis.ActivityLogger(app),
|
apis.ActivityLogger(app),
|
||||||
apis.RequireAdminAuth(),
|
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -57,7 +75,7 @@ func AddRoutes(app *pocketbase.PocketBase) {
|
|||||||
Method: http.MethodGet,
|
Method: http.MethodGet,
|
||||||
Path: "/api/fetchGroups",
|
Path: "/api/fetchGroups",
|
||||||
Handler: func(c echo.Context) error {
|
Handler: func(c echo.Context) error {
|
||||||
return fetch.SeminarGroups(c, app)
|
return v1.SeminarGroups(c, app)
|
||||||
},
|
},
|
||||||
Middlewares: []echo.MiddlewareFunc{
|
Middlewares: []echo.MiddlewareFunc{
|
||||||
apis.ActivityLogger(app),
|
apis.ActivityLogger(app),
|
||||||
|
@@ -1,6 +1,10 @@
|
|||||||
package date
|
package date
|
||||||
|
|
||||||
import "time"
|
import (
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
func GetDateFromWeekNumber(year int, weekNumber int, dayName string) (time.Time, error) {
|
func GetDateFromWeekNumber(year int, weekNumber int, dayName string) (time.Time, error) {
|
||||||
// Create a time.Date for the first day of the year
|
// Create a time.Date for the first day of the year
|
||||||
@@ -46,3 +50,13 @@ func GetDateFromWeekNumber(year int, weekNumber int, dayName string) (time.Time,
|
|||||||
|
|
||||||
return desiredDate, nil
|
return desiredDate, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// createEventFromTableData should create an event from the table data
|
||||||
|
// tableTime represents Hour and Minute like HH:MM
|
||||||
|
// tableDate returns a Time
|
||||||
|
func CreateTimeFromHourAndMinuteString(tableTime string) time.Time {
|
||||||
|
timeParts := strings.Split(tableTime, ":")
|
||||||
|
hour, _ := strconv.Atoi(timeParts[0])
|
||||||
|
minute, _ := strconv.Atoi(timeParts[1])
|
||||||
|
return time.Date(0, 0, 0, hour, minute, 0, 0, time.UTC)
|
||||||
|
}
|
||||||
|
@@ -7,7 +7,7 @@ import (
|
|||||||
"github.com/pocketbase/pocketbase"
|
"github.com/pocketbase/pocketbase"
|
||||||
)
|
)
|
||||||
|
|
||||||
func SaveEvents(seminarGroup []model.SeminarGroup, app *pocketbase.PocketBase) ([]model.Event, error) {
|
func SaveSeminarGroupEvents(seminarGroup []model.SeminarGroup, app *pocketbase.PocketBase) ([]model.Event, error) {
|
||||||
var toBeSavedEvents model.Events
|
var toBeSavedEvents model.Events
|
||||||
var savedRecords model.Events
|
var savedRecords model.Events
|
||||||
|
|
||||||
@@ -43,6 +43,38 @@ func SaveEvents(seminarGroup []model.SeminarGroup, app *pocketbase.PocketBase) (
|
|||||||
return savedRecords, nil
|
return savedRecords, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func SaveEvents(events []model.Event, app *pocketbase.PocketBase) ([]model.Event, error) {
|
||||||
|
var toBeSavedEvents model.Events
|
||||||
|
var savedRecords model.Events
|
||||||
|
|
||||||
|
// check if event is already in database and add to toBeSavedEvents if not
|
||||||
|
for _, event := range events {
|
||||||
|
existsInDatabase, err := findEventByDayWeekStartEndNameCourse(event, event.Course, app)
|
||||||
|
alreadyAddedToSave := toBeSavedEvents.Contains(event)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !existsInDatabase && !alreadyAddedToSave {
|
||||||
|
toBeSavedEvents = append(toBeSavedEvents, event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// create record for each event that's not already in the database
|
||||||
|
for _, event := range toBeSavedEvents {
|
||||||
|
event.MarkAsNew()
|
||||||
|
// auto mapping for event fields to record fields
|
||||||
|
err := app.Dao().Save(&event)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
} else {
|
||||||
|
savedRecords = append(savedRecords, event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return savedRecords, nil
|
||||||
|
}
|
||||||
|
|
||||||
// check if event is already in database and return true if it is and false if it's not
|
// check if event is already in database and return true if it is and false if it's not
|
||||||
func findEventByDayWeekStartEndNameCourse(event model.Event, course string, app *pocketbase.PocketBase) (bool, error) {
|
func findEventByDayWeekStartEndNameCourse(event model.Event, course string, app *pocketbase.PocketBase) (bool, error) {
|
||||||
|
|
||||||
@@ -165,7 +197,7 @@ func GetAllModulesForCourse(app *pocketbase.PocketBase, course string, semester
|
|||||||
func GetAllModulesDistinctByNameAndCourse(app *pocketbase.PocketBase) (model.Events, error) {
|
func GetAllModulesDistinctByNameAndCourse(app *pocketbase.PocketBase) (model.Events, error) {
|
||||||
var events model.Events
|
var events model.Events
|
||||||
|
|
||||||
err := app.Dao().DB().Select("*").From("events").GroupBy("Name", "course").Distinct(true).All(&events)
|
err := app.Dao().DB().Select("*").From("events").GroupBy("Name").Distinct(true).All(&events)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
print("Error while getting events from database: ", err)
|
print("Error while getting events from database: ", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@@ -6,7 +6,7 @@ import (
|
|||||||
"github.com/pocketbase/pocketbase/apis"
|
"github.com/pocketbase/pocketbase/apis"
|
||||||
"htwkalender/model"
|
"htwkalender/model"
|
||||||
"htwkalender/service/db"
|
"htwkalender/service/db"
|
||||||
"htwkalender/service/fetch"
|
"htwkalender/service/fetch/v1"
|
||||||
"htwkalender/service/functions"
|
"htwkalender/service/functions"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -95,11 +95,11 @@ func UpdateModulesForCourse(app *pocketbase.PocketBase, course string) error {
|
|||||||
var courses []string
|
var courses []string
|
||||||
courses = append(courses, course)
|
courses = append(courses, course)
|
||||||
|
|
||||||
seminarGroups := fetch.GetSeminarGroupsEventsFromHTML(courses)
|
seminarGroups := v1.GetSeminarGroupsEventsFromHTML(courses)
|
||||||
|
|
||||||
seminarGroups = fetch.ClearEmptySeminarGroups(seminarGroups)
|
seminarGroups = v1.ClearEmptySeminarGroups(seminarGroups)
|
||||||
|
|
||||||
seminarGroups = fetch.ReplaceEmptyEventNames(seminarGroups)
|
seminarGroups = v1.ReplaceEmptyEventNames(seminarGroups)
|
||||||
|
|
||||||
//check if events in the seminarGroups Events are already in the database
|
//check if events in the seminarGroups Events are already in the database
|
||||||
//if yes, keep the database as it is
|
//if yes, keep the database as it is
|
||||||
@@ -122,7 +122,7 @@ func UpdateModulesForCourse(app *pocketbase.PocketBase, course string) error {
|
|||||||
|
|
||||||
//if there are no events in the database, save the new events
|
//if there are no events in the database, save the new events
|
||||||
if len(events) == 0 {
|
if len(events) == 0 {
|
||||||
_, dbError := db.SaveEvents(seminarGroups, app)
|
_, dbError := db.SaveSeminarGroupEvents(seminarGroups, app)
|
||||||
if dbError != nil {
|
if dbError != nil {
|
||||||
return apis.NewNotFoundError("Events could not be saved", dbError)
|
return apis.NewNotFoundError("Events could not be saved", dbError)
|
||||||
}
|
}
|
||||||
@@ -148,7 +148,7 @@ func UpdateModulesForCourse(app *pocketbase.PocketBase, course string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//save the new events
|
//save the new events
|
||||||
_, dbError := db.SaveEvents(seminarGroups, app)
|
_, dbError := db.SaveSeminarGroupEvents(seminarGroups, app)
|
||||||
if dbError != nil {
|
if dbError != nil {
|
||||||
return apis.NewNotFoundError("Events could not be saved", dbError)
|
return apis.NewNotFoundError("Events could not be saved", dbError)
|
||||||
}
|
}
|
||||||
|
44
backend/service/fetch/htmlDownloader.go
Normal file
44
backend/service/fetch/htmlDownloader.go
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
package fetch
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
// getPlanHTML Get the HTML document from the specified URL
|
||||||
|
|
||||||
|
func GetHTML(url string) (string, error) {
|
||||||
|
|
||||||
|
// Send GET request
|
||||||
|
response, err := http.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Error occurred while making the request: %s\n", err.Error())
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer func(Body io.ReadCloser) {
|
||||||
|
err := Body.Close()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}(response.Body)
|
||||||
|
|
||||||
|
// Read the response body
|
||||||
|
body, err := io.ReadAll(response.Body)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Error occurred while reading the response: %s\n", err.Error())
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return toUtf8(body), err
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func toUtf8(iso88591Buf []byte) string {
|
||||||
|
buf := make([]rune, len(iso88591Buf))
|
||||||
|
for i, b := range iso88591Buf {
|
||||||
|
buf[i] = rune(b)
|
||||||
|
}
|
||||||
|
return string(buf)
|
||||||
|
}
|
@@ -1,11 +1,11 @@
|
|||||||
package fetch
|
package v1
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"htwkalender/model"
|
"htwkalender/model"
|
||||||
"htwkalender/service/date"
|
"htwkalender/service/date"
|
||||||
"htwkalender/service/db"
|
"htwkalender/service/db"
|
||||||
"io"
|
"htwkalender/service/fetch"
|
||||||
"net/http"
|
"net/http"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -30,7 +30,7 @@ func GetSeminarEvents(c echo.Context, app *pocketbase.PocketBase) error {
|
|||||||
|
|
||||||
seminarGroups = ReplaceEmptyEventNames(seminarGroups)
|
seminarGroups = ReplaceEmptyEventNames(seminarGroups)
|
||||||
|
|
||||||
savedRecords, dbError := db.SaveEvents(seminarGroups, app)
|
savedRecords, dbError := db.SaveSeminarGroupEvents(seminarGroups, app)
|
||||||
|
|
||||||
if dbError != nil {
|
if dbError != nil {
|
||||||
return apis.NewNotFoundError("Events could not be saved", dbError.Error())
|
return apis.NewNotFoundError("Events could not be saved", dbError.Error())
|
||||||
@@ -64,13 +64,15 @@ func GetSeminarGroupsEventsFromHTML(seminarGroupsLabel []string) []model.Seminar
|
|||||||
var seminarGroups []model.SeminarGroup
|
var seminarGroups []model.SeminarGroup
|
||||||
for _, seminarGroupLabel := range seminarGroupsLabel {
|
for _, seminarGroupLabel := range seminarGroupsLabel {
|
||||||
|
|
||||||
result, getError := getPlanHTML("ss", seminarGroupLabel)
|
ssUrl := "https://stundenplan.htwk-leipzig.de/" + string("ss") + "/Berichte/Text-Listen;Studenten-Sets;name;" + seminarGroupLabel + "?template=sws_semgrp&weeks=1-65"
|
||||||
|
result, getError := fetch.GetHTML(ssUrl)
|
||||||
if getError == nil {
|
if getError == nil {
|
||||||
seminarGroup := parseSeminarGroup(result)
|
seminarGroup := parseSeminarGroup(result)
|
||||||
seminarGroups = append(seminarGroups, seminarGroup)
|
seminarGroups = append(seminarGroups, seminarGroup)
|
||||||
}
|
}
|
||||||
|
|
||||||
result, getError = getPlanHTML("ws", seminarGroupLabel)
|
wsUrl := "https://stundenplan.htwk-leipzig.de/" + string("ws") + "/Berichte/Text-Listen;Studenten-Sets;name;" + seminarGroupLabel + "?template=sws_semgrp&weeks=1-65"
|
||||||
|
result, getError = fetch.GetHTML(wsUrl)
|
||||||
if getError == nil {
|
if getError == nil {
|
||||||
seminarGroup := parseSeminarGroup(result)
|
seminarGroup := parseSeminarGroup(result)
|
||||||
seminarGroups = append(seminarGroups, seminarGroup)
|
seminarGroups = append(seminarGroups, seminarGroup)
|
||||||
@@ -80,7 +82,6 @@ func GetSeminarGroupsEventsFromHTML(seminarGroupsLabel []string) []model.Seminar
|
|||||||
}
|
}
|
||||||
|
|
||||||
func splitEventType(events []model.Event) []model.Event {
|
func splitEventType(events []model.Event) []model.Event {
|
||||||
|
|
||||||
for i, event := range events {
|
for i, event := range events {
|
||||||
matched, _ := regexp.Match("^([VPS])([wp])$", []byte(event.EventType))
|
matched, _ := regexp.Match("^([VPS])([wp])$", []byte(event.EventType))
|
||||||
if matched {
|
if matched {
|
||||||
@@ -90,7 +91,6 @@ func splitEventType(events []model.Event) []model.Event {
|
|||||||
events[i] = event
|
events[i] = event
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return events
|
return events
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -133,7 +133,6 @@ func generateUUIDs(events []model.Event, course string) []model.Event {
|
|||||||
events[i].UUID = hash.String()
|
events[i].UUID = hash.String()
|
||||||
}
|
}
|
||||||
return events
|
return events
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// convertWeeksToDates converts the week and year to a date
|
// convertWeeksToDates converts the week and year to a date
|
||||||
@@ -230,7 +229,6 @@ func toEvents(tables [][]*html.Node, days []string) []model.Event {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return events
|
return events
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -296,40 +294,3 @@ func splitWeekRange(weekRange string) []string {
|
|||||||
|
|
||||||
return weeks
|
return weeks
|
||||||
}
|
}
|
||||||
|
|
||||||
func toUtf8(iso88591Buf []byte) string {
|
|
||||||
buf := make([]rune, len(iso88591Buf))
|
|
||||||
for i, b := range iso88591Buf {
|
|
||||||
buf[i] = rune(b)
|
|
||||||
}
|
|
||||||
return string(buf)
|
|
||||||
}
|
|
||||||
|
|
||||||
// getPlanHTML Get the HTML document from the specified URL
|
|
||||||
func getPlanHTML(semester string, matrikel string) (string, error) {
|
|
||||||
url := "https://stundenplan.htwk-leipzig.de/" + string(semester) + "/Berichte/Text-Listen;Studenten-Sets;name;" + matrikel + "?template=sws_semgrp&weeks=1-65"
|
|
||||||
|
|
||||||
// Send GET request
|
|
||||||
response, err := http.Get(url)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Printf("Error occurred while making the request: %s\n", err.Error())
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
defer func(Body io.ReadCloser) {
|
|
||||||
err := Body.Close()
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}(response.Body)
|
|
||||||
|
|
||||||
// Read the response body
|
|
||||||
body, err := io.ReadAll(response.Body)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Printf("Error occurred while reading the response: %s\n", err.Error())
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
return toUtf8(body), err
|
|
||||||
|
|
||||||
}
|
|
@@ -1,4 +1,4 @@
|
|||||||
package fetch
|
package v1
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
@@ -1,4 +1,4 @@
|
|||||||
package fetch
|
package v1
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/xml"
|
"encoding/xml"
|
@@ -1,4 +1,4 @@
|
|||||||
package fetch
|
package v1
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
@@ -182,7 +182,6 @@ func findTableData(node *html.Node) []*html.Node {
|
|||||||
child = child.NextSibling
|
child = child.NextSibling
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return tableData
|
return tableData
|
||||||
}
|
}
|
||||||
|
|
60
backend/service/fetch/v2/eventParser.go
Normal file
60
backend/service/fetch/v2/eventParser.go
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
package v2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/pocketbase/pocketbase/tools/types"
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
"htwkalender/model"
|
||||||
|
"htwkalender/service/date"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func toEvents(tables [][]*html.Node, days []string) []model.Event {
|
||||||
|
var events []model.Event
|
||||||
|
|
||||||
|
for table := range tables {
|
||||||
|
for row := range tables[table] {
|
||||||
|
|
||||||
|
tableData := findTableData(tables[table][row])
|
||||||
|
if len(tableData) > 0 {
|
||||||
|
start, _ := types.ParseDateTime(date.CreateTimeFromHourAndMinuteString(getTextContent(tableData[1])))
|
||||||
|
end, _ := types.ParseDateTime(date.CreateTimeFromHourAndMinuteString(getTextContent(tableData[2])))
|
||||||
|
|
||||||
|
courses := getTextContent(tableData[7])
|
||||||
|
|
||||||
|
if len(courses) > 0 {
|
||||||
|
for _, course := range strings.Split(courses, " ") {
|
||||||
|
events = append(events, model.Event{
|
||||||
|
Day: days[table],
|
||||||
|
Week: getTextContent(tableData[0]),
|
||||||
|
Start: start,
|
||||||
|
End: end,
|
||||||
|
Name: getTextContent(tableData[3]),
|
||||||
|
EventType: getTextContent(tableData[4]),
|
||||||
|
Notes: getTextContent(tableData[5]),
|
||||||
|
Prof: getTextContent(tableData[6]),
|
||||||
|
Rooms: getTextContent(tableData[8]),
|
||||||
|
BookedAt: getTextContent(tableData[10]),
|
||||||
|
Course: course,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return events
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitEventType(events []model.Event) []model.Event {
|
||||||
|
for i, event := range events {
|
||||||
|
matched, _ := regexp.Match("^([VPS])([wp])$", []byte(event.EventType))
|
||||||
|
if matched {
|
||||||
|
eventType := event.EventType
|
||||||
|
event.EventType = eventType[0:1]
|
||||||
|
event.Compulsory = eventType[1:2]
|
||||||
|
events[i] = event
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return events
|
||||||
|
}
|
95
backend/service/fetch/v2/fetcher.go
Normal file
95
backend/service/fetch/v2/fetcher.go
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
package v2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/google/uuid"
|
||||||
|
"github.com/labstack/echo/v5"
|
||||||
|
"github.com/pocketbase/pocketbase"
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
"htwkalender/model"
|
||||||
|
"htwkalender/service/db"
|
||||||
|
"htwkalender/service/fetch"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ParseEventsFromRemote(c echo.Context, app *pocketbase.PocketBase) error {
|
||||||
|
|
||||||
|
url := "https://stundenplan.htwk-leipzig.de/ws/Berichte/Text-Listen;Veranstaltungsarten;name;Vp%0AVw%0AV%0ASp%0ASw%0AS%0APp%0APw%0AP%0AZV%0ATut%0ASperr%0Apf%0Awpf%0Afak%0A%0A?&template=sws_modul&weeks=1-65&combined=yes"
|
||||||
|
|
||||||
|
// Fetch Webpage from URL
|
||||||
|
webpage, err := fetch.GetHTML(url)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse HTML to Node Tree
|
||||||
|
doc, err2 := parseHTML(err, webpage)
|
||||||
|
if err2 != nil {
|
||||||
|
return err2
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all event tables and all day labels
|
||||||
|
eventTables := getEventTables(doc)
|
||||||
|
allDayLabels := getAllDayLabels(doc)
|
||||||
|
|
||||||
|
if eventTables == nil || allDayLabels == nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels)
|
||||||
|
|
||||||
|
if eventsWithCombinedWeeks == nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
|
||||||
|
events := splitEventsBySingleWeek(splitEventsByWeekVal)
|
||||||
|
|
||||||
|
if events == nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search Semester String in Page Head
|
||||||
|
table := findFirstTable(doc)
|
||||||
|
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
|
||||||
|
|
||||||
|
semester, year := extractSemesterAndYear(semesterString)
|
||||||
|
events = convertWeeksToDates(events, semester, year)
|
||||||
|
events = generateUUIDs(events)
|
||||||
|
events = splitEventType(events)
|
||||||
|
|
||||||
|
var seminarGroup = model.SeminarGroup{
|
||||||
|
University: findFirstSpanWithClass(table, "header-1-0-0").FirstChild.Data,
|
||||||
|
Events: events,
|
||||||
|
}
|
||||||
|
|
||||||
|
if seminarGroup.Events == nil && seminarGroup.University == "" {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
savedRecords, dbError := db.SaveEvents(events, app)
|
||||||
|
|
||||||
|
if dbError != nil {
|
||||||
|
return dbError
|
||||||
|
} else {
|
||||||
|
savedRecordsLength := strconv.FormatInt(int64(len(savedRecords)), 10)
|
||||||
|
return c.JSON(200, "Successfully saved "+savedRecordsLength+" events")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseHTML(err error, webpage string) (*html.Node, error) {
|
||||||
|
doc, err := html.Parse(strings.NewReader(webpage))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return doc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateUUIDs(events []model.Event) []model.Event {
|
||||||
|
for i, event := range events {
|
||||||
|
// generate a hash value from the event name, course and semester
|
||||||
|
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+event.Course))
|
||||||
|
events[i].UUID = hash.String()
|
||||||
|
}
|
||||||
|
return events
|
||||||
|
}
|
324
backend/service/fetch/v2/htmlParsingFunctions.go
Normal file
324
backend/service/fetch/v2/htmlParsingFunctions.go
Normal file
@@ -0,0 +1,324 @@
|
|||||||
|
package v2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/pocketbase/pocketbase/tools/types"
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
"htwkalender/model"
|
||||||
|
"htwkalender/service/date"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Find the first <table> element in the HTML document
|
||||||
|
func findFirstTable(node *html.Node) *html.Node {
|
||||||
|
if node.Type == html.ElementNode && node.Data == "table" {
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
// Traverse child nodes recursively
|
||||||
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||||
|
found := findFirstTable(child)
|
||||||
|
if found != nil {
|
||||||
|
return found
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the first <span> element with the specified class attribute value
|
||||||
|
func findFirstSpanWithClass(node *html.Node, classValue string) *html.Node {
|
||||||
|
|
||||||
|
// Check if the current node is a <span> element with the specified class attribute value
|
||||||
|
if node.Type == html.ElementNode && node.Data == "span" {
|
||||||
|
if hasClassAttribute(node, classValue) {
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Traverse child nodes recursively
|
||||||
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||||
|
found := findFirstSpanWithClass(child, classValue)
|
||||||
|
if found != nil {
|
||||||
|
return found
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the specified element has the specified class attribute value
|
||||||
|
func hasClassAttribute(node *html.Node, classValue string) bool {
|
||||||
|
for _, attr := range node.Attr {
|
||||||
|
if attr.Key == "class" && strings.Contains(attr.Val, classValue) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get Tables with days
|
||||||
|
func getEventTables(node *html.Node) [][]*html.Node {
|
||||||
|
var eventTables [][]*html.Node
|
||||||
|
tables := findTables(node)
|
||||||
|
// get all tables with events
|
||||||
|
for events := range tables {
|
||||||
|
rows := findTableRows(tables[events])
|
||||||
|
// check that a first row exists
|
||||||
|
if len(rows) > 0 {
|
||||||
|
rows = rows[1:]
|
||||||
|
eventTables = append(eventTables, rows)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return eventTables
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get Tables with days
|
||||||
|
func getAllDayLabels(node *html.Node) []string {
|
||||||
|
paragraphs := findParagraphs(node)
|
||||||
|
var dayArray []string
|
||||||
|
|
||||||
|
for _, p := range paragraphs {
|
||||||
|
label := getDayLabel(p)
|
||||||
|
if label != "" {
|
||||||
|
dayArray = append(dayArray, label)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dayArray
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find all <p> elements in the HTML document
|
||||||
|
func findParagraphs(node *html.Node) []*html.Node {
|
||||||
|
var paragraphs []*html.Node
|
||||||
|
|
||||||
|
if node.Type == html.ElementNode && node.Data == "p" {
|
||||||
|
paragraphs = append(paragraphs, node)
|
||||||
|
}
|
||||||
|
|
||||||
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||||
|
paragraphs = append(paragraphs, findParagraphs(child)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return paragraphs
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find all <tr> elements in <tbody>, excluding the first one
|
||||||
|
func findTableRows(node *html.Node) []*html.Node {
|
||||||
|
var tableRows []*html.Node
|
||||||
|
|
||||||
|
if node.Type == html.ElementNode && node.Data == "tbody" {
|
||||||
|
child := node.FirstChild
|
||||||
|
for child != nil {
|
||||||
|
if child.Type == html.ElementNode && child.Data == "tr" {
|
||||||
|
tableRows = append(tableRows, child)
|
||||||
|
}
|
||||||
|
child = child.NextSibling
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Traverse child nodes recursively
|
||||||
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||||
|
var tableRowElement = findTableRows(child)
|
||||||
|
if tableRowElement != nil {
|
||||||
|
tableRows = append(tableRows, tableRowElement...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if tableRows is nil
|
||||||
|
if tableRows == nil {
|
||||||
|
return []*html.Node{}
|
||||||
|
} else {
|
||||||
|
return tableRows
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find all <p> elements in the HTML document
|
||||||
|
func findTables(node *html.Node) []*html.Node {
|
||||||
|
var tables []*html.Node
|
||||||
|
|
||||||
|
if node.Type == html.ElementNode && node.Data == "table" {
|
||||||
|
tables = append(tables, node)
|
||||||
|
}
|
||||||
|
|
||||||
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||||
|
tables = append(tables, findDayTables(child)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return tables
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find all <p> elements in the HTML document
|
||||||
|
func findDayTables(node *html.Node) []*html.Node {
|
||||||
|
var tables []*html.Node
|
||||||
|
|
||||||
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||||
|
tables = append(tables, findDayTables(child)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if node.Type == html.ElementNode && node.Data == "table" && hasClassAttribute(node, "spreadsheet") {
|
||||||
|
tables = append(tables, node)
|
||||||
|
}
|
||||||
|
|
||||||
|
return tables
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the text content of the specified node and its descendants
|
||||||
|
func getDayLabel(node *html.Node) string {
|
||||||
|
|
||||||
|
child := node.FirstChild
|
||||||
|
if child != nil {
|
||||||
|
if child.Type == html.ElementNode && child.Data == "span" {
|
||||||
|
if child.FirstChild != nil {
|
||||||
|
return child.FirstChild.Data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find all <td> elements in the current <tr>
|
||||||
|
func findTableData(node *html.Node) []*html.Node {
|
||||||
|
var tableData []*html.Node
|
||||||
|
|
||||||
|
if node.Type == html.ElementNode && node.Data == "tr" {
|
||||||
|
child := node.FirstChild
|
||||||
|
for child != nil {
|
||||||
|
if child.Type == html.ElementNode && child.Data == "td" {
|
||||||
|
tableData = append(tableData, child)
|
||||||
|
}
|
||||||
|
child = child.NextSibling
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tableData
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the text content of the specified node and its descendants
|
||||||
|
func getTextContent(node *html.Node) string {
|
||||||
|
var textContent string
|
||||||
|
|
||||||
|
if node.Type == html.TextNode {
|
||||||
|
textContent = node.Data
|
||||||
|
}
|
||||||
|
|
||||||
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||||
|
textContent += getTextContent(child)
|
||||||
|
}
|
||||||
|
|
||||||
|
return textContent
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitEventsByWeek(events []model.Event) []model.Event {
|
||||||
|
var newEvents []model.Event
|
||||||
|
|
||||||
|
for _, event := range events {
|
||||||
|
weeks := strings.Split(event.Week, ",")
|
||||||
|
for _, week := range weeks {
|
||||||
|
newEvent := event
|
||||||
|
newEvent.Week = strings.TrimSpace(week)
|
||||||
|
newEvents = append(newEvents, newEvent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return newEvents
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitEventsBySingleWeek(events []model.Event) []model.Event {
|
||||||
|
var newEvents []model.Event
|
||||||
|
|
||||||
|
for _, event := range events {
|
||||||
|
if strings.Contains(event.Week, "-") {
|
||||||
|
weeks := splitWeekRange(event.Week)
|
||||||
|
for _, week := range weeks {
|
||||||
|
newEvent := event
|
||||||
|
newEvent.Week = week
|
||||||
|
newEvents = append(newEvents, newEvent)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
newEvents = append(newEvents, event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return newEvents
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitWeekRange(weekRange string) []string {
|
||||||
|
parts := strings.Split(weekRange, "-")
|
||||||
|
if len(parts) != 2 {
|
||||||
|
return nil // Invalid format
|
||||||
|
}
|
||||||
|
|
||||||
|
start, errStart := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||||
|
end, errEnd := strconv.Atoi(strings.TrimSpace(parts[1]))
|
||||||
|
|
||||||
|
if errStart != nil || errEnd != nil {
|
||||||
|
return nil // Error converting to integers
|
||||||
|
}
|
||||||
|
|
||||||
|
var weeks []string
|
||||||
|
for i := start; i <= end; i++ {
|
||||||
|
weeks = append(weeks, strconv.Itoa(i))
|
||||||
|
}
|
||||||
|
|
||||||
|
return weeks
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractSemesterAndYear(semesterString string) (string, string) {
|
||||||
|
winterPattern := "Wintersemester"
|
||||||
|
summerPattern := "Sommersemester"
|
||||||
|
|
||||||
|
winterMatch := strings.Contains(semesterString, winterPattern)
|
||||||
|
summerMatch := strings.Contains(semesterString, summerPattern)
|
||||||
|
|
||||||
|
semester := ""
|
||||||
|
semesterShortcut := ""
|
||||||
|
|
||||||
|
if winterMatch {
|
||||||
|
semester = "Wintersemester"
|
||||||
|
semesterShortcut = "ws"
|
||||||
|
} else if summerMatch {
|
||||||
|
semester = "Sommersemester"
|
||||||
|
semesterShortcut = "ss"
|
||||||
|
} else {
|
||||||
|
return "", ""
|
||||||
|
}
|
||||||
|
|
||||||
|
yearPattern := `\d{4}`
|
||||||
|
combinedPattern := semester + `\s` + yearPattern
|
||||||
|
re := regexp.MustCompile(combinedPattern)
|
||||||
|
match := re.FindString(semesterString)
|
||||||
|
year := ""
|
||||||
|
|
||||||
|
if match != "" {
|
||||||
|
reYear := regexp.MustCompile(yearPattern)
|
||||||
|
year = reYear.FindString(match)
|
||||||
|
}
|
||||||
|
return semesterShortcut, year
|
||||||
|
}
|
||||||
|
|
||||||
|
func convertWeeksToDates(events []model.Event, semester string, year string) []model.Event {
|
||||||
|
var newEvents []model.Event
|
||||||
|
eventYear, _ := strconv.Atoi(year)
|
||||||
|
|
||||||
|
// for each event we need to calculate the start and end date based on the week and the year
|
||||||
|
for _, event := range events {
|
||||||
|
eventWeek, _ := strconv.Atoi(event.Week)
|
||||||
|
eventDay, _ := date.GetDateFromWeekNumber(eventYear, eventWeek, event.Day)
|
||||||
|
start := replaceTimeForDate(eventDay, event.Start.Time())
|
||||||
|
end := replaceTimeForDate(eventDay, event.End.Time())
|
||||||
|
|
||||||
|
//Check if end is before start
|
||||||
|
if end.Before(start) {
|
||||||
|
end = end.AddDate(0, 0, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
newEvent := event
|
||||||
|
newEvent.Start, _ = types.ParseDateTime(start.In(time.UTC))
|
||||||
|
newEvent.End, _ = types.ParseDateTime(end.In(time.UTC))
|
||||||
|
newEvent.Semester = semester
|
||||||
|
newEvents = append(newEvents, newEvent)
|
||||||
|
}
|
||||||
|
return newEvents
|
||||||
|
}
|
||||||
|
|
||||||
|
// replaceTimeForDate replaces hour, minute, second, nsec for the selected date
|
||||||
|
func replaceTimeForDate(date time.Time, replacementTime time.Time) time.Time {
|
||||||
|
return time.Date(date.Year(), date.Month(), date.Day(), replacementTime.Hour(), replacementTime.Minute(), replacementTime.Second(), replacementTime.Nanosecond(), date.Location())
|
||||||
|
}
|
Reference in New Issue
Block a user