From 921a2c7a281a77afec733143bc0b728efc2c7e63 Mon Sep 17 00:00:00 2001 From: masterElmar <18119527+masterElmar@users.noreply.github.com> Date: Thu, 30 Nov 2023 00:00:38 +0100 Subject: [PATCH 1/6] feat:#52 added new fetcher from new endpoint --- backend/service/addRoute.go | 26 +- backend/service/date/dateFormat.go | 16 +- backend/service/db/dbEvents.go | 36 +- backend/service/events/eventService.go | 12 +- backend/service/fetch/htmlDownloader.go | 44 +++ .../{ => v1}/fetchSeminarEventService.go | 53 +-- .../{ => v1}/fetchSeminarEventService_test.go | 2 +- .../{ => v1}/fetchSeminarGroupService.go | 2 +- .../fetch/{ => v1}/htmlParsingFunctions.go | 3 +- backend/service/fetch/v2/eventParser.go | 60 ++++ backend/service/fetch/v2/fetcher.go | 95 +++++ .../service/fetch/v2/htmlParsingFunctions.go | 324 ++++++++++++++++++ 12 files changed, 610 insertions(+), 63 deletions(-) create mode 100644 backend/service/fetch/htmlDownloader.go rename backend/service/fetch/{ => v1}/fetchSeminarEventService.go (88%) rename backend/service/fetch/{ => v1}/fetchSeminarEventService_test.go (99%) rename backend/service/fetch/{ => v1}/fetchSeminarGroupService.go (99%) rename backend/service/fetch/{ => v1}/htmlParsingFunctions.go (99%) create mode 100644 backend/service/fetch/v2/eventParser.go create mode 100644 backend/service/fetch/v2/fetcher.go create mode 100644 backend/service/fetch/v2/htmlParsingFunctions.go diff --git a/backend/service/addRoute.go b/backend/service/addRoute.go index b6f2e7d..b662273 100644 --- a/backend/service/addRoute.go +++ b/backend/service/addRoute.go @@ -2,7 +2,8 @@ package service import ( "htwkalender/service/events" - "htwkalender/service/fetch" + "htwkalender/service/fetch/v1" + v2 "htwkalender/service/fetch/v2" "htwkalender/service/ical" "htwkalender/service/room" "io" @@ -21,11 +22,28 @@ func AddRoutes(app *pocketbase.PocketBase) { Method: http.MethodGet, Path: "/api/fetchPlans", Handler: func(c echo.Context) error { - return fetch.GetSeminarEvents(c, app) + return v1.GetSeminarEvents(c, app) + }, + Middlewares: []echo.MiddlewareFunc{ + apis.ActivityLogger(app), + //apis.RequireAdminAuth(), + }, + }) + if err != nil { + return err + } + return nil + }) + + app.OnBeforeServe().Add(func(e *core.ServeEvent) error { + _, err := e.Router.AddRoute(echo.Route{ + Method: http.MethodGet, + Path: "/api/v2/fetch", + Handler: func(c echo.Context) error { + return v2.ParseEventsFromRemote(c, app) }, Middlewares: []echo.MiddlewareFunc{ apis.ActivityLogger(app), - apis.RequireAdminAuth(), }, }) if err != nil { @@ -57,7 +75,7 @@ func AddRoutes(app *pocketbase.PocketBase) { Method: http.MethodGet, Path: "/api/fetchGroups", Handler: func(c echo.Context) error { - return fetch.SeminarGroups(c, app) + return v1.SeminarGroups(c, app) }, Middlewares: []echo.MiddlewareFunc{ apis.ActivityLogger(app), diff --git a/backend/service/date/dateFormat.go b/backend/service/date/dateFormat.go index 0fa9ee8..989c99b 100644 --- a/backend/service/date/dateFormat.go +++ b/backend/service/date/dateFormat.go @@ -1,6 +1,10 @@ package date -import "time" +import ( + "strconv" + "strings" + "time" +) func GetDateFromWeekNumber(year int, weekNumber int, dayName string) (time.Time, error) { // Create a time.Date for the first day of the year @@ -46,3 +50,13 @@ func GetDateFromWeekNumber(year int, weekNumber int, dayName string) (time.Time, return desiredDate, nil } + +// createEventFromTableData should create an event from the table data +// tableTime represents Hour and Minute like HH:MM +// tableDate returns a Time +func CreateTimeFromHourAndMinuteString(tableTime string) time.Time { + timeParts := strings.Split(tableTime, ":") + hour, _ := strconv.Atoi(timeParts[0]) + minute, _ := strconv.Atoi(timeParts[1]) + return time.Date(0, 0, 0, hour, minute, 0, 0, time.UTC) +} diff --git a/backend/service/db/dbEvents.go b/backend/service/db/dbEvents.go index e052b2e..3253525 100644 --- a/backend/service/db/dbEvents.go +++ b/backend/service/db/dbEvents.go @@ -7,7 +7,7 @@ import ( "github.com/pocketbase/pocketbase" ) -func SaveEvents(seminarGroup []model.SeminarGroup, app *pocketbase.PocketBase) ([]model.Event, error) { +func SaveSeminarGroupEvents(seminarGroup []model.SeminarGroup, app *pocketbase.PocketBase) ([]model.Event, error) { var toBeSavedEvents model.Events var savedRecords model.Events @@ -43,6 +43,38 @@ func SaveEvents(seminarGroup []model.SeminarGroup, app *pocketbase.PocketBase) ( return savedRecords, nil } +func SaveEvents(events []model.Event, app *pocketbase.PocketBase) ([]model.Event, error) { + var toBeSavedEvents model.Events + var savedRecords model.Events + + // check if event is already in database and add to toBeSavedEvents if not + for _, event := range events { + existsInDatabase, err := findEventByDayWeekStartEndNameCourse(event, event.Course, app) + alreadyAddedToSave := toBeSavedEvents.Contains(event) + + if err != nil { + return nil, err + } + + if !existsInDatabase && !alreadyAddedToSave { + toBeSavedEvents = append(toBeSavedEvents, event) + } + } + + // create record for each event that's not already in the database + for _, event := range toBeSavedEvents { + event.MarkAsNew() + // auto mapping for event fields to record fields + err := app.Dao().Save(&event) + if err != nil { + return nil, err + } else { + savedRecords = append(savedRecords, event) + } + } + return savedRecords, nil +} + // check if event is already in database and return true if it is and false if it's not func findEventByDayWeekStartEndNameCourse(event model.Event, course string, app *pocketbase.PocketBase) (bool, error) { @@ -165,7 +197,7 @@ func GetAllModulesForCourse(app *pocketbase.PocketBase, course string, semester func GetAllModulesDistinctByNameAndCourse(app *pocketbase.PocketBase) (model.Events, error) { var events model.Events - err := app.Dao().DB().Select("*").From("events").GroupBy("Name", "course").Distinct(true).All(&events) + err := app.Dao().DB().Select("*").From("events").GroupBy("Name").Distinct(true).All(&events) if err != nil { print("Error while getting events from database: ", err) return nil, err diff --git a/backend/service/events/eventService.go b/backend/service/events/eventService.go index 43a6c35..d6dc97b 100644 --- a/backend/service/events/eventService.go +++ b/backend/service/events/eventService.go @@ -6,7 +6,7 @@ import ( "github.com/pocketbase/pocketbase/apis" "htwkalender/model" "htwkalender/service/db" - "htwkalender/service/fetch" + "htwkalender/service/fetch/v1" "htwkalender/service/functions" ) @@ -95,11 +95,11 @@ func UpdateModulesForCourse(app *pocketbase.PocketBase, course string) error { var courses []string courses = append(courses, course) - seminarGroups := fetch.GetSeminarGroupsEventsFromHTML(courses) + seminarGroups := v1.GetSeminarGroupsEventsFromHTML(courses) - seminarGroups = fetch.ClearEmptySeminarGroups(seminarGroups) + seminarGroups = v1.ClearEmptySeminarGroups(seminarGroups) - seminarGroups = fetch.ReplaceEmptyEventNames(seminarGroups) + seminarGroups = v1.ReplaceEmptyEventNames(seminarGroups) //check if events in the seminarGroups Events are already in the database //if yes, keep the database as it is @@ -122,7 +122,7 @@ func UpdateModulesForCourse(app *pocketbase.PocketBase, course string) error { //if there are no events in the database, save the new events if len(events) == 0 { - _, dbError := db.SaveEvents(seminarGroups, app) + _, dbError := db.SaveSeminarGroupEvents(seminarGroups, app) if dbError != nil { return apis.NewNotFoundError("Events could not be saved", dbError) } @@ -148,7 +148,7 @@ func UpdateModulesForCourse(app *pocketbase.PocketBase, course string) error { } //save the new events - _, dbError := db.SaveEvents(seminarGroups, app) + _, dbError := db.SaveSeminarGroupEvents(seminarGroups, app) if dbError != nil { return apis.NewNotFoundError("Events could not be saved", dbError) } diff --git a/backend/service/fetch/htmlDownloader.go b/backend/service/fetch/htmlDownloader.go new file mode 100644 index 0000000..8bd4d42 --- /dev/null +++ b/backend/service/fetch/htmlDownloader.go @@ -0,0 +1,44 @@ +package fetch + +import ( + "fmt" + "io" + "net/http" +) + +// getPlanHTML Get the HTML document from the specified URL + +func GetHTML(url string) (string, error) { + + // Send GET request + response, err := http.Get(url) + if err != nil { + fmt.Printf("Error occurred while making the request: %s\n", err.Error()) + return "", err + } + defer func(Body io.ReadCloser) { + err := Body.Close() + if err != nil { + return + } + }(response.Body) + + // Read the response body + body, err := io.ReadAll(response.Body) + + if err != nil { + fmt.Printf("Error occurred while reading the response: %s\n", err.Error()) + return "", err + } + + return toUtf8(body), err + +} + +func toUtf8(iso88591Buf []byte) string { + buf := make([]rune, len(iso88591Buf)) + for i, b := range iso88591Buf { + buf[i] = rune(b) + } + return string(buf) +} diff --git a/backend/service/fetch/fetchSeminarEventService.go b/backend/service/fetch/v1/fetchSeminarEventService.go similarity index 88% rename from backend/service/fetch/fetchSeminarEventService.go rename to backend/service/fetch/v1/fetchSeminarEventService.go index bf18258..bc1af4c 100644 --- a/backend/service/fetch/fetchSeminarEventService.go +++ b/backend/service/fetch/v1/fetchSeminarEventService.go @@ -1,11 +1,11 @@ -package fetch +package v1 import ( "fmt" "htwkalender/model" "htwkalender/service/date" "htwkalender/service/db" - "io" + "htwkalender/service/fetch" "net/http" "regexp" "strconv" @@ -30,7 +30,7 @@ func GetSeminarEvents(c echo.Context, app *pocketbase.PocketBase) error { seminarGroups = ReplaceEmptyEventNames(seminarGroups) - savedRecords, dbError := db.SaveEvents(seminarGroups, app) + savedRecords, dbError := db.SaveSeminarGroupEvents(seminarGroups, app) if dbError != nil { return apis.NewNotFoundError("Events could not be saved", dbError.Error()) @@ -64,13 +64,15 @@ func GetSeminarGroupsEventsFromHTML(seminarGroupsLabel []string) []model.Seminar var seminarGroups []model.SeminarGroup for _, seminarGroupLabel := range seminarGroupsLabel { - result, getError := getPlanHTML("ss", seminarGroupLabel) + ssUrl := "https://stundenplan.htwk-leipzig.de/" + string("ss") + "/Berichte/Text-Listen;Studenten-Sets;name;" + seminarGroupLabel + "?template=sws_semgrp&weeks=1-65" + result, getError := fetch.GetHTML(ssUrl) if getError == nil { seminarGroup := parseSeminarGroup(result) seminarGroups = append(seminarGroups, seminarGroup) } - result, getError = getPlanHTML("ws", seminarGroupLabel) + wsUrl := "https://stundenplan.htwk-leipzig.de/" + string("ws") + "/Berichte/Text-Listen;Studenten-Sets;name;" + seminarGroupLabel + "?template=sws_semgrp&weeks=1-65" + result, getError = fetch.GetHTML(wsUrl) if getError == nil { seminarGroup := parseSeminarGroup(result) seminarGroups = append(seminarGroups, seminarGroup) @@ -80,7 +82,6 @@ func GetSeminarGroupsEventsFromHTML(seminarGroupsLabel []string) []model.Seminar } func splitEventType(events []model.Event) []model.Event { - for i, event := range events { matched, _ := regexp.Match("^([VPS])([wp])$", []byte(event.EventType)) if matched { @@ -90,7 +91,6 @@ func splitEventType(events []model.Event) []model.Event { events[i] = event } } - return events } @@ -133,7 +133,6 @@ func generateUUIDs(events []model.Event, course string) []model.Event { events[i].UUID = hash.String() } return events - } // convertWeeksToDates converts the week and year to a date @@ -230,7 +229,6 @@ func toEvents(tables [][]*html.Node, days []string) []model.Event { } } - return events } @@ -296,40 +294,3 @@ func splitWeekRange(weekRange string) []string { return weeks } - -func toUtf8(iso88591Buf []byte) string { - buf := make([]rune, len(iso88591Buf)) - for i, b := range iso88591Buf { - buf[i] = rune(b) - } - return string(buf) -} - -// getPlanHTML Get the HTML document from the specified URL -func getPlanHTML(semester string, matrikel string) (string, error) { - url := "https://stundenplan.htwk-leipzig.de/" + string(semester) + "/Berichte/Text-Listen;Studenten-Sets;name;" + matrikel + "?template=sws_semgrp&weeks=1-65" - - // Send GET request - response, err := http.Get(url) - if err != nil { - fmt.Printf("Error occurred while making the request: %s\n", err.Error()) - return "", err - } - defer func(Body io.ReadCloser) { - err := Body.Close() - if err != nil { - return - } - }(response.Body) - - // Read the response body - body, err := io.ReadAll(response.Body) - - if err != nil { - fmt.Printf("Error occurred while reading the response: %s\n", err.Error()) - return "", err - } - - return toUtf8(body), err - -} diff --git a/backend/service/fetch/fetchSeminarEventService_test.go b/backend/service/fetch/v1/fetchSeminarEventService_test.go similarity index 99% rename from backend/service/fetch/fetchSeminarEventService_test.go rename to backend/service/fetch/v1/fetchSeminarEventService_test.go index e1b6948..3811b85 100644 --- a/backend/service/fetch/fetchSeminarEventService_test.go +++ b/backend/service/fetch/v1/fetchSeminarEventService_test.go @@ -1,4 +1,4 @@ -package fetch +package v1 import ( "fmt" diff --git a/backend/service/fetch/fetchSeminarGroupService.go b/backend/service/fetch/v1/fetchSeminarGroupService.go similarity index 99% rename from backend/service/fetch/fetchSeminarGroupService.go rename to backend/service/fetch/v1/fetchSeminarGroupService.go index 89c9b72..0d39595 100644 --- a/backend/service/fetch/fetchSeminarGroupService.go +++ b/backend/service/fetch/v1/fetchSeminarGroupService.go @@ -1,4 +1,4 @@ -package fetch +package v1 import ( "encoding/xml" diff --git a/backend/service/fetch/htmlParsingFunctions.go b/backend/service/fetch/v1/htmlParsingFunctions.go similarity index 99% rename from backend/service/fetch/htmlParsingFunctions.go rename to backend/service/fetch/v1/htmlParsingFunctions.go index dd3b8b2..8513359 100644 --- a/backend/service/fetch/htmlParsingFunctions.go +++ b/backend/service/fetch/v1/htmlParsingFunctions.go @@ -1,4 +1,4 @@ -package fetch +package v1 import ( "golang.org/x/net/html" @@ -182,7 +182,6 @@ func findTableData(node *html.Node) []*html.Node { child = child.NextSibling } } - return tableData } diff --git a/backend/service/fetch/v2/eventParser.go b/backend/service/fetch/v2/eventParser.go new file mode 100644 index 0000000..752bb24 --- /dev/null +++ b/backend/service/fetch/v2/eventParser.go @@ -0,0 +1,60 @@ +package v2 + +import ( + "github.com/pocketbase/pocketbase/tools/types" + "golang.org/x/net/html" + "htwkalender/model" + "htwkalender/service/date" + "regexp" + "strings" +) + +func toEvents(tables [][]*html.Node, days []string) []model.Event { + var events []model.Event + + for table := range tables { + for row := range tables[table] { + + tableData := findTableData(tables[table][row]) + if len(tableData) > 0 { + start, _ := types.ParseDateTime(date.CreateTimeFromHourAndMinuteString(getTextContent(tableData[1]))) + end, _ := types.ParseDateTime(date.CreateTimeFromHourAndMinuteString(getTextContent(tableData[2]))) + + courses := getTextContent(tableData[7]) + + if len(courses) > 0 { + for _, course := range strings.Split(courses, " ") { + events = append(events, model.Event{ + Day: days[table], + Week: getTextContent(tableData[0]), + Start: start, + End: end, + Name: getTextContent(tableData[3]), + EventType: getTextContent(tableData[4]), + Notes: getTextContent(tableData[5]), + Prof: getTextContent(tableData[6]), + Rooms: getTextContent(tableData[8]), + BookedAt: getTextContent(tableData[10]), + Course: course, + }) + } + } + } + } + + } + return events +} + +func splitEventType(events []model.Event) []model.Event { + for i, event := range events { + matched, _ := regexp.Match("^([VPS])([wp])$", []byte(event.EventType)) + if matched { + eventType := event.EventType + event.EventType = eventType[0:1] + event.Compulsory = eventType[1:2] + events[i] = event + } + } + return events +} diff --git a/backend/service/fetch/v2/fetcher.go b/backend/service/fetch/v2/fetcher.go new file mode 100644 index 0000000..6205927 --- /dev/null +++ b/backend/service/fetch/v2/fetcher.go @@ -0,0 +1,95 @@ +package v2 + +import ( + "github.com/google/uuid" + "github.com/labstack/echo/v5" + "github.com/pocketbase/pocketbase" + "golang.org/x/net/html" + "htwkalender/model" + "htwkalender/service/db" + "htwkalender/service/fetch" + "strconv" + "strings" +) + +func ParseEventsFromRemote(c echo.Context, app *pocketbase.PocketBase) error { + + url := "https://stundenplan.htwk-leipzig.de/ws/Berichte/Text-Listen;Veranstaltungsarten;name;Vp%0AVw%0AV%0ASp%0ASw%0AS%0APp%0APw%0AP%0AZV%0ATut%0ASperr%0Apf%0Awpf%0Afak%0A%0A?&template=sws_modul&weeks=1-65&combined=yes" + + // Fetch Webpage from URL + webpage, err := fetch.GetHTML(url) + if err != nil { + return err + } + + // Parse HTML to Node Tree + doc, err2 := parseHTML(err, webpage) + if err2 != nil { + return err2 + } + + // Get all event tables and all day labels + eventTables := getEventTables(doc) + allDayLabels := getAllDayLabels(doc) + + if eventTables == nil || allDayLabels == nil { + return err + } + + eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels) + + if eventsWithCombinedWeeks == nil { + return err + } + + splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks) + events := splitEventsBySingleWeek(splitEventsByWeekVal) + + if events == nil { + return err + } + + // Search Semester String in Page Head + table := findFirstTable(doc) + semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data + + semester, year := extractSemesterAndYear(semesterString) + events = convertWeeksToDates(events, semester, year) + events = generateUUIDs(events) + events = splitEventType(events) + + var seminarGroup = model.SeminarGroup{ + University: findFirstSpanWithClass(table, "header-1-0-0").FirstChild.Data, + Events: events, + } + + if seminarGroup.Events == nil && seminarGroup.University == "" { + return err + } + + savedRecords, dbError := db.SaveEvents(events, app) + + if dbError != nil { + return dbError + } else { + savedRecordsLength := strconv.FormatInt(int64(len(savedRecords)), 10) + return c.JSON(200, "Successfully saved "+savedRecordsLength+" events") + } +} + +func parseHTML(err error, webpage string) (*html.Node, error) { + doc, err := html.Parse(strings.NewReader(webpage)) + if err != nil { + return nil, err + } + return doc, nil +} + +func generateUUIDs(events []model.Event) []model.Event { + for i, event := range events { + // generate a hash value from the event name, course and semester + hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+event.Course)) + events[i].UUID = hash.String() + } + return events +} diff --git a/backend/service/fetch/v2/htmlParsingFunctions.go b/backend/service/fetch/v2/htmlParsingFunctions.go new file mode 100644 index 0000000..44be4cd --- /dev/null +++ b/backend/service/fetch/v2/htmlParsingFunctions.go @@ -0,0 +1,324 @@ +package v2 + +import ( + "github.com/pocketbase/pocketbase/tools/types" + "golang.org/x/net/html" + "htwkalender/model" + "htwkalender/service/date" + "regexp" + "strconv" + "strings" + "time" +) + +// Find the first element in the HTML document +func findFirstTable(node *html.Node) *html.Node { + if node.Type == html.ElementNode && node.Data == "table" { + return node + } + // Traverse child nodes recursively + for child := node.FirstChild; child != nil; child = child.NextSibling { + found := findFirstTable(child) + if found != nil { + return found + } + } + return nil +} + +// Find the first element with the specified class attribute value +func findFirstSpanWithClass(node *html.Node, classValue string) *html.Node { + + // Check if the current node is a element with the specified class attribute value + if node.Type == html.ElementNode && node.Data == "span" { + if hasClassAttribute(node, classValue) { + return node + } + } + + // Traverse child nodes recursively + for child := node.FirstChild; child != nil; child = child.NextSibling { + found := findFirstSpanWithClass(child, classValue) + if found != nil { + return found + } + } + return nil +} + +// Check if the specified element has the specified class attribute value +func hasClassAttribute(node *html.Node, classValue string) bool { + for _, attr := range node.Attr { + if attr.Key == "class" && strings.Contains(attr.Val, classValue) { + return true + } + } + return false +} + +// Get Tables with days +func getEventTables(node *html.Node) [][]*html.Node { + var eventTables [][]*html.Node + tables := findTables(node) + // get all tables with events + for events := range tables { + rows := findTableRows(tables[events]) + // check that a first row exists + if len(rows) > 0 { + rows = rows[1:] + eventTables = append(eventTables, rows) + } + } + return eventTables +} + +// Get Tables with days +func getAllDayLabels(node *html.Node) []string { + paragraphs := findParagraphs(node) + var dayArray []string + + for _, p := range paragraphs { + label := getDayLabel(p) + if label != "" { + dayArray = append(dayArray, label) + } + } + return dayArray +} + +// Find all

elements in the HTML document +func findParagraphs(node *html.Node) []*html.Node { + var paragraphs []*html.Node + + if node.Type == html.ElementNode && node.Data == "p" { + paragraphs = append(paragraphs, node) + } + + for child := node.FirstChild; child != nil; child = child.NextSibling { + paragraphs = append(paragraphs, findParagraphs(child)...) + } + + return paragraphs +} + +// Find all

elements in , excluding the first one +func findTableRows(node *html.Node) []*html.Node { + var tableRows []*html.Node + + if node.Type == html.ElementNode && node.Data == "tbody" { + child := node.FirstChild + for child != nil { + if child.Type == html.ElementNode && child.Data == "tr" { + tableRows = append(tableRows, child) + } + child = child.NextSibling + } + } + + // Traverse child nodes recursively + for child := node.FirstChild; child != nil; child = child.NextSibling { + var tableRowElement = findTableRows(child) + if tableRowElement != nil { + tableRows = append(tableRows, tableRowElement...) + } + } + + // check if tableRows is nil + if tableRows == nil { + return []*html.Node{} + } else { + return tableRows + } +} + +// Find all

elements in the HTML document +func findTables(node *html.Node) []*html.Node { + var tables []*html.Node + + if node.Type == html.ElementNode && node.Data == "table" { + tables = append(tables, node) + } + + for child := node.FirstChild; child != nil; child = child.NextSibling { + tables = append(tables, findDayTables(child)...) + } + + return tables +} + +// Find all

elements in the HTML document +func findDayTables(node *html.Node) []*html.Node { + var tables []*html.Node + + for child := node.FirstChild; child != nil; child = child.NextSibling { + tables = append(tables, findDayTables(child)...) + } + + if node.Type == html.ElementNode && node.Data == "table" && hasClassAttribute(node, "spreadsheet") { + tables = append(tables, node) + } + + return tables +} + +// Get the text content of the specified node and its descendants +func getDayLabel(node *html.Node) string { + + child := node.FirstChild + if child != nil { + if child.Type == html.ElementNode && child.Data == "span" { + if child.FirstChild != nil { + return child.FirstChild.Data + } + } + } + return "" +} + +// Find all

+func findTableData(node *html.Node) []*html.Node { + var tableData []*html.Node + + if node.Type == html.ElementNode && node.Data == "tr" { + child := node.FirstChild + for child != nil { + if child.Type == html.ElementNode && child.Data == "td" { + tableData = append(tableData, child) + } + child = child.NextSibling + } + } + + return tableData +} + +// Get the text content of the specified node and its descendants +func getTextContent(node *html.Node) string { + var textContent string + + if node.Type == html.TextNode { + textContent = node.Data + } + + for child := node.FirstChild; child != nil; child = child.NextSibling { + textContent += getTextContent(child) + } + + return textContent +} + +func splitEventsByWeek(events []model.Event) []model.Event { + var newEvents []model.Event + + for _, event := range events { + weeks := strings.Split(event.Week, ",") + for _, week := range weeks { + newEvent := event + newEvent.Week = strings.TrimSpace(week) + newEvents = append(newEvents, newEvent) + } + } + return newEvents +} + +func splitEventsBySingleWeek(events []model.Event) []model.Event { + var newEvents []model.Event + + for _, event := range events { + if strings.Contains(event.Week, "-") { + weeks := splitWeekRange(event.Week) + for _, week := range weeks { + newEvent := event + newEvent.Week = week + newEvents = append(newEvents, newEvent) + } + } else { + newEvents = append(newEvents, event) + } + } + return newEvents +} + +func splitWeekRange(weekRange string) []string { + parts := strings.Split(weekRange, "-") + if len(parts) != 2 { + return nil // Invalid format + } + + start, errStart := strconv.Atoi(strings.TrimSpace(parts[0])) + end, errEnd := strconv.Atoi(strings.TrimSpace(parts[1])) + + if errStart != nil || errEnd != nil { + return nil // Error converting to integers + } + + var weeks []string + for i := start; i <= end; i++ { + weeks = append(weeks, strconv.Itoa(i)) + } + + return weeks +} + +func extractSemesterAndYear(semesterString string) (string, string) { + winterPattern := "Wintersemester" + summerPattern := "Sommersemester" + + winterMatch := strings.Contains(semesterString, winterPattern) + summerMatch := strings.Contains(semesterString, summerPattern) + + semester := "" + semesterShortcut := "" + + if winterMatch { + semester = "Wintersemester" + semesterShortcut = "ws" + } else if summerMatch { + semester = "Sommersemester" + semesterShortcut = "ss" + } else { + return "", "" + } + + yearPattern := `\d{4}` + combinedPattern := semester + `\s` + yearPattern + re := regexp.MustCompile(combinedPattern) + match := re.FindString(semesterString) + year := "" + + if match != "" { + reYear := regexp.MustCompile(yearPattern) + year = reYear.FindString(match) + } + return semesterShortcut, year +} + +func convertWeeksToDates(events []model.Event, semester string, year string) []model.Event { + var newEvents []model.Event + eventYear, _ := strconv.Atoi(year) + + // for each event we need to calculate the start and end date based on the week and the year + for _, event := range events { + eventWeek, _ := strconv.Atoi(event.Week) + eventDay, _ := date.GetDateFromWeekNumber(eventYear, eventWeek, event.Day) + start := replaceTimeForDate(eventDay, event.Start.Time()) + end := replaceTimeForDate(eventDay, event.End.Time()) + + //Check if end is before start + if end.Before(start) { + end = end.AddDate(0, 0, 1) + } + + newEvent := event + newEvent.Start, _ = types.ParseDateTime(start.In(time.UTC)) + newEvent.End, _ = types.ParseDateTime(end.In(time.UTC)) + newEvent.Semester = semester + newEvents = append(newEvents, newEvent) + } + return newEvents +} + +// replaceTimeForDate replaces hour, minute, second, nsec for the selected date +func replaceTimeForDate(date time.Time, replacementTime time.Time) time.Time { + return time.Date(date.Year(), date.Month(), date.Day(), replacementTime.Hour(), replacementTime.Minute(), replacementTime.Second(), replacementTime.Nanosecond(), date.Location()) +} From cd663ac69df4856a0c0742509b6ea0d8ba6cc44a Mon Sep 17 00:00:00 2001 From: masterElmar <18119527+masterElmar@users.noreply.github.com> Date: Thu, 30 Nov 2023 00:01:34 +0100 Subject: [PATCH 2/6] bug:#52 volume mount raises db savings upto 2min more --- docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 8e105d5..8c1f01b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,8 +9,8 @@ services: ports: - "8090:8090" command: "/htwkalender serve --http=0.0.0.0:8090 --dir=/pb_data" - volumes: - - ./backend/pb_data:/pb_data +# volumes: +# - ./backend/pb_data:/pb_data htwkalender-frontend: volumes: - ./frontend/src:/app/src From c9caa2aedc29f7cfc3c29fb64613e26a0af2f858 Mon Sep 17 00:00:00 2001 From: masterElmar <18119527+masterElmar@users.noreply.github.com> Date: Thu, 30 Nov 2023 23:31:04 +0100 Subject: [PATCH 3/6] feat:#52 added ss and ws month fix for fetching --- README.md | 6 +-- backend/service/addRoute.go | 59 ++++++++++------------------- backend/service/addSchedule.go | 2 +- backend/service/fetch/v2/fetcher.go | 58 +++++++++++++++++++--------- 4 files changed, 66 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 7d59d51..67fa014 100644 --- a/README.md +++ b/README.md @@ -48,15 +48,15 @@ This should be done quick in a few seconds (0-5s). When you execute the command again, it will update the groups in the database and only return new added groups. -http://127.0.0.1/api/fetchGroups +http://127.0.0.1/api/fetch/groups For fetching the plans, you can use the following command. This will fetch all plans for all groups and store the events in the database. It's done for all current existing events (ws/ss). -The whole process takes a while (1-5min), depending on the amount of groups and events. +The whole process takes a while (30s-2min), depending on the amount of groups and events. Stay for this time on the page and wait for the response. -http://127.0.0.1/api/fetchPlans +http://127.0.0.1/api/fetch/events ### View/Filter/Search in Admin UI diff --git a/backend/service/addRoute.go b/backend/service/addRoute.go index b662273..82c33de 100644 --- a/backend/service/addRoute.go +++ b/backend/service/addRoute.go @@ -2,7 +2,7 @@ package service import ( "htwkalender/service/events" - "htwkalender/service/fetch/v1" + v1 "htwkalender/service/fetch/v1" v2 "htwkalender/service/fetch/v2" "htwkalender/service/ical" "htwkalender/service/room" @@ -20,45 +20,10 @@ func AddRoutes(app *pocketbase.PocketBase) { app.OnBeforeServe().Add(func(e *core.ServeEvent) error { _, err := e.Router.AddRoute(echo.Route{ Method: http.MethodGet, - Path: "/api/fetchPlans", - Handler: func(c echo.Context) error { - return v1.GetSeminarEvents(c, app) - }, - Middlewares: []echo.MiddlewareFunc{ - apis.ActivityLogger(app), - //apis.RequireAdminAuth(), - }, - }) - if err != nil { - return err - } - return nil - }) - - app.OnBeforeServe().Add(func(e *core.ServeEvent) error { - _, err := e.Router.AddRoute(echo.Route{ - Method: http.MethodGet, - Path: "/api/v2/fetch", + Path: "/api/fetch/events", Handler: func(c echo.Context) error { return v2.ParseEventsFromRemote(c, app) }, - Middlewares: []echo.MiddlewareFunc{ - apis.ActivityLogger(app), - }, - }) - if err != nil { - return err - } - return nil - }) - - app.OnBeforeServe().Add(func(e *core.ServeEvent) error { - _, err := e.Router.AddRoute(echo.Route{ - Method: http.MethodDelete, - Path: "/api/modules", - Handler: func(c echo.Context) error { - return events.DeleteAllEvents(app) - }, Middlewares: []echo.MiddlewareFunc{ apis.ActivityLogger(app), apis.RequireAdminAuth(), @@ -73,7 +38,7 @@ func AddRoutes(app *pocketbase.PocketBase) { app.OnBeforeServe().Add(func(e *core.ServeEvent) error { _, err := e.Router.AddRoute(echo.Route{ Method: http.MethodGet, - Path: "/api/fetchGroups", + Path: "/api/fetch/groups", Handler: func(c echo.Context) error { return v1.SeminarGroups(c, app) }, @@ -88,6 +53,24 @@ func AddRoutes(app *pocketbase.PocketBase) { return nil }) + app.OnBeforeServe().Add(func(e *core.ServeEvent) error { + _, err := e.Router.AddRoute(echo.Route{ + Method: http.MethodDelete, + Path: "/api/modules", + Handler: func(c echo.Context) error { + return events.DeleteAllEvents(app) + }, + Middlewares: []echo.MiddlewareFunc{ + apis.ActivityLogger(app), + apis.RequireAdminAuth(), + }, + }) + if err != nil { + return err + } + return nil + }) + app.OnBeforeServe().Add(func(e *core.ServeEvent) error { _, err := e.Router.AddRoute(echo.Route{ Method: http.MethodGet, diff --git a/backend/service/addSchedule.go b/backend/service/addSchedule.go index bd0cd96..e3df317 100644 --- a/backend/service/addSchedule.go +++ b/backend/service/addSchedule.go @@ -17,7 +17,7 @@ func AddSchedules(app *pocketbase.PocketBase) { // Every hour update all courses (5 segments - minute, hour, day, month, weekday) "0 * * * *" // Every three hours update all courses (5 segments - minute, hour, day, month, weekday) "0 */3 * * *" // Every 10 minutes update all courses (5 segments - minute, hour, day, month, weekday) "*/10 * * * *" - scheduler.MustAdd("updateCourse", "0 */3 * * *", func() { + scheduler.MustAdd("updateCourse", "*/10 * * * *", func() { course.UpdateCourse(app) }) diff --git a/backend/service/fetch/v2/fetcher.go b/backend/service/fetch/v2/fetcher.go index 6205927..b163d54 100644 --- a/backend/service/fetch/v2/fetcher.go +++ b/backend/service/fetch/v2/fetcher.go @@ -10,22 +10,55 @@ import ( "htwkalender/service/fetch" "strconv" "strings" + "time" ) func ParseEventsFromRemote(c echo.Context, app *pocketbase.PocketBase) error { - url := "https://stundenplan.htwk-leipzig.de/ws/Berichte/Text-Listen;Veranstaltungsarten;name;Vp%0AVw%0AV%0ASp%0ASw%0AS%0APp%0APw%0AP%0AZV%0ATut%0ASperr%0Apf%0Awpf%0Afak%0A%0A?&template=sws_modul&weeks=1-65&combined=yes" + err, savedRecords := FetchAllEventsAndSave(app) + if err != nil { + return err + } else { + savedRecordsLength := strconv.FormatInt(int64(len(savedRecords)), 10) + return c.JSON(200, "Successfully saved "+savedRecordsLength+" events") + } +} + +func FetchAllEventsAndSave(app *pocketbase.PocketBase) (error, []model.Event) { + var err error + var savedRecords []model.Event + var events []model.Event + + if (time.Now().Month() >= 3) && (time.Now().Month() <= 10) { + url := "https://stundenplan.htwk-leipzig.de/ws/Berichte/Text-Listen;Veranstaltungsarten;name;Vp%0AVw%0AV%0ASp%0ASw%0AS%0APp%0APw%0AP%0AZV%0ATut%0ASperr%0Apf%0Awpf%0Afak%0A%0A?&template=sws_modul&weeks=1-65&combined=yes" + events, err = parseEventForOneSemester(url) + savedEvents, dbError := db.SaveEvents(events, app) + err = dbError + savedRecords = append(savedEvents, events...) + } + + if (time.Now().Month() >= 9) || (time.Now().Month() <= 4) { + url := "https://stundenplan.htwk-leipzig.de/ws/Berichte/Text-Listen;Veranstaltungsarten;name;Vp%0AVw%0AV%0ASp%0ASw%0AS%0APp%0APw%0AP%0AZV%0ATut%0ASperr%0Apf%0Awpf%0Afak%0A%0A?&template=sws_modul&weeks=1-65&combined=yes" + events, err = parseEventForOneSemester(url) + savedEvents, dbError := db.SaveEvents(events, app) + err = dbError + savedRecords = append(savedEvents, events...) + } + return err, savedRecords +} + +func parseEventForOneSemester(url string) ([]model.Event, error) { // Fetch Webpage from URL webpage, err := fetch.GetHTML(url) if err != nil { - return err + return nil, err } // Parse HTML to Node Tree doc, err2 := parseHTML(err, webpage) if err2 != nil { - return err2 + return nil, err2 } // Get all event tables and all day labels @@ -33,26 +66,24 @@ func ParseEventsFromRemote(c echo.Context, app *pocketbase.PocketBase) error { allDayLabels := getAllDayLabels(doc) if eventTables == nil || allDayLabels == nil { - return err + return nil, err } eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels) if eventsWithCombinedWeeks == nil { - return err + return nil, err } splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks) events := splitEventsBySingleWeek(splitEventsByWeekVal) if events == nil { - return err + return nil, err } - // Search Semester String in Page Head table := findFirstTable(doc) semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data - semester, year := extractSemesterAndYear(semesterString) events = convertWeeksToDates(events, semester, year) events = generateUUIDs(events) @@ -64,17 +95,10 @@ func ParseEventsFromRemote(c echo.Context, app *pocketbase.PocketBase) error { } if seminarGroup.Events == nil && seminarGroup.University == "" { - return err + return nil, err } - savedRecords, dbError := db.SaveEvents(events, app) - - if dbError != nil { - return dbError - } else { - savedRecordsLength := strconv.FormatInt(int64(len(savedRecords)), 10) - return c.JSON(200, "Successfully saved "+savedRecordsLength+" events") - } + return events, nil } func parseHTML(err error, webpage string) (*html.Node, error) { From 03d393c7d9653b1cf6c901bd77ed1dd7f7005d50 Mon Sep 17 00:00:00 2001 From: masterElmar <18119527+masterElmar@users.noreply.github.com> Date: Thu, 30 Nov 2023 23:38:18 +0100 Subject: [PATCH 4/6] feat:#52 added lower timeout for http request --- backend/service/fetch/htmlDownloader.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/backend/service/fetch/htmlDownloader.go b/backend/service/fetch/htmlDownloader.go index 8bd4d42..c69817e 100644 --- a/backend/service/fetch/htmlDownloader.go +++ b/backend/service/fetch/htmlDownloader.go @@ -4,14 +4,20 @@ import ( "fmt" "io" "net/http" + "time" ) // getPlanHTML Get the HTML document from the specified URL func GetHTML(url string) (string, error) { + // Create HTTP client with timeout of 5 seconds + client := http.Client{ + Timeout: 30 * time.Second, + } + // Send GET request - response, err := http.Get(url) + response, err := client.Get(url) if err != nil { fmt.Printf("Error occurred while making the request: %s\n", err.Error()) return "", err From b1e253f8e69ce013db554a13b833719df2693716 Mon Sep 17 00:00:00 2001 From: masterElmar <18119527+masterElmar@users.noreply.github.com> Date: Thu, 30 Nov 2023 23:45:46 +0100 Subject: [PATCH 5/6] fix:#52 changed docker volume mount --- docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 8c1f01b..8e105d5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,8 +9,8 @@ services: ports: - "8090:8090" command: "/htwkalender serve --http=0.0.0.0:8090 --dir=/pb_data" -# volumes: -# - ./backend/pb_data:/pb_data + volumes: + - ./backend/pb_data:/pb_data htwkalender-frontend: volumes: - ./frontend/src:/app/src From dccb279df370f66a69fb53de4a3126e04cbd6e35 Mon Sep 17 00:00:00 2001 From: masterElmar <18119527+masterElmar@users.noreply.github.com> Date: Thu, 30 Nov 2023 23:46:09 +0100 Subject: [PATCH 6/6] fix:#52 schedule and ss html link --- backend/service/addSchedule.go | 2 +- backend/service/fetch/v2/fetcher.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/service/addSchedule.go b/backend/service/addSchedule.go index e3df317..bd0cd96 100644 --- a/backend/service/addSchedule.go +++ b/backend/service/addSchedule.go @@ -17,7 +17,7 @@ func AddSchedules(app *pocketbase.PocketBase) { // Every hour update all courses (5 segments - minute, hour, day, month, weekday) "0 * * * *" // Every three hours update all courses (5 segments - minute, hour, day, month, weekday) "0 */3 * * *" // Every 10 minutes update all courses (5 segments - minute, hour, day, month, weekday) "*/10 * * * *" - scheduler.MustAdd("updateCourse", "*/10 * * * *", func() { + scheduler.MustAdd("updateCourse", "0 */3 * * *", func() { course.UpdateCourse(app) }) diff --git a/backend/service/fetch/v2/fetcher.go b/backend/service/fetch/v2/fetcher.go index b163d54..6cafd63 100644 --- a/backend/service/fetch/v2/fetcher.go +++ b/backend/service/fetch/v2/fetcher.go @@ -31,7 +31,7 @@ func FetchAllEventsAndSave(app *pocketbase.PocketBase) (error, []model.Event) { var events []model.Event if (time.Now().Month() >= 3) && (time.Now().Month() <= 10) { - url := "https://stundenplan.htwk-leipzig.de/ws/Berichte/Text-Listen;Veranstaltungsarten;name;Vp%0AVw%0AV%0ASp%0ASw%0AS%0APp%0APw%0AP%0AZV%0ATut%0ASperr%0Apf%0Awpf%0Afak%0A%0A?&template=sws_modul&weeks=1-65&combined=yes" + url := "https://stundenplan.htwk-leipzig.de/ss/Berichte/Text-Listen;Veranstaltungsarten;name;Vp%0AVw%0AV%0ASp%0ASw%0AS%0APp%0APw%0AP%0AZV%0ATut%0ASperr%0Apf%0Awpf%0Afak%0A%0A?&template=sws_modul&weeks=1-65&combined=yes" events, err = parseEventForOneSemester(url) savedEvents, dbError := db.SaveEvents(events, app) err = dbError
elements in the current