mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender.git
synced 2025-08-03 18:29:14 +02:00
feat:#34 refactored function to intended service, fixed docker files
This commit is contained in:
66
services/data-manager/service/fetch/htmlDownloader.go
Normal file
66
services/data-manager/service/fetch/htmlDownloader.go
Normal file
@@ -0,0 +1,66 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package fetch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// getPlanHTML Get the HTML document from the specified URL
|
||||
|
||||
func GetHTML(url string) (string, error) {
|
||||
|
||||
// Create HTTP client with timeout of 5 seconds
|
||||
client := http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
}
|
||||
|
||||
// Send GET request
|
||||
response, err := client.Get(url)
|
||||
if err != nil {
|
||||
fmt.Printf("Error occurred while making the request: %s\n", err.Error())
|
||||
return "", err
|
||||
}
|
||||
defer func(Body io.ReadCloser) {
|
||||
err := Body.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}(response.Body)
|
||||
|
||||
// Read the response body
|
||||
body, err := io.ReadAll(response.Body)
|
||||
|
||||
if err != nil {
|
||||
fmt.Printf("Error occurred while reading the response: %s\n", err.Error())
|
||||
return "", err
|
||||
}
|
||||
|
||||
return toUtf8(body), err
|
||||
|
||||
}
|
||||
|
||||
func toUtf8(iso88591Buf []byte) string {
|
||||
buf := make([]rune, len(iso88591Buf))
|
||||
for i, b := range iso88591Buf {
|
||||
buf[i] = rune(b)
|
||||
}
|
||||
return string(buf)
|
||||
}
|
572
services/data-manager/service/fetch/sport/sportFetcher.go
Normal file
572
services/data-manager/service/fetch/sport/sportFetcher.go
Normal file
@@ -0,0 +1,572 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package sport
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"github.com/google/uuid"
|
||||
"github.com/pocketbase/pocketbase"
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"htwkalender/model"
|
||||
"htwkalender/service/db"
|
||||
"htwkalender/service/functions"
|
||||
clock "htwkalender/service/functions/time"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// FetchAndUpdateSportEvents fetches all sport events from the HTWK sport website
|
||||
// it deletes them first and then saves them to the database
|
||||
// It returns all saved events
|
||||
func FetchAndUpdateSportEvents(app *pocketbase.PocketBase) ([]model.Event, error) {
|
||||
|
||||
sportCourseLinks, err := fetchAllAvailableSportCourses()
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sportEntries := fetchHTWKSportCourses(sportCourseLinks)
|
||||
events := formatEntriesToEvents(sportEntries)
|
||||
|
||||
var earliestDate time.Time
|
||||
var latestDate time.Time
|
||||
|
||||
// find earliest and latest date in events
|
||||
for _, event := range events {
|
||||
if event.Start.Time().Before(earliestDate) {
|
||||
earliestDate = event.Start.Time()
|
||||
}
|
||||
if event.End.Time().After(latestDate) {
|
||||
latestDate = event.End.Time()
|
||||
}
|
||||
}
|
||||
|
||||
// get all events from database where name = Feiertage und lehrveranstaltungsfreie Tage
|
||||
holidays, err := db.GetAllModulesByNameAndDateRange(app, "Feiertage und lehrveranstaltungsfreie Tage", earliestDate, latestDate)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// remove all events that have same year, month and day as items in holidays
|
||||
for _, holiday := range holidays {
|
||||
for i, event := range events {
|
||||
if event.Start.Time().Year() == holiday.Start.Time().Year() &&
|
||||
event.Start.Time().Month() == holiday.Start.Time().Month() &&
|
||||
event.Start.Time().Day() == holiday.Start.Time().Day() {
|
||||
events = append(events[:i], events[i+1:]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// @TODO: delete and save events in one transaction and it only should delete events that are not in the new events list and save events that are not in the database
|
||||
err = db.DeleteAllEventsByCourse(app, "Sport", functions.GetCurrentSemesterString(clock.RealClock{}))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// save events to database
|
||||
savedEvents, err := db.SaveEvents(events, app)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return savedEvents, nil
|
||||
|
||||
}
|
||||
|
||||
func formatEntriesToEvents(entries []model.SportEntry) []model.Event {
|
||||
|
||||
var events []model.Event
|
||||
|
||||
for _, entry := range entries {
|
||||
eventStarts, eventEnds := getWeekEvents(entry.Details.DateRange.Start, entry.Details.DateRange.End, entry.Details.Cycle)
|
||||
for j := range eventStarts {
|
||||
|
||||
start, _ := types.ParseDateTime(eventStarts[j].In(time.UTC))
|
||||
end, _ := types.ParseDateTime(eventEnds[j].In(time.UTC))
|
||||
|
||||
var event = model.Event{
|
||||
UUID: uuid.NewSHA1(uuid.NameSpaceDNS, []byte(entry.Title+entry.ID+entry.Details.Type)).String(),
|
||||
Day: toGermanWeekdayString(start.Time().Weekday()),
|
||||
Week: strconv.Itoa(23),
|
||||
Start: start,
|
||||
End: end,
|
||||
Name: entry.Title + " (" + entry.ID + ")",
|
||||
EventType: entry.Details.Type,
|
||||
Prof: entry.Details.CourseLead.Name,
|
||||
Rooms: entry.Details.Location.Name,
|
||||
Notes: entry.AdditionalNote,
|
||||
BookedAt: "",
|
||||
Course: "Sport",
|
||||
Semester: checkSemester(entry.Details.DateRange.Start),
|
||||
}
|
||||
events = append(events, event)
|
||||
}
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
func getDayInt(weekDay string) (int, error) {
|
||||
var weekDayInt int
|
||||
var err error = nil
|
||||
switch weekDay {
|
||||
case "Mo":
|
||||
weekDayInt = 1
|
||||
case "Di":
|
||||
weekDayInt = 2
|
||||
case "Mi":
|
||||
weekDayInt = 3
|
||||
case "Do":
|
||||
weekDayInt = 4
|
||||
case "Fr":
|
||||
weekDayInt = 5
|
||||
case "Sa":
|
||||
weekDayInt = 6
|
||||
case "So":
|
||||
weekDayInt = 0
|
||||
default:
|
||||
{
|
||||
err = errors.New("no day found")
|
||||
weekDayInt = -1
|
||||
}
|
||||
}
|
||||
return weekDayInt, err
|
||||
}
|
||||
|
||||
func toGermanWeekdayString(weekday time.Weekday) string {
|
||||
switch weekday {
|
||||
case time.Monday:
|
||||
return "Montag"
|
||||
case time.Tuesday:
|
||||
return "Dienstag"
|
||||
case time.Wednesday:
|
||||
return "Mittwoch"
|
||||
case time.Thursday:
|
||||
return "Donnerstag"
|
||||
case time.Friday:
|
||||
return "Freitag"
|
||||
case time.Saturday:
|
||||
return "Samstag"
|
||||
case time.Sunday:
|
||||
return "Sonntag"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func extractStartAndEndTime(cycle string) (int, int, int, int) {
|
||||
timeRegExp, _ := regexp.Compile("[0-9]{2}:[0-9]{2}")
|
||||
times := timeRegExp.FindAllString(cycle, 2)
|
||||
startHour, _ := strconv.Atoi(times[0][0:2])
|
||||
startMinute, _ := strconv.Atoi(times[0][3:5])
|
||||
|
||||
endHour, _ := strconv.Atoi(times[1][0:2])
|
||||
endMinute, _ := strconv.Atoi(times[1][3:5])
|
||||
return startHour, startMinute, endHour, endMinute
|
||||
}
|
||||
|
||||
func getWeekEvents(start time.Time, end time.Time, cycle string) ([]time.Time, []time.Time) {
|
||||
var weekEvents []model.SportDayStartEnd
|
||||
// split by regexp to get the cycle parts
|
||||
var cycleParts = splitByCommaWithTime(cycle)
|
||||
for _, cyclePart := range cycleParts {
|
||||
|
||||
//cut string at the first integer/number
|
||||
cyclePartWithDaysOnly := cyclePart[0:strings.IndexFunc(cyclePart, func(r rune) bool { return r >= '0' && r <= '9' })]
|
||||
|
||||
// check if cycle has multiple days by checking if it has a plus sign
|
||||
if strings.Contains(cyclePartWithDaysOnly, "+") {
|
||||
// find all days in cycle part by regexp
|
||||
dayRegExp, _ := regexp.Compile("[A-Z][a-z]")
|
||||
days := dayRegExp.FindAllString(cyclePart, -1)
|
||||
startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart)
|
||||
|
||||
// creating a SportDayStartEnd for each day in the cycle
|
||||
for _, day := range days {
|
||||
weekDay, err := getDayInt(day)
|
||||
if err != nil {
|
||||
slog.Error("Error while getting day int: "+day+" ", "error", err)
|
||||
} else {
|
||||
weekEvents = append(weekEvents, model.SportDayStartEnd{
|
||||
Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()),
|
||||
End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()),
|
||||
Day: time.Weekday(weekDay),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// check if cycle has multiple days by checking if it has a minus sign
|
||||
if strings.Contains(cyclePartWithDaysOnly, "-") {
|
||||
// find all days in cycle part by regexp
|
||||
dayRegExp, _ := regexp.Compile("[A-Z][a-z]")
|
||||
days := dayRegExp.FindAllString(cyclePart, 2)
|
||||
startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart)
|
||||
|
||||
var startI, endI int
|
||||
var endIErr, startIErr error
|
||||
startI, startIErr = getDayInt(days[0])
|
||||
endI, endIErr = getDayInt(days[1])
|
||||
|
||||
if endIErr != nil || startIErr != nil {
|
||||
slog.Error("StartError while getting day int: "+days[0]+" - "+days[1]+" :", "error", startIErr)
|
||||
slog.Error("EndError while getting day int: "+days[0]+" - "+days[1]+" :", "error", endIErr)
|
||||
} else {
|
||||
//create a int array with all days from start to end day
|
||||
var daysBetween []int
|
||||
for i := startI; i <= endI; i++ {
|
||||
daysBetween = append(daysBetween, i)
|
||||
}
|
||||
|
||||
// creating a SportDayStartEnd for each day in the cycle
|
||||
weekEvents = createEventListFromStartToEndMatchingDay23(daysBetween, start, startHour, startMinute, end, endHour, endMinute)
|
||||
}
|
||||
}
|
||||
|
||||
// check if cycle has only one day
|
||||
if !strings.Contains(cyclePartWithDaysOnly, "-") && !strings.Contains(cyclePartWithDaysOnly, "+") {
|
||||
// find all days in cycle part by regexp
|
||||
dayRegExp, _ := regexp.Compile("[A-Z][a-z]")
|
||||
days := dayRegExp.FindAllString(cyclePart, -1)
|
||||
startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart)
|
||||
|
||||
var dayNumbers []int
|
||||
for _, day := range days {
|
||||
|
||||
dayInt, err := getDayInt(day)
|
||||
if err != nil {
|
||||
slog.Error("Error while getting day int: "+day+" ", "error", err)
|
||||
} else {
|
||||
dayNumbers = append(dayNumbers, dayInt)
|
||||
}
|
||||
}
|
||||
|
||||
// creating a SportDayStartEnd for each day in the cycle
|
||||
weekEvents = append(weekEvents, createEventListFromStartToEndMatchingDay23(dayNumbers, start, startHour, startMinute, end, endHour, endMinute)...)
|
||||
for _, day := range days {
|
||||
|
||||
weekDay, err := getDayInt(day)
|
||||
if err != nil {
|
||||
slog.Error("Error while getting day int: "+day+" ", "error", err)
|
||||
} else {
|
||||
weekEvents = append(weekEvents, model.SportDayStartEnd{
|
||||
Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()),
|
||||
End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()),
|
||||
Day: time.Weekday(weekDay),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var startDatesList []time.Time
|
||||
var endDatesList []time.Time
|
||||
|
||||
for _, weekEvent := range weekEvents {
|
||||
startDates, endDates := createEventListFromStartToEndMatchingDay(weekEvent)
|
||||
startDatesList = append(startDatesList, startDates...)
|
||||
endDatesList = append(endDatesList, endDates...)
|
||||
}
|
||||
|
||||
return startDatesList, endDatesList
|
||||
}
|
||||
|
||||
// creating a SportDayStartEnd for each day in the cycle
|
||||
func createEventListFromStartToEndMatchingDay23(days []int, start time.Time, startHour int, startMinute int, end time.Time, endHour int, endMinute int) []model.SportDayStartEnd {
|
||||
|
||||
var weekEvents []model.SportDayStartEnd
|
||||
|
||||
for _, day := range days {
|
||||
weekEvents = append(weekEvents, model.SportDayStartEnd{
|
||||
Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()),
|
||||
End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()),
|
||||
Day: time.Weekday(day),
|
||||
})
|
||||
}
|
||||
return weekEvents
|
||||
}
|
||||
|
||||
func createEventListFromStartToEndMatchingDay(weekEvent model.SportDayStartEnd) ([]time.Time, []time.Time) {
|
||||
var startDates []time.Time
|
||||
var endDates []time.Time
|
||||
for d := weekEvent.Start; d.Before(weekEvent.End); d = d.AddDate(0, 0, 1) {
|
||||
if d.Weekday() == weekEvent.Day {
|
||||
startDates = append(startDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.Start.Hour(), weekEvent.Start.Minute(), 0, 0, d.Location()))
|
||||
endDates = append(endDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.End.Hour(), weekEvent.End.Minute(), 0, 0, d.Location()))
|
||||
}
|
||||
}
|
||||
return startDates, endDates
|
||||
}
|
||||
|
||||
func splitByCommaWithTime(input string) []string {
|
||||
var result []string
|
||||
|
||||
// Split by comma
|
||||
parts := strings.Split(input, ", ")
|
||||
|
||||
// Regular expression to match a day with time
|
||||
regex := regexp.MustCompile(`([A-Za-z]{2,}(\+[A-Za-z]{2,})* \d{2}:\d{2}-\d{2}:\d{2})`)
|
||||
|
||||
// Iterate over parts and combine when necessary
|
||||
var currentPart string
|
||||
for _, part := range parts {
|
||||
if regex.MatchString(part) {
|
||||
if currentPart != "" {
|
||||
currentPart += ", " + part
|
||||
result = append(result, currentPart)
|
||||
currentPart = ""
|
||||
} else {
|
||||
result = append(result, part)
|
||||
}
|
||||
// If the part contains a day with time, start a new currentPart
|
||||
|
||||
} else {
|
||||
// If there's no currentPart, start a new one
|
||||
if currentPart != "" {
|
||||
currentPart += ", " + part
|
||||
} else {
|
||||
currentPart = part
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add the last currentPart to the result
|
||||
if currentPart != "" {
|
||||
result = append(result, currentPart)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// check if ws or ss
|
||||
func checkSemester(date time.Time) string {
|
||||
if date.Month() >= 4 && date.Month() <= 9 {
|
||||
return "ss"
|
||||
} else {
|
||||
return "ws"
|
||||
}
|
||||
}
|
||||
|
||||
// fetch the main page where all sport courses are listed and extract all links to the sport courses
|
||||
func fetchAllAvailableSportCourses() ([]string, error) {
|
||||
var url = "https://sport.htwk-leipzig.de/sportangebote"
|
||||
|
||||
var doc, err = htmlRequest(url)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("Error while fetching sport courses from webpage", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// link list of all sport courses
|
||||
var links []string
|
||||
|
||||
// find all links to sport courses with regex https://sport.htwk-leipzig.de/sportangebote/detail/sport/ + [0-9]{1,4}
|
||||
doc.Find("a[href]").Each(func(i int, s *goquery.Selection) {
|
||||
link, _ := s.Attr("href")
|
||||
if strings.HasPrefix(link, "/sportangebote/detail/sport/") {
|
||||
links = append(links, link)
|
||||
}
|
||||
})
|
||||
|
||||
return links, nil
|
||||
}
|
||||
|
||||
// fetchAllHTWKSportCourses fetches all sport courses from the given links.
|
||||
// to speed up the process, it uses multithreading.
|
||||
|
||||
func fetchHTWKSportCourses(links []string) []model.SportEntry {
|
||||
|
||||
//multithreaded webpage requests to speed up the process
|
||||
|
||||
var maxThreads = 10
|
||||
var htmlPageArray = make([]*goquery.Document, len(links))
|
||||
var hostUrl = "https://sport.htwk-leipzig.de"
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(maxThreads)
|
||||
for i := 0; i < maxThreads; i++ {
|
||||
go func(i int) {
|
||||
for j := i; j < len(links); j += maxThreads {
|
||||
doc, err := htmlRequest(hostUrl + links[j])
|
||||
if err == nil {
|
||||
htmlPageArray[j] = doc
|
||||
}
|
||||
}
|
||||
wg.Done()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
var events []model.SportEntry
|
||||
|
||||
for _, doc := range htmlPageArray {
|
||||
if doc != nil {
|
||||
event, err := fetchHtwkSportCourse(doc)
|
||||
if err == nil {
|
||||
events = append(events, event...)
|
||||
}
|
||||
}
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
func htmlRequest(url string) (*goquery.Document, error) {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func(Body io.ReadCloser) {
|
||||
readErr := Body.Close()
|
||||
if readErr != nil {
|
||||
slog.Error("Error while closing response body from html request", "error", readErr)
|
||||
return
|
||||
}
|
||||
}(resp.Body)
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
// fetchHtwkSportCourse fetches the sport course from the given url and id.
|
||||
// If the sport course does not exist, it will return an error.
|
||||
// If the sport course exists, it will return the sport course.
|
||||
// goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse.
|
||||
// May be improved in the future.
|
||||
func fetchHtwkSportCourse(doc *goquery.Document) ([]model.SportEntry, error) {
|
||||
var events []model.SportEntry
|
||||
germanTime, _ := time.LoadLocation("Europe/Berlin")
|
||||
|
||||
if doc.Find("h1").Text() == "Aktuelle Sportangebote" {
|
||||
return nil, errors.New("not a sport course page")
|
||||
}
|
||||
|
||||
doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) {
|
||||
var event model.SportEntry
|
||||
var details model.EventDetails
|
||||
|
||||
fullTitle := strings.TrimSpace(s.Find("h3").Text())
|
||||
titleParts := strings.Split(fullTitle, "-")
|
||||
if len(titleParts) > 0 {
|
||||
event.Title = strings.TrimSpace(titleParts[0])
|
||||
}
|
||||
|
||||
if len(titleParts) > 2 {
|
||||
details.Type = strings.TrimSpace(titleParts[len(titleParts)-1])
|
||||
}
|
||||
|
||||
event.ID = parseEventID(fullTitle)
|
||||
|
||||
s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) {
|
||||
key := strings.TrimSpace(s.Find("td").First().Text())
|
||||
value := strings.TrimSpace(s.Find("td").Last().Text())
|
||||
|
||||
switch key {
|
||||
case "Zeitraum":
|
||||
dates := strings.Split(value, "-")
|
||||
if len(dates) == 2 {
|
||||
startDate, _ := time.ParseInLocation("02.01.2006", strings.TrimSpace(dates[0]), germanTime)
|
||||
endDate, _ := time.ParseInLocation("02.01.2006", strings.TrimSpace(dates[1]), germanTime)
|
||||
details.DateRange = model.DateRange{Start: startDate, End: endDate}
|
||||
}
|
||||
case "Zyklus":
|
||||
details.Cycle = value
|
||||
case "Geschlecht":
|
||||
details.Gender = value
|
||||
case "Leiter":
|
||||
leaderName := strings.TrimSpace(s.Find("td a").Text())
|
||||
leadersSlice := strings.Split(leaderName, "\n")
|
||||
for i, leader := range leadersSlice {
|
||||
leadersSlice[i] = strings.TrimSpace(leader)
|
||||
}
|
||||
formattedLeaders := strings.Join(leadersSlice, ", ")
|
||||
leaderLink, _ := s.Find("td a").Attr("href")
|
||||
details.CourseLead = model.CourseLead{Name: formattedLeaders, Link: leaderLink}
|
||||
case "Ort":
|
||||
locationDetails := strings.Split(value, "(")
|
||||
if len(locationDetails) == 2 {
|
||||
details.Location = model.Location{
|
||||
Name: strings.TrimSpace(locationDetails[0]),
|
||||
Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"),
|
||||
}
|
||||
}
|
||||
case "Teilnehmer":
|
||||
parts := strings.Split(value, "/")
|
||||
if len(parts) >= 3 {
|
||||
bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||
totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1]))
|
||||
waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2]))
|
||||
details.Participants = model.Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList}
|
||||
}
|
||||
case "Kosten":
|
||||
details.Cost = value // makes no sense since you need to be logged in to see the price
|
||||
case "Hinweis":
|
||||
var allNotes []string
|
||||
|
||||
s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) {
|
||||
if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" {
|
||||
note := strings.TrimSpace(s.Text())
|
||||
if note != "" {
|
||||
allNotes = append(allNotes, note)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
event.AdditionalNote = strings.Join(allNotes, " ")
|
||||
}
|
||||
})
|
||||
|
||||
event.Details = details
|
||||
events = append(events, event)
|
||||
})
|
||||
|
||||
return events, nil
|
||||
}
|
||||
|
||||
// parseEventID from fulltitle
|
||||
// the event id is a number in the fulltitle thats not a time like HH:MM and shoudl be found after Nr. or Nr:
|
||||
func parseEventID(fulltitle string) string {
|
||||
var eventID string
|
||||
var numberRegExp = regexp.MustCompile("[0-9]{1,4}")
|
||||
var fulltitleParts = strings.Split(fulltitle, " ")
|
||||
for i, part := range fulltitleParts {
|
||||
if part == "Nr." || part == "Nr:" {
|
||||
eventID = fulltitleParts[i+1]
|
||||
break
|
||||
}
|
||||
}
|
||||
if eventID == "" {
|
||||
eventID = numberRegExp.FindString(fulltitle)
|
||||
}
|
||||
return eventID
|
||||
|
||||
}
|
@@ -0,0 +1,56 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package sport
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func Test_splitByCommaWithTime(t *testing.T) {
|
||||
type args struct {
|
||||
input string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want []string
|
||||
}{
|
||||
{"one string", args{"one"}, []string{"one"}},
|
||||
{"two strings", args{"one,two"}, []string{"one,two"}},
|
||||
{"three strings", args{"one,two,three"}, []string{"one,two,three"}},
|
||||
// e.g. "Mo 18:00-20:00, Di 18:00-20:00" -> ["Mo 18:00-20:00", "Di 18:00-20:00"]
|
||||
// e.g. "Mo 18:00-20:00, Di 18:00-20:00, Mi 18:00-20:00" -> ["Mo 18:00-20:00", "Di 18:00-20:00", "Mi 18:00-20:00"]
|
||||
// e.g. "Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00" -> ["Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00"]
|
||||
// e.g. "Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00" -> ["Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"]
|
||||
// e.g. "Mo+Mi+Fr 18:00-20:00, Sa 20:00-21:00" -> ["Mo+Mi+Fr 18:00-20:00", "Sa 20:00-21:00"]
|
||||
// e.g. "Mo+Mi 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00" -> ["Mo+Mi 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"]
|
||||
{"Mo 18:00-20:00, Di 18:00-20:00", args{"Mo 18:00-20:00, Di 18:00-20:00"}, []string{"Mo 18:00-20:00", "Di 18:00-20:00"}},
|
||||
{"Mo 18:00-20:00, Di 18:00-20:00, Mi 18:00-20:00", args{"Mo 18:00-20:00, Di 18:00-20:00, Mi 18:00-20:00"}, []string{"Mo 18:00-20:00", "Di 18:00-20:00", "Mi 18:00-20:00"}},
|
||||
{"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00", args{"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00"}, []string{"Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00"}},
|
||||
{"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00", args{"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00"}, []string{"Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"}},
|
||||
{"Mo+Mi+Fr 18:00-20:00, Sa 20:00-21:00", args{"Mo+Mi+Fr 18:00-20:00, Sa 20:00-21:00"}, []string{"Mo+Mi+Fr 18:00-20:00", "Sa 20:00-21:00"}},
|
||||
{"Mo+Mi 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00", args{"Mo+Mi 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00"}, []string{"Mo+Mi 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"}},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := splitByCommaWithTime(tt.args.input); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("splitByCommaWithTime() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@@ -0,0 +1,297 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/google/uuid"
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"golang.org/x/net/html"
|
||||
"htwkalender/model"
|
||||
"htwkalender/service/date"
|
||||
"htwkalender/service/fetch"
|
||||
"htwkalender/service/functions"
|
||||
"log/slog"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
func ReplaceEmptyEventNames(group model.SeminarGroup) model.SeminarGroup {
|
||||
for j, event := range group.Events {
|
||||
if functions.OnlyWhitespace(event.Name) {
|
||||
group.Events[j].Name = "Sonderveranstaltungen"
|
||||
}
|
||||
}
|
||||
return group
|
||||
}
|
||||
|
||||
func ClearEmptySeminarGroups(seminarGroup model.SeminarGroup) model.SeminarGroup {
|
||||
var newSeminarGroup = model.SeminarGroup{}
|
||||
|
||||
if len(seminarGroup.Events) > 0 && seminarGroup.Course != "" {
|
||||
newSeminarGroup = seminarGroup
|
||||
}
|
||||
return newSeminarGroup
|
||||
}
|
||||
|
||||
func GetSeminarGroupEventsFromHTML(seminarGroupLabel string) model.SeminarGroup {
|
||||
var seminarGroup model.SeminarGroup
|
||||
|
||||
if (time.Now().Month() >= 3) && (time.Now().Month() <= 10) {
|
||||
ssUrl := "https://stundenplan.htwk-leipzig.de/" + string("ss") + "/Berichte/Text-Listen;Studenten-Sets;name;" + seminarGroupLabel + "?template=sws_semgrp&weeks=1-65"
|
||||
result, getError := fetch.GetHTML(ssUrl)
|
||||
if getError == nil {
|
||||
seminarGroup = parseSeminarGroup(result)
|
||||
}
|
||||
}
|
||||
|
||||
if (time.Now().Month() >= 9) || (time.Now().Month() <= 4) {
|
||||
wsUrl := "https://stundenplan.htwk-leipzig.de/" + string("ws") + "/Berichte/Text-Listen;Studenten-Sets;name;" + seminarGroupLabel + "?template=sws_semgrp&weeks=1-65"
|
||||
result, getError := fetch.GetHTML(wsUrl)
|
||||
if getError == nil {
|
||||
seminarGroup = parseSeminarGroup(result)
|
||||
}
|
||||
}
|
||||
return seminarGroup
|
||||
}
|
||||
|
||||
func SplitEventType(events []model.Event) ([]model.Event, error) {
|
||||
re, err := regexp.Compile("^([VPS])([wp])$")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i, event := range events {
|
||||
matched := re.Match([]byte(event.EventType))
|
||||
if matched {
|
||||
eventType := event.EventType
|
||||
event.EventType = eventType[0:1]
|
||||
event.Compulsory = eventType[1:2]
|
||||
events[i] = event
|
||||
}
|
||||
}
|
||||
return events, nil
|
||||
}
|
||||
|
||||
func parseSeminarGroup(result string) model.SeminarGroup {
|
||||
doc, err := html.Parse(strings.NewReader(result))
|
||||
if err != nil {
|
||||
fmt.Printf("Error occurred while parsing the HTML document: %s\n", err.Error())
|
||||
return model.SeminarGroup{}
|
||||
}
|
||||
|
||||
table := findFirstTable(doc)
|
||||
eventTables := getEventTables(doc)
|
||||
allDayLabels := getAllDayLabels(doc)
|
||||
|
||||
if eventTables == nil || allDayLabels == nil {
|
||||
return model.SeminarGroup{}
|
||||
}
|
||||
course := findFirstSpanWithClass(table, "header-2-0-1").FirstChild.Data
|
||||
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels, course)
|
||||
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
|
||||
events := splitEventsBySingleWeek(splitEventsByWeekVal)
|
||||
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
|
||||
semester, year := extractSemesterAndYear(semesterString)
|
||||
events = convertWeeksToDates(events, semester, year)
|
||||
events = generateUUIDs(events, course)
|
||||
events, err = SplitEventType(events)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("Error occurred while splitting event types:", "error", err)
|
||||
return model.SeminarGroup{}
|
||||
}
|
||||
|
||||
var seminarGroup = model.SeminarGroup{
|
||||
University: findFirstSpanWithClass(table, "header-1-0-0").FirstChild.Data,
|
||||
Course: course,
|
||||
Events: events,
|
||||
}
|
||||
return seminarGroup
|
||||
}
|
||||
|
||||
func generateUUIDs(events []model.Event, course string) []model.Event {
|
||||
for i, event := range events {
|
||||
// generate a hash value from the event name, course and semester
|
||||
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+course))
|
||||
events[i].UUID = hash.String()
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
// convertWeeksToDates converts the week and year to a date
|
||||
// The date is calculated based on the week and the year
|
||||
// The time is unset and 23:00 is used as default
|
||||
// Additionally the semester is added to the event
|
||||
|
||||
func convertWeeksToDates(events []model.Event, semester string, year string) []model.Event {
|
||||
var newEvents []model.Event
|
||||
eventYear, _ := strconv.Atoi(year)
|
||||
|
||||
// for each event we need to calculate the start and end date based on the week and the year
|
||||
for _, event := range events {
|
||||
eventWeek, _ := strconv.Atoi(event.Week)
|
||||
eventDay, _ := date.GetDateFromWeekNumber(eventYear, eventWeek, event.Day)
|
||||
start := replaceTimeForDate(eventDay, event.Start.Time())
|
||||
end := replaceTimeForDate(eventDay, event.End.Time())
|
||||
|
||||
//Check if end is before start
|
||||
if end.Before(start) {
|
||||
end = end.AddDate(0, 0, 1)
|
||||
}
|
||||
|
||||
newEvent := event
|
||||
newEvent.Start, _ = types.ParseDateTime(start.In(time.UTC))
|
||||
newEvent.End, _ = types.ParseDateTime(end.In(time.UTC))
|
||||
newEvent.Semester = semester
|
||||
newEvents = append(newEvents, newEvent)
|
||||
}
|
||||
return newEvents
|
||||
}
|
||||
|
||||
// replaceTimeForDate replaces hour, minute, second, nsec for the selected date
|
||||
func replaceTimeForDate(date time.Time, replacementTime time.Time) time.Time {
|
||||
return time.Date(date.Year(), date.Month(), date.Day(), replacementTime.Hour(), replacementTime.Minute(), replacementTime.Second(), replacementTime.Nanosecond(), date.Location())
|
||||
}
|
||||
|
||||
func extractSemesterAndYear(semesterString string) (string, string) {
|
||||
winterPattern := "Wintersemester"
|
||||
summerPattern := "Sommersemester"
|
||||
|
||||
winterMatch := strings.Contains(semesterString, winterPattern)
|
||||
summerMatch := strings.Contains(semesterString, summerPattern)
|
||||
|
||||
semester := ""
|
||||
semesterShortcut := ""
|
||||
|
||||
if winterMatch {
|
||||
semester = "Wintersemester"
|
||||
semesterShortcut = "ws"
|
||||
} else if summerMatch {
|
||||
semester = "Sommersemester"
|
||||
semesterShortcut = "ss"
|
||||
} else {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
yearPattern := `\d{4}`
|
||||
combinedPattern := semester + `\s` + yearPattern
|
||||
re := regexp.MustCompile(combinedPattern)
|
||||
match := re.FindString(semesterString)
|
||||
year := ""
|
||||
|
||||
if match != "" {
|
||||
reYear := regexp.MustCompile(yearPattern)
|
||||
year = reYear.FindString(match)
|
||||
}
|
||||
return semesterShortcut, year
|
||||
}
|
||||
|
||||
func toEvents(tables [][]*html.Node, days []string, course string) []model.Event {
|
||||
var events []model.Event
|
||||
|
||||
for table := range tables {
|
||||
for row := range tables[table] {
|
||||
|
||||
tableData := findTableData(tables[table][row])
|
||||
if len(tableData) > 0 {
|
||||
start, _ := types.ParseDateTime(createTimeFromHourAndMinuteString(getTextContent(tableData[1])))
|
||||
end, _ := types.ParseDateTime(createTimeFromHourAndMinuteString(getTextContent(tableData[2])))
|
||||
events = append(events, model.Event{
|
||||
Day: days[table],
|
||||
Week: getTextContent(tableData[0]),
|
||||
Start: start,
|
||||
End: end,
|
||||
Name: getTextContent(tableData[3]),
|
||||
EventType: getTextContent(tableData[4]),
|
||||
Prof: getTextContent(tableData[5]),
|
||||
Rooms: getTextContent(tableData[6]),
|
||||
Notes: getTextContent(tableData[7]),
|
||||
BookedAt: getTextContent(tableData[8]),
|
||||
Course: course,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
// createEventFromTableData should create an event from the table data
|
||||
// tableTime represents Hour and Minute like HH:MM
|
||||
// tableDate returns a Time
|
||||
func createTimeFromHourAndMinuteString(tableTime string) time.Time {
|
||||
timeParts := strings.Split(tableTime, ":")
|
||||
hour, _ := strconv.Atoi(timeParts[0])
|
||||
minute, _ := strconv.Atoi(timeParts[1])
|
||||
return time.Date(0, 0, 0, hour, minute, 0, 0, time.UTC)
|
||||
}
|
||||
|
||||
func splitEventsByWeek(events []model.Event) []model.Event {
|
||||
var newEvents []model.Event
|
||||
|
||||
for _, event := range events {
|
||||
weeks := strings.Split(event.Week, ",")
|
||||
for _, week := range weeks {
|
||||
newEvent := event
|
||||
newEvent.Week = strings.TrimSpace(week)
|
||||
newEvents = append(newEvents, newEvent)
|
||||
}
|
||||
}
|
||||
return newEvents
|
||||
}
|
||||
|
||||
func splitEventsBySingleWeek(events []model.Event) []model.Event {
|
||||
var newEvents []model.Event
|
||||
|
||||
for _, event := range events {
|
||||
if strings.Contains(event.Week, "-") {
|
||||
weeks := splitWeekRange(event.Week)
|
||||
for _, week := range weeks {
|
||||
newEvent := event
|
||||
newEvent.Week = week
|
||||
newEvents = append(newEvents, newEvent)
|
||||
}
|
||||
} else {
|
||||
newEvents = append(newEvents, event)
|
||||
}
|
||||
}
|
||||
return newEvents
|
||||
}
|
||||
|
||||
func splitWeekRange(weekRange string) []string {
|
||||
parts := strings.Split(weekRange, "-")
|
||||
if len(parts) != 2 {
|
||||
return nil // Invalid format
|
||||
}
|
||||
|
||||
start, errStart := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||
end, errEnd := strconv.Atoi(strings.TrimSpace(parts[1]))
|
||||
|
||||
if errStart != nil || errEnd != nil {
|
||||
return nil // Error converting to integers
|
||||
}
|
||||
|
||||
var weeks []string
|
||||
for i := start; i <= end; i++ {
|
||||
weeks = append(weeks, strconv.Itoa(i))
|
||||
}
|
||||
|
||||
return weeks
|
||||
}
|
@@ -0,0 +1,503 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"htwkalender/model"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func Test_extractSemesterAndYear(t *testing.T) {
|
||||
type args struct {
|
||||
semesterString string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want string
|
||||
want1 string
|
||||
}{
|
||||
{
|
||||
name: "Test 1",
|
||||
args: args{
|
||||
semesterString: "Wintersemester 2023/24 (Planungszeitraum 01.09.2023 bis 03.03.2024)",
|
||||
},
|
||||
want: "ws",
|
||||
want1: "2023",
|
||||
},
|
||||
{
|
||||
name: "Test 2",
|
||||
args: args{
|
||||
semesterString: "Sommersemester 2023 (Planungszeitraum 06.03. bis 31.08.2023)",
|
||||
},
|
||||
want: "ss",
|
||||
want1: "2023",
|
||||
},
|
||||
{
|
||||
name: "Test 3",
|
||||
args: args{
|
||||
semesterString: "Sommersemester 2010 (Planungszeitraum 06.03. bis 31.08.2023)",
|
||||
},
|
||||
want: "ss",
|
||||
want1: "2010",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, got1 := extractSemesterAndYear(tt.args.semesterString)
|
||||
if got != tt.want {
|
||||
t.Errorf("extractSemesterAndYear() got = %v, want %v", got, tt.want)
|
||||
}
|
||||
if got1 != tt.want1 {
|
||||
t.Errorf("extractSemesterAndYear() got1 = %v, want %v", got1, tt.want1)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_replaceEmptyEventNames(t *testing.T) {
|
||||
type args struct {
|
||||
group model.SeminarGroup
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want model.SeminarGroup
|
||||
}{
|
||||
{
|
||||
name: "Test 1",
|
||||
args: args{
|
||||
group: model.SeminarGroup{
|
||||
Events: []model.Event{
|
||||
{
|
||||
Name: "Test",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
want: model.SeminarGroup{
|
||||
Events: []model.Event{
|
||||
{
|
||||
Name: "Test",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Test 1",
|
||||
args: args{
|
||||
group: model.SeminarGroup{
|
||||
Events: []model.Event{
|
||||
{
|
||||
Name: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
want: model.SeminarGroup{
|
||||
Events: []model.Event{
|
||||
{
|
||||
Name: "Sonderveranstaltungen",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := ReplaceEmptyEventNames(tt.args.group); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("ReplaceEmptyEventNames() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_splitEventType(t *testing.T) {
|
||||
type args struct {
|
||||
events []model.Event
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want []model.Event
|
||||
}{
|
||||
{
|
||||
name: "Test 1",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
EventType: "V",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
EventType: "V",
|
||||
Compulsory: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Test 2",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
EventType: "Vw",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
EventType: "V",
|
||||
Compulsory: "w",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Test 3",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
EventType: "Sperr",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
EventType: "Sperr",
|
||||
Compulsory: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Test 4",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
EventType: "Sperr",
|
||||
},
|
||||
{
|
||||
EventType: "Vw",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
EventType: "Sperr",
|
||||
Compulsory: "",
|
||||
},
|
||||
{
|
||||
EventType: "V",
|
||||
Compulsory: "w",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got, _ := SplitEventType(tt.args.events); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("SplitEventType() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_generateUUIDs(t *testing.T) {
|
||||
type args struct {
|
||||
events []model.Event
|
||||
course string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want []model.Event
|
||||
}{
|
||||
{
|
||||
name: "Test 1",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
Name: " Arbeitssicherheit / Rechtsformen von Unternehmen B435 SBB (wpf) & B348 BIB (pf) 5. FS",
|
||||
},
|
||||
},
|
||||
course: "21BIB-2a",
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
Name: " Arbeitssicherheit / Rechtsformen von Unternehmen B435 SBB (wpf) & B348 BIB (pf) 5. FS",
|
||||
UUID: "3720afdc-10c7-5b72-9489-cffb70cb0c13",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Test 2",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
Name: " Arbeitssicherheit / Rechtsformen von Unternehmen B435 SBB (wpf) & B348 BIB (pf) 5. FS",
|
||||
},
|
||||
},
|
||||
course: "21BIB-2b",
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
Name: " Arbeitssicherheit / Rechtsformen von Unternehmen B435 SBB (wpf) & B348 BIB (pf) 5. FS",
|
||||
UUID: "81083480-bcf1-5452-af84-bb27d79282d8",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := generateUUIDs(tt.args.events, tt.args.course); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("generateUUIDs() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_createTimeFromHourAndMinuteString(t *testing.T) {
|
||||
type args struct {
|
||||
tableTime string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want time.Time
|
||||
}{
|
||||
{
|
||||
name: "Test 1",
|
||||
args: args{
|
||||
tableTime: "08:00",
|
||||
},
|
||||
want: time.Date(0, 0, 0, 8, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
name: "Test 2",
|
||||
args: args{
|
||||
tableTime: "08:15",
|
||||
},
|
||||
want: time.Date(0, 0, 0, 8, 15, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
name: "Test 3",
|
||||
args: args{
|
||||
tableTime: "08:30",
|
||||
},
|
||||
want: time.Date(0, 0, 0, 8, 30, 0, 0, time.UTC),
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := createTimeFromHourAndMinuteString(tt.args.tableTime); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("createTimeFromHourAndMinuteString() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_replaceTimeInDate(t *testing.T) {
|
||||
type args struct {
|
||||
date time.Time
|
||||
time time.Time
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want time.Time
|
||||
}{
|
||||
{
|
||||
name: "Test 1",
|
||||
args: args{
|
||||
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
time: time.Date(0, 0, 0, 8, 0, 0, 0, time.UTC),
|
||||
},
|
||||
want: time.Date(2021, 1, 1, 8, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
name: "Test 2",
|
||||
args: args{
|
||||
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
time: time.Date(0, 0, 0, 8, 15, 0, 0, time.UTC),
|
||||
},
|
||||
want: time.Date(2021, 1, 1, 8, 15, 0, 0, time.UTC),
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := replaceTimeForDate(tt.args.date, tt.args.time); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("addTimeToDate() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_convertWeeksToDates(t *testing.T) {
|
||||
type args struct {
|
||||
events []model.Event
|
||||
semester string
|
||||
year string
|
||||
}
|
||||
returnDateTime := func(date time.Time) types.DateTime {
|
||||
dateTime, err := types.ParseDateTime(date)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
return dateTime
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want []model.Event
|
||||
}{
|
||||
{
|
||||
name: "Test Wintertime",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
Week: "1",
|
||||
Day: "Montag",
|
||||
Start: returnDateTime(time.Date(0, 0, 0, 7, 30, 0, 0, time.UTC)),
|
||||
End: returnDateTime(time.Date(0, 0, 0, 9, 0, 0, 0, time.UTC)),
|
||||
},
|
||||
},
|
||||
semester: "ws",
|
||||
year: "2021",
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
Week: "1",
|
||||
Day: "Montag",
|
||||
Start: returnDateTime(time.Date(2021, 1, 4, 6, 30, 0, 0, time.UTC)),
|
||||
End: returnDateTime(time.Date(2021, 1, 4, 8, 0, 0, 0, time.UTC)),
|
||||
Semester: "ws",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Test Summertime",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
Week: "30",
|
||||
Day: "Donnerstag",
|
||||
Start: returnDateTime(time.Date(0, 0, 0, 7, 30, 0, 0, time.UTC)),
|
||||
End: returnDateTime(time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC)),
|
||||
},
|
||||
},
|
||||
semester: "ws",
|
||||
year: "2023",
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
Week: "30",
|
||||
Day: "Donnerstag",
|
||||
Start: returnDateTime(time.Date(2023, 7, 27, 5, 30, 0, 0, time.UTC)),
|
||||
End: returnDateTime(time.Date(2023, 7, 27, 22, 0, 0, 0, time.UTC)),
|
||||
Semester: "ws",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Test NextDay",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
Week: "45",
|
||||
Day: "Donnerstag",
|
||||
Start: returnDateTime(time.Date(0, 0, 0, 7, 30, 0, 0, time.UTC)),
|
||||
End: returnDateTime(time.Date(0, 0, 0, 4, 0, 0, 0, time.UTC)),
|
||||
},
|
||||
},
|
||||
semester: "ws",
|
||||
year: "2023",
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
Week: "45",
|
||||
Day: "Donnerstag",
|
||||
Start: returnDateTime(time.Date(2023, 11, 9, 6, 30, 0, 0, time.UTC)),
|
||||
End: returnDateTime(time.Date(2023, 11, 10, 3, 0, 0, 0, time.UTC)),
|
||||
Semester: "ws",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := convertWeeksToDates(tt.args.events, tt.args.semester, tt.args.year); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("convertWeeksToDates() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_replaceTimeForDate(t *testing.T) {
|
||||
type args struct {
|
||||
date time.Time
|
||||
replacementTime time.Time
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want time.Time
|
||||
}{
|
||||
{
|
||||
name: "Replace Hour",
|
||||
args: args{
|
||||
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
replacementTime: time.Date(0, 0, 0, 8, 0, 0, 0, time.UTC),
|
||||
},
|
||||
want: time.Date(2021, 1, 1, 8, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
name: "Replace Hour and Minute",
|
||||
args: args{
|
||||
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
replacementTime: time.Date(0, 0, 0, 8, 15, 0, 0, time.UTC),
|
||||
},
|
||||
want: time.Date(2021, 1, 1, 8, 15, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
name: "Replace Hour and Minute",
|
||||
args: args{
|
||||
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
replacementTime: time.Date(0, 0, 0, 8, 30, 0, 0, time.UTC),
|
||||
},
|
||||
want: time.Date(2021, 1, 1, 8, 30, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
name: "Replace Hour and Minute without Year, Month, Day",
|
||||
args: args{
|
||||
date: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
replacementTime: time.Date(2023, 10, 3, 8, 30, 0, 0, time.UTC),
|
||||
},
|
||||
want: time.Date(2021, 1, 1, 8, 30, 0, 0, time.UTC),
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := replaceTimeForDate(tt.args.date, tt.args.replacementTime); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("replaceTimeForDate() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@@ -0,0 +1,141 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"github.com/pocketbase/pocketbase"
|
||||
"github.com/pocketbase/pocketbase/models"
|
||||
"htwkalender/model"
|
||||
"htwkalender/service/db"
|
||||
"htwkalender/service/functions"
|
||||
"htwkalender/service/functions/time"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func getSeminarHTML(semester string) (string, error) {
|
||||
url := "https://stundenplan.htwk-leipzig.de/stundenplan/xml/public/semgrp_" + semester + ".xml"
|
||||
|
||||
// Send GET request
|
||||
response, err := http.Get(url)
|
||||
if err != nil {
|
||||
fmt.Printf("Error occurred while making the request: %s\n", err.Error())
|
||||
return "", err
|
||||
}
|
||||
defer func(Body io.ReadCloser) {
|
||||
err := Body.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}(response.Body)
|
||||
|
||||
// Read the response body
|
||||
body, err := io.ReadAll(response.Body)
|
||||
|
||||
if err != nil {
|
||||
fmt.Printf("Error occurred while reading the response: %s\n", err.Error())
|
||||
return "", err
|
||||
}
|
||||
|
||||
return string(body), err
|
||||
|
||||
}
|
||||
|
||||
func FetchSeminarGroups(app *pocketbase.PocketBase) ([]*models.Record, error) {
|
||||
var groups []model.SeminarGroup
|
||||
|
||||
semesterString := functions.CalculateSemesterList(time.RealClock{})
|
||||
var results [2]string
|
||||
var err error
|
||||
|
||||
for i, semester := range semesterString {
|
||||
results[i], err = getSeminarHTML(semester)
|
||||
if err != nil {
|
||||
slog.Error("Error while fetching seminar groups for: "+semester, "error", err)
|
||||
return nil, err
|
||||
}
|
||||
groups = append(groups, parseSeminarGroups(results[i], semester)...)
|
||||
}
|
||||
|
||||
// filter duplicates
|
||||
groups = removeDuplicates(groups)
|
||||
|
||||
collection, dbError := db.FindCollection(app, "groups")
|
||||
if dbError != nil {
|
||||
slog.Error("Error while searching collection groups", "error", dbError)
|
||||
return nil, err
|
||||
}
|
||||
var insertedGroups []*models.Record
|
||||
|
||||
insertedGroups, dbError = db.SaveGroups(groups, collection, app)
|
||||
if dbError != nil {
|
||||
slog.Error("Error while saving groups", "error", dbError)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return insertedGroups, nil
|
||||
}
|
||||
|
||||
func removeDuplicates(groups []model.SeminarGroup) []model.SeminarGroup {
|
||||
var uniqueGroups []model.SeminarGroup
|
||||
for _, group := range groups {
|
||||
if !contains(uniqueGroups, group) {
|
||||
uniqueGroups = append(uniqueGroups, group)
|
||||
}
|
||||
}
|
||||
return uniqueGroups
|
||||
}
|
||||
|
||||
func contains(groups []model.SeminarGroup, group model.SeminarGroup) bool {
|
||||
for _, a := range groups {
|
||||
if (a.Course == group.Course) && (a.Semester == group.Semester) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func parseSeminarGroups(result string, semester string) []model.SeminarGroup {
|
||||
|
||||
var studium model.Studium
|
||||
err := xml.Unmarshal([]byte(result), &studium)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var seminarGroups []model.SeminarGroup
|
||||
for _, faculty := range studium.Faculty {
|
||||
for _, Studiengang := range faculty.Studiengang {
|
||||
for _, Studienrichtung := range Studiengang.Semgrp {
|
||||
seminarGroup := model.SeminarGroup{
|
||||
University: "HTWK-Leipzig",
|
||||
GroupShortcut: Studiengang.Name,
|
||||
GroupId: Studiengang.ID,
|
||||
Course: Studienrichtung.Name,
|
||||
Faculty: faculty.Name,
|
||||
FacultyId: faculty.ID,
|
||||
Semester: semester,
|
||||
}
|
||||
seminarGroups = append(seminarGroups, seminarGroup)
|
||||
}
|
||||
}
|
||||
}
|
||||
return seminarGroups
|
||||
}
|
@@ -0,0 +1,91 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"htwkalender/model"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func Test_contains(t *testing.T) {
|
||||
type args struct {
|
||||
groups []model.SeminarGroup
|
||||
group model.SeminarGroup
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "should return true if group is in groups",
|
||||
args: args{
|
||||
groups: []model.SeminarGroup{
|
||||
{
|
||||
Course: "test",
|
||||
Semester: "test",
|
||||
},
|
||||
},
|
||||
group: model.SeminarGroup{
|
||||
Course: "test",
|
||||
Semester: "test",
|
||||
},
|
||||
},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "should return false if group is not in groups",
|
||||
args: args{
|
||||
groups: []model.SeminarGroup{
|
||||
{
|
||||
Course: "test",
|
||||
Semester: "test",
|
||||
},
|
||||
},
|
||||
group: model.SeminarGroup{
|
||||
Course: "test",
|
||||
Semester: "test2",
|
||||
},
|
||||
},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "should return false if group is not in courses",
|
||||
args: args{
|
||||
groups: []model.SeminarGroup{
|
||||
{
|
||||
Course: "test3",
|
||||
Semester: "test",
|
||||
},
|
||||
},
|
||||
group: model.SeminarGroup{
|
||||
Course: "test",
|
||||
Semester: "test",
|
||||
},
|
||||
},
|
||||
want: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := contains(tt.args.groups, tt.args.group); got != tt.want {
|
||||
t.Errorf("contains() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
217
services/data-manager/service/fetch/v1/htmlParsingFunctions.go
Normal file
217
services/data-manager/service/fetch/v1/htmlParsingFunctions.go
Normal file
@@ -0,0 +1,217 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"golang.org/x/net/html"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Find the first <table> element in the HTML document
|
||||
func findFirstTable(node *html.Node) *html.Node {
|
||||
if node.Type == html.ElementNode && node.Data == "table" {
|
||||
return node
|
||||
}
|
||||
// Traverse child nodes recursively
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
found := findFirstTable(child)
|
||||
if found != nil {
|
||||
return found
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Find the first <span> element with the specified class attribute value
|
||||
func findFirstSpanWithClass(node *html.Node, classValue string) *html.Node {
|
||||
|
||||
// Check if the current node is a <span> element with the specified class attribute value
|
||||
if node.Type == html.ElementNode && node.Data == "span" {
|
||||
if hasClassAttribute(node, classValue) {
|
||||
return node
|
||||
}
|
||||
}
|
||||
|
||||
// Traverse child nodes recursively
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
found := findFirstSpanWithClass(child, classValue)
|
||||
if found != nil {
|
||||
return found
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the specified element has the specified class attribute value
|
||||
func hasClassAttribute(node *html.Node, classValue string) bool {
|
||||
for _, attr := range node.Attr {
|
||||
if attr.Key == "class" && strings.Contains(attr.Val, classValue) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Get Tables with days
|
||||
func getEventTables(node *html.Node) [][]*html.Node {
|
||||
var eventTables [][]*html.Node
|
||||
tables := findTables(node)
|
||||
// get all tables with events
|
||||
for events := range tables {
|
||||
rows := findTableRows(tables[events])
|
||||
// check that a first row exists
|
||||
if len(rows) > 0 {
|
||||
rows = rows[1:]
|
||||
eventTables = append(eventTables, rows)
|
||||
}
|
||||
}
|
||||
return eventTables
|
||||
}
|
||||
|
||||
// Get Tables with days
|
||||
func getAllDayLabels(node *html.Node) []string {
|
||||
paragraphs := findParagraphs(node)
|
||||
var dayArray []string
|
||||
|
||||
for _, p := range paragraphs {
|
||||
label := getDayLabel(p)
|
||||
if label != "" {
|
||||
dayArray = append(dayArray, label)
|
||||
}
|
||||
}
|
||||
return dayArray
|
||||
}
|
||||
|
||||
// Find all <p> elements in the HTML document
|
||||
func findParagraphs(node *html.Node) []*html.Node {
|
||||
var paragraphs []*html.Node
|
||||
|
||||
if node.Type == html.ElementNode && node.Data == "p" {
|
||||
paragraphs = append(paragraphs, node)
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
paragraphs = append(paragraphs, findParagraphs(child)...)
|
||||
}
|
||||
|
||||
return paragraphs
|
||||
}
|
||||
|
||||
// Find all <tr> elements in <tbody>, excluding the first one
|
||||
func findTableRows(node *html.Node) []*html.Node {
|
||||
var tableRows []*html.Node
|
||||
|
||||
if node.Type == html.ElementNode && node.Data == "tbody" {
|
||||
child := node.FirstChild
|
||||
for child != nil {
|
||||
if child.Type == html.ElementNode && child.Data == "tr" {
|
||||
tableRows = append(tableRows, child)
|
||||
}
|
||||
child = child.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
// Traverse child nodes recursively
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
var tableRowElement = findTableRows(child)
|
||||
if tableRowElement != nil {
|
||||
tableRows = append(tableRows, tableRowElement...)
|
||||
}
|
||||
}
|
||||
|
||||
// check if tableRows is nil
|
||||
if tableRows == nil {
|
||||
return []*html.Node{}
|
||||
} else {
|
||||
return tableRows
|
||||
}
|
||||
}
|
||||
|
||||
// Find all <p> elements in the HTML document
|
||||
func findTables(node *html.Node) []*html.Node {
|
||||
var tables []*html.Node
|
||||
|
||||
if node.Type == html.ElementNode && node.Data == "table" {
|
||||
tables = append(tables, node)
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
tables = append(tables, findDayTables(child)...)
|
||||
}
|
||||
|
||||
return tables
|
||||
}
|
||||
|
||||
// Find all <p> elements in the HTML document
|
||||
func findDayTables(node *html.Node) []*html.Node {
|
||||
var tables []*html.Node
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
tables = append(tables, findDayTables(child)...)
|
||||
}
|
||||
|
||||
if node.Type == html.ElementNode && node.Data == "table" && hasClassAttribute(node, "spreadsheet") {
|
||||
tables = append(tables, node)
|
||||
}
|
||||
|
||||
return tables
|
||||
}
|
||||
|
||||
// Get the text content of the specified node and its descendants
|
||||
func getDayLabel(node *html.Node) string {
|
||||
|
||||
child := node.FirstChild
|
||||
if child != nil {
|
||||
if child.Type == html.ElementNode && child.Data == "span" {
|
||||
if child.FirstChild != nil {
|
||||
return child.FirstChild.Data
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Find all <td> elements in the current <tr>
|
||||
func findTableData(node *html.Node) []*html.Node {
|
||||
var tableData []*html.Node
|
||||
|
||||
if node.Type == html.ElementNode && node.Data == "tr" {
|
||||
child := node.FirstChild
|
||||
for child != nil {
|
||||
if child.Type == html.ElementNode && child.Data == "td" {
|
||||
tableData = append(tableData, child)
|
||||
}
|
||||
child = child.NextSibling
|
||||
}
|
||||
}
|
||||
return tableData
|
||||
}
|
||||
|
||||
// Get the text content of the specified node and its descendants
|
||||
func getTextContent(node *html.Node) string {
|
||||
var textContent string
|
||||
|
||||
if node.Type == html.TextNode {
|
||||
textContent = node.Data
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
textContent += getTextContent(child)
|
||||
}
|
||||
|
||||
return textContent
|
||||
}
|
67
services/data-manager/service/fetch/v2/eventParser.go
Normal file
67
services/data-manager/service/fetch/v2/eventParser.go
Normal file
@@ -0,0 +1,67 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"golang.org/x/net/html"
|
||||
"htwkalender/model"
|
||||
"htwkalender/service/date"
|
||||
"htwkalender/service/functions"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func toEvents(tables [][]*html.Node, days []string) []model.Event {
|
||||
var events []model.Event
|
||||
|
||||
for table := range tables {
|
||||
for row := range tables[table] {
|
||||
|
||||
tableData := findTableData(tables[table][row])
|
||||
if len(tableData) > 0 {
|
||||
start, _ := types.ParseDateTime(date.CreateTimeFromHourAndMinuteString(getTextContent(tableData[1])))
|
||||
end, _ := types.ParseDateTime(date.CreateTimeFromHourAndMinuteString(getTextContent(tableData[2])))
|
||||
|
||||
courses := getTextContent(tableData[7])
|
||||
name := getTextContent(tableData[3])
|
||||
if functions.OnlyWhitespace(name) {
|
||||
name = "Sonderveranstaltung"
|
||||
}
|
||||
|
||||
if len(courses) > 0 {
|
||||
for _, course := range strings.Split(courses, " ") {
|
||||
events = append(events, model.Event{
|
||||
Day: days[table],
|
||||
Week: getTextContent(tableData[0]),
|
||||
Start: start,
|
||||
End: end,
|
||||
Name: name,
|
||||
EventType: getTextContent(tableData[4]),
|
||||
Notes: getTextContent(tableData[5]),
|
||||
Prof: getTextContent(tableData[6]),
|
||||
Rooms: getTextContent(tableData[8]),
|
||||
BookedAt: getTextContent(tableData[10]),
|
||||
Course: strings.TrimSpace(course),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return events
|
||||
}
|
181
services/data-manager/service/fetch/v2/fetcher.go
Normal file
181
services/data-manager/service/fetch/v2/fetcher.go
Normal file
@@ -0,0 +1,181 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/google/uuid"
|
||||
"github.com/pocketbase/pocketbase"
|
||||
"golang.org/x/net/html"
|
||||
"htwkalender/model"
|
||||
"htwkalender/service/db"
|
||||
"htwkalender/service/fetch"
|
||||
v1 "htwkalender/service/fetch/v1"
|
||||
"htwkalender/service/functions"
|
||||
localTime "htwkalender/service/functions/time"
|
||||
"log/slog"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func ParseEventsFromRemote(app *pocketbase.PocketBase) (model.Events, error) {
|
||||
savedRecords, err := FetchAllEventsAndSave(app, localTime.RealClock{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return savedRecords, nil
|
||||
}
|
||||
|
||||
func FetchAllEventsAndSave(app *pocketbase.PocketBase, clock localTime.Clock) ([]model.Event, error) {
|
||||
var savedRecords []model.Event
|
||||
var err error = nil
|
||||
|
||||
var stubUrl = [2]string{
|
||||
"https://stundenplan.htwk-leipzig.de/",
|
||||
"/Berichte/Text-Listen;Veranstaltungsarten;name;" +
|
||||
"Vp%0A" +
|
||||
"Vw%0A" +
|
||||
"V%0A" +
|
||||
"Sp%0A" +
|
||||
"Sw%0A" +
|
||||
"S%0A" +
|
||||
"Pp%0A" +
|
||||
"Pw%0A" +
|
||||
"P%0A" +
|
||||
"ZV%0A" +
|
||||
"Tut%0A" +
|
||||
"Sperr%0A" +
|
||||
"pf%0A" +
|
||||
"wpf%0A" +
|
||||
"fak%0A" +
|
||||
"Pruefung%0A" +
|
||||
"gebucht%0A" +
|
||||
"Vertretung%0A" +
|
||||
"Fremdveranst.%0A" +
|
||||
"Buchen%0A" +
|
||||
"%0A?&template=sws_modul&weeks=1-65&combined=yes",
|
||||
}
|
||||
|
||||
// Fetch and save events for all semesters
|
||||
for _, semester := range functions.CalculateSemesterList(clock) {
|
||||
events, fetchErr := fetchAndSaveAllEventsForSemester(app, semester, stubUrl)
|
||||
if fetchErr != nil {
|
||||
return nil, fmt.Errorf("failed to fetch and save events for "+semester+": %w", err)
|
||||
}
|
||||
savedRecords = append(savedRecords, events...)
|
||||
}
|
||||
|
||||
return savedRecords, err
|
||||
}
|
||||
|
||||
func fetchAndSaveAllEventsForSemester(
|
||||
app *pocketbase.PocketBase,
|
||||
semester string,
|
||||
stubUrl [2]string,
|
||||
) ([]model.Event, error) {
|
||||
var savedRecords []model.Event
|
||||
url := stubUrl[0] + semester + stubUrl[1]
|
||||
events, err := parseEventForOneSemester(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse events for "+semester+": %w", err)
|
||||
}
|
||||
err = db.DeleteAllEventsBySemesterWithoutCourse(app, "Sport", semester)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to delete all events for "+semester+": %w", err)
|
||||
}
|
||||
savedEvents, dbError := db.SaveEvents(events, app)
|
||||
if dbError != nil {
|
||||
return nil, fmt.Errorf("failed to save events for "+semester+": %w", dbError)
|
||||
}
|
||||
savedRecords = append(savedRecords, savedEvents...)
|
||||
return savedRecords, err
|
||||
}
|
||||
|
||||
func parseEventForOneSemester(url string) ([]model.Event, error) {
|
||||
// Fetch Webpage from URL
|
||||
webpage, err := fetch.GetHTML(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Parse HTML to Node Tree
|
||||
var doc *html.Node
|
||||
doc, err = parseHTML(webpage)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Get all event tables and all day labels
|
||||
eventTables := getEventTables(doc)
|
||||
allDayLabels := getAllDayLabels(doc)
|
||||
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels)
|
||||
|
||||
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
|
||||
events := splitEventsBySingleWeek(splitEventsByWeekVal)
|
||||
|
||||
if events == nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
table := findFirstTable(doc)
|
||||
|
||||
if table == nil {
|
||||
return nil, fmt.Errorf("failed to find first table")
|
||||
}
|
||||
|
||||
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
|
||||
semester, year := extractSemesterAndYear(semesterString)
|
||||
events = convertWeeksToDates(events, semester, year)
|
||||
events, err = v1.SplitEventType(events)
|
||||
if err != nil {
|
||||
slog.Error("Error occurred while splitting event types: ", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
events = switchNameAndNotesForExam(events)
|
||||
events = generateUUIDs(events)
|
||||
|
||||
return events, nil
|
||||
}
|
||||
|
||||
// switch name and notes for Pruefung events when Note is not empty and Name starts with "Prüfungen" and contains email
|
||||
func switchNameAndNotesForExam(events []model.Event) []model.Event {
|
||||
for i, event := range events {
|
||||
if event.EventType == "Pruefung" {
|
||||
if event.Notes != "" && strings.HasPrefix(event.Name, "Prüfungen") && strings.Contains(event.Name, "@") {
|
||||
events[i].Name = event.Notes
|
||||
events[i].Notes = event.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
func parseHTML(webpage string) (*html.Node, error) {
|
||||
doc, err := html.Parse(strings.NewReader(webpage))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
func generateUUIDs(events []model.Event) []model.Event {
|
||||
for i, event := range events {
|
||||
// generate a hash value from the event name, course and semester
|
||||
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+event.Course))
|
||||
events[i].UUID = hash.String()
|
||||
}
|
||||
return events
|
||||
}
|
99
services/data-manager/service/fetch/v2/fetcher_test.go
Normal file
99
services/data-manager/service/fetch/v2/fetcher_test.go
Normal file
@@ -0,0 +1,99 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"htwkalender/model"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func Test_switchNameAndNotesForExam(t *testing.T) {
|
||||
type args struct {
|
||||
events []model.Event
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want []model.Event
|
||||
}{
|
||||
{
|
||||
name: "switch name and notes for exam",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
EventType: "Pruefung",
|
||||
Name: "Prüfungen FING/EIT WiSe (pruefungsamt.fing-eit@htwk-leipzig.de)",
|
||||
Notes: "Computer Vision II - Räume/Zeit unter Vorbehalt- (Raum W111.1)",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
EventType: "Pruefung",
|
||||
Name: "Computer Vision II - Räume/Zeit unter Vorbehalt- (Raum W111.1)",
|
||||
Notes: "Prüfungen FING/EIT WiSe (pruefungsamt.fing-eit@htwk-leipzig.de)",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "dont switch name and notes for exam",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
EventType: "Pruefung",
|
||||
Name: "i054 Umweltschutz und Recycling DPB & VNB 7.FS (wpf)",
|
||||
Notes: "Prüfung",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
EventType: "Pruefung",
|
||||
Notes: "Prüfung",
|
||||
Name: "i054 Umweltschutz und Recycling DPB & VNB 7.FS (wpf)",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "dont switch name and notes for exam",
|
||||
args: args{
|
||||
events: []model.Event{
|
||||
{
|
||||
EventType: "Pruefung",
|
||||
Name: "Prüfungen FING/ME WiSe (pruefungsamt.fing-me@htwk-leipzig.de)",
|
||||
Notes: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: []model.Event{
|
||||
{
|
||||
EventType: "Pruefung",
|
||||
Notes: "",
|
||||
Name: "Prüfungen FING/ME WiSe (pruefungsamt.fing-me@htwk-leipzig.de)",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := switchNameAndNotesForExam(tt.args.events); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("switchNameAndNotesForExam() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
340
services/data-manager/service/fetch/v2/htmlParsingFunctions.go
Normal file
340
services/data-manager/service/fetch/v2/htmlParsingFunctions.go
Normal file
@@ -0,0 +1,340 @@
|
||||
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
|
||||
//Copyright (C) 2024 HTWKalender support@htwkalender.de
|
||||
|
||||
//This program is free software: you can redistribute it and/or modify
|
||||
//it under the terms of the GNU Affero General Public License as published by
|
||||
//the Free Software Foundation, either version 3 of the License, or
|
||||
//(at your option) any later version.
|
||||
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU Affero General Public License for more details.
|
||||
|
||||
//You should have received a copy of the GNU Affero General Public License
|
||||
//along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"golang.org/x/net/html"
|
||||
"htwkalender/model"
|
||||
"htwkalender/service/date"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Find the first <table> element in the HTML document
|
||||
func findFirstTable(node *html.Node) *html.Node {
|
||||
if node.Type == html.ElementNode && node.Data == "table" {
|
||||
return node
|
||||
}
|
||||
// Traverse child nodes recursively
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
found := findFirstTable(child)
|
||||
if found != nil {
|
||||
return found
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Find the first <span> element with the specified class attribute value
|
||||
func findFirstSpanWithClass(node *html.Node, classValue string) *html.Node {
|
||||
|
||||
// Check if the current node is a <span> element with the specified class attribute value
|
||||
if node.Type == html.ElementNode && node.Data == "span" {
|
||||
if hasClassAttribute(node, classValue) {
|
||||
return node
|
||||
}
|
||||
}
|
||||
|
||||
// Traverse child nodes recursively
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
found := findFirstSpanWithClass(child, classValue)
|
||||
if found != nil {
|
||||
return found
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the specified element has the specified class attribute value
|
||||
func hasClassAttribute(node *html.Node, classValue string) bool {
|
||||
for _, attr := range node.Attr {
|
||||
if attr.Key == "class" && strings.Contains(attr.Val, classValue) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Get Tables with days
|
||||
func getEventTables(node *html.Node) [][]*html.Node {
|
||||
var eventTables [][]*html.Node
|
||||
tables := findTables(node)
|
||||
// get all tables with events
|
||||
for events := range tables {
|
||||
rows := findTableRows(tables[events])
|
||||
// check that a first row exists
|
||||
if len(rows) > 0 {
|
||||
rows = rows[1:]
|
||||
eventTables = append(eventTables, rows)
|
||||
}
|
||||
}
|
||||
return eventTables
|
||||
}
|
||||
|
||||
// Get Tables with days
|
||||
func getAllDayLabels(node *html.Node) []string {
|
||||
paragraphs := findParagraphs(node)
|
||||
var dayArray []string
|
||||
|
||||
for _, p := range paragraphs {
|
||||
label := getDayLabel(p)
|
||||
if label != "" {
|
||||
dayArray = append(dayArray, label)
|
||||
}
|
||||
}
|
||||
return dayArray
|
||||
}
|
||||
|
||||
// Find all <p> elements in the HTML document
|
||||
func findParagraphs(node *html.Node) []*html.Node {
|
||||
var paragraphs []*html.Node
|
||||
|
||||
if node.Type == html.ElementNode && node.Data == "p" {
|
||||
paragraphs = append(paragraphs, node)
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
paragraphs = append(paragraphs, findParagraphs(child)...)
|
||||
}
|
||||
|
||||
return paragraphs
|
||||
}
|
||||
|
||||
// Find all <tr> elements in <tbody>, excluding the first one
|
||||
func findTableRows(node *html.Node) []*html.Node {
|
||||
var tableRows []*html.Node
|
||||
|
||||
if node.Type == html.ElementNode && node.Data == "tbody" {
|
||||
child := node.FirstChild
|
||||
for child != nil {
|
||||
if child.Type == html.ElementNode && child.Data == "tr" {
|
||||
tableRows = append(tableRows, child)
|
||||
}
|
||||
child = child.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
// Traverse child nodes recursively
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
var tableRowElement = findTableRows(child)
|
||||
if tableRowElement != nil {
|
||||
tableRows = append(tableRows, tableRowElement...)
|
||||
}
|
||||
}
|
||||
|
||||
// check if tableRows is nil
|
||||
if tableRows == nil {
|
||||
return []*html.Node{}
|
||||
} else {
|
||||
return tableRows
|
||||
}
|
||||
}
|
||||
|
||||
// Find all <p> elements in the HTML document
|
||||
func findTables(node *html.Node) []*html.Node {
|
||||
var tables []*html.Node
|
||||
|
||||
if node.Type == html.ElementNode && node.Data == "table" {
|
||||
tables = append(tables, node)
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
tables = append(tables, findDayTables(child)...)
|
||||
}
|
||||
|
||||
return tables
|
||||
}
|
||||
|
||||
// Find all <p> elements in the HTML document
|
||||
func findDayTables(node *html.Node) []*html.Node {
|
||||
var tables []*html.Node
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
tables = append(tables, findDayTables(child)...)
|
||||
}
|
||||
|
||||
if node.Type == html.ElementNode && node.Data == "table" && hasClassAttribute(node, "spreadsheet") {
|
||||
tables = append(tables, node)
|
||||
}
|
||||
|
||||
return tables
|
||||
}
|
||||
|
||||
// Get the text content of the specified node and its descendants
|
||||
func getDayLabel(node *html.Node) string {
|
||||
|
||||
child := node.FirstChild
|
||||
if child != nil {
|
||||
if child.Type == html.ElementNode && child.Data == "span" {
|
||||
if child.FirstChild != nil {
|
||||
return child.FirstChild.Data
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Find all <td> elements in the current <tr>
|
||||
func findTableData(node *html.Node) []*html.Node {
|
||||
var tableData []*html.Node
|
||||
|
||||
if node.Type == html.ElementNode && node.Data == "tr" {
|
||||
child := node.FirstChild
|
||||
for child != nil {
|
||||
if child.Type == html.ElementNode && child.Data == "td" {
|
||||
tableData = append(tableData, child)
|
||||
}
|
||||
child = child.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
return tableData
|
||||
}
|
||||
|
||||
// Get the text content of the specified node and its descendants
|
||||
func getTextContent(node *html.Node) string {
|
||||
var textContent string
|
||||
|
||||
if node.Type == html.TextNode {
|
||||
textContent = node.Data
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
textContent += getTextContent(child)
|
||||
}
|
||||
|
||||
return textContent
|
||||
}
|
||||
|
||||
func splitEventsByWeek(events []model.Event) []model.Event {
|
||||
var newEvents []model.Event
|
||||
|
||||
for _, event := range events {
|
||||
weeks := strings.Split(event.Week, ",")
|
||||
for _, week := range weeks {
|
||||
newEvent := event
|
||||
newEvent.Week = strings.TrimSpace(week)
|
||||
newEvents = append(newEvents, newEvent)
|
||||
}
|
||||
}
|
||||
return newEvents
|
||||
}
|
||||
|
||||
func splitEventsBySingleWeek(events []model.Event) []model.Event {
|
||||
var newEvents []model.Event
|
||||
|
||||
for _, event := range events {
|
||||
if strings.Contains(event.Week, "-") {
|
||||
weeks := splitWeekRange(event.Week)
|
||||
for _, week := range weeks {
|
||||
newEvent := event
|
||||
newEvent.Week = week
|
||||
newEvents = append(newEvents, newEvent)
|
||||
}
|
||||
} else {
|
||||
newEvents = append(newEvents, event)
|
||||
}
|
||||
}
|
||||
return newEvents
|
||||
}
|
||||
|
||||
func splitWeekRange(weekRange string) []string {
|
||||
parts := strings.Split(weekRange, "-")
|
||||
if len(parts) != 2 {
|
||||
return nil // Invalid format
|
||||
}
|
||||
|
||||
start, errStart := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||
end, errEnd := strconv.Atoi(strings.TrimSpace(parts[1]))
|
||||
|
||||
if errStart != nil || errEnd != nil {
|
||||
return nil // Error converting to integers
|
||||
}
|
||||
|
||||
var weeks []string
|
||||
for i := start; i <= end; i++ {
|
||||
weeks = append(weeks, strconv.Itoa(i))
|
||||
}
|
||||
|
||||
return weeks
|
||||
}
|
||||
|
||||
func extractSemesterAndYear(semesterString string) (string, string) {
|
||||
winterPattern := "Wintersemester"
|
||||
summerPattern := "Sommersemester"
|
||||
|
||||
winterMatch := strings.Contains(semesterString, winterPattern)
|
||||
summerMatch := strings.Contains(semesterString, summerPattern)
|
||||
|
||||
semester := ""
|
||||
semesterShortcut := ""
|
||||
|
||||
if winterMatch {
|
||||
semester = "Wintersemester"
|
||||
semesterShortcut = "ws"
|
||||
} else if summerMatch {
|
||||
semester = "Sommersemester"
|
||||
semesterShortcut = "ss"
|
||||
} else {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
yearPattern := `\d{4}`
|
||||
combinedPattern := semester + `\s` + yearPattern
|
||||
re := regexp.MustCompile(combinedPattern)
|
||||
match := re.FindString(semesterString)
|
||||
year := ""
|
||||
|
||||
if match != "" {
|
||||
reYear := regexp.MustCompile(yearPattern)
|
||||
year = reYear.FindString(match)
|
||||
}
|
||||
return semesterShortcut, year
|
||||
}
|
||||
|
||||
func convertWeeksToDates(events []model.Event, semester string, year string) []model.Event {
|
||||
var newEvents []model.Event
|
||||
eventYear, _ := strconv.Atoi(year)
|
||||
|
||||
// for each event we need to calculate the start and end date based on the week and the year
|
||||
for _, event := range events {
|
||||
eventWeek, _ := strconv.Atoi(event.Week)
|
||||
eventDay, _ := date.GetDateFromWeekNumber(eventYear, eventWeek, event.Day)
|
||||
start := replaceTimeForDate(eventDay, event.Start.Time())
|
||||
end := replaceTimeForDate(eventDay, event.End.Time())
|
||||
|
||||
//Check if end is before start
|
||||
if end.Before(start) {
|
||||
end = end.AddDate(0, 0, 1)
|
||||
}
|
||||
|
||||
newEvent := event
|
||||
newEvent.Start, _ = types.ParseDateTime(start.In(time.UTC))
|
||||
newEvent.End, _ = types.ParseDateTime(end.In(time.UTC))
|
||||
newEvent.Semester = semester
|
||||
newEvents = append(newEvents, newEvent)
|
||||
}
|
||||
return newEvents
|
||||
}
|
||||
|
||||
// replaceTimeForDate replaces hour, minute, second, nsec for the selected date
|
||||
func replaceTimeForDate(date time.Time, replacementTime time.Time) time.Time {
|
||||
return time.Date(date.Year(), date.Month(), date.Day(), replacementTime.Hour(), replacementTime.Minute(), replacementTime.Second(), replacementTime.Nanosecond(), date.Location())
|
||||
}
|
Reference in New Issue
Block a user