mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender.git
synced 2025-08-02 17:59:14 +02:00
feat:#82 integrated sport into fetch service
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
package main
|
package model
|
||||||
|
|
||||||
import "time"
|
import "time"
|
||||||
|
|
||||||
@@ -47,3 +47,9 @@ type Participants struct {
|
|||||||
TotalPlaces int
|
TotalPlaces int
|
||||||
WaitList int
|
WaitList int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type SportDayStartEnd struct {
|
||||||
|
Start time.Time
|
||||||
|
End time.Time
|
||||||
|
Day time.Weekday
|
||||||
|
}
|
@@ -2,6 +2,7 @@ package db
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"htwkalender/model"
|
"htwkalender/model"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/pocketbase/dbx"
|
"github.com/pocketbase/dbx"
|
||||||
"github.com/pocketbase/pocketbase"
|
"github.com/pocketbase/pocketbase"
|
||||||
@@ -251,3 +252,15 @@ func FindAllEventsByModule(app *pocketbase.PocketBase, module model.Module) (mod
|
|||||||
|
|
||||||
return events, nil
|
return events, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetAllModulesByNameAndDateRange(app *pocketbase.PocketBase, name string, startDate time.Time, endDate time.Time) (model.Events, error) {
|
||||||
|
var events model.Events
|
||||||
|
|
||||||
|
err := app.Dao().DB().Select("*").From("events").Where(dbx.NewExp("Name = {:name} AND Start >= {:startDate} AND End <= {:endDate}", dbx.Params{"name": name, "startDate": startDate, "endDate": endDate})).All(&events)
|
||||||
|
if err != nil {
|
||||||
|
print("Error while getting events from database: ", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return events, nil
|
||||||
|
}
|
||||||
|
467
backend/service/fetch/sport/sportFetcher.go
Normal file
467
backend/service/fetch/sport/sportFetcher.go
Normal file
@@ -0,0 +1,467 @@
|
|||||||
|
package sport
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
"github.com/pocketbase/pocketbase"
|
||||||
|
"github.com/pocketbase/pocketbase/tools/types"
|
||||||
|
"htwkalender/model"
|
||||||
|
"htwkalender/service/db"
|
||||||
|
"net/http"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
)
|
||||||
|
|
||||||
|
// @TODO: add tests
|
||||||
|
// @TODO: make it like a cron job to fetch the sport courses once a week
|
||||||
|
func RetrieveAndFetchAllSportCourses(app *pocketbase.PocketBase) []model.Event {
|
||||||
|
|
||||||
|
var sportCourseLinks = fetchAllAvailableSportCourses()
|
||||||
|
sportEntries := fetchHTWKSportCourses(sportCourseLinks)
|
||||||
|
events := formatEntriesToEvents(sportEntries)
|
||||||
|
|
||||||
|
var earliestDate time.Time
|
||||||
|
var latestDate time.Time
|
||||||
|
|
||||||
|
// find earliest and latest date in events
|
||||||
|
for _, event := range events {
|
||||||
|
if event.Start.Time().Before(earliestDate) {
|
||||||
|
earliestDate = event.Start.Time()
|
||||||
|
}
|
||||||
|
if event.End.Time().After(latestDate) {
|
||||||
|
latestDate = event.End.Time()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// get all events from database where name = Feiertage und lehrveranstaltungsfreie Tage
|
||||||
|
holidays, err := db.GetAllModulesByNameAndDateRange(app, "Feiertage und lehrveranstaltungsfreie Tage", earliestDate, latestDate)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove all events that have same year, month and day as items in holidays
|
||||||
|
for _, holiday := range holidays {
|
||||||
|
for i, event := range events {
|
||||||
|
if event.Start.Time().Year() == holiday.Start.Time().Year() &&
|
||||||
|
event.Start.Time().Month() == holiday.Start.Time().Month() &&
|
||||||
|
event.Start.Time().Day() == holiday.Start.Time().Day() {
|
||||||
|
events = append(events[:i], events[i+1:]...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// save events to database
|
||||||
|
savedEvents, err := db.SaveEvents(events, app)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return savedEvents
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatEntriesToEvents(entries []model.SportEntry) []model.Event {
|
||||||
|
|
||||||
|
var events []model.Event
|
||||||
|
|
||||||
|
for i, entry := range entries {
|
||||||
|
eventStarts, eventEnds := getWeekEvents(entry.Details.DateRange.Start, entry.Details.DateRange.End, entry.Details.Cycle)
|
||||||
|
for j := range eventStarts {
|
||||||
|
|
||||||
|
start, _ := types.ParseDateTime(eventStarts[j].In(time.UTC))
|
||||||
|
end, _ := types.ParseDateTime(eventEnds[j].In(time.UTC))
|
||||||
|
|
||||||
|
var event = model.Event{
|
||||||
|
UUID: uuid.NewSHA1(uuid.NameSpaceDNS, []byte(entry.Title+strconv.FormatInt(int64(i), 10))).String(),
|
||||||
|
Day: toGermanWeekdayString(entry.Details.DateRange.Start.Weekday()),
|
||||||
|
Week: strconv.Itoa(23),
|
||||||
|
Start: start,
|
||||||
|
End: end,
|
||||||
|
Name: entry.Title,
|
||||||
|
EventType: entry.Details.Type,
|
||||||
|
Prof: entry.Details.CourseLead.Name,
|
||||||
|
Rooms: entry.Details.Location.Name,
|
||||||
|
Notes: entry.AdditionalNote,
|
||||||
|
BookedAt: "",
|
||||||
|
Course: "Sport",
|
||||||
|
Semester: checkSemester(entry.Details.DateRange.Start),
|
||||||
|
}
|
||||||
|
events = append(events, event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return events
|
||||||
|
}
|
||||||
|
|
||||||
|
func getDayInt(weekDay string) int {
|
||||||
|
var weekDayInt int
|
||||||
|
switch weekDay {
|
||||||
|
case "Mo":
|
||||||
|
weekDayInt = 1
|
||||||
|
case "Di":
|
||||||
|
weekDayInt = 2
|
||||||
|
case "Mi":
|
||||||
|
weekDayInt = 3
|
||||||
|
case "Do":
|
||||||
|
weekDayInt = 4
|
||||||
|
case "Fr":
|
||||||
|
weekDayInt = 5
|
||||||
|
case "Sa":
|
||||||
|
weekDayInt = 6
|
||||||
|
case "So":
|
||||||
|
weekDayInt = 0
|
||||||
|
}
|
||||||
|
return weekDayInt
|
||||||
|
}
|
||||||
|
|
||||||
|
func toGermanWeekdayString(weekday time.Weekday) string {
|
||||||
|
switch weekday {
|
||||||
|
case time.Monday:
|
||||||
|
return "Montag"
|
||||||
|
case time.Tuesday:
|
||||||
|
return "Dienstag"
|
||||||
|
case time.Wednesday:
|
||||||
|
return "Mittwoch"
|
||||||
|
case time.Thursday:
|
||||||
|
return "Donnerstag"
|
||||||
|
case time.Friday:
|
||||||
|
return "Freitag"
|
||||||
|
case time.Saturday:
|
||||||
|
return "Samstag"
|
||||||
|
case time.Sunday:
|
||||||
|
return "Sonntag"
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractStartAndEndTime(cycle string) (int, int, int, int) {
|
||||||
|
timeRegExp, _ := regexp.Compile("[0-9]{2}:[0-9]{2}")
|
||||||
|
times := timeRegExp.FindAllString(cycle, 2)
|
||||||
|
startHour, _ := strconv.Atoi(times[0][0:2])
|
||||||
|
startMinute, _ := strconv.Atoi(times[0][3:5])
|
||||||
|
|
||||||
|
endHour, _ := strconv.Atoi(times[1][0:2])
|
||||||
|
endMinute, _ := strconv.Atoi(times[1][3:5])
|
||||||
|
return startHour, startMinute, endHour, endMinute
|
||||||
|
}
|
||||||
|
|
||||||
|
func getWeekEvents(start time.Time, end time.Time, cycle string) ([]time.Time, []time.Time) {
|
||||||
|
|
||||||
|
var weekEvents []model.SportDayStartEnd
|
||||||
|
|
||||||
|
// split by regexp to get the cycle parts
|
||||||
|
var cycleParts []string
|
||||||
|
cycleParts = splitByCommaWithTime(cycle)
|
||||||
|
|
||||||
|
for _, cyclePart := range cycleParts {
|
||||||
|
|
||||||
|
//cut string at the first integer/number
|
||||||
|
cyclePartWithDaysOnly := cyclePart[0:strings.IndexFunc(cyclePart, func(r rune) bool { return r >= '0' && r <= '9' })]
|
||||||
|
|
||||||
|
// check if cycle has multiple days by checking if it has a plus sign
|
||||||
|
if strings.Contains(cyclePartWithDaysOnly, "+") {
|
||||||
|
// find all days in cycle part by regexp
|
||||||
|
dayRegExp, _ := regexp.Compile("[A-Z][a-z]")
|
||||||
|
days := dayRegExp.FindAllString(cyclePart, -1)
|
||||||
|
startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart)
|
||||||
|
|
||||||
|
// creating a SportDayStartEnd for each day in the cycle
|
||||||
|
for _, day := range days {
|
||||||
|
weekEvents = append(weekEvents, model.SportDayStartEnd{
|
||||||
|
Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()),
|
||||||
|
End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()),
|
||||||
|
Day: time.Weekday(getDayInt(day)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if cycle has multiple days by checking if it has a minus sign
|
||||||
|
if strings.Contains(cyclePartWithDaysOnly, "-") {
|
||||||
|
// find all days in cycle part by regexp
|
||||||
|
dayRegExp, _ := regexp.Compile("[A-Z][a-z]")
|
||||||
|
days := dayRegExp.FindAllString(cyclePart, 2)
|
||||||
|
startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart)
|
||||||
|
|
||||||
|
//create a int array with all days from start to end day
|
||||||
|
var daysBetween []int
|
||||||
|
for i := getDayInt(days[0]); i <= getDayInt(days[1]); i++ {
|
||||||
|
daysBetween = append(daysBetween, i)
|
||||||
|
}
|
||||||
|
|
||||||
|
// creating a SportDayStartEnd for each day in the cycle
|
||||||
|
for _, day := range daysBetween {
|
||||||
|
weekEvents = append(weekEvents, model.SportDayStartEnd{
|
||||||
|
Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()),
|
||||||
|
End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()),
|
||||||
|
Day: time.Weekday(day),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if cycle has only one day
|
||||||
|
if !strings.Contains(cyclePartWithDaysOnly, "-") && !strings.Contains(cyclePartWithDaysOnly, "+") {
|
||||||
|
// find all days in cycle part by regexp
|
||||||
|
dayRegExp, _ := regexp.Compile("[A-Z][a-z]")
|
||||||
|
days := dayRegExp.FindAllString(cyclePart, -1)
|
||||||
|
startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart)
|
||||||
|
|
||||||
|
// creating a SportDayStartEnd for each day in the cycle
|
||||||
|
for _, day := range days {
|
||||||
|
weekEvents = append(weekEvents, model.SportDayStartEnd{
|
||||||
|
Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()),
|
||||||
|
End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()),
|
||||||
|
Day: time.Weekday(getDayInt(day)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var startDatesList []time.Time
|
||||||
|
var endDatesList []time.Time
|
||||||
|
|
||||||
|
for _, weekEvent := range weekEvents {
|
||||||
|
startDates, endDates := createEventListFromStartToEndMatchingDay(weekEvent)
|
||||||
|
startDatesList = append(startDatesList, startDates...)
|
||||||
|
endDatesList = append(endDatesList, endDates...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return startDatesList, endDatesList
|
||||||
|
}
|
||||||
|
|
||||||
|
func createEventListFromStartToEndMatchingDay(weekEvent model.SportDayStartEnd) ([]time.Time, []time.Time) {
|
||||||
|
var startDates []time.Time
|
||||||
|
var endDates []time.Time
|
||||||
|
for d := weekEvent.Start; d.Before(weekEvent.End); d = d.AddDate(0, 0, 1) {
|
||||||
|
if d.Weekday() == weekEvent.Day {
|
||||||
|
startDates = append(startDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.Start.Hour(), weekEvent.Start.Minute(), 0, 0, d.Location()))
|
||||||
|
endDates = append(endDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.End.Hour(), weekEvent.End.Minute(), 0, 0, d.Location()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return startDates, endDates
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitByCommaWithTime(input string) []string {
|
||||||
|
var result []string
|
||||||
|
|
||||||
|
// Split by comma
|
||||||
|
parts := strings.Split(input, ", ")
|
||||||
|
|
||||||
|
// Regular expression to match a day with time
|
||||||
|
regex := regexp.MustCompile(`([A-Za-z]{2,}(\+[A-Za-z]{2,})* \d{2}:\d{2}-\d{2}:\d{2})`)
|
||||||
|
|
||||||
|
// Iterate over parts and combine when necessary
|
||||||
|
var currentPart string
|
||||||
|
for _, part := range parts {
|
||||||
|
if regex.MatchString(part) {
|
||||||
|
if currentPart != "" {
|
||||||
|
currentPart += ", " + part
|
||||||
|
result = append(result, currentPart)
|
||||||
|
currentPart = ""
|
||||||
|
} else {
|
||||||
|
result = append(result, part)
|
||||||
|
}
|
||||||
|
// If the part contains a day with time, start a new currentPart
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// If there's no currentPart, start a new one
|
||||||
|
if currentPart != "" {
|
||||||
|
currentPart += ", " + part
|
||||||
|
} else {
|
||||||
|
currentPart = part
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the last currentPart to the result
|
||||||
|
if currentPart != "" {
|
||||||
|
result = append(result, currentPart)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if ws or ss
|
||||||
|
func checkSemester(date time.Time) string {
|
||||||
|
if date.Month() >= 4 && date.Month() <= 9 {
|
||||||
|
return "ss"
|
||||||
|
} else {
|
||||||
|
return "ws"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetch the main page where all sport courses are listed and extract all links to the sport courses
|
||||||
|
func fetchAllAvailableSportCourses() []string {
|
||||||
|
var url = "https://sport.htwk-leipzig.de/sportangebote"
|
||||||
|
|
||||||
|
var doc, err = htmlRequest(url)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// link list of all sport courses
|
||||||
|
var links []string
|
||||||
|
|
||||||
|
// find all links to sport courses with regex https://sport.htwk-leipzig.de/sportangebote/detail/sport/ + [0-9]{1,4}
|
||||||
|
doc.Find("a[href]").Each(func(i int, s *goquery.Selection) {
|
||||||
|
link, _ := s.Attr("href")
|
||||||
|
if strings.HasPrefix(link, "/sportangebote/detail/sport/") {
|
||||||
|
links = append(links, link)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return links
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetchAllHTWKSportCourses fetches all sport courses from the given links.
|
||||||
|
// to speed up the process, it uses multithreading.
|
||||||
|
|
||||||
|
func fetchHTWKSportCourses(links []string) []model.SportEntry {
|
||||||
|
|
||||||
|
//multithreaded webpage requests to speed up the process
|
||||||
|
|
||||||
|
var maxThreads = 10
|
||||||
|
var htmlPageArray = make([]*goquery.Document, len(links))
|
||||||
|
var hostUrl = "https://sport.htwk-leipzig.de"
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(maxThreads)
|
||||||
|
for i := 0; i < maxThreads; i++ {
|
||||||
|
go func(i int) {
|
||||||
|
for j := i; j < len(links); j += maxThreads {
|
||||||
|
doc, err := htmlRequest(hostUrl + links[j])
|
||||||
|
if err == nil {
|
||||||
|
htmlPageArray[j] = doc
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wg.Done()
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
var events []model.SportEntry
|
||||||
|
|
||||||
|
for _, doc := range htmlPageArray {
|
||||||
|
if doc != nil {
|
||||||
|
event, err := fetchHtwkSportCourse(doc)
|
||||||
|
if err == nil {
|
||||||
|
events = append(events, event...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return events
|
||||||
|
}
|
||||||
|
|
||||||
|
func htmlRequest(url string) (*goquery.Document, error) {
|
||||||
|
resp, err := http.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return doc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetchHtwkSportCourse fetches the sport course from the given url and id.
|
||||||
|
// If the sport course does not exist, it will return an error.
|
||||||
|
// If the sport course exists, it will return the sport course.
|
||||||
|
// goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse.
|
||||||
|
// May be improved in the future.
|
||||||
|
func fetchHtwkSportCourse(doc *goquery.Document) ([]model.SportEntry, error) {
|
||||||
|
var events []model.SportEntry
|
||||||
|
|
||||||
|
if doc.Find("h1").Text() == "Aktuelle Sportangebote" {
|
||||||
|
return nil, errors.New("not a sport course page")
|
||||||
|
}
|
||||||
|
|
||||||
|
doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) {
|
||||||
|
var event model.SportEntry
|
||||||
|
var details model.EventDetails
|
||||||
|
|
||||||
|
fullTitle := strings.TrimSpace(s.Find("h3").Text())
|
||||||
|
titleParts := strings.Split(fullTitle, "-")
|
||||||
|
if len(titleParts) > 0 {
|
||||||
|
event.Title = "Sport: " + strings.TrimSpace(titleParts[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(titleParts) > 2 {
|
||||||
|
details.Type = strings.TrimSpace(titleParts[len(titleParts)-1])
|
||||||
|
}
|
||||||
|
|
||||||
|
s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) {
|
||||||
|
key := strings.TrimSpace(s.Find("td").First().Text())
|
||||||
|
value := strings.TrimSpace(s.Find("td").Last().Text())
|
||||||
|
|
||||||
|
switch key {
|
||||||
|
case "Zeitraum":
|
||||||
|
dates := strings.Split(value, "-")
|
||||||
|
if len(dates) == 2 {
|
||||||
|
startDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[0]))
|
||||||
|
endDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[1]))
|
||||||
|
details.DateRange = model.DateRange{Start: startDate, End: endDate}
|
||||||
|
}
|
||||||
|
case "Zyklus":
|
||||||
|
details.Cycle = value
|
||||||
|
case "Geschlecht":
|
||||||
|
details.Gender = value
|
||||||
|
case "Leiter":
|
||||||
|
leaderName := strings.TrimSpace(s.Find("td a").Text())
|
||||||
|
leadersSlice := strings.Split(leaderName, "\n")
|
||||||
|
for i, leader := range leadersSlice {
|
||||||
|
leadersSlice[i] = strings.TrimSpace(leader)
|
||||||
|
}
|
||||||
|
formattedLeaders := strings.Join(leadersSlice, ", ")
|
||||||
|
leaderLink, _ := s.Find("td a").Attr("href")
|
||||||
|
details.CourseLead = model.CourseLead{Name: formattedLeaders, Link: leaderLink}
|
||||||
|
case "Ort":
|
||||||
|
locationDetails := strings.Split(value, "(")
|
||||||
|
if len(locationDetails) == 2 {
|
||||||
|
details.Location = model.Location{
|
||||||
|
Name: strings.TrimSpace(locationDetails[0]),
|
||||||
|
Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case "Teilnehmer":
|
||||||
|
parts := strings.Split(value, "/")
|
||||||
|
if len(parts) >= 3 {
|
||||||
|
bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||||
|
totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1]))
|
||||||
|
waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2]))
|
||||||
|
details.Participants = model.Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList}
|
||||||
|
}
|
||||||
|
case "Kosten":
|
||||||
|
details.Cost = value // makes no sense since you need to be logged in to see the price
|
||||||
|
case "Hinweis":
|
||||||
|
var allNotes []string
|
||||||
|
|
||||||
|
s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) {
|
||||||
|
if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" {
|
||||||
|
note := strings.TrimSpace(s.Text())
|
||||||
|
if note != "" {
|
||||||
|
allNotes = append(allNotes, note)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
event.AdditionalNote = strings.Join(allNotes, " ")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
event.Details = details
|
||||||
|
events = append(events, event)
|
||||||
|
})
|
||||||
|
|
||||||
|
return events, nil
|
||||||
|
}
|
40
backend/service/fetch/sport/sportFetcher_test.go
Normal file
40
backend/service/fetch/sport/sportFetcher_test.go
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
package sport
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Test_splitByCommaWithTime(t *testing.T) {
|
||||||
|
type args struct {
|
||||||
|
input string
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
args args
|
||||||
|
want []string
|
||||||
|
}{
|
||||||
|
{"one string", args{"one"}, []string{"one"}},
|
||||||
|
{"two strings", args{"one,two"}, []string{"one,two"}},
|
||||||
|
{"three strings", args{"one,two,three"}, []string{"one,two,three"}},
|
||||||
|
// e.g. "Mo 18:00-20:00, Di 18:00-20:00" -> ["Mo 18:00-20:00", "Di 18:00-20:00"]
|
||||||
|
// e.g. "Mo 18:00-20:00, Di 18:00-20:00, Mi 18:00-20:00" -> ["Mo 18:00-20:00", "Di 18:00-20:00", "Mi 18:00-20:00"]
|
||||||
|
// e.g. "Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00" -> ["Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00"]
|
||||||
|
// e.g. "Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00" -> ["Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"]
|
||||||
|
// e.g. "Mo+Mi+Fr 18:00-20:00, Sa 20:00-21:00" -> ["Mo+Mi+Fr 18:00-20:00", "Sa 20:00-21:00"]
|
||||||
|
// e.g. "Mo+Mi 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00" -> ["Mo+Mi 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"]
|
||||||
|
{"Mo 18:00-20:00, Di 18:00-20:00", args{"Mo 18:00-20:00, Di 18:00-20:00"}, []string{"Mo 18:00-20:00", "Di 18:00-20:00"}},
|
||||||
|
{"Mo 18:00-20:00, Di 18:00-20:00, Mi 18:00-20:00", args{"Mo 18:00-20:00, Di 18:00-20:00, Mi 18:00-20:00"}, []string{"Mo 18:00-20:00", "Di 18:00-20:00", "Mi 18:00-20:00"}},
|
||||||
|
{"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00", args{"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00"}, []string{"Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00"}},
|
||||||
|
{"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00", args{"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00"}, []string{"Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"}},
|
||||||
|
{"Mo+Mi+Fr 18:00-20:00, Sa 20:00-21:00", args{"Mo+Mi+Fr 18:00-20:00, Sa 20:00-21:00"}, []string{"Mo+Mi+Fr 18:00-20:00", "Sa 20:00-21:00"}},
|
||||||
|
{"Mo+Mi 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00", args{"Mo+Mi 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00"}, []string{"Mo+Mi 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"}},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
if got := splitByCommaWithTime(tt.args.input); !reflect.DeepEqual(got, tt.want) {
|
||||||
|
t.Errorf("splitByCommaWithTime() = %v, want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@@ -1,353 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"github.com/google/uuid"
|
|
||||||
"github.com/pocketbase/pocketbase/tools/types"
|
|
||||||
"htwkalender/model"
|
|
||||||
"net/http"
|
|
||||||
"regexp"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
|
||||||
)
|
|
||||||
|
|
||||||
// @TODO: fix bug where cycle contains multiple days (e.g. "Mo + Mi 18:00-20:00")
|
|
||||||
// @TODO: add tests
|
|
||||||
// @TODO: add it to the service
|
|
||||||
// @TODO: make it like a cron job to fetch the sport courses once a week
|
|
||||||
func main() {
|
|
||||||
|
|
||||||
var sportCourseLinks = fetchAllAvailableSportCourses()
|
|
||||||
sportEntries := fetchHTWKSportCourses(sportCourseLinks)
|
|
||||||
|
|
||||||
for _, event := range sportEntries {
|
|
||||||
println(event.Title)
|
|
||||||
}
|
|
||||||
|
|
||||||
events := formatEntriesToEvents(sportEntries)
|
|
||||||
|
|
||||||
for _, event := range events {
|
|
||||||
println(event.Name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func formatEntriesToEvents(entries []SportEntry) []model.Event {
|
|
||||||
|
|
||||||
var events []model.Event
|
|
||||||
|
|
||||||
for i, entry := range entries {
|
|
||||||
|
|
||||||
eventStarts, eventEnds := calculateEventStarts(entry.Details.DateRange.Start, entry.Details.DateRange.End, entry.Details.Cycle)
|
|
||||||
|
|
||||||
for j := range eventStarts {
|
|
||||||
|
|
||||||
start, _ := types.ParseDateTime(eventStarts[j].In(time.UTC))
|
|
||||||
end, _ := types.ParseDateTime(eventEnds[j].In(time.UTC))
|
|
||||||
|
|
||||||
var event = model.Event{
|
|
||||||
UUID: uuid.NewSHA1(uuid.NameSpaceDNS, []byte(entry.Title+strconv.FormatInt(int64(i), 10))).String(),
|
|
||||||
Day: entry.Details.DateRange.Start.Weekday().String(),
|
|
||||||
Week: strconv.Itoa(23),
|
|
||||||
Start: start,
|
|
||||||
End: end,
|
|
||||||
Name: entry.Title,
|
|
||||||
EventType: entry.Details.Type,
|
|
||||||
Prof: entry.Details.CourseLead.Name,
|
|
||||||
Rooms: entry.Details.Location.Name,
|
|
||||||
Notes: entry.AdditionalNote,
|
|
||||||
BookedAt: "",
|
|
||||||
Course: "Sport",
|
|
||||||
Semester: checkSemester(entry.Details.DateRange.Start),
|
|
||||||
}
|
|
||||||
events = append(events, event)
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
return events
|
|
||||||
}
|
|
||||||
|
|
||||||
func calculateEventStarts(start time.Time, end time.Time, cycle string) ([]time.Time, []time.Time) {
|
|
||||||
|
|
||||||
// start is the begin of the cycle e.g. 01.04.2020
|
|
||||||
// end is the end of the cycle e.g. 30.09.2020
|
|
||||||
// cycle is the day and timespan (e.g. "Mo 18:00-20:00")
|
|
||||||
|
|
||||||
// check if start is before end
|
|
||||||
if start.After(end) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if cycle is valid
|
|
||||||
if !checkCycle(cycle) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var weekDay = cycle[0:2]
|
|
||||||
// match weekday to time.Weekday (e.g. "Mo" -> time.Monday)
|
|
||||||
var weekDayInt int
|
|
||||||
|
|
||||||
switch weekDay {
|
|
||||||
case "Mo":
|
|
||||||
weekDayInt = 1
|
|
||||||
case "Di":
|
|
||||||
weekDayInt = 2
|
|
||||||
case "Mi":
|
|
||||||
weekDayInt = 3
|
|
||||||
case "Do":
|
|
||||||
weekDayInt = 4
|
|
||||||
case "Fr":
|
|
||||||
weekDayInt = 5
|
|
||||||
case "Sa":
|
|
||||||
weekDayInt = 6
|
|
||||||
case "So":
|
|
||||||
weekDayInt = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// get every date matching the weekday in the cycle between start and end
|
|
||||||
var eventDates []time.Time
|
|
||||||
for d := start; d.Before(end); d = d.AddDate(0, 0, 1) {
|
|
||||||
if d.Weekday() == time.Weekday(weekDayInt) {
|
|
||||||
eventDates = append(eventDates, d)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// add hours and minutes to the dates in eventDates
|
|
||||||
// array of tuple of start and end times
|
|
||||||
var eventStartsWithTime []time.Time
|
|
||||||
var eventEndWithTime []time.Time
|
|
||||||
|
|
||||||
for _, eventStart := range eventDates {
|
|
||||||
timeRegExp, _ := regexp.Compile("[0-9]{2}:[0-9]{2}")
|
|
||||||
times := timeRegExp.FindAllString(cycle, 2)
|
|
||||||
startHour, _ := strconv.Atoi(times[0][0:2])
|
|
||||||
startMinute, _ := strconv.Atoi(times[0][3:5])
|
|
||||||
|
|
||||||
endHour, _ := strconv.Atoi(times[1][0:2])
|
|
||||||
endMinute, _ := strconv.Atoi(times[1][3:5])
|
|
||||||
eventStartsWithTime = append(eventStartsWithTime, time.Date(eventStart.Year(), eventStart.Month(), eventStart.Day(), startHour, startMinute, 0, 0, eventStart.Location()))
|
|
||||||
eventEndWithTime = append(eventEndWithTime, time.Date(eventStart.Year(), eventStart.Month(), eventStart.Day(), endHour, endMinute, 0, 0, eventStart.Location()))
|
|
||||||
}
|
|
||||||
|
|
||||||
return eventStartsWithTime, eventEndWithTime
|
|
||||||
}
|
|
||||||
|
|
||||||
func checkCycle(cycle string) bool {
|
|
||||||
|
|
||||||
// check if cycle is valid
|
|
||||||
if len(cycle) < 12 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if cycle has a weekday
|
|
||||||
weekDay := cycle[0:2]
|
|
||||||
if weekDay != "Mo" && weekDay != "Di" && weekDay != "Mi" && weekDay != "Do" && weekDay != "Fr" && weekDay != "Sa" && weekDay != "So" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if cycle has a timespan
|
|
||||||
timeSpan := cycle[3:12]
|
|
||||||
if len(timeSpan) != 9 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if timespan has a start and end time
|
|
||||||
startTime := timeSpan[0:5]
|
|
||||||
endTime := timeSpan[6:9]
|
|
||||||
if len(startTime) != 5 || len(endTime) != 3 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if start time is before end time
|
|
||||||
if startTime > endTime {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if ws or ss
|
|
||||||
func checkSemester(date time.Time) string {
|
|
||||||
if date.Month() >= 4 && date.Month() <= 9 {
|
|
||||||
return "ss"
|
|
||||||
} else {
|
|
||||||
return "ws"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// fetch the main page where all sport courses are listed and extract all links to the sport courses
|
|
||||||
func fetchAllAvailableSportCourses() []string {
|
|
||||||
var url = "https://sport.htwk-leipzig.de/sportangebote"
|
|
||||||
|
|
||||||
var doc, err = htmlRequest(url)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// link list of all sport courses
|
|
||||||
var links []string
|
|
||||||
|
|
||||||
// find all links to sport courses with regex https://sport.htwk-leipzig.de/sportangebote/detail/sport/ + [0-9]{1,4}
|
|
||||||
doc.Find("a[href]").Each(func(i int, s *goquery.Selection) {
|
|
||||||
link, _ := s.Attr("href")
|
|
||||||
if strings.HasPrefix(link, "/sportangebote/detail/sport/") {
|
|
||||||
links = append(links, link)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
return links
|
|
||||||
}
|
|
||||||
|
|
||||||
// fetchAllHTWKSportCourses fetches all sport courses from the given links.
|
|
||||||
// to speed up the process, it uses multithreading.
|
|
||||||
|
|
||||||
func fetchHTWKSportCourses(links []string) []SportEntry {
|
|
||||||
|
|
||||||
//multithreaded webpage requests to speed up the process
|
|
||||||
|
|
||||||
var maxThreads = 10
|
|
||||||
var htmlPageArray = make([]*goquery.Document, len(links))
|
|
||||||
var hostUrl = "https://sport.htwk-leipzig.de"
|
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
wg.Add(maxThreads)
|
|
||||||
for i := 0; i < maxThreads; i++ {
|
|
||||||
go func(i int) {
|
|
||||||
for j := i; j < len(links); j += maxThreads {
|
|
||||||
doc, err := htmlRequest(hostUrl + links[j])
|
|
||||||
if err == nil {
|
|
||||||
htmlPageArray[j] = doc
|
|
||||||
}
|
|
||||||
}
|
|
||||||
wg.Done()
|
|
||||||
}(i)
|
|
||||||
}
|
|
||||||
wg.Wait()
|
|
||||||
|
|
||||||
var events []SportEntry
|
|
||||||
|
|
||||||
for _, doc := range htmlPageArray {
|
|
||||||
if doc != nil {
|
|
||||||
event, err := fetchHtwkSportCourse(doc)
|
|
||||||
if err == nil {
|
|
||||||
events = append(events, event...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return events
|
|
||||||
}
|
|
||||||
|
|
||||||
func htmlRequest(url string) (*goquery.Document, error) {
|
|
||||||
println("fetching " + url)
|
|
||||||
|
|
||||||
resp, err := http.Get(url)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
println("finished fetching " + url)
|
|
||||||
return doc, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// fetchHtwkSportCourse fetches the sport course from the given url and id.
|
|
||||||
// If the sport course does not exist, it will return an error.
|
|
||||||
// If the sport course exists, it will return the sport course.
|
|
||||||
// goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse.
|
|
||||||
// May be improved in the future.
|
|
||||||
func fetchHtwkSportCourse(doc *goquery.Document) ([]SportEntry, error) {
|
|
||||||
var events []SportEntry
|
|
||||||
|
|
||||||
if doc.Find("h1").Text() == "Aktuelle Sportangebote" {
|
|
||||||
return nil, errors.New("not a sport course page")
|
|
||||||
}
|
|
||||||
|
|
||||||
doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) {
|
|
||||||
var event SportEntry
|
|
||||||
var details EventDetails
|
|
||||||
|
|
||||||
fullTitle := strings.TrimSpace(s.Find("h3").Text())
|
|
||||||
titleParts := strings.Split(fullTitle, "-")
|
|
||||||
if len(titleParts) > 0 {
|
|
||||||
event.Title = strings.TrimSpace(titleParts[0])
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(titleParts) > 2 {
|
|
||||||
details.Type = strings.TrimSpace(titleParts[len(titleParts)-1])
|
|
||||||
}
|
|
||||||
|
|
||||||
s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) {
|
|
||||||
key := strings.TrimSpace(s.Find("td").First().Text())
|
|
||||||
value := strings.TrimSpace(s.Find("td").Last().Text())
|
|
||||||
|
|
||||||
switch key {
|
|
||||||
case "Zeitraum":
|
|
||||||
dates := strings.Split(value, "-")
|
|
||||||
if len(dates) == 2 {
|
|
||||||
startDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[0]))
|
|
||||||
endDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[1]))
|
|
||||||
details.DateRange = DateRange{Start: startDate, End: endDate}
|
|
||||||
}
|
|
||||||
case "Zyklus":
|
|
||||||
details.Cycle = value
|
|
||||||
case "Geschlecht":
|
|
||||||
details.Gender = value
|
|
||||||
case "Leiter":
|
|
||||||
leaderName := strings.TrimSpace(s.Find("td a").Text())
|
|
||||||
leadersSlice := strings.Split(leaderName, "\n")
|
|
||||||
for i, leader := range leadersSlice {
|
|
||||||
leadersSlice[i] = strings.TrimSpace(leader)
|
|
||||||
}
|
|
||||||
formattedLeaders := strings.Join(leadersSlice, ", ")
|
|
||||||
leaderLink, _ := s.Find("td a").Attr("href")
|
|
||||||
details.CourseLead = CourseLead{Name: formattedLeaders, Link: leaderLink}
|
|
||||||
case "Ort":
|
|
||||||
locationDetails := strings.Split(value, "(")
|
|
||||||
if len(locationDetails) == 2 {
|
|
||||||
details.Location = Location{
|
|
||||||
Name: strings.TrimSpace(locationDetails[0]),
|
|
||||||
Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case "Teilnehmer":
|
|
||||||
parts := strings.Split(value, "/")
|
|
||||||
if len(parts) >= 3 {
|
|
||||||
bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
|
|
||||||
totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1]))
|
|
||||||
waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2]))
|
|
||||||
details.Participants = Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList}
|
|
||||||
}
|
|
||||||
case "Kosten":
|
|
||||||
details.Cost = value // makes no sense since you need to be logged in to see the price
|
|
||||||
case "Hinweis":
|
|
||||||
var allNotes []string
|
|
||||||
|
|
||||||
s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) {
|
|
||||||
if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" {
|
|
||||||
note := strings.TrimSpace(s.Text())
|
|
||||||
if note != "" {
|
|
||||||
allNotes = append(allNotes, note)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
event.AdditionalNote = strings.Join(allNotes, " ")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
event.Details = details
|
|
||||||
events = append(events, event)
|
|
||||||
})
|
|
||||||
|
|
||||||
return events, nil
|
|
||||||
}
|
|
Reference in New Issue
Block a user