diff --git a/backend/sport/sportFetcherModel.go b/backend/model/sportFetcherModel.go similarity index 90% rename from backend/sport/sportFetcherModel.go rename to backend/model/sportFetcherModel.go index d7b82ce..600f6ba 100644 --- a/backend/sport/sportFetcherModel.go +++ b/backend/model/sportFetcherModel.go @@ -1,4 +1,4 @@ -package main +package model import "time" @@ -47,3 +47,9 @@ type Participants struct { TotalPlaces int WaitList int } + +type SportDayStartEnd struct { + Start time.Time + End time.Time + Day time.Weekday +} diff --git a/backend/service/db/dbEvents.go b/backend/service/db/dbEvents.go index 3253525..e301f00 100644 --- a/backend/service/db/dbEvents.go +++ b/backend/service/db/dbEvents.go @@ -2,6 +2,7 @@ package db import ( "htwkalender/model" + "time" "github.com/pocketbase/dbx" "github.com/pocketbase/pocketbase" @@ -251,3 +252,15 @@ func FindAllEventsByModule(app *pocketbase.PocketBase, module model.Module) (mod return events, nil } + +func GetAllModulesByNameAndDateRange(app *pocketbase.PocketBase, name string, startDate time.Time, endDate time.Time) (model.Events, error) { + var events model.Events + + err := app.Dao().DB().Select("*").From("events").Where(dbx.NewExp("Name = {:name} AND Start >= {:startDate} AND End <= {:endDate}", dbx.Params{"name": name, "startDate": startDate, "endDate": endDate})).All(&events) + if err != nil { + print("Error while getting events from database: ", err) + return nil, err + } + + return events, nil +} diff --git a/backend/service/fetch/sport/sportFetcher.go b/backend/service/fetch/sport/sportFetcher.go new file mode 100644 index 0000000..e71c285 --- /dev/null +++ b/backend/service/fetch/sport/sportFetcher.go @@ -0,0 +1,467 @@ +package sport + +import ( + "errors" + "github.com/google/uuid" + "github.com/pocketbase/pocketbase" + "github.com/pocketbase/pocketbase/tools/types" + "htwkalender/model" + "htwkalender/service/db" + "net/http" + "regexp" + "strconv" + "strings" + "sync" + "time" + + "github.com/PuerkitoBio/goquery" +) + +// @TODO: add tests +// @TODO: make it like a cron job to fetch the sport courses once a week +func RetrieveAndFetchAllSportCourses(app *pocketbase.PocketBase) []model.Event { + + var sportCourseLinks = fetchAllAvailableSportCourses() + sportEntries := fetchHTWKSportCourses(sportCourseLinks) + events := formatEntriesToEvents(sportEntries) + + var earliestDate time.Time + var latestDate time.Time + + // find earliest and latest date in events + for _, event := range events { + if event.Start.Time().Before(earliestDate) { + earliestDate = event.Start.Time() + } + if event.End.Time().After(latestDate) { + latestDate = event.End.Time() + } + } + + // get all events from database where name = Feiertage und lehrveranstaltungsfreie Tage + holidays, err := db.GetAllModulesByNameAndDateRange(app, "Feiertage und lehrveranstaltungsfreie Tage", earliestDate, latestDate) + if err != nil { + return nil + } + + // remove all events that have same year, month and day as items in holidays + for _, holiday := range holidays { + for i, event := range events { + if event.Start.Time().Year() == holiday.Start.Time().Year() && + event.Start.Time().Month() == holiday.Start.Time().Month() && + event.Start.Time().Day() == holiday.Start.Time().Day() { + events = append(events[:i], events[i+1:]...) + } + } + } + + // save events to database + savedEvents, err := db.SaveEvents(events, app) + + if err != nil { + return nil + } + + return savedEvents + +} + +func formatEntriesToEvents(entries []model.SportEntry) []model.Event { + + var events []model.Event + + for i, entry := range entries { + eventStarts, eventEnds := getWeekEvents(entry.Details.DateRange.Start, entry.Details.DateRange.End, entry.Details.Cycle) + for j := range eventStarts { + + start, _ := types.ParseDateTime(eventStarts[j].In(time.UTC)) + end, _ := types.ParseDateTime(eventEnds[j].In(time.UTC)) + + var event = model.Event{ + UUID: uuid.NewSHA1(uuid.NameSpaceDNS, []byte(entry.Title+strconv.FormatInt(int64(i), 10))).String(), + Day: toGermanWeekdayString(entry.Details.DateRange.Start.Weekday()), + Week: strconv.Itoa(23), + Start: start, + End: end, + Name: entry.Title, + EventType: entry.Details.Type, + Prof: entry.Details.CourseLead.Name, + Rooms: entry.Details.Location.Name, + Notes: entry.AdditionalNote, + BookedAt: "", + Course: "Sport", + Semester: checkSemester(entry.Details.DateRange.Start), + } + events = append(events, event) + } + } + return events +} + +func getDayInt(weekDay string) int { + var weekDayInt int + switch weekDay { + case "Mo": + weekDayInt = 1 + case "Di": + weekDayInt = 2 + case "Mi": + weekDayInt = 3 + case "Do": + weekDayInt = 4 + case "Fr": + weekDayInt = 5 + case "Sa": + weekDayInt = 6 + case "So": + weekDayInt = 0 + } + return weekDayInt +} + +func toGermanWeekdayString(weekday time.Weekday) string { + switch weekday { + case time.Monday: + return "Montag" + case time.Tuesday: + return "Dienstag" + case time.Wednesday: + return "Mittwoch" + case time.Thursday: + return "Donnerstag" + case time.Friday: + return "Freitag" + case time.Saturday: + return "Samstag" + case time.Sunday: + return "Sonntag" + default: + return "" + } +} + +func extractStartAndEndTime(cycle string) (int, int, int, int) { + timeRegExp, _ := regexp.Compile("[0-9]{2}:[0-9]{2}") + times := timeRegExp.FindAllString(cycle, 2) + startHour, _ := strconv.Atoi(times[0][0:2]) + startMinute, _ := strconv.Atoi(times[0][3:5]) + + endHour, _ := strconv.Atoi(times[1][0:2]) + endMinute, _ := strconv.Atoi(times[1][3:5]) + return startHour, startMinute, endHour, endMinute +} + +func getWeekEvents(start time.Time, end time.Time, cycle string) ([]time.Time, []time.Time) { + + var weekEvents []model.SportDayStartEnd + + // split by regexp to get the cycle parts + var cycleParts []string + cycleParts = splitByCommaWithTime(cycle) + + for _, cyclePart := range cycleParts { + + //cut string at the first integer/number + cyclePartWithDaysOnly := cyclePart[0:strings.IndexFunc(cyclePart, func(r rune) bool { return r >= '0' && r <= '9' })] + + // check if cycle has multiple days by checking if it has a plus sign + if strings.Contains(cyclePartWithDaysOnly, "+") { + // find all days in cycle part by regexp + dayRegExp, _ := regexp.Compile("[A-Z][a-z]") + days := dayRegExp.FindAllString(cyclePart, -1) + startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) + + // creating a SportDayStartEnd for each day in the cycle + for _, day := range days { + weekEvents = append(weekEvents, model.SportDayStartEnd{ + Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), + End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), + Day: time.Weekday(getDayInt(day)), + }) + } + + } + + // check if cycle has multiple days by checking if it has a minus sign + if strings.Contains(cyclePartWithDaysOnly, "-") { + // find all days in cycle part by regexp + dayRegExp, _ := regexp.Compile("[A-Z][a-z]") + days := dayRegExp.FindAllString(cyclePart, 2) + startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) + + //create a int array with all days from start to end day + var daysBetween []int + for i := getDayInt(days[0]); i <= getDayInt(days[1]); i++ { + daysBetween = append(daysBetween, i) + } + + // creating a SportDayStartEnd for each day in the cycle + for _, day := range daysBetween { + weekEvents = append(weekEvents, model.SportDayStartEnd{ + Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), + End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), + Day: time.Weekday(day), + }) + } + } + + // check if cycle has only one day + if !strings.Contains(cyclePartWithDaysOnly, "-") && !strings.Contains(cyclePartWithDaysOnly, "+") { + // find all days in cycle part by regexp + dayRegExp, _ := regexp.Compile("[A-Z][a-z]") + days := dayRegExp.FindAllString(cyclePart, -1) + startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) + + // creating a SportDayStartEnd for each day in the cycle + for _, day := range days { + weekEvents = append(weekEvents, model.SportDayStartEnd{ + Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), + End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), + Day: time.Weekday(getDayInt(day)), + }) + } + } + } + + var startDatesList []time.Time + var endDatesList []time.Time + + for _, weekEvent := range weekEvents { + startDates, endDates := createEventListFromStartToEndMatchingDay(weekEvent) + startDatesList = append(startDatesList, startDates...) + endDatesList = append(endDatesList, endDates...) + } + + return startDatesList, endDatesList +} + +func createEventListFromStartToEndMatchingDay(weekEvent model.SportDayStartEnd) ([]time.Time, []time.Time) { + var startDates []time.Time + var endDates []time.Time + for d := weekEvent.Start; d.Before(weekEvent.End); d = d.AddDate(0, 0, 1) { + if d.Weekday() == weekEvent.Day { + startDates = append(startDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.Start.Hour(), weekEvent.Start.Minute(), 0, 0, d.Location())) + endDates = append(endDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.End.Hour(), weekEvent.End.Minute(), 0, 0, d.Location())) + } + } + return startDates, endDates +} + +func splitByCommaWithTime(input string) []string { + var result []string + + // Split by comma + parts := strings.Split(input, ", ") + + // Regular expression to match a day with time + regex := regexp.MustCompile(`([A-Za-z]{2,}(\+[A-Za-z]{2,})* \d{2}:\d{2}-\d{2}:\d{2})`) + + // Iterate over parts and combine when necessary + var currentPart string + for _, part := range parts { + if regex.MatchString(part) { + if currentPart != "" { + currentPart += ", " + part + result = append(result, currentPart) + currentPart = "" + } else { + result = append(result, part) + } + // If the part contains a day with time, start a new currentPart + + } else { + // If there's no currentPart, start a new one + if currentPart != "" { + currentPart += ", " + part + } else { + currentPart = part + } + } + } + + // Add the last currentPart to the result + if currentPart != "" { + result = append(result, currentPart) + } + + return result +} + +// check if ws or ss +func checkSemester(date time.Time) string { + if date.Month() >= 4 && date.Month() <= 9 { + return "ss" + } else { + return "ws" + } +} + +// fetch the main page where all sport courses are listed and extract all links to the sport courses +func fetchAllAvailableSportCourses() []string { + var url = "https://sport.htwk-leipzig.de/sportangebote" + + var doc, err = htmlRequest(url) + + if err != nil { + return nil + } + + // link list of all sport courses + var links []string + + // find all links to sport courses with regex https://sport.htwk-leipzig.de/sportangebote/detail/sport/ + [0-9]{1,4} + doc.Find("a[href]").Each(func(i int, s *goquery.Selection) { + link, _ := s.Attr("href") + if strings.HasPrefix(link, "/sportangebote/detail/sport/") { + links = append(links, link) + } + }) + + return links +} + +// fetchAllHTWKSportCourses fetches all sport courses from the given links. +// to speed up the process, it uses multithreading. + +func fetchHTWKSportCourses(links []string) []model.SportEntry { + + //multithreaded webpage requests to speed up the process + + var maxThreads = 10 + var htmlPageArray = make([]*goquery.Document, len(links)) + var hostUrl = "https://sport.htwk-leipzig.de" + + var wg sync.WaitGroup + wg.Add(maxThreads) + for i := 0; i < maxThreads; i++ { + go func(i int) { + for j := i; j < len(links); j += maxThreads { + doc, err := htmlRequest(hostUrl + links[j]) + if err == nil { + htmlPageArray[j] = doc + } + } + wg.Done() + }(i) + } + wg.Wait() + + var events []model.SportEntry + + for _, doc := range htmlPageArray { + if doc != nil { + event, err := fetchHtwkSportCourse(doc) + if err == nil { + events = append(events, event...) + } + } + } + return events +} + +func htmlRequest(url string) (*goquery.Document, error) { + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, err + } + + return doc, nil +} + +// fetchHtwkSportCourse fetches the sport course from the given url and id. +// If the sport course does not exist, it will return an error. +// If the sport course exists, it will return the sport course. +// goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse. +// May be improved in the future. +func fetchHtwkSportCourse(doc *goquery.Document) ([]model.SportEntry, error) { + var events []model.SportEntry + + if doc.Find("h1").Text() == "Aktuelle Sportangebote" { + return nil, errors.New("not a sport course page") + } + + doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) { + var event model.SportEntry + var details model.EventDetails + + fullTitle := strings.TrimSpace(s.Find("h3").Text()) + titleParts := strings.Split(fullTitle, "-") + if len(titleParts) > 0 { + event.Title = "Sport: " + strings.TrimSpace(titleParts[0]) + } + + if len(titleParts) > 2 { + details.Type = strings.TrimSpace(titleParts[len(titleParts)-1]) + } + + s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) { + key := strings.TrimSpace(s.Find("td").First().Text()) + value := strings.TrimSpace(s.Find("td").Last().Text()) + + switch key { + case "Zeitraum": + dates := strings.Split(value, "-") + if len(dates) == 2 { + startDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[0])) + endDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[1])) + details.DateRange = model.DateRange{Start: startDate, End: endDate} + } + case "Zyklus": + details.Cycle = value + case "Geschlecht": + details.Gender = value + case "Leiter": + leaderName := strings.TrimSpace(s.Find("td a").Text()) + leadersSlice := strings.Split(leaderName, "\n") + for i, leader := range leadersSlice { + leadersSlice[i] = strings.TrimSpace(leader) + } + formattedLeaders := strings.Join(leadersSlice, ", ") + leaderLink, _ := s.Find("td a").Attr("href") + details.CourseLead = model.CourseLead{Name: formattedLeaders, Link: leaderLink} + case "Ort": + locationDetails := strings.Split(value, "(") + if len(locationDetails) == 2 { + details.Location = model.Location{ + Name: strings.TrimSpace(locationDetails[0]), + Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"), + } + } + case "Teilnehmer": + parts := strings.Split(value, "/") + if len(parts) >= 3 { + bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0])) + totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1])) + waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2])) + details.Participants = model.Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList} + } + case "Kosten": + details.Cost = value // makes no sense since you need to be logged in to see the price + case "Hinweis": + var allNotes []string + + s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) { + if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" { + note := strings.TrimSpace(s.Text()) + if note != "" { + allNotes = append(allNotes, note) + } + } + }) + + event.AdditionalNote = strings.Join(allNotes, " ") + } + }) + + event.Details = details + events = append(events, event) + }) + + return events, nil +} diff --git a/backend/service/fetch/sport/sportFetcher_test.go b/backend/service/fetch/sport/sportFetcher_test.go new file mode 100644 index 0000000..c630e88 --- /dev/null +++ b/backend/service/fetch/sport/sportFetcher_test.go @@ -0,0 +1,40 @@ +package sport + +import ( + "reflect" + "testing" +) + +func Test_splitByCommaWithTime(t *testing.T) { + type args struct { + input string + } + tests := []struct { + name string + args args + want []string + }{ + {"one string", args{"one"}, []string{"one"}}, + {"two strings", args{"one,two"}, []string{"one,two"}}, + {"three strings", args{"one,two,three"}, []string{"one,two,three"}}, + // e.g. "Mo 18:00-20:00, Di 18:00-20:00" -> ["Mo 18:00-20:00", "Di 18:00-20:00"] + // e.g. "Mo 18:00-20:00, Di 18:00-20:00, Mi 18:00-20:00" -> ["Mo 18:00-20:00", "Di 18:00-20:00", "Mi 18:00-20:00"] + // e.g. "Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00" -> ["Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00"] + // e.g. "Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00" -> ["Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"] + // e.g. "Mo+Mi+Fr 18:00-20:00, Sa 20:00-21:00" -> ["Mo+Mi+Fr 18:00-20:00", "Sa 20:00-21:00"] + // e.g. "Mo+Mi 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00" -> ["Mo+Mi 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"] + {"Mo 18:00-20:00, Di 18:00-20:00", args{"Mo 18:00-20:00, Di 18:00-20:00"}, []string{"Mo 18:00-20:00", "Di 18:00-20:00"}}, + {"Mo 18:00-20:00, Di 18:00-20:00, Mi 18:00-20:00", args{"Mo 18:00-20:00, Di 18:00-20:00, Mi 18:00-20:00"}, []string{"Mo 18:00-20:00", "Di 18:00-20:00", "Mi 18:00-20:00"}}, + {"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00", args{"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00"}, []string{"Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00"}}, + {"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00", args{"Mo, Mi, Fr 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00"}, []string{"Mo, Mi, Fr 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"}}, + {"Mo+Mi+Fr 18:00-20:00, Sa 20:00-21:00", args{"Mo+Mi+Fr 18:00-20:00, Sa 20:00-21:00"}, []string{"Mo+Mi+Fr 18:00-20:00", "Sa 20:00-21:00"}}, + {"Mo+Mi 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00", args{"Mo+Mi 18:00-20:00, Sa 20:00-21:00, So 20:00-21:00"}, []string{"Mo+Mi 18:00-20:00", "Sa 20:00-21:00", "So 20:00-21:00"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := splitByCommaWithTime(tt.args.input); !reflect.DeepEqual(got, tt.want) { + t.Errorf("splitByCommaWithTime() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/backend/sport/main.go b/backend/sport/main.go deleted file mode 100644 index 5efed87..0000000 --- a/backend/sport/main.go +++ /dev/null @@ -1,353 +0,0 @@ -package main - -import ( - "errors" - "github.com/google/uuid" - "github.com/pocketbase/pocketbase/tools/types" - "htwkalender/model" - "net/http" - "regexp" - "strconv" - "strings" - "sync" - "time" - - "github.com/PuerkitoBio/goquery" -) - -// @TODO: fix bug where cycle contains multiple days (e.g. "Mo + Mi 18:00-20:00") -// @TODO: add tests -// @TODO: add it to the service -// @TODO: make it like a cron job to fetch the sport courses once a week -func main() { - - var sportCourseLinks = fetchAllAvailableSportCourses() - sportEntries := fetchHTWKSportCourses(sportCourseLinks) - - for _, event := range sportEntries { - println(event.Title) - } - - events := formatEntriesToEvents(sportEntries) - - for _, event := range events { - println(event.Name) - } -} - -func formatEntriesToEvents(entries []SportEntry) []model.Event { - - var events []model.Event - - for i, entry := range entries { - - eventStarts, eventEnds := calculateEventStarts(entry.Details.DateRange.Start, entry.Details.DateRange.End, entry.Details.Cycle) - - for j := range eventStarts { - - start, _ := types.ParseDateTime(eventStarts[j].In(time.UTC)) - end, _ := types.ParseDateTime(eventEnds[j].In(time.UTC)) - - var event = model.Event{ - UUID: uuid.NewSHA1(uuid.NameSpaceDNS, []byte(entry.Title+strconv.FormatInt(int64(i), 10))).String(), - Day: entry.Details.DateRange.Start.Weekday().String(), - Week: strconv.Itoa(23), - Start: start, - End: end, - Name: entry.Title, - EventType: entry.Details.Type, - Prof: entry.Details.CourseLead.Name, - Rooms: entry.Details.Location.Name, - Notes: entry.AdditionalNote, - BookedAt: "", - Course: "Sport", - Semester: checkSemester(entry.Details.DateRange.Start), - } - events = append(events, event) - } - - } - return events -} - -func calculateEventStarts(start time.Time, end time.Time, cycle string) ([]time.Time, []time.Time) { - - // start is the begin of the cycle e.g. 01.04.2020 - // end is the end of the cycle e.g. 30.09.2020 - // cycle is the day and timespan (e.g. "Mo 18:00-20:00") - - // check if start is before end - if start.After(end) { - return nil, nil - } - - // check if cycle is valid - if !checkCycle(cycle) { - return nil, nil - } - - var weekDay = cycle[0:2] - // match weekday to time.Weekday (e.g. "Mo" -> time.Monday) - var weekDayInt int - - switch weekDay { - case "Mo": - weekDayInt = 1 - case "Di": - weekDayInt = 2 - case "Mi": - weekDayInt = 3 - case "Do": - weekDayInt = 4 - case "Fr": - weekDayInt = 5 - case "Sa": - weekDayInt = 6 - case "So": - weekDayInt = 0 - } - - // get every date matching the weekday in the cycle between start and end - var eventDates []time.Time - for d := start; d.Before(end); d = d.AddDate(0, 0, 1) { - if d.Weekday() == time.Weekday(weekDayInt) { - eventDates = append(eventDates, d) - } - } - - // add hours and minutes to the dates in eventDates - // array of tuple of start and end times - var eventStartsWithTime []time.Time - var eventEndWithTime []time.Time - - for _, eventStart := range eventDates { - timeRegExp, _ := regexp.Compile("[0-9]{2}:[0-9]{2}") - times := timeRegExp.FindAllString(cycle, 2) - startHour, _ := strconv.Atoi(times[0][0:2]) - startMinute, _ := strconv.Atoi(times[0][3:5]) - - endHour, _ := strconv.Atoi(times[1][0:2]) - endMinute, _ := strconv.Atoi(times[1][3:5]) - eventStartsWithTime = append(eventStartsWithTime, time.Date(eventStart.Year(), eventStart.Month(), eventStart.Day(), startHour, startMinute, 0, 0, eventStart.Location())) - eventEndWithTime = append(eventEndWithTime, time.Date(eventStart.Year(), eventStart.Month(), eventStart.Day(), endHour, endMinute, 0, 0, eventStart.Location())) - } - - return eventStartsWithTime, eventEndWithTime -} - -func checkCycle(cycle string) bool { - - // check if cycle is valid - if len(cycle) < 12 { - return false - } - - // check if cycle has a weekday - weekDay := cycle[0:2] - if weekDay != "Mo" && weekDay != "Di" && weekDay != "Mi" && weekDay != "Do" && weekDay != "Fr" && weekDay != "Sa" && weekDay != "So" { - return false - } - - // check if cycle has a timespan - timeSpan := cycle[3:12] - if len(timeSpan) != 9 { - return false - } - - // check if timespan has a start and end time - startTime := timeSpan[0:5] - endTime := timeSpan[6:9] - if len(startTime) != 5 || len(endTime) != 3 { - return false - } - - // check if start time is before end time - if startTime > endTime { - return false - } - - return true - -} - -// check if ws or ss -func checkSemester(date time.Time) string { - if date.Month() >= 4 && date.Month() <= 9 { - return "ss" - } else { - return "ws" - } -} - -// fetch the main page where all sport courses are listed and extract all links to the sport courses -func fetchAllAvailableSportCourses() []string { - var url = "https://sport.htwk-leipzig.de/sportangebote" - - var doc, err = htmlRequest(url) - - if err != nil { - return nil - } - - // link list of all sport courses - var links []string - - // find all links to sport courses with regex https://sport.htwk-leipzig.de/sportangebote/detail/sport/ + [0-9]{1,4} - doc.Find("a[href]").Each(func(i int, s *goquery.Selection) { - link, _ := s.Attr("href") - if strings.HasPrefix(link, "/sportangebote/detail/sport/") { - links = append(links, link) - } - }) - - return links -} - -// fetchAllHTWKSportCourses fetches all sport courses from the given links. -// to speed up the process, it uses multithreading. - -func fetchHTWKSportCourses(links []string) []SportEntry { - - //multithreaded webpage requests to speed up the process - - var maxThreads = 10 - var htmlPageArray = make([]*goquery.Document, len(links)) - var hostUrl = "https://sport.htwk-leipzig.de" - - var wg sync.WaitGroup - wg.Add(maxThreads) - for i := 0; i < maxThreads; i++ { - go func(i int) { - for j := i; j < len(links); j += maxThreads { - doc, err := htmlRequest(hostUrl + links[j]) - if err == nil { - htmlPageArray[j] = doc - } - } - wg.Done() - }(i) - } - wg.Wait() - - var events []SportEntry - - for _, doc := range htmlPageArray { - if doc != nil { - event, err := fetchHtwkSportCourse(doc) - if err == nil { - events = append(events, event...) - } - } - } - return events -} - -func htmlRequest(url string) (*goquery.Document, error) { - println("fetching " + url) - - resp, err := http.Get(url) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - doc, err := goquery.NewDocumentFromReader(resp.Body) - if err != nil { - return nil, err - } - - println("finished fetching " + url) - return doc, nil -} - -// fetchHtwkSportCourse fetches the sport course from the given url and id. -// If the sport course does not exist, it will return an error. -// If the sport course exists, it will return the sport course. -// goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse. -// May be improved in the future. -func fetchHtwkSportCourse(doc *goquery.Document) ([]SportEntry, error) { - var events []SportEntry - - if doc.Find("h1").Text() == "Aktuelle Sportangebote" { - return nil, errors.New("not a sport course page") - } - - doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) { - var event SportEntry - var details EventDetails - - fullTitle := strings.TrimSpace(s.Find("h3").Text()) - titleParts := strings.Split(fullTitle, "-") - if len(titleParts) > 0 { - event.Title = strings.TrimSpace(titleParts[0]) - } - - if len(titleParts) > 2 { - details.Type = strings.TrimSpace(titleParts[len(titleParts)-1]) - } - - s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) { - key := strings.TrimSpace(s.Find("td").First().Text()) - value := strings.TrimSpace(s.Find("td").Last().Text()) - - switch key { - case "Zeitraum": - dates := strings.Split(value, "-") - if len(dates) == 2 { - startDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[0])) - endDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[1])) - details.DateRange = DateRange{Start: startDate, End: endDate} - } - case "Zyklus": - details.Cycle = value - case "Geschlecht": - details.Gender = value - case "Leiter": - leaderName := strings.TrimSpace(s.Find("td a").Text()) - leadersSlice := strings.Split(leaderName, "\n") - for i, leader := range leadersSlice { - leadersSlice[i] = strings.TrimSpace(leader) - } - formattedLeaders := strings.Join(leadersSlice, ", ") - leaderLink, _ := s.Find("td a").Attr("href") - details.CourseLead = CourseLead{Name: formattedLeaders, Link: leaderLink} - case "Ort": - locationDetails := strings.Split(value, "(") - if len(locationDetails) == 2 { - details.Location = Location{ - Name: strings.TrimSpace(locationDetails[0]), - Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"), - } - } - case "Teilnehmer": - parts := strings.Split(value, "/") - if len(parts) >= 3 { - bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0])) - totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1])) - waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2])) - details.Participants = Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList} - } - case "Kosten": - details.Cost = value // makes no sense since you need to be logged in to see the price - case "Hinweis": - var allNotes []string - - s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) { - if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" { - note := strings.TrimSpace(s.Text()) - if note != "" { - allNotes = append(allNotes, note) - } - } - }) - - event.AdditionalNote = strings.Join(allNotes, " ") - } - }) - - event.Details = details - events = append(events, event) - }) - - return events, nil -}