//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format. //Copyright (C) 2024 HTWKalender support@htwkalender.de //This program is free software: you can redistribute it and/or modify //it under the terms of the GNU Affero General Public License as published by //the Free Software Foundation, either version 3 of the License, or //(at your option) any later version. //This program is distributed in the hope that it will be useful, //but WITHOUT ANY WARRANTY; without even the implied warranty of //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //GNU Affero General Public License for more details. //You should have received a copy of the GNU Affero General Public License //along with this program. If not, see . package sport import ( "errors" "github.com/google/uuid" "github.com/pocketbase/pocketbase" "github.com/pocketbase/pocketbase/tools/types" "htwkalender/model" "htwkalender/service/db" "htwkalender/service/functions" "io" "log/slog" "net/http" "regexp" "strconv" "strings" "sync" "time" "github.com/PuerkitoBio/goquery" ) // FetchAndUpdateSportEvents fetches all sport events from the HTWK sport website // it deletes them first and then saves them to the database // It returns all saved events func FetchAndUpdateSportEvents(app *pocketbase.PocketBase) ([]model.Event, error) { sportCourseLinks, err := fetchAllAvailableSportCourses() if err != nil { return nil, err } sportEntries := fetchHTWKSportCourses(sportCourseLinks) events := formatEntriesToEvents(sportEntries) var earliestDate time.Time var latestDate time.Time // find earliest and latest date in events for _, event := range events { if event.Start.Time().Before(earliestDate) { earliestDate = event.Start.Time() } if event.End.Time().After(latestDate) { latestDate = event.End.Time() } } // get all events from database where name = Feiertage und lehrveranstaltungsfreie Tage holidays, err := db.GetAllModulesByNameAndDateRange(app, "Feiertage und lehrveranstaltungsfreie Tage", earliestDate, latestDate) if err != nil { return nil, err } // remove all events that have same year, month and day as items in holidays for _, holiday := range holidays { for i, event := range events { if event.Start.Time().Year() == holiday.Start.Time().Year() && event.Start.Time().Month() == holiday.Start.Time().Month() && event.Start.Time().Day() == holiday.Start.Time().Day() { events = append(events[:i], events[i+1:]...) } } } // @TODO: delete and save events in one transaction and it only should delete events that are not in the new events list and save events that are not in the database err = db.DeleteAllEventsByCourse(app, "Sport", functions.GetCurrentSemesterString()) if err != nil { return nil, err } // save events to database savedEvents, err := db.SaveEvents(events, app) if err != nil { return nil, err } return savedEvents, nil } func formatEntriesToEvents(entries []model.SportEntry) []model.Event { var events []model.Event for _, entry := range entries { eventStarts, eventEnds := getWeekEvents(entry.Details.DateRange.Start, entry.Details.DateRange.End, entry.Details.Cycle) for j := range eventStarts { start, _ := types.ParseDateTime(eventStarts[j].In(time.UTC)) end, _ := types.ParseDateTime(eventEnds[j].In(time.UTC)) var event = model.Event{ UUID: uuid.NewSHA1(uuid.NameSpaceDNS, []byte(entry.Title+entry.ID+entry.Details.Type)).String(), Day: toGermanWeekdayString(start.Time().Weekday()), Week: strconv.Itoa(23), Start: start, End: end, Name: entry.Title + " (" + entry.ID + ")", EventType: entry.Details.Type, Prof: entry.Details.CourseLead.Name, Rooms: entry.Details.Location.Name, Notes: entry.AdditionalNote, BookedAt: "", Course: "Sport", Semester: checkSemester(entry.Details.DateRange.Start), } events = append(events, event) } } return events } func getDayInt(weekDay string) (int, error) { var weekDayInt int var err error = nil switch weekDay { case "Mo": weekDayInt = 1 case "Di": weekDayInt = 2 case "Mi": weekDayInt = 3 case "Do": weekDayInt = 4 case "Fr": weekDayInt = 5 case "Sa": weekDayInt = 6 case "So": weekDayInt = 0 default: { err = errors.New("no day found") weekDayInt = -1 } } return weekDayInt, err } func toGermanWeekdayString(weekday time.Weekday) string { switch weekday { case time.Monday: return "Montag" case time.Tuesday: return "Dienstag" case time.Wednesday: return "Mittwoch" case time.Thursday: return "Donnerstag" case time.Friday: return "Freitag" case time.Saturday: return "Samstag" case time.Sunday: return "Sonntag" default: return "" } } func extractStartAndEndTime(cycle string) (int, int, int, int) { timeRegExp, _ := regexp.Compile("[0-9]{2}:[0-9]{2}") times := timeRegExp.FindAllString(cycle, 2) startHour, _ := strconv.Atoi(times[0][0:2]) startMinute, _ := strconv.Atoi(times[0][3:5]) endHour, _ := strconv.Atoi(times[1][0:2]) endMinute, _ := strconv.Atoi(times[1][3:5]) return startHour, startMinute, endHour, endMinute } func getWeekEvents(start time.Time, end time.Time, cycle string) ([]time.Time, []time.Time) { var weekEvents []model.SportDayStartEnd // split by regexp to get the cycle parts var cycleParts = splitByCommaWithTime(cycle) for _, cyclePart := range cycleParts { //cut string at the first integer/number cyclePartWithDaysOnly := cyclePart[0:strings.IndexFunc(cyclePart, func(r rune) bool { return r >= '0' && r <= '9' })] // check if cycle has multiple days by checking if it has a plus sign if strings.Contains(cyclePartWithDaysOnly, "+") { // find all days in cycle part by regexp dayRegExp, _ := regexp.Compile("[A-Z][a-z]") days := dayRegExp.FindAllString(cyclePart, -1) startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) // creating a SportDayStartEnd for each day in the cycle for _, day := range days { weekDay, err := getDayInt(day) if err != nil { slog.Error("Error while getting day int: "+day+" ", "error", err) } else { weekEvents = append(weekEvents, model.SportDayStartEnd{ Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), Day: time.Weekday(weekDay), }) } } } // check if cycle has multiple days by checking if it has a minus sign if strings.Contains(cyclePartWithDaysOnly, "-") { // find all days in cycle part by regexp dayRegExp, _ := regexp.Compile("[A-Z][a-z]") days := dayRegExp.FindAllString(cyclePart, 2) startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) var startI, endI int var endIErr, startIErr error startI, startIErr = getDayInt(days[0]) endI, endIErr = getDayInt(days[1]) if endIErr != nil || startIErr != nil { slog.Error("Error while getting day int: "+days[0]+" - "+days[1]+" :", "error", startIErr) slog.Error("Error while getting day int endErr: ", "error", endIErr) } else { //create an int array with all days from start to end day var daysBetween []int for i := startI; i <= endI; i++ { daysBetween = append(daysBetween, i) } // creating a SportDayStartEnd for each day in the cycle weekEvents = createEventListFromStartToEndMatchingDay23(daysBetween, start, startHour, startMinute, end, endHour, endMinute) } } // check if cycle has only one day if !strings.Contains(cyclePartWithDaysOnly, "-") && !strings.Contains(cyclePartWithDaysOnly, "+") { // find all days in cycle part by regexp dayRegExp, _ := regexp.Compile("[A-Z][a-z]") days := dayRegExp.FindAllString(cyclePart, -1) startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) var dayNumbers []int for _, day := range days { dayInt, err := getDayInt(day) if err != nil { slog.Error("Error while getting day int: "+day+" ", "error", err) } else { dayNumbers = append(dayNumbers, dayInt) } } // creating a SportDayStartEnd for each day in the cycle weekEvents = append(weekEvents, createEventListFromStartToEndMatchingDay23(dayNumbers, start, startHour, startMinute, end, endHour, endMinute)...) for _, day := range days { weekDay, err := getDayInt(day) if err != nil { slog.Error("Error while getting day int: "+day+" ", "error", err) } else { weekEvents = append(weekEvents, model.SportDayStartEnd{ Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), Day: time.Weekday(weekDay), }) } } } } var startDatesList []time.Time var endDatesList []time.Time for _, weekEvent := range weekEvents { startDates, endDates := createEventListFromStartToEndMatchingDay(weekEvent) startDatesList = append(startDatesList, startDates...) endDatesList = append(endDatesList, endDates...) } return startDatesList, endDatesList } // creating a SportDayStartEnd for each day in the cycle func createEventListFromStartToEndMatchingDay23(days []int, start time.Time, startHour int, startMinute int, end time.Time, endHour int, endMinute int) []model.SportDayStartEnd { var weekEvents []model.SportDayStartEnd for _, day := range days { weekEvents = append(weekEvents, model.SportDayStartEnd{ Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), Day: time.Weekday(day), }) } return weekEvents } func createEventListFromStartToEndMatchingDay(weekEvent model.SportDayStartEnd) ([]time.Time, []time.Time) { var startDates []time.Time var endDates []time.Time for d := weekEvent.Start; d.Before(weekEvent.End); d = d.AddDate(0, 0, 1) { if d.Weekday() == weekEvent.Day { startDates = append(startDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.Start.Hour(), weekEvent.Start.Minute(), 0, 0, d.Location())) endDates = append(endDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.End.Hour(), weekEvent.End.Minute(), 0, 0, d.Location())) } } return startDates, endDates } func splitByCommaWithTime(input string) []string { var result []string // Split by comma parts := strings.Split(input, ", ") // Regular expression to match a day with time regex := regexp.MustCompile(`([A-Za-z]{2,}(\+[A-Za-z]{2,})* \d{2}:\d{2}-\d{2}:\d{2})`) // Iterate over parts and combine when necessary var currentPart string for _, part := range parts { if regex.MatchString(part) { if currentPart != "" { currentPart += ", " + part result = append(result, currentPart) currentPart = "" } else { result = append(result, part) } // If the part contains a day with time, start a new currentPart } else { // If there's no currentPart, start a new one if currentPart != "" { currentPart += ", " + part } else { currentPart = part } } } // Add the last currentPart to the result if currentPart != "" { result = append(result, currentPart) } return result } // check if ws or ss func checkSemester(date time.Time) string { if date.Month() >= 4 && date.Month() <= 9 { return "ss" } else { return "ws" } } // fetch the main page where all sport courses are listed and extract all links to the sport courses func fetchAllAvailableSportCourses() ([]string, error) { var url = "https://sport.htwk-leipzig.de/sportangebote" var doc, err = htmlRequest(url) if err != nil { slog.Error("Error while fetching sport courses from webpage", "error", err) return nil, err } // link list of all sport courses var links []string // find all links to sport courses with regex https://sport.htwk-leipzig.de/sportangebote/detail/sport/ + [0-9]{1,4} doc.Find("a[href]").Each(func(i int, s *goquery.Selection) { link, _ := s.Attr("href") if strings.HasPrefix(link, "/sportangebote/detail/sport/") { links = append(links, link) } }) return links, nil } // fetchAllHTWKSportCourses fetches all sport courses from the given links. // to speed up the process, it uses multithreading. func fetchHTWKSportCourses(links []string) []model.SportEntry { //multithreaded webpage requests to speed up the process var maxThreads = 10 var htmlPageArray = make([]*goquery.Document, len(links)) var hostUrl = "https://sport.htwk-leipzig.de" var wg sync.WaitGroup wg.Add(maxThreads) for i := 0; i < maxThreads; i++ { go func(i int) { for j := i; j < len(links); j += maxThreads { doc, err := htmlRequest(hostUrl + links[j]) if err == nil { htmlPageArray[j] = doc } } wg.Done() }(i) } wg.Wait() var events []model.SportEntry for _, doc := range htmlPageArray { if doc != nil { event, err := fetchHtwkSportCourse(doc) if err == nil { events = append(events, event...) } } } return events } func htmlRequest(url string) (*goquery.Document, error) { resp, err := http.Get(url) if err != nil { return nil, err } defer func(Body io.ReadCloser) { readErr := Body.Close() if readErr != nil { slog.Error("Error while closing response body from html request", "error", readErr) return } }(resp.Body) doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return nil, err } return doc, nil } // fetchHtwkSportCourse fetches the sport course from the given url and id. // If the sport course does not exist, it will return an error. // If the sport course exists, it will return the sport course. // goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse. // May be improved in the future. func fetchHtwkSportCourse(doc *goquery.Document) ([]model.SportEntry, error) { var events []model.SportEntry germanTime, _ := time.LoadLocation("Europe/Berlin") if doc.Find("h1").Text() == "Aktuelle Sportangebote" { return nil, errors.New("not a sport course page") } doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) { var event model.SportEntry var details model.EventDetails fullTitle := strings.TrimSpace(s.Find("h3").Text()) titleParts := strings.Split(fullTitle, "-") if len(titleParts) > 0 { event.Title = strings.TrimSpace(titleParts[0]) } if len(titleParts) > 2 { details.Type = strings.TrimSpace(titleParts[len(titleParts)-1]) } event.ID = parseEventID(fullTitle) s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) { key := strings.TrimSpace(s.Find("td").First().Text()) value := strings.TrimSpace(s.Find("td").Last().Text()) switch key { case "Zeitraum": dates := strings.Split(value, "-") if len(dates) == 2 { startDate, _ := time.ParseInLocation("02.01.2006", strings.TrimSpace(dates[0]), germanTime) endDate, _ := time.ParseInLocation("02.01.2006", strings.TrimSpace(dates[1]), germanTime) details.DateRange = model.DateRange{Start: startDate, End: endDate} } case "Zyklus": details.Cycle = value case "Geschlecht": details.Gender = value case "Leiter": leaderName := strings.TrimSpace(s.Find("td a").Text()) leadersSlice := strings.Split(leaderName, "\n") for i, leader := range leadersSlice { leadersSlice[i] = strings.TrimSpace(leader) } formattedLeaders := strings.Join(leadersSlice, ", ") leaderLink, _ := s.Find("td a").Attr("href") details.CourseLead = model.CourseLead{Name: formattedLeaders, Link: leaderLink} case "Ort": locationDetails := strings.Split(value, "(") if len(locationDetails) == 2 { details.Location = model.Location{ Name: strings.TrimSpace(locationDetails[0]), Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"), } } case "Teilnehmer": parts := strings.Split(value, "/") if len(parts) >= 3 { bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0])) totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1])) waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2])) details.Participants = model.Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList} } case "Kosten": details.Cost = value // makes no sense since you need to be logged in to see the price case "Hinweis": var allNotes []string s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) { if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" { note := strings.TrimSpace(s.Text()) if note != "" { allNotes = append(allNotes, note) } } }) event.AdditionalNote = strings.Join(allNotes, " ") } }) event.Details = details events = append(events, event) }) return events, nil } // parseEventID from fulltitle // the event id is a number in the fulltitle thats not a time like HH:MM and shoudl be found after Nr. or Nr: func parseEventID(fulltitle string) string { var eventID string var numberRegExp = regexp.MustCompile("[0-9]{1,4}") var fulltitleParts = strings.Split(fulltitle, " ") for i, part := range fulltitleParts { if part == "Nr." || part == "Nr:" { eventID = fulltitleParts[i+1] break } } if eventID == "" { eventID = numberRegExp.FindString(fulltitle) } return eventID }