package sport import ( "errors" "github.com/google/uuid" "github.com/pocketbase/pocketbase" "github.com/pocketbase/pocketbase/tools/types" "htwkalender/model" "htwkalender/service/db" "htwkalender/service/functions" "io" "log/slog" "net/http" "regexp" "strconv" "strings" "sync" "time" "github.com/PuerkitoBio/goquery" ) // FetchAndUpdateSportEvents fetches all sport events from the HTWK sport website // it deletes them first and then saves them to the database // It returns all saved events func FetchAndUpdateSportEvents(app *pocketbase.PocketBase) ([]model.Event, error) { sportCourseLinks, err := fetchAllAvailableSportCourses() if err != nil { return nil, err } sportEntries := fetchHTWKSportCourses(sportCourseLinks) events := formatEntriesToEvents(sportEntries) var earliestDate time.Time var latestDate time.Time // find earliest and latest date in events for _, event := range events { if event.Start.Time().Before(earliestDate) { earliestDate = event.Start.Time() } if event.End.Time().After(latestDate) { latestDate = event.End.Time() } } // get all events from database where name = Feiertage und lehrveranstaltungsfreie Tage holidays, err := db.GetAllModulesByNameAndDateRange(app, "Feiertage und lehrveranstaltungsfreie Tage", earliestDate, latestDate) if err != nil { return nil, err } // remove all events that have same year, month and day as items in holidays for _, holiday := range holidays { for i, event := range events { if event.Start.Time().Year() == holiday.Start.Time().Year() && event.Start.Time().Month() == holiday.Start.Time().Month() && event.Start.Time().Day() == holiday.Start.Time().Day() { events = append(events[:i], events[i+1:]...) } } } // @TODO: delete and save events in one transaction and it only should delete events that are not in the new events list and save events that are not in the database err = db.DeleteAllEventsForCourse(app, "Sport", functions.GetCurrentSemesterString()) if err != nil { return nil, err } // save events to database savedEvents, err := db.SaveEvents(events, app) if err != nil { return nil, err } return savedEvents, nil } func formatEntriesToEvents(entries []model.SportEntry) []model.Event { var events []model.Event for _, entry := range entries { eventStarts, eventEnds := getWeekEvents(entry.Details.DateRange.Start, entry.Details.DateRange.End, entry.Details.Cycle) for j := range eventStarts { start, _ := types.ParseDateTime(eventStarts[j].In(time.UTC)) end, _ := types.ParseDateTime(eventEnds[j].In(time.UTC)) var event = model.Event{ UUID: uuid.NewSHA1(uuid.NameSpaceDNS, []byte(entry.Title+entry.ID+entry.Details.Type)).String(), Day: toGermanWeekdayString(entry.Details.DateRange.Start.Weekday()), Week: strconv.Itoa(23), Start: start, End: end, Name: entry.Title + " (" + entry.ID + ")", EventType: entry.Details.Type, Prof: entry.Details.CourseLead.Name, Rooms: entry.Details.Location.Name, Notes: entry.AdditionalNote, BookedAt: "", Course: "Sport", Semester: checkSemester(entry.Details.DateRange.Start), } events = append(events, event) } } return events } func getDayInt(weekDay string) int { var weekDayInt int switch weekDay { case "Mo": weekDayInt = 1 case "Di": weekDayInt = 2 case "Mi": weekDayInt = 3 case "Do": weekDayInt = 4 case "Fr": weekDayInt = 5 case "Sa": weekDayInt = 6 case "So": weekDayInt = 0 } return weekDayInt } func toGermanWeekdayString(weekday time.Weekday) string { switch weekday { case time.Monday: return "Montag" case time.Tuesday: return "Dienstag" case time.Wednesday: return "Mittwoch" case time.Thursday: return "Donnerstag" case time.Friday: return "Freitag" case time.Saturday: return "Samstag" case time.Sunday: return "Sonntag" default: return "" } } func extractStartAndEndTime(cycle string) (int, int, int, int) { timeRegExp, _ := regexp.Compile("[0-9]{2}:[0-9]{2}") times := timeRegExp.FindAllString(cycle, 2) startHour, _ := strconv.Atoi(times[0][0:2]) startMinute, _ := strconv.Atoi(times[0][3:5]) endHour, _ := strconv.Atoi(times[1][0:2]) endMinute, _ := strconv.Atoi(times[1][3:5]) return startHour, startMinute, endHour, endMinute } func getWeekEvents(start time.Time, end time.Time, cycle string) ([]time.Time, []time.Time) { var weekEvents []model.SportDayStartEnd // split by regexp to get the cycle parts var cycleParts []string cycleParts = splitByCommaWithTime(cycle) for _, cyclePart := range cycleParts { //cut string at the first integer/number cyclePartWithDaysOnly := cyclePart[0:strings.IndexFunc(cyclePart, func(r rune) bool { return r >= '0' && r <= '9' })] // check if cycle has multiple days by checking if it has a plus sign if strings.Contains(cyclePartWithDaysOnly, "+") { // find all days in cycle part by regexp dayRegExp, _ := regexp.Compile("[A-Z][a-z]") days := dayRegExp.FindAllString(cyclePart, -1) startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) // creating a SportDayStartEnd for each day in the cycle for _, day := range days { weekEvents = append(weekEvents, model.SportDayStartEnd{ Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), Day: time.Weekday(getDayInt(day)), }) } } // check if cycle has multiple days by checking if it has a minus sign if strings.Contains(cyclePartWithDaysOnly, "-") { // find all days in cycle part by regexp dayRegExp, _ := regexp.Compile("[A-Z][a-z]") days := dayRegExp.FindAllString(cyclePart, 2) startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) //create a int array with all days from start to end day var daysBetween []int for i := getDayInt(days[0]); i <= getDayInt(days[1]); i++ { daysBetween = append(daysBetween, i) } // creating a SportDayStartEnd for each day in the cycle for _, day := range daysBetween { weekEvents = append(weekEvents, model.SportDayStartEnd{ Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), Day: time.Weekday(day), }) } } // check if cycle has only one day if !strings.Contains(cyclePartWithDaysOnly, "-") && !strings.Contains(cyclePartWithDaysOnly, "+") { // find all days in cycle part by regexp dayRegExp, _ := regexp.Compile("[A-Z][a-z]") days := dayRegExp.FindAllString(cyclePart, -1) startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) // creating a SportDayStartEnd for each day in the cycle for _, day := range days { weekEvents = append(weekEvents, model.SportDayStartEnd{ Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), Day: time.Weekday(getDayInt(day)), }) } } } var startDatesList []time.Time var endDatesList []time.Time for _, weekEvent := range weekEvents { startDates, endDates := createEventListFromStartToEndMatchingDay(weekEvent) startDatesList = append(startDatesList, startDates...) endDatesList = append(endDatesList, endDates...) } return startDatesList, endDatesList } func createEventListFromStartToEndMatchingDay(weekEvent model.SportDayStartEnd) ([]time.Time, []time.Time) { var startDates []time.Time var endDates []time.Time for d := weekEvent.Start; d.Before(weekEvent.End); d = d.AddDate(0, 0, 1) { if d.Weekday() == weekEvent.Day { startDates = append(startDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.Start.Hour(), weekEvent.Start.Minute(), 0, 0, d.Location())) endDates = append(endDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.End.Hour(), weekEvent.End.Minute(), 0, 0, d.Location())) } } return startDates, endDates } func splitByCommaWithTime(input string) []string { var result []string // Split by comma parts := strings.Split(input, ", ") // Regular expression to match a day with time regex := regexp.MustCompile(`([A-Za-z]{2,}(\+[A-Za-z]{2,})* \d{2}:\d{2}-\d{2}:\d{2})`) // Iterate over parts and combine when necessary var currentPart string for _, part := range parts { if regex.MatchString(part) { if currentPart != "" { currentPart += ", " + part result = append(result, currentPart) currentPart = "" } else { result = append(result, part) } // If the part contains a day with time, start a new currentPart } else { // If there's no currentPart, start a new one if currentPart != "" { currentPart += ", " + part } else { currentPart = part } } } // Add the last currentPart to the result if currentPart != "" { result = append(result, currentPart) } return result } // check if ws or ss func checkSemester(date time.Time) string { if date.Month() >= 4 && date.Month() <= 9 { return "ss" } else { return "ws" } } // fetch the main page where all sport courses are listed and extract all links to the sport courses func fetchAllAvailableSportCourses() ([]string, error) { var url = "https://sport.htwk-leipzig.de/sportangebote" var doc, err = htmlRequest(url) if err != nil { slog.Error("Error while fetching sport courses from webpage", err) return nil, err } // link list of all sport courses var links []string // find all links to sport courses with regex https://sport.htwk-leipzig.de/sportangebote/detail/sport/ + [0-9]{1,4} doc.Find("a[href]").Each(func(i int, s *goquery.Selection) { link, _ := s.Attr("href") if strings.HasPrefix(link, "/sportangebote/detail/sport/") { links = append(links, link) } }) return links, nil } // fetchAllHTWKSportCourses fetches all sport courses from the given links. // to speed up the process, it uses multithreading. func fetchHTWKSportCourses(links []string) []model.SportEntry { //multithreaded webpage requests to speed up the process var maxThreads = 10 var htmlPageArray = make([]*goquery.Document, len(links)) var hostUrl = "https://sport.htwk-leipzig.de" var wg sync.WaitGroup wg.Add(maxThreads) for i := 0; i < maxThreads; i++ { go func(i int) { for j := i; j < len(links); j += maxThreads { doc, err := htmlRequest(hostUrl + links[j]) if err == nil { htmlPageArray[j] = doc } } wg.Done() }(i) } wg.Wait() var events []model.SportEntry for _, doc := range htmlPageArray { if doc != nil { event, err := fetchHtwkSportCourse(doc) if err == nil { events = append(events, event...) } } } return events } func htmlRequest(url string) (*goquery.Document, error) { resp, err := http.Get(url) if err != nil { return nil, err } defer func(Body io.ReadCloser) { readErr := Body.Close() if readErr != nil { slog.Error("Error while closing response body from html request", readErr) return } }(resp.Body) doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return nil, err } return doc, nil } // fetchHtwkSportCourse fetches the sport course from the given url and id. // If the sport course does not exist, it will return an error. // If the sport course exists, it will return the sport course. // goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse. // May be improved in the future. func fetchHtwkSportCourse(doc *goquery.Document) ([]model.SportEntry, error) { var events []model.SportEntry germanTime, _ := time.LoadLocation("Europe/Berlin") if doc.Find("h1").Text() == "Aktuelle Sportangebote" { return nil, errors.New("not a sport course page") } doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) { var event model.SportEntry var details model.EventDetails fullTitle := strings.TrimSpace(s.Find("h3").Text()) titleParts := strings.Split(fullTitle, "-") if len(titleParts) > 0 { event.Title = strings.TrimSpace(titleParts[0]) } if len(titleParts) > 2 { details.Type = strings.TrimSpace(titleParts[len(titleParts)-1]) } event.ID = parseEventID(fullTitle) s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) { key := strings.TrimSpace(s.Find("td").First().Text()) value := strings.TrimSpace(s.Find("td").Last().Text()) switch key { case "Zeitraum": dates := strings.Split(value, "-") if len(dates) == 2 { startDate, _ := time.ParseInLocation("02.01.2006", strings.TrimSpace(dates[0]), germanTime) endDate, _ := time.ParseInLocation("02.01.2006", strings.TrimSpace(dates[1]), germanTime) details.DateRange = model.DateRange{Start: startDate, End: endDate} } case "Zyklus": details.Cycle = value case "Geschlecht": details.Gender = value case "Leiter": leaderName := strings.TrimSpace(s.Find("td a").Text()) leadersSlice := strings.Split(leaderName, "\n") for i, leader := range leadersSlice { leadersSlice[i] = strings.TrimSpace(leader) } formattedLeaders := strings.Join(leadersSlice, ", ") leaderLink, _ := s.Find("td a").Attr("href") details.CourseLead = model.CourseLead{Name: formattedLeaders, Link: leaderLink} case "Ort": locationDetails := strings.Split(value, "(") if len(locationDetails) == 2 { details.Location = model.Location{ Name: strings.TrimSpace(locationDetails[0]), Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"), } } case "Teilnehmer": parts := strings.Split(value, "/") if len(parts) >= 3 { bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0])) totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1])) waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2])) details.Participants = model.Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList} } case "Kosten": details.Cost = value // makes no sense since you need to be logged in to see the price case "Hinweis": var allNotes []string s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) { if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" { note := strings.TrimSpace(s.Text()) if note != "" { allNotes = append(allNotes, note) } } }) event.AdditionalNote = strings.Join(allNotes, " ") } }) event.Details = details events = append(events, event) }) return events, nil } // parseEventID from fulltitle // the event id is a number in the fulltitle thats not a time like HH:MM and shoudl be found after Nr. or Nr: func parseEventID(fulltitle string) string { var eventID string var numberRegExp = regexp.MustCompile("[0-9]{1,4}") var fulltitleParts = strings.Split(fulltitle, " ") for i, part := range fulltitleParts { if part == "Nr." || part == "Nr:" { eventID = fulltitleParts[i+1] break } } if eventID == "" { eventID = numberRegExp.FindString(fulltitle) } return eventID }