package sport import ( "errors" "github.com/google/uuid" "github.com/pocketbase/pocketbase" "github.com/pocketbase/pocketbase/tools/types" "htwkalender/model" "htwkalender/service/db" "htwkalender/service/functions" "net/http" "regexp" "strconv" "strings" "sync" "time" "github.com/PuerkitoBio/goquery" ) // @TODO: add tests // @TODO: make it like a cron job to fetch the sport courses once a week func FetchAndUpdateSportEvents(app *pocketbase.PocketBase) []model.Event { var sportCourseLinks = fetchAllAvailableSportCourses() sportEntries := fetchHTWKSportCourses(sportCourseLinks) events := formatEntriesToEvents(sportEntries) var earliestDate time.Time var latestDate time.Time // find earliest and latest date in events for _, event := range events { if event.Start.Time().Before(earliestDate) { earliestDate = event.Start.Time() } if event.End.Time().After(latestDate) { latestDate = event.End.Time() } } // get all events from database where name = Feiertage und lehrveranstaltungsfreie Tage holidays, err := db.GetAllModulesByNameAndDateRange(app, "Feiertage und lehrveranstaltungsfreie Tage", earliestDate, latestDate) if err != nil { return nil } // remove all events that have same year, month and day as items in holidays for _, holiday := range holidays { for i, event := range events { if event.Start.Time().Year() == holiday.Start.Time().Year() && event.Start.Time().Month() == holiday.Start.Time().Month() && event.Start.Time().Day() == holiday.Start.Time().Day() { events = append(events[:i], events[i+1:]...) } } } err = db.DeleteAllEventsForCourse(app, "Sport", functions.GetCurrentSemesterString()) if err != nil { return nil } // save events to database savedEvents, err := db.SaveEvents(events, app) if err != nil { return nil } return savedEvents } func formatEntriesToEvents(entries []model.SportEntry) []model.Event { var events []model.Event for _, entry := range entries { eventStarts, eventEnds := getWeekEvents(entry.Details.DateRange.Start, entry.Details.DateRange.End, entry.Details.Cycle) for j := range eventStarts { start, _ := types.ParseDateTime(eventStarts[j].In(time.UTC)) end, _ := types.ParseDateTime(eventEnds[j].In(time.UTC)) var event = model.Event{ UUID: uuid.NewSHA1(uuid.NameSpaceDNS, []byte(entry.Title+entry.ID+entry.Details.Type)).String(), Day: toGermanWeekdayString(entry.Details.DateRange.Start.Weekday()), Week: strconv.Itoa(23), Start: start, End: end, Name: entry.Title + " (" + entry.ID + ")", EventType: entry.Details.Type, Prof: entry.Details.CourseLead.Name, Rooms: entry.Details.Location.Name, Notes: entry.AdditionalNote, BookedAt: "", Course: "Sport", Semester: checkSemester(entry.Details.DateRange.Start), } events = append(events, event) } } return events } func getDayInt(weekDay string) int { var weekDayInt int switch weekDay { case "Mo": weekDayInt = 1 case "Di": weekDayInt = 2 case "Mi": weekDayInt = 3 case "Do": weekDayInt = 4 case "Fr": weekDayInt = 5 case "Sa": weekDayInt = 6 case "So": weekDayInt = 0 } return weekDayInt } func toGermanWeekdayString(weekday time.Weekday) string { switch weekday { case time.Monday: return "Montag" case time.Tuesday: return "Dienstag" case time.Wednesday: return "Mittwoch" case time.Thursday: return "Donnerstag" case time.Friday: return "Freitag" case time.Saturday: return "Samstag" case time.Sunday: return "Sonntag" default: return "" } } func extractStartAndEndTime(cycle string) (int, int, int, int) { timeRegExp, _ := regexp.Compile("[0-9]{2}:[0-9]{2}") times := timeRegExp.FindAllString(cycle, 2) startHour, _ := strconv.Atoi(times[0][0:2]) startMinute, _ := strconv.Atoi(times[0][3:5]) endHour, _ := strconv.Atoi(times[1][0:2]) endMinute, _ := strconv.Atoi(times[1][3:5]) return startHour, startMinute, endHour, endMinute } func getWeekEvents(start time.Time, end time.Time, cycle string) ([]time.Time, []time.Time) { var weekEvents []model.SportDayStartEnd // split by regexp to get the cycle parts var cycleParts []string cycleParts = splitByCommaWithTime(cycle) for _, cyclePart := range cycleParts { //cut string at the first integer/number cyclePartWithDaysOnly := cyclePart[0:strings.IndexFunc(cyclePart, func(r rune) bool { return r >= '0' && r <= '9' })] // check if cycle has multiple days by checking if it has a plus sign if strings.Contains(cyclePartWithDaysOnly, "+") { // find all days in cycle part by regexp dayRegExp, _ := regexp.Compile("[A-Z][a-z]") days := dayRegExp.FindAllString(cyclePart, -1) startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) // creating a SportDayStartEnd for each day in the cycle for _, day := range days { weekEvents = append(weekEvents, model.SportDayStartEnd{ Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), Day: time.Weekday(getDayInt(day)), }) } } // check if cycle has multiple days by checking if it has a minus sign if strings.Contains(cyclePartWithDaysOnly, "-") { // find all days in cycle part by regexp dayRegExp, _ := regexp.Compile("[A-Z][a-z]") days := dayRegExp.FindAllString(cyclePart, 2) startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) //create a int array with all days from start to end day var daysBetween []int for i := getDayInt(days[0]); i <= getDayInt(days[1]); i++ { daysBetween = append(daysBetween, i) } // creating a SportDayStartEnd for each day in the cycle for _, day := range daysBetween { weekEvents = append(weekEvents, model.SportDayStartEnd{ Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), Day: time.Weekday(day), }) } } // check if cycle has only one day if !strings.Contains(cyclePartWithDaysOnly, "-") && !strings.Contains(cyclePartWithDaysOnly, "+") { // find all days in cycle part by regexp dayRegExp, _ := regexp.Compile("[A-Z][a-z]") days := dayRegExp.FindAllString(cyclePart, -1) startHour, startMinute, endHour, endMinute := extractStartAndEndTime(cyclePart) // creating a SportDayStartEnd for each day in the cycle for _, day := range days { weekEvents = append(weekEvents, model.SportDayStartEnd{ Start: time.Date(start.Year(), start.Month(), start.Day(), startHour, startMinute, 0, 0, start.Location()), End: time.Date(end.Year(), end.Month(), end.Day(), endHour, endMinute, 0, 0, end.Location()), Day: time.Weekday(getDayInt(day)), }) } } } var startDatesList []time.Time var endDatesList []time.Time for _, weekEvent := range weekEvents { startDates, endDates := createEventListFromStartToEndMatchingDay(weekEvent) startDatesList = append(startDatesList, startDates...) endDatesList = append(endDatesList, endDates...) } return startDatesList, endDatesList } func createEventListFromStartToEndMatchingDay(weekEvent model.SportDayStartEnd) ([]time.Time, []time.Time) { var startDates []time.Time var endDates []time.Time for d := weekEvent.Start; d.Before(weekEvent.End); d = d.AddDate(0, 0, 1) { if d.Weekday() == weekEvent.Day { startDates = append(startDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.Start.Hour(), weekEvent.Start.Minute(), 0, 0, d.Location())) endDates = append(endDates, time.Date(d.Year(), d.Month(), d.Day(), weekEvent.End.Hour(), weekEvent.End.Minute(), 0, 0, d.Location())) } } return startDates, endDates } func splitByCommaWithTime(input string) []string { var result []string // Split by comma parts := strings.Split(input, ", ") // Regular expression to match a day with time regex := regexp.MustCompile(`([A-Za-z]{2,}(\+[A-Za-z]{2,})* \d{2}:\d{2}-\d{2}:\d{2})`) // Iterate over parts and combine when necessary var currentPart string for _, part := range parts { if regex.MatchString(part) { if currentPart != "" { currentPart += ", " + part result = append(result, currentPart) currentPart = "" } else { result = append(result, part) } // If the part contains a day with time, start a new currentPart } else { // If there's no currentPart, start a new one if currentPart != "" { currentPart += ", " + part } else { currentPart = part } } } // Add the last currentPart to the result if currentPart != "" { result = append(result, currentPart) } return result } // check if ws or ss func checkSemester(date time.Time) string { if date.Month() >= 4 && date.Month() <= 9 { return "ss" } else { return "ws" } } // fetch the main page where all sport courses are listed and extract all links to the sport courses func fetchAllAvailableSportCourses() []string { var url = "https://sport.htwk-leipzig.de/sportangebote" var doc, err = htmlRequest(url) if err != nil { return nil } // link list of all sport courses var links []string // find all links to sport courses with regex https://sport.htwk-leipzig.de/sportangebote/detail/sport/ + [0-9]{1,4} doc.Find("a[href]").Each(func(i int, s *goquery.Selection) { link, _ := s.Attr("href") if strings.HasPrefix(link, "/sportangebote/detail/sport/") { links = append(links, link) } }) return links } // fetchAllHTWKSportCourses fetches all sport courses from the given links. // to speed up the process, it uses multithreading. func fetchHTWKSportCourses(links []string) []model.SportEntry { //multithreaded webpage requests to speed up the process var maxThreads = 10 var htmlPageArray = make([]*goquery.Document, len(links)) var hostUrl = "https://sport.htwk-leipzig.de" var wg sync.WaitGroup wg.Add(maxThreads) for i := 0; i < maxThreads; i++ { go func(i int) { for j := i; j < len(links); j += maxThreads { doc, err := htmlRequest(hostUrl + links[j]) if err == nil { htmlPageArray[j] = doc } } wg.Done() }(i) } wg.Wait() var events []model.SportEntry for _, doc := range htmlPageArray { if doc != nil { event, err := fetchHtwkSportCourse(doc) if err == nil { events = append(events, event...) } } } return events } func htmlRequest(url string) (*goquery.Document, error) { resp, err := http.Get(url) if err != nil { return nil, err } defer resp.Body.Close() doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return nil, err } return doc, nil } // fetchHtwkSportCourse fetches the sport course from the given url and id. // If the sport course does not exist, it will return an error. // If the sport course exists, it will return the sport course. // goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse. // May be improved in the future. func fetchHtwkSportCourse(doc *goquery.Document) ([]model.SportEntry, error) { var events []model.SportEntry germanTime, _ := time.LoadLocation("Europe/Berlin") if doc.Find("h1").Text() == "Aktuelle Sportangebote" { return nil, errors.New("not a sport course page") } doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) { var event model.SportEntry var details model.EventDetails fullTitle := strings.TrimSpace(s.Find("h3").Text()) titleParts := strings.Split(fullTitle, "-") if len(titleParts) > 0 { event.Title = strings.TrimSpace(titleParts[0]) } if len(titleParts) > 2 { details.Type = strings.TrimSpace(titleParts[len(titleParts)-1]) } event.ID = parseEventID(fullTitle) s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) { key := strings.TrimSpace(s.Find("td").First().Text()) value := strings.TrimSpace(s.Find("td").Last().Text()) switch key { case "Zeitraum": dates := strings.Split(value, "-") if len(dates) == 2 { startDate, _ := time.ParseInLocation("02.01.2006", strings.TrimSpace(dates[0]), germanTime) endDate, _ := time.ParseInLocation("02.01.2006", strings.TrimSpace(dates[1]), germanTime) details.DateRange = model.DateRange{Start: startDate, End: endDate} } case "Zyklus": details.Cycle = value case "Geschlecht": details.Gender = value case "Leiter": leaderName := strings.TrimSpace(s.Find("td a").Text()) leadersSlice := strings.Split(leaderName, "\n") for i, leader := range leadersSlice { leadersSlice[i] = strings.TrimSpace(leader) } formattedLeaders := strings.Join(leadersSlice, ", ") leaderLink, _ := s.Find("td a").Attr("href") details.CourseLead = model.CourseLead{Name: formattedLeaders, Link: leaderLink} case "Ort": locationDetails := strings.Split(value, "(") if len(locationDetails) == 2 { details.Location = model.Location{ Name: strings.TrimSpace(locationDetails[0]), Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"), } } case "Teilnehmer": parts := strings.Split(value, "/") if len(parts) >= 3 { bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0])) totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1])) waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2])) details.Participants = model.Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList} } case "Kosten": details.Cost = value // makes no sense since you need to be logged in to see the price case "Hinweis": var allNotes []string s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) { if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" { note := strings.TrimSpace(s.Text()) if note != "" { allNotes = append(allNotes, note) } } }) event.AdditionalNote = strings.Join(allNotes, " ") } }) event.Details = details events = append(events, event) }) return events, nil } // parseEventID from fulltitle // the event id is a number in the fulltitle thats not a time like HH:MM and shoudl be found after Nr. or Nr: func parseEventID(fulltitle string) string { var eventID string var numberRegExp = regexp.MustCompile("[0-9]{1,4}") var fulltitleParts = strings.Split(fulltitle, " ") for i, part := range fulltitleParts { if part == "Nr." || part == "Nr:" { eventID = fulltitleParts[i+1] break } } if eventID == "" { eventID = numberRegExp.FindString(fulltitle) } return eventID }