package main import ( "errors" "net/http" "strconv" "strings" "time" "github.com/PuerkitoBio/goquery" ) // just to test the code // @TODO: remove this // @TODO: add tests // @TODO: add it to the service // @TODO: make it like a cron job to fetch the sport courses once a week func main() { events := fetchAllHtwkSportCourses() for _, event := range events { print(event.Title) } } // fetchAllHtwkSportCourses fetches all sport courses from the htwk sport website. // It iterates over all ids from 0 to 9999 and tries to fetch the sport course. // If the sport course does not exist, it will continue with the next id. // If the sport course exists, it will be added to the events slice. // Since the ids are not consecutive, it will take a while to fetch all sport courses. // @TODO: find the highest id and iterate over all ids from 0 to highest id func fetchAllHtwkSportCourses() []Event { var events []Event for i := 0; i <= 9999; i++ { newEvent, err := fetchHtwkSportCourse("https://sport.htwk-leipzig.de/sportangebote/detail/sport/", i) if err != nil { continue } events = append(events, newEvent...) } return events } // fetchHtwkSportCourse fetches the sport course from the given url and id. // If the sport course does not exist, it will return an error. // If the sport course exists, it will return the sport course. // goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse. // May be improved in the future. func fetchHtwkSportCourse(url string, id int) ([]Event, error) { var events []Event resp, err := http.Get(url + strconv.Itoa(id)) if err != nil { return nil, err } defer resp.Body.Close() doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return nil, err } if doc.Find("h1").Text() == "Aktuelle Sportangebote" { return nil, errors.New("not a sport course page") } doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) { var event Event var details EventDetails fullTitle := strings.TrimSpace(s.Find("h3").Text()) titleParts := strings.Split(fullTitle, "-") if len(titleParts) > 0 { event.Title = strings.TrimSpace(titleParts[0]) } s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) { key := strings.TrimSpace(s.Find("td").First().Text()) value := strings.TrimSpace(s.Find("td").Last().Text()) switch key { case "Zeitraum": dates := strings.Split(value, "-") if len(dates) == 2 { startDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[0])) endDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[1])) details.DateRange = DateRange{Start: startDate, End: endDate} } case "Zyklus": details.Cycle = value case "Geschlecht": details.Gender = value case "Leiter": leaderName := strings.TrimSpace(s.Find("td a").Text()) leadersSlice := strings.Split(leaderName, "\n") for i, leader := range leadersSlice { leadersSlice[i] = strings.TrimSpace(leader) } formattedLeaders := strings.Join(leadersSlice, ", ") leaderLink, _ := s.Find("td a").Attr("href") details.CourseLead = CourseLead{Name: formattedLeaders, Link: leaderLink} case "Ort": locationDetails := strings.Split(value, "(") if len(locationDetails) == 2 { details.Location = Location{ Name: strings.TrimSpace(locationDetails[0]), Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"), } } case "Teilnehmer": parts := strings.Split(value, "/") if len(parts) >= 3 { bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0])) totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1])) waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2])) details.Participants = Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList} } case "Kosten": details.Cost = value // makes no sense since you need to be logged in to see the price case "Hinweis": var allNotes []string s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) { if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" { note := strings.TrimSpace(s.Text()) if note != "" { allNotes = append(allNotes, note) } } }) event.AdditionalNote = strings.Join(allNotes, " ") } }) event.Details = details events = append(events, event) }) return events, nil } // MODELS // Event represents the overall event details. type Event struct { Title string Details EventDetails AdditionalNote string } // EventDetails represents detailed information about the event. type EventDetails struct { DateRange DateRange Cycle string Gender string CourseLead CourseLead Location Location Participants Participants Cost string } // DateRange represents a start and end date. type DateRange struct { Start time.Time End time.Time } // CourseLead represents a person with a name and a contact link. type CourseLead struct { Name string Link string } // Location represents the location of the event. type Location struct { Name string Address string } // Participants represents the participants' details. type Participants struct { Bookings int TotalPlaces int WaitList int }