diff --git a/backend/go.mod b/backend/go.mod index 3a2ca8b..a9ed30a 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -3,6 +3,7 @@ module htwkalender go 1.21 require ( + github.com/PuerkitoBio/goquery v1.8.1 github.com/google/uuid v1.3.1 github.com/jordic/goics v0.0.0-20210404174824-5a0337b716a0 github.com/labstack/echo/v5 v5.0.0-20230722203903-ec5b858dab61 @@ -13,6 +14,7 @@ require ( require ( github.com/AlecAivazis/survey/v2 v2.3.7 // indirect + github.com/andybalholm/cascadia v1.3.1 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/aws/aws-sdk-go v1.46.3 // indirect github.com/aws/aws-sdk-go-v2 v1.21.2 // indirect diff --git a/backend/go.sum b/backend/go.sum index cdbecb5..70f7586 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -14,6 +14,10 @@ github.com/AlecAivazis/survey/v2 v2.3.7/go.mod h1:xUTIdE4KCOIjsBAE1JYsUPoCqYdZ1r github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2 h1:+vx7roKuyA63nhn5WAunQHLTznkw5W8b1Xc0dNjp83s= github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2/go.mod h1:HBCaDeC1lPdgDeDbhX8XFpy1jqjK0IBG8W5K+xYqA0w= +github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= @@ -239,8 +243,10 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -258,25 +264,30 @@ golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= diff --git a/backend/sport/main.go b/backend/sport/main.go new file mode 100644 index 0000000..b62e2c2 --- /dev/null +++ b/backend/sport/main.go @@ -0,0 +1,185 @@ +package main + +import ( + "errors" + "net/http" + "strconv" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" +) + +// just to test the code +// @TODO: remove this +// @TODO: add tests +// @TODO: add it to the service +// @TODO: make it like a cron job to fetch the sport courses once a week +func main() { + events := fetchAllHtwkSportCourses() + for _, event := range events { + print(event.Title) + } +} + +// fetchAllHtwkSportCourses fetches all sport courses from the htwk sport website. +// It iterates over all ids from 0 to 9999 and tries to fetch the sport course. +// If the sport course does not exist, it will continue with the next id. +// If the sport course exists, it will be added to the events slice. +// Since the ids are not consecutive, it will take a while to fetch all sport courses. +// @TODO: find the highest id and iterate over all ids from 0 to highest id +func fetchAllHtwkSportCourses() []Event { + var events []Event + for i := 0; i <= 9999; i++ { + newEvent, err := fetchHtwkSportCourse("https://sport.htwk-leipzig.de/sportangebote/detail/sport/", i) + if err != nil { + continue + } + events = append(events, newEvent...) + + } + return events +} + +// fetchHtwkSportCourse fetches the sport course from the given url and id. +// If the sport course does not exist, it will return an error. +// If the sport course exists, it will return the sport course. +// goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse. +// May be improved in the future. +func fetchHtwkSportCourse(url string, id int) ([]Event, error) { + var events []Event + + resp, err := http.Get(url + strconv.Itoa(id)) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, err + } + + if doc.Find("h1").Text() == "Aktuelle Sportangebote" { + return nil, errors.New("not a sport course page") + } + + doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) { + var event Event + var details EventDetails + + fullTitle := strings.TrimSpace(s.Find("h3").Text()) + titleParts := strings.Split(fullTitle, "-") + if len(titleParts) > 0 { + event.Title = strings.TrimSpace(titleParts[0]) + } + s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) { + key := strings.TrimSpace(s.Find("td").First().Text()) + value := strings.TrimSpace(s.Find("td").Last().Text()) + + switch key { + case "Zeitraum": + dates := strings.Split(value, "-") + if len(dates) == 2 { + startDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[0])) + endDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[1])) + details.DateRange = DateRange{Start: startDate, End: endDate} + } + case "Zyklus": + details.Cycle = value + case "Geschlecht": + details.Gender = value + case "Leiter": + leaderName := strings.TrimSpace(s.Find("td a").Text()) + leadersSlice := strings.Split(leaderName, "\n") + for i, leader := range leadersSlice { + leadersSlice[i] = strings.TrimSpace(leader) + } + formattedLeaders := strings.Join(leadersSlice, ", ") + leaderLink, _ := s.Find("td a").Attr("href") + details.CourseLead = CourseLead{Name: formattedLeaders, Link: leaderLink} + case "Ort": + locationDetails := strings.Split(value, "(") + if len(locationDetails) == 2 { + details.Location = Location{ + Name: strings.TrimSpace(locationDetails[0]), + Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"), + } + } + case "Teilnehmer": + parts := strings.Split(value, "/") + if len(parts) >= 3 { + bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0])) + totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1])) + waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2])) + details.Participants = Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList} + } + case "Kosten": + details.Cost = value // makes no sense since you need to be logged in to see the price + case "Hinweis": + var allNotes []string + + s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) { + if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" { + note := strings.TrimSpace(s.Text()) + if note != "" { + allNotes = append(allNotes, note) + } + } + }) + + event.AdditionalNote = strings.Join(allNotes, " ") + } + }) + + event.Details = details + events = append(events, event) + }) + + return events, nil +} + +// MODELS + +// Event represents the overall event details. +type Event struct { + Title string + Details EventDetails + AdditionalNote string +} + +// EventDetails represents detailed information about the event. +type EventDetails struct { + DateRange DateRange + Cycle string + Gender string + CourseLead CourseLead + Location Location + Participants Participants + Cost string +} + +// DateRange represents a start and end date. +type DateRange struct { + Start time.Time + End time.Time +} + +// CourseLead represents a person with a name and a contact link. +type CourseLead struct { + Name string + Link string +} + +// Location represents the location of the event. +type Location struct { + Name string + Address string +} + +// Participants represents the participants' details. +type Participants struct { + Bookings int + TotalPlaces int + WaitList int +}