Since im not that deep into the project i seperated it from the rest, its in "backend/sport/main.go". I added the task which are still needed to do.

Added Package "github.com/PuerkitoBio/goquery"
This commit is contained in:
joossm
2023-12-08 17:38:37 +01:00
parent c6eeb7d81c
commit 57e3e41a9a
3 changed files with 198 additions and 0 deletions

View File

@@ -3,6 +3,7 @@ module htwkalender
go 1.21
require (
github.com/PuerkitoBio/goquery v1.8.1
github.com/google/uuid v1.3.1
github.com/jordic/goics v0.0.0-20210404174824-5a0337b716a0
github.com/labstack/echo/v5 v5.0.0-20230722203903-ec5b858dab61
@@ -13,6 +14,7 @@ require (
require (
github.com/AlecAivazis/survey/v2 v2.3.7 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/aws/aws-sdk-go v1.46.3 // indirect
github.com/aws/aws-sdk-go-v2 v1.21.2 // indirect

View File

@@ -14,6 +14,10 @@ github.com/AlecAivazis/survey/v2 v2.3.7/go.mod h1:xUTIdE4KCOIjsBAE1JYsUPoCqYdZ1r
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2 h1:+vx7roKuyA63nhn5WAunQHLTznkw5W8b1Xc0dNjp83s=
github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2/go.mod h1:HBCaDeC1lPdgDeDbhX8XFpy1jqjK0IBG8W5K+xYqA0w=
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
@@ -239,8 +243,10 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -258,25 +264,30 @@ golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=

185
backend/sport/main.go Normal file
View File

@@ -0,0 +1,185 @@
package main
import (
"errors"
"net/http"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// just to test the code
// @TODO: remove this
// @TODO: add tests
// @TODO: add it to the service
// @TODO: make it like a cron job to fetch the sport courses once a week
func main() {
events := fetchAllHtwkSportCourses()
for _, event := range events {
print(event.Title)
}
}
// fetchAllHtwkSportCourses fetches all sport courses from the htwk sport website.
// It iterates over all ids from 0 to 9999 and tries to fetch the sport course.
// If the sport course does not exist, it will continue with the next id.
// If the sport course exists, it will be added to the events slice.
// Since the ids are not consecutive, it will take a while to fetch all sport courses.
// @TODO: find the highest id and iterate over all ids from 0 to highest id
func fetchAllHtwkSportCourses() []Event {
var events []Event
for i := 0; i <= 9999; i++ {
newEvent, err := fetchHtwkSportCourse("https://sport.htwk-leipzig.de/sportangebote/detail/sport/", i)
if err != nil {
continue
}
events = append(events, newEvent...)
}
return events
}
// fetchHtwkSportCourse fetches the sport course from the given url and id.
// If the sport course does not exist, it will return an error.
// If the sport course exists, it will return the sport course.
// goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse.
// May be improved in the future.
func fetchHtwkSportCourse(url string, id int) ([]Event, error) {
var events []Event
resp, err := http.Get(url + strconv.Itoa(id))
if err != nil {
return nil, err
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
if doc.Find("h1").Text() == "Aktuelle Sportangebote" {
return nil, errors.New("not a sport course page")
}
doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) {
var event Event
var details EventDetails
fullTitle := strings.TrimSpace(s.Find("h3").Text())
titleParts := strings.Split(fullTitle, "-")
if len(titleParts) > 0 {
event.Title = strings.TrimSpace(titleParts[0])
}
s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) {
key := strings.TrimSpace(s.Find("td").First().Text())
value := strings.TrimSpace(s.Find("td").Last().Text())
switch key {
case "Zeitraum":
dates := strings.Split(value, "-")
if len(dates) == 2 {
startDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[0]))
endDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[1]))
details.DateRange = DateRange{Start: startDate, End: endDate}
}
case "Zyklus":
details.Cycle = value
case "Geschlecht":
details.Gender = value
case "Leiter":
leaderName := strings.TrimSpace(s.Find("td a").Text())
leadersSlice := strings.Split(leaderName, "\n")
for i, leader := range leadersSlice {
leadersSlice[i] = strings.TrimSpace(leader)
}
formattedLeaders := strings.Join(leadersSlice, ", ")
leaderLink, _ := s.Find("td a").Attr("href")
details.CourseLead = CourseLead{Name: formattedLeaders, Link: leaderLink}
case "Ort":
locationDetails := strings.Split(value, "(")
if len(locationDetails) == 2 {
details.Location = Location{
Name: strings.TrimSpace(locationDetails[0]),
Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"),
}
}
case "Teilnehmer":
parts := strings.Split(value, "/")
if len(parts) >= 3 {
bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1]))
waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2]))
details.Participants = Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList}
}
case "Kosten":
details.Cost = value // makes no sense since you need to be logged in to see the price
case "Hinweis":
var allNotes []string
s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) {
if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" {
note := strings.TrimSpace(s.Text())
if note != "" {
allNotes = append(allNotes, note)
}
}
})
event.AdditionalNote = strings.Join(allNotes, " ")
}
})
event.Details = details
events = append(events, event)
})
return events, nil
}
// MODELS
// Event represents the overall event details.
type Event struct {
Title string
Details EventDetails
AdditionalNote string
}
// EventDetails represents detailed information about the event.
type EventDetails struct {
DateRange DateRange
Cycle string
Gender string
CourseLead CourseLead
Location Location
Participants Participants
Cost string
}
// DateRange represents a start and end date.
type DateRange struct {
Start time.Time
End time.Time
}
// CourseLead represents a person with a name and a contact link.
type CourseLead struct {
Name string
Link string
}
// Location represents the location of the event.
type Location struct {
Name string
Address string
}
// Participants represents the participants' details.
type Participants struct {
Bookings int
TotalPlaces int
WaitList int
}