mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender.git
synced 2025-08-03 18:29:14 +02:00
Since im not that deep into the project i seperated it from the rest, its in "backend/sport/main.go". I added the task which are still needed to do.
Added Package "github.com/PuerkitoBio/goquery"
This commit is contained in:
185
backend/sport/main.go
Normal file
185
backend/sport/main.go
Normal file
@@ -0,0 +1,185 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// just to test the code
|
||||
// @TODO: remove this
|
||||
// @TODO: add tests
|
||||
// @TODO: add it to the service
|
||||
// @TODO: make it like a cron job to fetch the sport courses once a week
|
||||
func main() {
|
||||
events := fetchAllHtwkSportCourses()
|
||||
for _, event := range events {
|
||||
print(event.Title)
|
||||
}
|
||||
}
|
||||
|
||||
// fetchAllHtwkSportCourses fetches all sport courses from the htwk sport website.
|
||||
// It iterates over all ids from 0 to 9999 and tries to fetch the sport course.
|
||||
// If the sport course does not exist, it will continue with the next id.
|
||||
// If the sport course exists, it will be added to the events slice.
|
||||
// Since the ids are not consecutive, it will take a while to fetch all sport courses.
|
||||
// @TODO: find the highest id and iterate over all ids from 0 to highest id
|
||||
func fetchAllHtwkSportCourses() []Event {
|
||||
var events []Event
|
||||
for i := 0; i <= 9999; i++ {
|
||||
newEvent, err := fetchHtwkSportCourse("https://sport.htwk-leipzig.de/sportangebote/detail/sport/", i)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
events = append(events, newEvent...)
|
||||
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
// fetchHtwkSportCourse fetches the sport course from the given url and id.
|
||||
// If the sport course does not exist, it will return an error.
|
||||
// If the sport course exists, it will return the sport course.
|
||||
// goquery is used to parse the html. The html structure is not very consistent, so it is hard to parse.
|
||||
// May be improved in the future.
|
||||
func fetchHtwkSportCourse(url string, id int) ([]Event, error) {
|
||||
var events []Event
|
||||
|
||||
resp, err := http.Get(url + strconv.Itoa(id))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if doc.Find("h1").Text() == "Aktuelle Sportangebote" {
|
||||
return nil, errors.New("not a sport course page")
|
||||
}
|
||||
|
||||
doc.Find(".eventHead").Each(func(i int, s *goquery.Selection) {
|
||||
var event Event
|
||||
var details EventDetails
|
||||
|
||||
fullTitle := strings.TrimSpace(s.Find("h3").Text())
|
||||
titleParts := strings.Split(fullTitle, "-")
|
||||
if len(titleParts) > 0 {
|
||||
event.Title = strings.TrimSpace(titleParts[0])
|
||||
}
|
||||
s.NextFiltered("table.eventDetails").Find("tr").Each(func(i int, s *goquery.Selection) {
|
||||
key := strings.TrimSpace(s.Find("td").First().Text())
|
||||
value := strings.TrimSpace(s.Find("td").Last().Text())
|
||||
|
||||
switch key {
|
||||
case "Zeitraum":
|
||||
dates := strings.Split(value, "-")
|
||||
if len(dates) == 2 {
|
||||
startDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[0]))
|
||||
endDate, _ := time.Parse("02.01.2006", strings.TrimSpace(dates[1]))
|
||||
details.DateRange = DateRange{Start: startDate, End: endDate}
|
||||
}
|
||||
case "Zyklus":
|
||||
details.Cycle = value
|
||||
case "Geschlecht":
|
||||
details.Gender = value
|
||||
case "Leiter":
|
||||
leaderName := strings.TrimSpace(s.Find("td a").Text())
|
||||
leadersSlice := strings.Split(leaderName, "\n")
|
||||
for i, leader := range leadersSlice {
|
||||
leadersSlice[i] = strings.TrimSpace(leader)
|
||||
}
|
||||
formattedLeaders := strings.Join(leadersSlice, ", ")
|
||||
leaderLink, _ := s.Find("td a").Attr("href")
|
||||
details.CourseLead = CourseLead{Name: formattedLeaders, Link: leaderLink}
|
||||
case "Ort":
|
||||
locationDetails := strings.Split(value, "(")
|
||||
if len(locationDetails) == 2 {
|
||||
details.Location = Location{
|
||||
Name: strings.TrimSpace(locationDetails[0]),
|
||||
Address: strings.TrimRight(strings.TrimSpace(locationDetails[1]), ")"),
|
||||
}
|
||||
}
|
||||
case "Teilnehmer":
|
||||
parts := strings.Split(value, "/")
|
||||
if len(parts) >= 3 {
|
||||
bookings, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||
totalPlaces, _ := strconv.Atoi(strings.TrimSpace(parts[1]))
|
||||
waitList, _ := strconv.Atoi(strings.TrimSpace(parts[2]))
|
||||
details.Participants = Participants{Bookings: bookings, TotalPlaces: totalPlaces, WaitList: waitList}
|
||||
}
|
||||
case "Kosten":
|
||||
details.Cost = value // makes no sense since you need to be logged in to see the price
|
||||
case "Hinweis":
|
||||
var allNotes []string
|
||||
|
||||
s.Find("td").Last().Contents().Each(func(i int, s *goquery.Selection) {
|
||||
if s.Is("h4.eventAdvice") || goquery.NodeName(s) == "#text" {
|
||||
note := strings.TrimSpace(s.Text())
|
||||
if note != "" {
|
||||
allNotes = append(allNotes, note)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
event.AdditionalNote = strings.Join(allNotes, " ")
|
||||
}
|
||||
})
|
||||
|
||||
event.Details = details
|
||||
events = append(events, event)
|
||||
})
|
||||
|
||||
return events, nil
|
||||
}
|
||||
|
||||
// MODELS
|
||||
|
||||
// Event represents the overall event details.
|
||||
type Event struct {
|
||||
Title string
|
||||
Details EventDetails
|
||||
AdditionalNote string
|
||||
}
|
||||
|
||||
// EventDetails represents detailed information about the event.
|
||||
type EventDetails struct {
|
||||
DateRange DateRange
|
||||
Cycle string
|
||||
Gender string
|
||||
CourseLead CourseLead
|
||||
Location Location
|
||||
Participants Participants
|
||||
Cost string
|
||||
}
|
||||
|
||||
// DateRange represents a start and end date.
|
||||
type DateRange struct {
|
||||
Start time.Time
|
||||
End time.Time
|
||||
}
|
||||
|
||||
// CourseLead represents a person with a name and a contact link.
|
||||
type CourseLead struct {
|
||||
Name string
|
||||
Link string
|
||||
}
|
||||
|
||||
// Location represents the location of the event.
|
||||
type Location struct {
|
||||
Name string
|
||||
Address string
|
||||
}
|
||||
|
||||
// Participants represents the participants' details.
|
||||
type Participants struct {
|
||||
Bookings int
|
||||
TotalPlaces int
|
||||
WaitList int
|
||||
}
|
Reference in New Issue
Block a user