Files
htwkalender/services/data-manager/service/fetch/v1/fetchSeminarEventService.go
2024-06-24 10:50:18 +02:00

333 lines
9.9 KiB
Go

//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
//Copyright (C) 2024 HTWKalender support@htwkalender.de
//This program is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU Affero General Public License for more details.
//You should have received a copy of the GNU Affero General Public License
//along with this program. If not, see <https://www.gnu.org/licenses/>.
package v1
import (
"fmt"
"github.com/google/uuid"
"github.com/pocketbase/pocketbase/tools/types"
"golang.org/x/net/html"
"htwkalender/data-manager/model"
"htwkalender/data-manager/service/date"
"htwkalender/data-manager/service/fetch"
"htwkalender/data-manager/service/functions"
"log/slog"
"regexp"
"strconv"
"strings"
"time"
)
func ReplaceEmptyEventNames(group model.SeminarGroup) model.SeminarGroup {
for j, event := range group.Events {
if functions.OnlyWhitespace(event.Name) {
group.Events[j].Name = "Sonderveranstaltungen"
}
}
return group
}
func ClearEmptySeminarGroups(seminarGroup model.SeminarGroup) model.SeminarGroup {
var newSeminarGroup = model.SeminarGroup{}
if len(seminarGroup.Events) > 0 && seminarGroup.Course != "" {
newSeminarGroup = seminarGroup
}
return newSeminarGroup
}
func fetchHTMLFromURL(semester, seminarGroupLabel string) (string, error) {
url := "https://stundenplan.htwk-leipzig.de/" + semester + "/Berichte/Text-Listen;Studenten-Sets;name;" + seminarGroupLabel + "?template=sws_semgrp&weeks=1-65"
result, err := fetch.GetHTML(url)
if err != nil {
slog.Error("Error occurred while fetching the HTML document:", "error", err)
return "", err
}
return result, nil
}
func GetSeminarGroupEventsFromHTML(seminarGroupLabel string) (model.SeminarGroup, error) {
var seminarGroup [2]model.SeminarGroup
var errSS, errWS error
currentMonth := time.Now().Month()
if isSummerSemester(currentMonth) {
seminarGroup[0], errSS = fetchAndParse("ss", seminarGroupLabel)
}
if isWinterSemester(currentMonth) {
seminarGroup[1], errWS = fetchAndParse("ws", seminarGroupLabel)
}
return checkForSuccessfulFetch(errSS, errWS, seminarGroup)
}
func isSummerSemester(month time.Month) bool {
return month >= 3 && month <= 10
}
func isWinterSemester(month time.Month) bool {
return month >= 9 || month <= 4
}
func fetchAndParse(season, label string) (model.SeminarGroup, error) {
result, err := fetchHTMLFromURL(season, label)
if err != nil {
return model.SeminarGroup{}, err
}
return parseSeminarGroup(result), nil
}
func checkForSuccessfulFetch(errSS error, errWS error, seminarGroup [2]model.SeminarGroup) (model.SeminarGroup, error) {
switch {
case errSS != nil && errWS != nil:
return model.SeminarGroup{}, errWS
case errSS != nil:
return seminarGroup[1], nil
case errWS != nil:
return seminarGroup[0], nil
default:
seminarGroup[0].Events = append(seminarGroup[0].Events, seminarGroup[1].Events...)
return seminarGroup[0], nil
}
}
func SplitEventType(events []model.Event) ([]model.Event, error) {
re, err := regexp.Compile("^([VPS])([wp])$")
if err != nil {
return nil, err
}
for i, event := range events {
matched := re.Match([]byte(event.EventType))
if matched {
eventType := event.EventType
event.EventType = eventType[0:1]
event.Compulsory = eventType[1:2]
events[i] = event
}
}
return events, nil
}
func parseSeminarGroup(result string) model.SeminarGroup {
doc, err := html.Parse(strings.NewReader(result))
if err != nil {
fmt.Printf("Error occurred while parsing the HTML document: %s\n", err.Error())
return model.SeminarGroup{}
}
table := findFirstTable(doc)
eventTables := getEventTables(doc)
allDayLabels := getAllDayLabels(doc)
if eventTables == nil || allDayLabels == nil {
return model.SeminarGroup{}
}
course := findFirstSpanWithClass(table, "header-2-0-1").FirstChild.Data
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels, course)
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
events := splitEventsBySingleWeek(splitEventsByWeekVal)
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
semester, year := extractSemesterAndYear(semesterString)
events = convertWeeksToDates(events, semester, year)
events = generateUUIDs(events, course)
events, err = SplitEventType(events)
if err != nil {
slog.Error("Error occurred while splitting event types:", "error", err)
return model.SeminarGroup{}
}
var seminarGroup = model.SeminarGroup{
University: findFirstSpanWithClass(table, "header-1-0-0").FirstChild.Data,
Course: course,
Events: events,
}
return seminarGroup
}
func generateUUIDs(events []model.Event, course string) []model.Event {
for i, event := range events {
// generate a hash value from the event name, course and semester
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+course))
events[i].UUID = hash.String()
}
return events
}
// convertWeeksToDates converts the week and year to a date
// The date is calculated based on the week and the year
// The time is unset and 23:00 is used as default
// Additionally the semester is added to the event
func convertWeeksToDates(events []model.Event, semester string, year string) []model.Event {
var newEvents []model.Event
eventYear, _ := strconv.Atoi(year)
// for each event we need to calculate the start and end date based on the week and the year
for _, event := range events {
eventWeek, _ := strconv.Atoi(event.Week)
eventDay, _ := date.GetDateFromWeekNumber(eventYear, eventWeek, event.Day)
start := replaceTimeForDate(eventDay, event.Start.Time())
end := replaceTimeForDate(eventDay, event.End.Time())
//Check if end is before start
if end.Before(start) {
end = end.AddDate(0, 0, 1)
}
newEvent := event
newEvent.Start, _ = types.ParseDateTime(start.In(time.UTC))
newEvent.End, _ = types.ParseDateTime(end.In(time.UTC))
newEvent.Semester = semester
newEvents = append(newEvents, newEvent)
}
return newEvents
}
// replaceTimeForDate replaces hour, minute, second, nsec for the selected date
func replaceTimeForDate(date time.Time, replacementTime time.Time) time.Time {
return time.Date(date.Year(), date.Month(), date.Day(), replacementTime.Hour(), replacementTime.Minute(), replacementTime.Second(), replacementTime.Nanosecond(), date.Location())
}
func extractSemesterAndYear(semesterString string) (string, string) {
winterPattern := "Wintersemester"
summerPattern := "Sommersemester"
winterMatch := strings.Contains(semesterString, winterPattern)
summerMatch := strings.Contains(semesterString, summerPattern)
semester := ""
semesterShortcut := ""
if winterMatch {
semester = "Wintersemester"
semesterShortcut = "ws"
} else if summerMatch {
semester = "Sommersemester"
semesterShortcut = "ss"
} else {
return "", ""
}
yearPattern := `\d{4}`
combinedPattern := semester + `\s` + yearPattern
re := regexp.MustCompile(combinedPattern)
match := re.FindString(semesterString)
year := ""
if match != "" {
reYear := regexp.MustCompile(yearPattern)
year = reYear.FindString(match)
}
return semesterShortcut, year
}
func toEvents(tables [][]*html.Node, days []string, course string) []model.Event {
var events []model.Event
for table := range tables {
for row := range tables[table] {
tableData := findTableData(tables[table][row])
if len(tableData) > 0 {
start, _ := types.ParseDateTime(createTimeFromHourAndMinuteString(getTextContent(tableData[1])))
end, _ := types.ParseDateTime(createTimeFromHourAndMinuteString(getTextContent(tableData[2])))
events = append(events, model.Event{
Day: days[table],
Week: getTextContent(tableData[0]),
Start: start,
End: end,
Name: getTextContent(tableData[3]),
EventType: getTextContent(tableData[4]),
Prof: getTextContent(tableData[5]),
Rooms: getTextContent(tableData[6]),
Notes: getTextContent(tableData[7]),
BookedAt: getTextContent(tableData[8]),
Course: course,
})
}
}
}
return events
}
// createEventFromTableData should create an event from the table data
// tableTime represents Hour and Minute like HH:MM
// tableDate returns a Time
func createTimeFromHourAndMinuteString(tableTime string) time.Time {
timeParts := strings.Split(tableTime, ":")
hour, _ := strconv.Atoi(timeParts[0])
minute, _ := strconv.Atoi(timeParts[1])
return time.Date(0, 0, 0, hour, minute, 0, 0, time.UTC)
}
func splitEventsByWeek(events []model.Event) []model.Event {
var newEvents []model.Event
for _, event := range events {
weeks := strings.Split(event.Week, ",")
for _, week := range weeks {
newEvent := event
newEvent.Week = strings.TrimSpace(week)
newEvents = append(newEvents, newEvent)
}
}
return newEvents
}
func splitEventsBySingleWeek(events []model.Event) []model.Event {
var newEvents []model.Event
for _, event := range events {
if strings.Contains(event.Week, "-") {
weeks := splitWeekRange(event.Week)
for _, week := range weeks {
newEvent := event
newEvent.Week = week
newEvents = append(newEvents, newEvent)
}
} else {
newEvents = append(newEvents, event)
}
}
return newEvents
}
func splitWeekRange(weekRange string) []string {
parts := strings.Split(weekRange, "-")
if len(parts) != 2 {
return nil // Invalid format
}
start, errStart := strconv.Atoi(strings.TrimSpace(parts[0]))
end, errEnd := strconv.Atoi(strings.TrimSpace(parts[1]))
if errStart != nil || errEnd != nil {
return nil // Error converting to integers
}
var weeks []string
for i := start; i <= end; i++ {
weeks = append(weeks, strconv.Itoa(i))
}
return weeks
}