feat:#34 refactored function to intended service, fixed docker files

This commit is contained in:
Elmar Kresse
2024-06-10 16:57:40 +02:00
parent cb76b5c188
commit 2d7701b0c9
96 changed files with 212 additions and 79 deletions

View File

@@ -0,0 +1,67 @@
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
//Copyright (C) 2024 HTWKalender support@htwkalender.de
//This program is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU Affero General Public License for more details.
//You should have received a copy of the GNU Affero General Public License
//along with this program. If not, see <https://www.gnu.org/licenses/>.
package v2
import (
"github.com/pocketbase/pocketbase/tools/types"
"golang.org/x/net/html"
"htwkalender/model"
"htwkalender/service/date"
"htwkalender/service/functions"
"strings"
)
func toEvents(tables [][]*html.Node, days []string) []model.Event {
var events []model.Event
for table := range tables {
for row := range tables[table] {
tableData := findTableData(tables[table][row])
if len(tableData) > 0 {
start, _ := types.ParseDateTime(date.CreateTimeFromHourAndMinuteString(getTextContent(tableData[1])))
end, _ := types.ParseDateTime(date.CreateTimeFromHourAndMinuteString(getTextContent(tableData[2])))
courses := getTextContent(tableData[7])
name := getTextContent(tableData[3])
if functions.OnlyWhitespace(name) {
name = "Sonderveranstaltung"
}
if len(courses) > 0 {
for _, course := range strings.Split(courses, " ") {
events = append(events, model.Event{
Day: days[table],
Week: getTextContent(tableData[0]),
Start: start,
End: end,
Name: name,
EventType: getTextContent(tableData[4]),
Notes: getTextContent(tableData[5]),
Prof: getTextContent(tableData[6]),
Rooms: getTextContent(tableData[8]),
BookedAt: getTextContent(tableData[10]),
Course: strings.TrimSpace(course),
})
}
}
}
}
}
return events
}

View File

@@ -0,0 +1,181 @@
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
//Copyright (C) 2024 HTWKalender support@htwkalender.de
//This program is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU Affero General Public License for more details.
//You should have received a copy of the GNU Affero General Public License
//along with this program. If not, see <https://www.gnu.org/licenses/>.
package v2
import (
"fmt"
"github.com/google/uuid"
"github.com/pocketbase/pocketbase"
"golang.org/x/net/html"
"htwkalender/model"
"htwkalender/service/db"
"htwkalender/service/fetch"
v1 "htwkalender/service/fetch/v1"
"htwkalender/service/functions"
localTime "htwkalender/service/functions/time"
"log/slog"
"strings"
)
func ParseEventsFromRemote(app *pocketbase.PocketBase) (model.Events, error) {
savedRecords, err := FetchAllEventsAndSave(app, localTime.RealClock{})
if err != nil {
return nil, err
}
return savedRecords, nil
}
func FetchAllEventsAndSave(app *pocketbase.PocketBase, clock localTime.Clock) ([]model.Event, error) {
var savedRecords []model.Event
var err error = nil
var stubUrl = [2]string{
"https://stundenplan.htwk-leipzig.de/",
"/Berichte/Text-Listen;Veranstaltungsarten;name;" +
"Vp%0A" +
"Vw%0A" +
"V%0A" +
"Sp%0A" +
"Sw%0A" +
"S%0A" +
"Pp%0A" +
"Pw%0A" +
"P%0A" +
"ZV%0A" +
"Tut%0A" +
"Sperr%0A" +
"pf%0A" +
"wpf%0A" +
"fak%0A" +
"Pruefung%0A" +
"gebucht%0A" +
"Vertretung%0A" +
"Fremdveranst.%0A" +
"Buchen%0A" +
"%0A?&template=sws_modul&weeks=1-65&combined=yes",
}
// Fetch and save events for all semesters
for _, semester := range functions.CalculateSemesterList(clock) {
events, fetchErr := fetchAndSaveAllEventsForSemester(app, semester, stubUrl)
if fetchErr != nil {
return nil, fmt.Errorf("failed to fetch and save events for "+semester+": %w", err)
}
savedRecords = append(savedRecords, events...)
}
return savedRecords, err
}
func fetchAndSaveAllEventsForSemester(
app *pocketbase.PocketBase,
semester string,
stubUrl [2]string,
) ([]model.Event, error) {
var savedRecords []model.Event
url := stubUrl[0] + semester + stubUrl[1]
events, err := parseEventForOneSemester(url)
if err != nil {
return nil, fmt.Errorf("failed to parse events for "+semester+": %w", err)
}
err = db.DeleteAllEventsBySemesterWithoutCourse(app, "Sport", semester)
if err != nil {
return nil, fmt.Errorf("failed to delete all events for "+semester+": %w", err)
}
savedEvents, dbError := db.SaveEvents(events, app)
if dbError != nil {
return nil, fmt.Errorf("failed to save events for "+semester+": %w", dbError)
}
savedRecords = append(savedRecords, savedEvents...)
return savedRecords, err
}
func parseEventForOneSemester(url string) ([]model.Event, error) {
// Fetch Webpage from URL
webpage, err := fetch.GetHTML(url)
if err != nil {
return nil, err
}
// Parse HTML to Node Tree
var doc *html.Node
doc, err = parseHTML(webpage)
if err != nil {
return nil, err
}
// Get all event tables and all day labels
eventTables := getEventTables(doc)
allDayLabels := getAllDayLabels(doc)
eventsWithCombinedWeeks := toEvents(eventTables, allDayLabels)
splitEventsByWeekVal := splitEventsByWeek(eventsWithCombinedWeeks)
events := splitEventsBySingleWeek(splitEventsByWeekVal)
if events == nil {
return nil, err
}
table := findFirstTable(doc)
if table == nil {
return nil, fmt.Errorf("failed to find first table")
}
semesterString := findFirstSpanWithClass(table, "header-0-2-0").FirstChild.Data
semester, year := extractSemesterAndYear(semesterString)
events = convertWeeksToDates(events, semester, year)
events, err = v1.SplitEventType(events)
if err != nil {
slog.Error("Error occurred while splitting event types: ", "error", err)
return nil, err
}
events = switchNameAndNotesForExam(events)
events = generateUUIDs(events)
return events, nil
}
// switch name and notes for Pruefung events when Note is not empty and Name starts with "Prüfungen" and contains email
func switchNameAndNotesForExam(events []model.Event) []model.Event {
for i, event := range events {
if event.EventType == "Pruefung" {
if event.Notes != "" && strings.HasPrefix(event.Name, "Prüfungen") && strings.Contains(event.Name, "@") {
events[i].Name = event.Notes
events[i].Notes = event.Name
}
}
}
return events
}
func parseHTML(webpage string) (*html.Node, error) {
doc, err := html.Parse(strings.NewReader(webpage))
if err != nil {
return nil, err
}
return doc, nil
}
func generateUUIDs(events []model.Event) []model.Event {
for i, event := range events {
// generate a hash value from the event name, course and semester
hash := uuid.NewSHA1(uuid.NameSpaceOID, []byte(event.Name+event.Course))
events[i].UUID = hash.String()
}
return events
}

View File

@@ -0,0 +1,99 @@
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
//Copyright (C) 2024 HTWKalender support@htwkalender.de
//This program is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU Affero General Public License for more details.
//You should have received a copy of the GNU Affero General Public License
//along with this program. If not, see <https://www.gnu.org/licenses/>.
package v2
import (
"htwkalender/model"
"reflect"
"testing"
)
func Test_switchNameAndNotesForExam(t *testing.T) {
type args struct {
events []model.Event
}
tests := []struct {
name string
args args
want []model.Event
}{
{
name: "switch name and notes for exam",
args: args{
events: []model.Event{
{
EventType: "Pruefung",
Name: "Prüfungen FING/EIT WiSe (pruefungsamt.fing-eit@htwk-leipzig.de)",
Notes: "Computer Vision II - Räume/Zeit unter Vorbehalt- (Raum W111.1)",
},
},
},
want: []model.Event{
{
EventType: "Pruefung",
Name: "Computer Vision II - Räume/Zeit unter Vorbehalt- (Raum W111.1)",
Notes: "Prüfungen FING/EIT WiSe (pruefungsamt.fing-eit@htwk-leipzig.de)",
},
},
},
{
name: "dont switch name and notes for exam",
args: args{
events: []model.Event{
{
EventType: "Pruefung",
Name: "i054 Umweltschutz und Recycling DPB & VNB 7.FS (wpf)",
Notes: "Prüfung",
},
},
},
want: []model.Event{
{
EventType: "Pruefung",
Notes: "Prüfung",
Name: "i054 Umweltschutz und Recycling DPB & VNB 7.FS (wpf)",
},
},
},
{
name: "dont switch name and notes for exam",
args: args{
events: []model.Event{
{
EventType: "Pruefung",
Name: "Prüfungen FING/ME WiSe (pruefungsamt.fing-me@htwk-leipzig.de)",
Notes: "",
},
},
},
want: []model.Event{
{
EventType: "Pruefung",
Notes: "",
Name: "Prüfungen FING/ME WiSe (pruefungsamt.fing-me@htwk-leipzig.de)",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := switchNameAndNotesForExam(tt.args.events); !reflect.DeepEqual(got, tt.want) {
t.Errorf("switchNameAndNotesForExam() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -0,0 +1,340 @@
//Calendar implementation for the HTWK Leipzig timetable. Evaluation and display of the individual dates in iCal format.
//Copyright (C) 2024 HTWKalender support@htwkalender.de
//This program is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU Affero General Public License for more details.
//You should have received a copy of the GNU Affero General Public License
//along with this program. If not, see <https://www.gnu.org/licenses/>.
package v2
import (
"github.com/pocketbase/pocketbase/tools/types"
"golang.org/x/net/html"
"htwkalender/model"
"htwkalender/service/date"
"regexp"
"strconv"
"strings"
"time"
)
// Find the first <table> element in the HTML document
func findFirstTable(node *html.Node) *html.Node {
if node.Type == html.ElementNode && node.Data == "table" {
return node
}
// Traverse child nodes recursively
for child := node.FirstChild; child != nil; child = child.NextSibling {
found := findFirstTable(child)
if found != nil {
return found
}
}
return nil
}
// Find the first <span> element with the specified class attribute value
func findFirstSpanWithClass(node *html.Node, classValue string) *html.Node {
// Check if the current node is a <span> element with the specified class attribute value
if node.Type == html.ElementNode && node.Data == "span" {
if hasClassAttribute(node, classValue) {
return node
}
}
// Traverse child nodes recursively
for child := node.FirstChild; child != nil; child = child.NextSibling {
found := findFirstSpanWithClass(child, classValue)
if found != nil {
return found
}
}
return nil
}
// Check if the specified element has the specified class attribute value
func hasClassAttribute(node *html.Node, classValue string) bool {
for _, attr := range node.Attr {
if attr.Key == "class" && strings.Contains(attr.Val, classValue) {
return true
}
}
return false
}
// Get Tables with days
func getEventTables(node *html.Node) [][]*html.Node {
var eventTables [][]*html.Node
tables := findTables(node)
// get all tables with events
for events := range tables {
rows := findTableRows(tables[events])
// check that a first row exists
if len(rows) > 0 {
rows = rows[1:]
eventTables = append(eventTables, rows)
}
}
return eventTables
}
// Get Tables with days
func getAllDayLabels(node *html.Node) []string {
paragraphs := findParagraphs(node)
var dayArray []string
for _, p := range paragraphs {
label := getDayLabel(p)
if label != "" {
dayArray = append(dayArray, label)
}
}
return dayArray
}
// Find all <p> elements in the HTML document
func findParagraphs(node *html.Node) []*html.Node {
var paragraphs []*html.Node
if node.Type == html.ElementNode && node.Data == "p" {
paragraphs = append(paragraphs, node)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
paragraphs = append(paragraphs, findParagraphs(child)...)
}
return paragraphs
}
// Find all <tr> elements in <tbody>, excluding the first one
func findTableRows(node *html.Node) []*html.Node {
var tableRows []*html.Node
if node.Type == html.ElementNode && node.Data == "tbody" {
child := node.FirstChild
for child != nil {
if child.Type == html.ElementNode && child.Data == "tr" {
tableRows = append(tableRows, child)
}
child = child.NextSibling
}
}
// Traverse child nodes recursively
for child := node.FirstChild; child != nil; child = child.NextSibling {
var tableRowElement = findTableRows(child)
if tableRowElement != nil {
tableRows = append(tableRows, tableRowElement...)
}
}
// check if tableRows is nil
if tableRows == nil {
return []*html.Node{}
} else {
return tableRows
}
}
// Find all <p> elements in the HTML document
func findTables(node *html.Node) []*html.Node {
var tables []*html.Node
if node.Type == html.ElementNode && node.Data == "table" {
tables = append(tables, node)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
tables = append(tables, findDayTables(child)...)
}
return tables
}
// Find all <p> elements in the HTML document
func findDayTables(node *html.Node) []*html.Node {
var tables []*html.Node
for child := node.FirstChild; child != nil; child = child.NextSibling {
tables = append(tables, findDayTables(child)...)
}
if node.Type == html.ElementNode && node.Data == "table" && hasClassAttribute(node, "spreadsheet") {
tables = append(tables, node)
}
return tables
}
// Get the text content of the specified node and its descendants
func getDayLabel(node *html.Node) string {
child := node.FirstChild
if child != nil {
if child.Type == html.ElementNode && child.Data == "span" {
if child.FirstChild != nil {
return child.FirstChild.Data
}
}
}
return ""
}
// Find all <td> elements in the current <tr>
func findTableData(node *html.Node) []*html.Node {
var tableData []*html.Node
if node.Type == html.ElementNode && node.Data == "tr" {
child := node.FirstChild
for child != nil {
if child.Type == html.ElementNode && child.Data == "td" {
tableData = append(tableData, child)
}
child = child.NextSibling
}
}
return tableData
}
// Get the text content of the specified node and its descendants
func getTextContent(node *html.Node) string {
var textContent string
if node.Type == html.TextNode {
textContent = node.Data
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
textContent += getTextContent(child)
}
return textContent
}
func splitEventsByWeek(events []model.Event) []model.Event {
var newEvents []model.Event
for _, event := range events {
weeks := strings.Split(event.Week, ",")
for _, week := range weeks {
newEvent := event
newEvent.Week = strings.TrimSpace(week)
newEvents = append(newEvents, newEvent)
}
}
return newEvents
}
func splitEventsBySingleWeek(events []model.Event) []model.Event {
var newEvents []model.Event
for _, event := range events {
if strings.Contains(event.Week, "-") {
weeks := splitWeekRange(event.Week)
for _, week := range weeks {
newEvent := event
newEvent.Week = week
newEvents = append(newEvents, newEvent)
}
} else {
newEvents = append(newEvents, event)
}
}
return newEvents
}
func splitWeekRange(weekRange string) []string {
parts := strings.Split(weekRange, "-")
if len(parts) != 2 {
return nil // Invalid format
}
start, errStart := strconv.Atoi(strings.TrimSpace(parts[0]))
end, errEnd := strconv.Atoi(strings.TrimSpace(parts[1]))
if errStart != nil || errEnd != nil {
return nil // Error converting to integers
}
var weeks []string
for i := start; i <= end; i++ {
weeks = append(weeks, strconv.Itoa(i))
}
return weeks
}
func extractSemesterAndYear(semesterString string) (string, string) {
winterPattern := "Wintersemester"
summerPattern := "Sommersemester"
winterMatch := strings.Contains(semesterString, winterPattern)
summerMatch := strings.Contains(semesterString, summerPattern)
semester := ""
semesterShortcut := ""
if winterMatch {
semester = "Wintersemester"
semesterShortcut = "ws"
} else if summerMatch {
semester = "Sommersemester"
semesterShortcut = "ss"
} else {
return "", ""
}
yearPattern := `\d{4}`
combinedPattern := semester + `\s` + yearPattern
re := regexp.MustCompile(combinedPattern)
match := re.FindString(semesterString)
year := ""
if match != "" {
reYear := regexp.MustCompile(yearPattern)
year = reYear.FindString(match)
}
return semesterShortcut, year
}
func convertWeeksToDates(events []model.Event, semester string, year string) []model.Event {
var newEvents []model.Event
eventYear, _ := strconv.Atoi(year)
// for each event we need to calculate the start and end date based on the week and the year
for _, event := range events {
eventWeek, _ := strconv.Atoi(event.Week)
eventDay, _ := date.GetDateFromWeekNumber(eventYear, eventWeek, event.Day)
start := replaceTimeForDate(eventDay, event.Start.Time())
end := replaceTimeForDate(eventDay, event.End.Time())
//Check if end is before start
if end.Before(start) {
end = end.AddDate(0, 0, 1)
}
newEvent := event
newEvent.Start, _ = types.ParseDateTime(start.In(time.UTC))
newEvent.End, _ = types.ParseDateTime(end.In(time.UTC))
newEvent.Semester = semester
newEvents = append(newEvents, newEvent)
}
return newEvents
}
// replaceTimeForDate replaces hour, minute, second, nsec for the selected date
func replaceTimeForDate(date time.Time, replacementTime time.Time) time.Time {
return time.Date(date.Year(), date.Month(), date.Day(), replacementTime.Hour(), replacementTime.Minute(), replacementTime.Second(), replacementTime.Nanosecond(), date.Location())
}