mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender.git
synced 2026-01-17 03:52:26 +01:00
285 lines
7.4 KiB
Go
285 lines
7.4 KiB
Go
package professor
|
|
|
|
import (
|
|
"fmt"
|
|
"htwkalender/data-manager/model"
|
|
"log/slog"
|
|
"strings"
|
|
|
|
"github.com/pocketbase/dbx"
|
|
"github.com/pocketbase/pocketbase"
|
|
)
|
|
|
|
type ProfessorService struct {
|
|
app *pocketbase.PocketBase
|
|
}
|
|
|
|
func NewProfessorService(app *pocketbase.PocketBase) *ProfessorService {
|
|
return &ProfessorService{app: app}
|
|
}
|
|
|
|
func (s *ProfessorService) GetModulesForProfessor(email string) ([]model.ModuleDTO, error) {
|
|
// Extract name from email
|
|
// Format: firstname.lastname@htwk-leipzig.de
|
|
parts := strings.Split(email, "@")
|
|
if len(parts) != 2 {
|
|
return nil, fmt.Errorf("invalid email format")
|
|
}
|
|
|
|
nameParts := strings.Split(parts[0], ".")
|
|
if len(nameParts) < 2 {
|
|
slog.Warn("Email does not contain dot separator", "email", email)
|
|
return []model.ModuleDTO{}, nil
|
|
}
|
|
|
|
// Extract first and last name
|
|
firstName := nameParts[0]
|
|
lastName := nameParts[len(nameParts)-1]
|
|
|
|
// Capitalize first letter
|
|
if len(firstName) > 0 {
|
|
firstName = strings.ToUpper(firstName[:1]) + firstName[1:]
|
|
}
|
|
if len(lastName) > 0 {
|
|
lastName = strings.ToUpper(lastName[:1]) + lastName[1:]
|
|
}
|
|
|
|
slog.Info("Searching for modules for professor", "firstName", firstName, "lastName", lastName, "email", email)
|
|
|
|
// First, get all distinct modules with their professors
|
|
type EventProf struct {
|
|
Name string `db:"Name" json:"name"`
|
|
EventType string `db:"EventType" json:"eventType"`
|
|
Prof string `db:"Prof" json:"prof"`
|
|
Course string `db:"course" json:"course"`
|
|
Semester string `db:"semester" json:"semester"`
|
|
UUID string `db:"uuid" json:"uuid"`
|
|
}
|
|
|
|
var allEvents []EventProf
|
|
err := s.app.DB().
|
|
Select("Name", "EventType", "Prof", "course", "semester", "uuid").
|
|
From("events").
|
|
Where(dbx.NewExp("Prof != ''")).
|
|
GroupBy("Name", "course", "Prof").
|
|
Distinct(true).
|
|
All(&allEvents)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Filter events by matching professor name and calculate confidence scores
|
|
var modules []model.ModuleDTO
|
|
seenModules := make(map[string]bool) // key: Name+Course to avoid duplicates
|
|
|
|
for _, event := range allEvents {
|
|
score := calculateConfidenceScore(event.Prof, firstName, lastName)
|
|
if score > 0 { // Include all modules with any match
|
|
key := event.Name + "|" + event.Course
|
|
if !seenModules[key] {
|
|
modules = append(modules, model.ModuleDTO{
|
|
Name: event.Name,
|
|
EventType: event.EventType,
|
|
Prof: event.Prof,
|
|
Course: event.Course,
|
|
Semester: event.Semester,
|
|
UUID: event.UUID,
|
|
ConfidenceScore: score,
|
|
})
|
|
seenModules[key] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
slog.Info("Found modules for professor", "count", len(modules), "lastName", lastName)
|
|
return modules, nil
|
|
}
|
|
|
|
// calculateConfidenceScore returns a score from 0.0 to 1.0 indicating how confident we are
|
|
// that this professor string matches the given first and last name
|
|
// 1.0 = perfect match (both first and last name exact)
|
|
// 0.7-0.9 = good match (last name exact, first name fuzzy or present)
|
|
// 0.4-0.6 = possible match (last name fuzzy or partial)
|
|
// 0.1-0.3 = weak match (last name substring)
|
|
// 0.0 = no match
|
|
func calculateConfidenceScore(profString, firstName, lastName string) float64 {
|
|
// Normalize the professor string: remove common titles and split into words
|
|
profString = strings.ToLower(profString)
|
|
|
|
// Remove common titles
|
|
titles := []string{"prof.", "dr.", "arch.", "ing.", "dipl.", "m.sc.", "b.sc.", "ph.d."}
|
|
for _, title := range titles {
|
|
profString = strings.ReplaceAll(profString, title, "")
|
|
}
|
|
|
|
// Split by spaces, hyphens, and other separators
|
|
words := strings.FieldsFunc(profString, func(r rune) bool {
|
|
return r == ' ' || r == '-' || r == ',' || r == '.'
|
|
})
|
|
|
|
// Normalize firstName and lastName
|
|
firstNameLower := strings.ToLower(firstName)
|
|
lastNameLower := strings.ToLower(lastName)
|
|
|
|
lastNameExact := false
|
|
lastNameFuzzy := false
|
|
lastNameSubstring := false
|
|
firstNameExact := false
|
|
firstNameFuzzy := false
|
|
|
|
for _, word := range words {
|
|
word = strings.TrimSpace(word)
|
|
if word == "" {
|
|
continue
|
|
}
|
|
|
|
// Check last name
|
|
if word == lastNameLower {
|
|
lastNameExact = true
|
|
} else if levenshteinDistance(word, lastNameLower) <= 1 && len(lastNameLower) > 3 {
|
|
lastNameFuzzy = true
|
|
} else if strings.Contains(word, lastNameLower) || strings.Contains(lastNameLower, word) {
|
|
lastNameSubstring = true
|
|
}
|
|
|
|
// Check first name
|
|
if word == firstNameLower {
|
|
firstNameExact = true
|
|
} else if levenshteinDistance(word, firstNameLower) <= 1 && len(firstNameLower) > 3 {
|
|
firstNameFuzzy = true
|
|
}
|
|
}
|
|
|
|
// Calculate confidence score based on matches
|
|
score := 0.0
|
|
|
|
if lastNameExact {
|
|
if firstNameExact {
|
|
score = 1.0 // Perfect match
|
|
} else if firstNameFuzzy {
|
|
score = 0.9 // Excellent match
|
|
} else {
|
|
score = 0.8 // Good match (last name exact, no first name match)
|
|
}
|
|
} else if lastNameFuzzy {
|
|
if firstNameExact || firstNameFuzzy {
|
|
score = 0.6 // Decent match (fuzzy last name but first name matches)
|
|
} else {
|
|
score = 0.5 // Medium match (fuzzy last name, no first name)
|
|
}
|
|
} else if lastNameSubstring {
|
|
score = 0.2 // Weak match (substring only)
|
|
}
|
|
|
|
return score
|
|
}
|
|
|
|
// matchesProfessor is deprecated, use calculateConfidenceScore instead
|
|
func matchesProfessor(profString, firstName, lastName string) bool {
|
|
// Normalize the professor string: remove common titles and split into words
|
|
profString = strings.ToLower(profString)
|
|
|
|
// Remove common titles
|
|
titles := []string{"prof.", "dr.", "arch.", "ing.", "dipl.", "m.sc.", "b.sc.", "ph.d."}
|
|
for _, title := range titles {
|
|
profString = strings.ReplaceAll(profString, title, "")
|
|
}
|
|
|
|
// Split by spaces, hyphens, and other separators
|
|
words := strings.FieldsFunc(profString, func(r rune) bool {
|
|
return r == ' ' || r == '-' || r == ',' || r == '.'
|
|
})
|
|
|
|
// Normalize firstName and lastName
|
|
firstNameLower := strings.ToLower(firstName)
|
|
lastNameLower := strings.ToLower(lastName)
|
|
|
|
lastNameFound := false
|
|
firstNameFound := false
|
|
|
|
for _, word := range words {
|
|
word = strings.TrimSpace(word)
|
|
if word == "" {
|
|
continue
|
|
}
|
|
|
|
// Exact match for last name
|
|
if word == lastNameLower {
|
|
lastNameFound = true
|
|
}
|
|
|
|
// Exact match for first name (optional, but increases confidence)
|
|
if word == firstNameLower {
|
|
firstNameFound = true
|
|
}
|
|
|
|
// Also check Levenshtein distance for typos
|
|
if !lastNameFound && levenshteinDistance(word, lastNameLower) <= 1 {
|
|
lastNameFound = true
|
|
}
|
|
if !firstNameFound && levenshteinDistance(word, firstNameLower) <= 1 {
|
|
firstNameFound = true
|
|
}
|
|
}
|
|
|
|
// Match if last name is found (first name is optional for additional confidence)
|
|
// We require at least the last name to match
|
|
return lastNameFound
|
|
}
|
|
|
|
// levenshteinDistance calculates the Levenshtein distance between two strings
|
|
func levenshteinDistance(s1, s2 string) int {
|
|
if len(s1) == 0 {
|
|
return len(s2)
|
|
}
|
|
if len(s2) == 0 {
|
|
return len(s1)
|
|
}
|
|
|
|
// Create a 2D array for dynamic programming
|
|
d := make([][]int, len(s1)+1)
|
|
for i := range d {
|
|
d[i] = make([]int, len(s2)+1)
|
|
}
|
|
|
|
// Initialize first column and row
|
|
for i := 0; i <= len(s1); i++ {
|
|
d[i][0] = i
|
|
}
|
|
for j := 0; j <= len(s2); j++ {
|
|
d[0][j] = j
|
|
}
|
|
|
|
// Fill the matrix
|
|
for i := 1; i <= len(s1); i++ {
|
|
for j := 1; j <= len(s2); j++ {
|
|
cost := 0
|
|
if s1[i-1] != s2[j-1] {
|
|
cost = 1
|
|
}
|
|
|
|
d[i][j] = min(
|
|
d[i-1][j]+1, // deletion
|
|
d[i][j-1]+1, // insertion
|
|
d[i-1][j-1]+cost, // substitution
|
|
)
|
|
}
|
|
}
|
|
|
|
return d[len(s1)][len(s2)]
|
|
}
|
|
|
|
func min(a, b, c int) int {
|
|
if a < b {
|
|
if a < c {
|
|
return a
|
|
}
|
|
return c
|
|
}
|
|
if b < c {
|
|
return b
|
|
}
|
|
return c
|
|
}
|