mirror of
https://gitlab.dit.htwk-leipzig.de/htwk-software/htwkalender.git
synced 2026-01-17 03:52:26 +01:00
feat: introduce feed management for individual and professor modules across frontend and backend services.
This commit is contained in:
175
services/common/professor/matching.go
Normal file
175
services/common/professor/matching.go
Normal file
@@ -0,0 +1,175 @@
|
||||
package professor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ExtractNameFromEmail extracts the first and last name from a professor's email.
|
||||
// Expected format: firstname.lastname@htwk-leipzig.de
|
||||
func ExtractNameFromEmail(email string) (firstName, lastName string, err error) {
|
||||
parts := strings.Split(email, "@")
|
||||
if len(parts) != 2 {
|
||||
return "", "", fmt.Errorf("invalid email format")
|
||||
}
|
||||
|
||||
nameParts := strings.Split(parts[0], ".")
|
||||
if len(nameParts) < 2 {
|
||||
return "", "", fmt.Errorf("email does not contain dot separator")
|
||||
}
|
||||
|
||||
// Extract first and last name
|
||||
firstName = nameParts[0]
|
||||
lastName = nameParts[len(nameParts)-1]
|
||||
|
||||
// Capitalize first letter
|
||||
if len(firstName) > 0 {
|
||||
firstName = strings.ToUpper(firstName[:1]) + firstName[1:]
|
||||
}
|
||||
if len(lastName) > 0 {
|
||||
lastName = strings.ToUpper(lastName[:1]) + lastName[1:]
|
||||
}
|
||||
|
||||
return firstName, lastName, nil
|
||||
}
|
||||
|
||||
// CalculateConfidenceScore returns a score from 0.0 to 1.0 indicating how confident we are
|
||||
// that this professor string matches the given first and last name
|
||||
// 1.0 = perfect match (both first and last name exact)
|
||||
// 0.7-0.9 = good match (last name exact, first name fuzzy or present)
|
||||
// 0.4-0.6 = possible match (last name fuzzy or partial)
|
||||
// 0.1-0.3 = weak match (last name substring)
|
||||
// 0.0 = no match
|
||||
func CalculateConfidenceScore(profString, firstName, lastName string) float64 {
|
||||
// Normalize the professor string: remove common titles and split into words
|
||||
profString = strings.ToLower(profString)
|
||||
|
||||
// Remove common titles
|
||||
titles := []string{"prof.", "dr.", "arch.", "ing.", "dipl.", "m.sc.", "b.sc.", "ph.d."}
|
||||
for _, title := range titles {
|
||||
profString = strings.ReplaceAll(profString, title, "")
|
||||
}
|
||||
|
||||
// Split by spaces, hyphens, and other separators
|
||||
words := strings.FieldsFunc(profString, func(r rune) bool {
|
||||
return r == ' ' || r == '-' || r == ',' || r == '.'
|
||||
})
|
||||
|
||||
// Normalize firstName and lastName
|
||||
firstNameLower := strings.ToLower(firstName)
|
||||
lastNameLower := strings.ToLower(lastName)
|
||||
|
||||
lastNameExact := false
|
||||
lastNameFuzzy := false
|
||||
lastNameSubstring := false
|
||||
firstNameExact := false
|
||||
firstNameFuzzy := false
|
||||
|
||||
for _, word := range words {
|
||||
word = strings.TrimSpace(word)
|
||||
if word == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check last name
|
||||
if word == lastNameLower {
|
||||
lastNameExact = true
|
||||
} else if levenshteinDistance(word, lastNameLower) <= 1 && len(lastNameLower) > 3 {
|
||||
lastNameFuzzy = true
|
||||
} else if strings.Contains(word, lastNameLower) || strings.Contains(lastNameLower, word) {
|
||||
lastNameSubstring = true
|
||||
}
|
||||
|
||||
// Check first name
|
||||
if word == firstNameLower {
|
||||
firstNameExact = true
|
||||
} else if levenshteinDistance(word, firstNameLower) <= 1 && len(firstNameLower) > 3 {
|
||||
firstNameFuzzy = true
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate confidence score based on matches
|
||||
score := 0.0
|
||||
|
||||
if lastNameExact {
|
||||
if firstNameExact {
|
||||
score = 1.0 // Perfect match
|
||||
} else if firstNameFuzzy {
|
||||
score = 0.9 // Excellent match
|
||||
} else {
|
||||
score = 0.8 // Good match (last name exact, no first name match)
|
||||
}
|
||||
} else if lastNameFuzzy {
|
||||
if firstNameExact || firstNameFuzzy {
|
||||
score = 0.6 // Decent match (fuzzy last name but first name matches)
|
||||
} else {
|
||||
score = 0.5 // Medium match (fuzzy last name, no first name)
|
||||
}
|
||||
} else if lastNameSubstring {
|
||||
score = 0.2 // Weak match (substring only)
|
||||
}
|
||||
|
||||
return score
|
||||
}
|
||||
|
||||
// MatchesProfessor checks if the professor string matches the given last name (and optional first name)
|
||||
// It uses a simplified check suitable for filtering events where we want high recall but reasonable precision.
|
||||
// It returns true if the confidence score is > 0.
|
||||
func MatchesProfessor(profString, firstName, lastName string) bool {
|
||||
return CalculateConfidenceScore(profString, firstName, lastName) > 0
|
||||
}
|
||||
|
||||
// levenshteinDistance calculates the Levenshtein distance between two strings
|
||||
func levenshteinDistance(s1, s2 string) int {
|
||||
if len(s1) == 0 {
|
||||
return len(s2)
|
||||
}
|
||||
if len(s2) == 0 {
|
||||
return len(s1)
|
||||
}
|
||||
|
||||
// Create a 2D array for dynamic programming
|
||||
d := make([][]int, len(s1)+1)
|
||||
for i := range d {
|
||||
d[i] = make([]int, len(s2)+1)
|
||||
}
|
||||
|
||||
// Initialize first column and row
|
||||
for i := 0; i <= len(s1); i++ {
|
||||
d[i][0] = i
|
||||
}
|
||||
for j := 0; j <= len(s2); j++ {
|
||||
d[0][j] = j
|
||||
}
|
||||
|
||||
// Fill the matrix
|
||||
for i := 1; i <= len(s1); i++ {
|
||||
for j := 1; j <= len(s2); j++ {
|
||||
cost := 0
|
||||
if s1[i-1] != s2[j-1] {
|
||||
cost = 1
|
||||
}
|
||||
|
||||
d[i][j] = min(
|
||||
d[i-1][j]+1, // deletion
|
||||
d[i][j-1]+1, // insertion
|
||||
d[i-1][j-1]+cost, // substitution
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return d[len(s1)][len(s2)]
|
||||
}
|
||||
|
||||
func min(a, b, c int) int {
|
||||
if a < b {
|
||||
if a < c {
|
||||
return a
|
||||
}
|
||||
return c
|
||||
}
|
||||
if b < c {
|
||||
return b
|
||||
}
|
||||
return c
|
||||
}
|
||||
Reference in New Issue
Block a user