package professor import ( "fmt" "strings" ) // ExtractNameFromEmail extracts the first and last name from a professor's email. // Expected format: firstname.lastname@htwk-leipzig.de func ExtractNameFromEmail(email string) (firstName, lastName string, err error) { parts := strings.Split(email, "@") if len(parts) != 2 { return "", "", fmt.Errorf("invalid email format") } nameParts := strings.Split(parts[0], ".") if len(nameParts) < 2 { return "", "", fmt.Errorf("email does not contain dot separator") } // Extract first and last name firstName = nameParts[0] lastName = nameParts[len(nameParts)-1] // Capitalize first letter if len(firstName) > 0 { firstName = strings.ToUpper(firstName[:1]) + firstName[1:] } if len(lastName) > 0 { lastName = strings.ToUpper(lastName[:1]) + lastName[1:] } return firstName, lastName, nil } // CalculateConfidenceScore returns a score from 0.0 to 1.0 indicating how confident we are // that this professor string matches the given first and last name // 1.0 = perfect match (both first and last name exact) // 0.7-0.9 = good match (last name exact, first name fuzzy or present) // 0.4-0.6 = possible match (last name fuzzy or partial) // 0.1-0.3 = weak match (last name substring) // 0.0 = no match func CalculateConfidenceScore(profString, firstName, lastName string) float64 { // Normalize the professor string: remove common titles and split into words profString = strings.ToLower(profString) // Remove common titles titles := []string{"prof.", "dr.", "arch.", "ing.", "dipl.", "m.sc.", "b.sc.", "ph.d."} for _, title := range titles { profString = strings.ReplaceAll(profString, title, "") } // Split by spaces, hyphens, and other separators words := strings.FieldsFunc(profString, func(r rune) bool { return r == ' ' || r == '-' || r == ',' || r == '.' }) // Normalize firstName and lastName firstNameLower := strings.ToLower(firstName) lastNameLower := strings.ToLower(lastName) lastNameExact := false lastNameFuzzy := false lastNameSubstring := false firstNameExact := false firstNameFuzzy := false for _, word := range words { word = strings.TrimSpace(word) if word == "" { continue } // Check last name if word == lastNameLower { lastNameExact = true } else if levenshteinDistance(word, lastNameLower) <= 1 && len(lastNameLower) > 3 { lastNameFuzzy = true } else if strings.Contains(word, lastNameLower) || strings.Contains(lastNameLower, word) { lastNameSubstring = true } // Check first name if word == firstNameLower { firstNameExact = true } else if levenshteinDistance(word, firstNameLower) <= 1 && len(firstNameLower) > 3 { firstNameFuzzy = true } } // Calculate confidence score based on matches score := 0.0 if lastNameExact { if firstNameExact { score = 1.0 // Perfect match } else if firstNameFuzzy { score = 0.9 // Excellent match } else { score = 0.8 // Good match (last name exact, no first name match) } } else if lastNameFuzzy { if firstNameExact || firstNameFuzzy { score = 0.6 // Decent match (fuzzy last name but first name matches) } else { score = 0.5 // Medium match (fuzzy last name, no first name) } } else if lastNameSubstring { score = 0.2 // Weak match (substring only) } return score } // MatchesProfessor checks if the professor string matches the given last name (and optional first name) // It uses a simplified check suitable for filtering events where we want high recall but reasonable precision. // It returns true if the confidence score is > 0. func MatchesProfessor(profString, firstName, lastName string) bool { return CalculateConfidenceScore(profString, firstName, lastName) > 0 } // levenshteinDistance calculates the Levenshtein distance between two strings func levenshteinDistance(s1, s2 string) int { if len(s1) == 0 { return len(s2) } if len(s2) == 0 { return len(s1) } // Create a 2D array for dynamic programming d := make([][]int, len(s1)+1) for i := range d { d[i] = make([]int, len(s2)+1) } // Initialize first column and row for i := 0; i <= len(s1); i++ { d[i][0] = i } for j := 0; j <= len(s2); j++ { d[0][j] = j } // Fill the matrix for i := 1; i <= len(s1); i++ { for j := 1; j <= len(s2); j++ { cost := 0 if s1[i-1] != s2[j-1] { cost = 1 } d[i][j] = min( d[i-1][j]+1, // deletion d[i][j-1]+1, // insertion d[i-1][j-1]+cost, // substitution ) } } return d[len(s1)][len(s2)] } func min(a, b, c int) int { if a < b { if a < c { return a } return c } if b < c { return b } return c }