Files
hitstar/src/server-deno/application/AnswerCheckService.ts

318 lines
8.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Answer checking service for fuzzy matching of title, artist, and year guesses
* Based on the original answerCheck.js logic
*/
export interface ScoreResult {
score: number;
match: boolean;
}
/**
* Answer checking service
*/
export class AnswerCheckService {
/**
* Strip diacritics from string
*/
private stripDiacritics(str: string): string {
return str
.normalize('NFKD')
.replace(/[\u0300-\u036f]/g, '');
}
/**
* Normalize common terms
*/
private normalizeCommon(str: string): string {
let normalized = this.stripDiacritics(str)
.toLowerCase()
// Normalize common contractions before removing punctuation
.replace(/\bcan't\b/g, 'cant')
.replace(/\bwon't\b/g, 'wont')
.replace(/\bdon't\b/g, 'dont')
.replace(/\bdidn't\b/g, 'didnt')
.replace(/\bisn't\b/g, 'isnt')
.replace(/\baren't\b/g, 'arent')
.replace(/\bwasn't\b/g, 'wasnt')
.replace(/\bweren't\b/g, 'werent')
.replace(/\bhasn't\b/g, 'hasnt')
.replace(/\bhaven't\b/g, 'havent')
.replace(/\bhadn't\b/g, 'hadnt')
.replace(/\bshouldn't\b/g, 'shouldnt')
.replace(/\bwouldn't\b/g, 'wouldnt')
.replace(/\bcouldn't\b/g, 'couldnt')
.replace(/\bmustn't\b/g, 'mustnt')
.replace(/\bi'm\b/g, 'im')
.replace(/\byou're\b/g, 'youre')
.replace(/\bhe's\b/g, 'hes')
.replace(/\bshe's\b/g, 'shes')
.replace(/\bit's\b/g, 'its')
.replace(/\bwe're\b/g, 'were')
.replace(/\bthey're\b/g, 'theyre')
.replace(/\bi've\b/g, 'ive')
.replace(/\byou've\b/g, 'youve')
.replace(/\bwe've\b/g, 'weve')
.replace(/\bthey've\b/g, 'theyve')
.replace(/\bi'll\b/g, 'ill')
.replace(/\byou'll\b/g, 'youll')
.replace(/\bhe'll\b/g, 'hell')
.replace(/\bshe'll\b/g, 'shell')
.replace(/\bwe'll\b/g, 'well')
.replace(/\bthey'll\b/g, 'theyll')
.replace(/\bthat's\b/g, 'thats')
.replace(/\bwho's\b/g, 'whos')
.replace(/\bwhat's\b/g, 'whats')
.replace(/\bwhere's\b/g, 'wheres')
.replace(/\bwhen's\b/g, 'whens')
.replace(/\bwhy's\b/g, 'whys')
.replace(/\bhow's\b/g, 'hows')
.replace(/\s*(?:&|and|x|×|with|vs\.?|feat\.?|featuring)\s*/g, ' ')
.replace(/[^\p{L}\p{N}\s]/gu, ' ')
.replace(/\s+/g, ' ')
.trim();
return normalized;
}
/**
* Clean title-specific noise (remasters, edits, etc.)
*/
private cleanTitleNoise(raw: string): string {
let s = raw;
// Remove common parenthetical annotations
s = s.replace(/\(([^)]*remaster[^)]*)\)/gi, '');
s = s.replace(/\(([^)]*radio edit[^)]*)\)/gi, '');
s = s.replace(/\(([^)]*edit[^)]*)\)/gi, '');
s = s.replace(/\(([^)]*version[^)]*)\)/gi, '');
s = s.replace(/\(([^)]*live[^)]*)\)/gi, '');
s = s.replace(/\(([^)]*mono[^)]*|[^)]*stereo[^)]*)\)/gi, '');
// Remove standalone noise words
s = s.replace(/\b(remaster(?:ed)?(?: \d{2,4})?|radio edit|single version|original mix|version|live)\b/gi, '');
return s;
}
/**
* Strip optional segments (parentheses, quotes, brackets)
*/
private stripOptionalSegments(raw: string): string {
let s = raw;
s = s.replace(/"[^"]*"/g, ' '); // Remove quoted segments
s = s.replace(/\([^)]*\)/g, ' '); // Remove parenthetical
s = s.replace(/\[[^\]]*\]/g, ' '); // Remove brackets
return s;
}
/**
* Normalize title for comparison
*/
private normalizeTitle(str: string): string {
return this.normalizeCommon(this.cleanTitleNoise(str));
}
/**
* Normalize title with optional segments removed
*/
private normalizeTitleBaseOptional(str: string): string {
return this.normalizeCommon(this.stripOptionalSegments(this.cleanTitleNoise(str)));
}
/**
* Normalize artist for comparison
*/
private normalizeArtist(str: string): string {
return this.normalizeCommon(str)
.replace(/\bthe\b/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
/**
* Tokenize string
*/
private tokenize(str: string): string[] {
return str ? str.split(' ').filter(Boolean) : [];
}
/**
* Create token set
*/
private tokenSet(str: string): Set<string> {
return new Set(this.tokenize(str));
}
/**
* Calculate Jaccard similarity
*/
private jaccard(a: string, b: string): number {
const setA = this.tokenSet(a);
const setB = this.tokenSet(b);
if (setA.size === 0 && setB.size === 0) return 1;
let intersection = 0;
for (const token of setA) {
if (setB.has(token)) intersection++;
}
const union = setA.size + setB.size - intersection;
return union ? intersection / union : 0;
}
/**
* Calculate Levenshtein distance
*/
private levenshtein(a: string, b: string): number {
const m = a.length;
const n = b.length;
if (!m) return n;
if (!n) return m;
const dp = new Array(n + 1).fill(0);
for (let j = 0; j <= n; j++) {
dp[j] = j;
}
for (let i = 1; i <= m; i++) {
let prev = dp[0];
dp[0] = i;
for (let j = 1; j <= n; j++) {
const temp = dp[j];
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
dp[j] = Math.min(dp[j] + 1, dp[j - 1] + 1, prev + cost);
prev = temp;
}
}
return dp[n];
}
/**
* Calculate similarity ratio based on Levenshtein distance
*/
private simRatio(a: string, b: string): number {
if (!a && !b) return 1;
if (!a || !b) return 0;
const dist = this.levenshtein(a, b);
const maxLen = Math.max(a.length, b.length);
return maxLen ? 1 - dist / maxLen : 1;
}
/**
* Split artists string
*/
splitArtists(raw: string): string[] {
return raw
.split(/[,&+]|(?:\s+(?:feat\.?|featuring|with|vs\.?|and|x)\s+)/i)
.map((s) => s.trim())
.filter(Boolean);
}
/**
* Score title guess
*/
scoreTitle(guess: string, correct: string): ScoreResult {
if (!guess || !correct) {
return { score: 0, match: false };
}
const g = this.normalizeTitle(guess);
const c = this.normalizeTitle(correct);
// Exact match
if (g === c) {
return { score: 1.0, match: true };
}
// Try without optional segments
const gOpt = this.normalizeTitleBaseOptional(guess);
const cOpt = this.normalizeTitleBaseOptional(correct);
if (gOpt === cOpt && gOpt.length > 0) {
return { score: 0.98, match: true };
}
// Fuzzy matching
const jac = this.jaccard(g, c);
const sim = this.simRatio(g, c);
const score = 0.6 * jac + 0.4 * sim;
// Accept if score >= 0.6 (softened threshold)
return { score, match: score >= 0.6 };
}
/**
* Score artist guess
*/
scoreArtist(guess: string, correct: string): ScoreResult {
if (!guess || !correct) {
return { score: 0, match: false };
}
const guessParts = this.splitArtists(guess);
const correctParts = this.splitArtists(correct);
const gNorm = guessParts.map((p) => this.normalizeArtist(p));
const cNorm = correctParts.map((p) => this.normalizeArtist(p));
// Check if any guess part matches any correct part
let bestScore = 0;
for (const gPart of gNorm) {
for (const cPart of cNorm) {
if (gPart === cPart) {
return { score: 1.0, match: true };
}
const jac = this.jaccard(gPart, cPart);
const sim = this.simRatio(gPart, cPart);
const score = 0.6 * jac + 0.4 * sim;
bestScore = Math.max(bestScore, score);
}
}
// Accept if score >= 0.6 (softened threshold)
return { score: bestScore, match: bestScore >= 0.6 };
}
/**
* Score year guess
*/
scoreYear(guess: number | string, correct: number | null): ScoreResult {
if (correct === null) {
return { score: 0, match: false };
}
const guessNum = typeof guess === 'string' ? parseInt(guess, 10) : guess;
if (isNaN(guessNum)) {
return { score: 0, match: false };
}
if (guessNum === correct) {
return { score: 1.0, match: true };
}
const diff = Math.abs(guessNum - correct);
// Accept within 1 year
if (diff <= 1) {
return { score: 0.9, match: true };
}
// Within 2 years - partial credit but no match
if (diff <= 2) {
return { score: 0.7, match: false };
}
return { score: 0, match: false };
}
}