// Port of tmdbintegral/ambiguity.php // Detects pairs of distinct TMDb ids whose filtered titles collide and whose // years are within YEARTOLERANCE. import { readFileSync } from 'node:fs'; import { writeFile } from 'node:fs/promises'; import { join } from 'node:path'; import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js'; const TMDB = 0; const FILTEREDTITLE = 4; const FILTEREDENGLISHTITLE = 5; const FILTEREDORIGINALTITLE = 6; const YEAR = 7; export async function buildAmbiguity(type, nbParts = NB_SEARCH_PARTS) { const yearTolerance = type === 'tv' ? 200 : 1; const out = join(TMDBINTEGRAL_DIR, `ambiguity${type}.csv`); const database = []; for (let p = 0; p < nbParts; p++) { const file = join(TMDBINTEGRAL_DIR, `search${type}${p}.json`); const chunk = JSON.parse(readFileSync(file, 'utf8')); for (const e of chunk) database.push(e); } const tmdbs = []; const filteredTitles = []; const languages = []; const years = []; for (const db of database) { const fr = db[FILTEREDTITLE]; const en = db[FILTEREDENGLISHTITLE]; const vo = db[FILTEREDORIGINALTITLE]; if (fr) { tmdbs.push(db[TMDB]); filteredTitles.push(fr); years.push(db[YEAR][0]); languages.push('FR'); } if (en) { tmdbs.push(db[TMDB]); filteredTitles.push(en); years.push(db[YEAR][0]); languages.push('EN'); } if (vo) { tmdbs.push(db[TMDB]); filteredTitles.push(vo); years.push(db[YEAR][0]); languages.push('VO'); } } // PHP: array_multisort(filteredtitles, years, tmdbs, languages) // Sort indices by (filteredTitle ASC, year ASC, tmdb ASC, language ASC). const idx = filteredTitles.map((_, i) => i); idx.sort((a, b) => { if (filteredTitles[a] < filteredTitles[b]) return -1; if (filteredTitles[a] > filteredTitles[b]) return 1; if (years[a] !== years[b]) return years[a] - years[b]; if (tmdbs[a] !== tmdbs[b]) return tmdbs[a] - tmdbs[b]; if (languages[a] < languages[b]) return -1; if (languages[a] > languages[b]) return 1; return 0; }); const sortedTmdbs = idx.map((i) => tmdbs[i]); const sortedFiltered = idx.map((i) => filteredTitles[i]); const sortedYears = idx.map((i) => years[i]); const sortedLanguages = idx.map((i) => languages[i]); let oldTmdb = 0; let nbTmdbs = 0; let oldFiltered = ''; let ambiguities = []; const lines = []; const flush = () => { if (nbTmdbs >= 2) { for (const a1 of ambiguities) { for (const a2 of ambiguities) { if (a1[0] !== a2[0] && Math.abs(a1[1] - a2[1]) <= yearTolerance) { lines.push(`${a1[0]};${a1[2]};${a2[0]};${a2[2]}`); } } } } ambiguities = []; nbTmdbs = 0; }; for (let i = 0; i < sortedFiltered.length; i++) { if (sortedTmdbs[i] !== oldTmdb) nbTmdbs++; oldTmdb = sortedTmdbs[i]; if (sortedFiltered[i] !== oldFiltered) { flush(); } oldFiltered = sortedFiltered[i]; ambiguities.push([sortedTmdbs[i], sortedYears[i], sortedLanguages[i]]); } flush(); await writeFile(out, lines.length ? lines.join('\n') + '\n' : ''); } if (import.meta.url === `file://${process.argv[1]}`) { const type = process.argv[2]; const nb = parseInt(process.argv[3] || String(NB_SEARCH_PARTS), 10); if (type !== 'movie' && type !== 'tv') { console.error('Usage: node cron/ambiguity.js movie|tv [nbParts]'); process.exit(1); } buildAmbiguity(type, nb).catch((err) => { console.error(err); process.exit(1); }); }