105 lines
3.4 KiB
JavaScript
105 lines
3.4 KiB
JavaScript
// Port of tmdbintegral/ambiguity.php
|
|
// Detects pairs of distinct TMDb ids whose filtered titles collide and whose
|
|
// years are within YEARTOLERANCE.
|
|
|
|
import { readFileSync } from 'node:fs';
|
|
import { writeFile } from 'node:fs/promises';
|
|
import { join } from 'node:path';
|
|
import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js';
|
|
|
|
const TMDB = 0;
|
|
const FILTEREDTITLE = 4;
|
|
const FILTEREDENGLISHTITLE = 5;
|
|
const FILTEREDORIGINALTITLE = 6;
|
|
const YEAR = 7;
|
|
|
|
export async function buildAmbiguity(type, nbParts = NB_SEARCH_PARTS) {
|
|
const yearTolerance = type === 'tv' ? 200 : 1;
|
|
const out = join(TMDBINTEGRAL_DIR, `ambiguity${type}.csv`);
|
|
|
|
const database = [];
|
|
for (let p = 0; p < nbParts; p++) {
|
|
const file = join(TMDBINTEGRAL_DIR, `search${type}${p}.json`);
|
|
const chunk = JSON.parse(readFileSync(file, 'utf8'));
|
|
for (const e of chunk) database.push(e);
|
|
}
|
|
|
|
const tmdbs = [];
|
|
const filteredTitles = [];
|
|
const languages = [];
|
|
const years = [];
|
|
for (const db of database) {
|
|
const fr = db[FILTEREDTITLE];
|
|
const en = db[FILTEREDENGLISHTITLE];
|
|
const vo = db[FILTEREDORIGINALTITLE];
|
|
if (fr) { tmdbs.push(db[TMDB]); filteredTitles.push(fr); years.push(db[YEAR][0]); languages.push('FR'); }
|
|
if (en) { tmdbs.push(db[TMDB]); filteredTitles.push(en); years.push(db[YEAR][0]); languages.push('EN'); }
|
|
if (vo) { tmdbs.push(db[TMDB]); filteredTitles.push(vo); years.push(db[YEAR][0]); languages.push('VO'); }
|
|
}
|
|
|
|
// PHP: array_multisort(filteredtitles, years, tmdbs, languages)
|
|
// Sort indices by (filteredTitle ASC, year ASC, tmdb ASC, language ASC).
|
|
const idx = filteredTitles.map((_, i) => i);
|
|
idx.sort((a, b) => {
|
|
if (filteredTitles[a] < filteredTitles[b]) return -1;
|
|
if (filteredTitles[a] > filteredTitles[b]) return 1;
|
|
if (years[a] !== years[b]) return years[a] - years[b];
|
|
if (tmdbs[a] !== tmdbs[b]) return tmdbs[a] - tmdbs[b];
|
|
if (languages[a] < languages[b]) return -1;
|
|
if (languages[a] > languages[b]) return 1;
|
|
return 0;
|
|
});
|
|
|
|
const sortedTmdbs = idx.map((i) => tmdbs[i]);
|
|
const sortedFiltered = idx.map((i) => filteredTitles[i]);
|
|
const sortedYears = idx.map((i) => years[i]);
|
|
const sortedLanguages = idx.map((i) => languages[i]);
|
|
|
|
let oldTmdb = 0;
|
|
let nbTmdbs = 0;
|
|
let oldFiltered = '';
|
|
let ambiguities = [];
|
|
const lines = [];
|
|
|
|
const flush = () => {
|
|
if (nbTmdbs >= 2) {
|
|
for (const a1 of ambiguities) {
|
|
for (const a2 of ambiguities) {
|
|
if (a1[0] !== a2[0] && Math.abs(a1[1] - a2[1]) <= yearTolerance) {
|
|
lines.push(`${a1[0]};${a1[2]};${a2[0]};${a2[2]}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ambiguities = [];
|
|
nbTmdbs = 0;
|
|
};
|
|
|
|
for (let i = 0; i < sortedFiltered.length; i++) {
|
|
if (sortedTmdbs[i] !== oldTmdb) nbTmdbs++;
|
|
oldTmdb = sortedTmdbs[i];
|
|
|
|
if (sortedFiltered[i] !== oldFiltered) {
|
|
flush();
|
|
}
|
|
oldFiltered = sortedFiltered[i];
|
|
ambiguities.push([sortedTmdbs[i], sortedYears[i], sortedLanguages[i]]);
|
|
}
|
|
flush();
|
|
|
|
await writeFile(out, lines.length ? lines.join('\n') + '\n' : '');
|
|
}
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
const type = process.argv[2];
|
|
const nb = parseInt(process.argv[3] || String(NB_SEARCH_PARTS), 10);
|
|
if (type !== 'movie' && type !== 'tv') {
|
|
console.error('Usage: node cron/ambiguity.js movie|tv [nbParts]');
|
|
process.exit(1);
|
|
}
|
|
buildAmbiguity(type, nb).catch((err) => {
|
|
console.error(err);
|
|
process.exit(1);
|
|
});
|
|
}
|