Files
proxy_tmdb/cron/ambiguity.js

120 lines
3.5 KiB
JavaScript

// Port of tmdbintegral/ambiguity.php
// Detects pairs of distinct TMDb ids whose filtered titles collide and whose
// years are within YEARTOLERANCE.
import { readFileSync } from 'node:fs';
import { writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { NB_SEARCH_PARTS, TMDBINTEGRAL_DIR } from '../config.js';
const TMDB = 0;
const FILTEREDTITLE = 4;
const FILTEREDENGLISHTITLE = 5;
const FILTEREDORIGINALTITLE = 6;
const YEAR = 7;
export async function buildAmbiguity(type, nbParts = NB_SEARCH_PARTS) {
const yearTolerance = type === 'tv' ? 200 : 1;
const out = join(TMDBINTEGRAL_DIR, `ambiguity${type}.csv`);
const database = [];
for (let p = 0; p < nbParts; p++) {
const file = join(TMDBINTEGRAL_DIR, `search${type}${p}.json`);
const chunk = JSON.parse(readFileSync(file, 'utf8'));
for (const e of chunk) database.push(e);
}
const tmdbs = [];
const filteredTitles = [];
const languages = [];
const years = [];
for (const db of database) {
const fr = db[FILTEREDTITLE];
const en = db[FILTEREDENGLISHTITLE];
const vo = db[FILTEREDORIGINALTITLE];
if (fr) {
tmdbs.push(db[TMDB]);
filteredTitles.push(fr);
years.push(db[YEAR][0]);
languages.push('FR');
}
if (en) {
tmdbs.push(db[TMDB]);
filteredTitles.push(en);
years.push(db[YEAR][0]);
languages.push('EN');
}
if (vo) {
tmdbs.push(db[TMDB]);
filteredTitles.push(vo);
years.push(db[YEAR][0]);
languages.push('VO');
}
}
// PHP: array_multisort(filteredtitles, years, tmdbs, languages)
// Sort indices by (filteredTitle ASC, year ASC, tmdb ASC, language ASC).
const idx = filteredTitles.map((_, i) => i);
idx.sort((a, b) => {
if (filteredTitles[a] < filteredTitles[b]) return -1;
if (filteredTitles[a] > filteredTitles[b]) return 1;
if (years[a] !== years[b]) return years[a] - years[b];
if (tmdbs[a] !== tmdbs[b]) return tmdbs[a] - tmdbs[b];
if (languages[a] < languages[b]) return -1;
if (languages[a] > languages[b]) return 1;
return 0;
});
const sortedTmdbs = idx.map((i) => tmdbs[i]);
const sortedFiltered = idx.map((i) => filteredTitles[i]);
const sortedYears = idx.map((i) => years[i]);
const sortedLanguages = idx.map((i) => languages[i]);
let oldTmdb = 0;
let nbTmdbs = 0;
let oldFiltered = '';
let ambiguities = [];
const lines = [];
const flush = () => {
if (nbTmdbs >= 2) {
for (const a1 of ambiguities) {
for (const a2 of ambiguities) {
if (a1[0] !== a2[0] && Math.abs(a1[1] - a2[1]) <= yearTolerance) {
lines.push(`${a1[0]};${a1[2]};${a2[0]};${a2[2]}`);
}
}
}
}
ambiguities = [];
nbTmdbs = 0;
};
for (let i = 0; i < sortedFiltered.length; i++) {
if (sortedTmdbs[i] !== oldTmdb) nbTmdbs++;
oldTmdb = sortedTmdbs[i];
if (sortedFiltered[i] !== oldFiltered) {
flush();
}
oldFiltered = sortedFiltered[i];
ambiguities.push([sortedTmdbs[i], sortedYears[i], sortedLanguages[i]]);
}
flush();
await writeFile(out, lines.length ? `${lines.join('\n')}\n` : '');
}
if (import.meta.url === `file://${process.argv[1]}`) {
const type = process.argv[2];
const nb = parseInt(process.argv[3] || String(NB_SEARCH_PARTS), 10);
if (type !== 'movie' && type !== 'tv') {
console.error('Usage: node cron/ambiguity.js movie|tv [nbParts]');
process.exit(1);
}
buildAmbiguity(type, nb).catch((err) => {
console.error(err);
process.exit(1);
});
}