Portage complet PHP/Bash vers Node.js (Fastify + worker_threads)
This commit is contained in:
104
cron/ambiguity.js
Normal file
104
cron/ambiguity.js
Normal file
@@ -0,0 +1,104 @@
|
||||
// Port of tmdbintegral/ambiguity.php
|
||||
// Detects pairs of distinct TMDb ids whose filtered titles collide and whose
|
||||
// years are within YEARTOLERANCE.
|
||||
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js';
|
||||
|
||||
const TMDB = 0;
|
||||
const FILTEREDTITLE = 4;
|
||||
const FILTEREDENGLISHTITLE = 5;
|
||||
const FILTEREDORIGINALTITLE = 6;
|
||||
const YEAR = 7;
|
||||
|
||||
export async function buildAmbiguity(type, nbParts = NB_SEARCH_PARTS) {
|
||||
const yearTolerance = type === 'tv' ? 200 : 1;
|
||||
const out = join(TMDBINTEGRAL_DIR, `ambiguity${type}.csv`);
|
||||
|
||||
const database = [];
|
||||
for (let p = 0; p < nbParts; p++) {
|
||||
const file = join(TMDBINTEGRAL_DIR, `search${type}${p}.json`);
|
||||
const chunk = JSON.parse(readFileSync(file, 'utf8'));
|
||||
for (const e of chunk) database.push(e);
|
||||
}
|
||||
|
||||
const tmdbs = [];
|
||||
const filteredTitles = [];
|
||||
const languages = [];
|
||||
const years = [];
|
||||
for (const db of database) {
|
||||
const fr = db[FILTEREDTITLE];
|
||||
const en = db[FILTEREDENGLISHTITLE];
|
||||
const vo = db[FILTEREDORIGINALTITLE];
|
||||
if (fr) { tmdbs.push(db[TMDB]); filteredTitles.push(fr); years.push(db[YEAR][0]); languages.push('FR'); }
|
||||
if (en) { tmdbs.push(db[TMDB]); filteredTitles.push(en); years.push(db[YEAR][0]); languages.push('EN'); }
|
||||
if (vo) { tmdbs.push(db[TMDB]); filteredTitles.push(vo); years.push(db[YEAR][0]); languages.push('VO'); }
|
||||
}
|
||||
|
||||
// PHP: array_multisort(filteredtitles, years, tmdbs, languages)
|
||||
// Sort indices by (filteredTitle ASC, year ASC, tmdb ASC, language ASC).
|
||||
const idx = filteredTitles.map((_, i) => i);
|
||||
idx.sort((a, b) => {
|
||||
if (filteredTitles[a] < filteredTitles[b]) return -1;
|
||||
if (filteredTitles[a] > filteredTitles[b]) return 1;
|
||||
if (years[a] !== years[b]) return years[a] - years[b];
|
||||
if (tmdbs[a] !== tmdbs[b]) return tmdbs[a] - tmdbs[b];
|
||||
if (languages[a] < languages[b]) return -1;
|
||||
if (languages[a] > languages[b]) return 1;
|
||||
return 0;
|
||||
});
|
||||
|
||||
const sortedTmdbs = idx.map((i) => tmdbs[i]);
|
||||
const sortedFiltered = idx.map((i) => filteredTitles[i]);
|
||||
const sortedYears = idx.map((i) => years[i]);
|
||||
const sortedLanguages = idx.map((i) => languages[i]);
|
||||
|
||||
let oldTmdb = 0;
|
||||
let nbTmdbs = 0;
|
||||
let oldFiltered = '';
|
||||
let ambiguities = [];
|
||||
const lines = [];
|
||||
|
||||
const flush = () => {
|
||||
if (nbTmdbs >= 2) {
|
||||
for (const a1 of ambiguities) {
|
||||
for (const a2 of ambiguities) {
|
||||
if (a1[0] !== a2[0] && Math.abs(a1[1] - a2[1]) <= yearTolerance) {
|
||||
lines.push(`${a1[0]};${a1[2]};${a2[0]};${a2[2]}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ambiguities = [];
|
||||
nbTmdbs = 0;
|
||||
};
|
||||
|
||||
for (let i = 0; i < sortedFiltered.length; i++) {
|
||||
if (sortedTmdbs[i] !== oldTmdb) nbTmdbs++;
|
||||
oldTmdb = sortedTmdbs[i];
|
||||
|
||||
if (sortedFiltered[i] !== oldFiltered) {
|
||||
flush();
|
||||
}
|
||||
oldFiltered = sortedFiltered[i];
|
||||
ambiguities.push([sortedTmdbs[i], sortedYears[i], sortedLanguages[i]]);
|
||||
}
|
||||
flush();
|
||||
|
||||
await writeFile(out, lines.length ? lines.join('\n') + '\n' : '');
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
const type = process.argv[2];
|
||||
const nb = parseInt(process.argv[3] || String(NB_SEARCH_PARTS), 10);
|
||||
if (type !== 'movie' && type !== 'tv') {
|
||||
console.error('Usage: node cron/ambiguity.js movie|tv [nbParts]');
|
||||
process.exit(1);
|
||||
}
|
||||
buildAmbiguity(type, nb).catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user