Portage complet PHP/Bash vers Node.js (Fastify + worker_threads)
This commit is contained in:
133
cron/buildSearch.js
Normal file
133
cron/buildSearch.js
Normal file
@@ -0,0 +1,133 @@
|
||||
// Port of tmdbintegral/search.php
|
||||
// Builds the chunked search database files (searchmovieN.json / searchtvN.json).
|
||||
//
|
||||
// Each entry has the same positional shape as the PHP version:
|
||||
// [TMDB, TITLE, ENGLISHTITLE, ORIGINALTITLE,
|
||||
// FILTEREDTITLE, FILTEREDENGLISHTITLE, FILTEREDORIGINALTITLE,
|
||||
// YEARS[], POPULARITY]
|
||||
// so the runtime search worker can use the same indices.
|
||||
|
||||
import { createReadStream, existsSync, readFileSync } from 'node:fs';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { join } from 'node:path';
|
||||
import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js';
|
||||
import { entryPath } from '../lib/paths.js';
|
||||
import { filterTitle } from '../lib/titleFilter.js';
|
||||
import { mbStrlen } from '../lib/mbLevenshtein.js';
|
||||
|
||||
function lower(s) { return s.toLocaleLowerCase(); }
|
||||
|
||||
function extractEnglishTitle(detail, type) {
|
||||
const tr = detail?.translations?.translations;
|
||||
if (!Array.isArray(tr)) return '';
|
||||
for (const t of tr) {
|
||||
if (t.iso_639_1 === 'en') {
|
||||
return type === 'movie' ? (t.data?.title || '') : (t.data?.name || '');
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
function buildEntry(masterObj, detail, type) {
|
||||
const tmdb = masterObj.id;
|
||||
const popularity = parseFloat(masterObj.popularity) || 0;
|
||||
|
||||
let title, originalTitle, englishTitle;
|
||||
const years = [];
|
||||
|
||||
if (type === 'movie') {
|
||||
const date = String(detail.release_date || '').split('-');
|
||||
years.push(parseInt(date[0], 10) || 0);
|
||||
title = detail.title || '';
|
||||
originalTitle = detail.original_title || '';
|
||||
englishTitle = extractEnglishTitle(detail, 'movie');
|
||||
} else {
|
||||
const date = String(detail.first_air_date || '').split('-');
|
||||
years.push(parseInt(date[0], 10) || 0);
|
||||
title = detail.name || '';
|
||||
originalTitle = detail.original_name || '';
|
||||
englishTitle = extractEnglishTitle(detail, 'tv');
|
||||
if (Array.isArray(detail.seasons)) {
|
||||
for (const s of detail.seasons) {
|
||||
const sd = String(s.air_date || '').split('-');
|
||||
const sy = parseInt(sd[0], 10);
|
||||
if (sy) years.push(sy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!years[0]) return null;
|
||||
|
||||
let ft = filterTitle(title);
|
||||
let fe = filterTitle(englishTitle);
|
||||
let fo = filterTitle(originalTitle);
|
||||
|
||||
if (!ft && !fe && !fo) return null;
|
||||
|
||||
if (ft && mbStrlen(ft) / mbStrlen(title) < 0.5) ft = '';
|
||||
if (fe && mbStrlen(fe) / mbStrlen(englishTitle) < 0.5) fe = '';
|
||||
if (fo && mbStrlen(fo) / mbStrlen(originalTitle) < 0.5) fo = '';
|
||||
|
||||
// Dedupe years preserving order (PHP array_values(array_unique($years)))
|
||||
const seen = new Set();
|
||||
const uniqYears = [];
|
||||
for (const y of years) {
|
||||
if (!seen.has(y)) { seen.add(y); uniqYears.push(y); }
|
||||
}
|
||||
|
||||
return [
|
||||
tmdb,
|
||||
title,
|
||||
englishTitle,
|
||||
originalTitle,
|
||||
lower(ft),
|
||||
lower(fe),
|
||||
lower(fo),
|
||||
uniqYears,
|
||||
popularity,
|
||||
];
|
||||
}
|
||||
|
||||
export async function buildSearch(type, nbParts = NB_SEARCH_PARTS) {
|
||||
const indexFile = join(TMDBINTEGRAL_DIR, `${type}.json`);
|
||||
const database = [];
|
||||
|
||||
const stream = createReadStream(indexFile, { encoding: 'utf8' });
|
||||
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
||||
|
||||
for await (const line of rl) {
|
||||
if (!line) continue;
|
||||
let masterObj;
|
||||
try { masterObj = JSON.parse(line); } catch { continue; }
|
||||
const path = entryPath(type, masterObj.id);
|
||||
if (!existsSync(path)) continue;
|
||||
let detail;
|
||||
try { detail = JSON.parse(readFileSync(path, 'utf8')); } catch { continue; }
|
||||
const entry = buildEntry(masterObj, detail, type);
|
||||
if (entry) database.push(entry);
|
||||
}
|
||||
|
||||
const partSize = Math.ceil(database.length / nbParts);
|
||||
const writes = [];
|
||||
for (let p = 0; p < nbParts; p++) {
|
||||
const chunk = database.slice(p * partSize, (p + 1) * partSize);
|
||||
const out = join(TMDBINTEGRAL_DIR, `search${type}${p}.json`);
|
||||
console.log(`Writing ${chunk.length} entries to search${type}${p}.json`);
|
||||
writes.push(writeFile(out, JSON.stringify(chunk)));
|
||||
}
|
||||
await Promise.all(writes);
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
const type = process.argv[2];
|
||||
const nb = parseInt(process.argv[3] || String(NB_SEARCH_PARTS), 10);
|
||||
if (type !== 'movie' && type !== 'tv') {
|
||||
console.error('Usage: node cron/buildSearch.js movie|tv [nbParts]');
|
||||
process.exit(1);
|
||||
}
|
||||
buildSearch(type, nb).catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user