134 lines
4.3 KiB
JavaScript
134 lines
4.3 KiB
JavaScript
// Port of tmdbintegral/search.php
|
|
// Builds the chunked search database files (searchmovieN.json / searchtvN.json).
|
|
//
|
|
// Each entry has the same positional shape as the PHP version:
|
|
// [TMDB, TITLE, ENGLISHTITLE, ORIGINALTITLE,
|
|
// FILTEREDTITLE, FILTEREDENGLISHTITLE, FILTEREDORIGINALTITLE,
|
|
// YEARS[], POPULARITY]
|
|
// so the runtime search worker can use the same indices.
|
|
|
|
import { createReadStream, existsSync, readFileSync } from 'node:fs';
|
|
import { writeFile } from 'node:fs/promises';
|
|
import { createInterface } from 'node:readline';
|
|
import { join } from 'node:path';
|
|
import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js';
|
|
import { entryPath } from '../lib/paths.js';
|
|
import { filterTitle } from '../lib/titleFilter.js';
|
|
import { mbStrlen } from '../lib/mbLevenshtein.js';
|
|
|
|
function lower(s) { return s.toLocaleLowerCase(); }
|
|
|
|
function extractEnglishTitle(detail, type) {
|
|
const tr = detail?.translations?.translations;
|
|
if (!Array.isArray(tr)) return '';
|
|
for (const t of tr) {
|
|
if (t.iso_639_1 === 'en') {
|
|
return type === 'movie' ? (t.data?.title || '') : (t.data?.name || '');
|
|
}
|
|
}
|
|
return '';
|
|
}
|
|
|
|
function buildEntry(masterObj, detail, type) {
|
|
const tmdb = masterObj.id;
|
|
const popularity = parseFloat(masterObj.popularity) || 0;
|
|
|
|
let title, originalTitle, englishTitle;
|
|
const years = [];
|
|
|
|
if (type === 'movie') {
|
|
const date = String(detail.release_date || '').split('-');
|
|
years.push(parseInt(date[0], 10) || 0);
|
|
title = detail.title || '';
|
|
originalTitle = detail.original_title || '';
|
|
englishTitle = extractEnglishTitle(detail, 'movie');
|
|
} else {
|
|
const date = String(detail.first_air_date || '').split('-');
|
|
years.push(parseInt(date[0], 10) || 0);
|
|
title = detail.name || '';
|
|
originalTitle = detail.original_name || '';
|
|
englishTitle = extractEnglishTitle(detail, 'tv');
|
|
if (Array.isArray(detail.seasons)) {
|
|
for (const s of detail.seasons) {
|
|
const sd = String(s.air_date || '').split('-');
|
|
const sy = parseInt(sd[0], 10);
|
|
if (sy) years.push(sy);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!years[0]) return null;
|
|
|
|
let ft = filterTitle(title);
|
|
let fe = filterTitle(englishTitle);
|
|
let fo = filterTitle(originalTitle);
|
|
|
|
if (!ft && !fe && !fo) return null;
|
|
|
|
if (ft && mbStrlen(ft) / mbStrlen(title) < 0.5) ft = '';
|
|
if (fe && mbStrlen(fe) / mbStrlen(englishTitle) < 0.5) fe = '';
|
|
if (fo && mbStrlen(fo) / mbStrlen(originalTitle) < 0.5) fo = '';
|
|
|
|
// Dedupe years preserving order (PHP array_values(array_unique($years)))
|
|
const seen = new Set();
|
|
const uniqYears = [];
|
|
for (const y of years) {
|
|
if (!seen.has(y)) { seen.add(y); uniqYears.push(y); }
|
|
}
|
|
|
|
return [
|
|
tmdb,
|
|
title,
|
|
englishTitle,
|
|
originalTitle,
|
|
lower(ft),
|
|
lower(fe),
|
|
lower(fo),
|
|
uniqYears,
|
|
popularity,
|
|
];
|
|
}
|
|
|
|
export async function buildSearch(type, nbParts = NB_SEARCH_PARTS) {
|
|
const indexFile = join(TMDBINTEGRAL_DIR, `${type}.json`);
|
|
const database = [];
|
|
|
|
const stream = createReadStream(indexFile, { encoding: 'utf8' });
|
|
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
|
|
|
for await (const line of rl) {
|
|
if (!line) continue;
|
|
let masterObj;
|
|
try { masterObj = JSON.parse(line); } catch { continue; }
|
|
const path = entryPath(type, masterObj.id);
|
|
if (!existsSync(path)) continue;
|
|
let detail;
|
|
try { detail = JSON.parse(readFileSync(path, 'utf8')); } catch { continue; }
|
|
const entry = buildEntry(masterObj, detail, type);
|
|
if (entry) database.push(entry);
|
|
}
|
|
|
|
const partSize = Math.ceil(database.length / nbParts);
|
|
const writes = [];
|
|
for (let p = 0; p < nbParts; p++) {
|
|
const chunk = database.slice(p * partSize, (p + 1) * partSize);
|
|
const out = join(TMDBINTEGRAL_DIR, `search${type}${p}.json`);
|
|
console.log(`Writing ${chunk.length} entries to search${type}${p}.json`);
|
|
writes.push(writeFile(out, JSON.stringify(chunk)));
|
|
}
|
|
await Promise.all(writes);
|
|
}
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
const type = process.argv[2];
|
|
const nb = parseInt(process.argv[3] || String(NB_SEARCH_PARTS), 10);
|
|
if (type !== 'movie' && type !== 'tv') {
|
|
console.error('Usage: node cron/buildSearch.js movie|tv [nbParts]');
|
|
process.exit(1);
|
|
}
|
|
buildSearch(type, nb).catch((err) => {
|
|
console.error(err);
|
|
process.exit(1);
|
|
});
|
|
}
|