Files
proxy_tmdb/cron/buildSearch.js

137 lines
4.3 KiB
JavaScript
Raw Permalink Normal View History

// Port of tmdbintegral/search.php
// Builds the chunked search database files (searchmovieN.json / searchtvN.json).
//
// Each entry has the same positional shape as the PHP version:
// [TMDB, TITLE, ENGLISHTITLE, ORIGINALTITLE,
// FILTEREDTITLE, FILTEREDENGLISHTITLE, FILTEREDORIGINALTITLE,
// YEARS[], POPULARITY]
// so the runtime search worker can use the same indices.
import { createReadStream, existsSync, readFileSync } from 'node:fs';
import { writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { createInterface } from 'node:readline';
import { NB_SEARCH_PARTS, TMDBINTEGRAL_DIR } from '../config.js';
import { mbStrlen } from '../lib/mbLevenshtein.js';
import { entryPath } from '../lib/paths.js';
import { filterTitle } from '../lib/titleFilter.js';
function lower(s) {
return s.toLocaleLowerCase();
}
function extractEnglishTitle(detail, type) {
const tr = detail?.translations?.translations;
if (!Array.isArray(tr)) return '';
for (const t of tr) {
if (t.iso_639_1 === 'en') {
return type === 'movie' ? t.data?.title || '' : t.data?.name || '';
}
}
return '';
}
function buildEntry(masterObj, detail, type) {
const tmdb = masterObj.id;
const popularity = parseFloat(masterObj.popularity) || 0;
let title, originalTitle, englishTitle;
const years = [];
if (type === 'movie') {
const date = String(detail.release_date || '').split('-');
years.push(parseInt(date[0], 10) || 0);
title = detail.title || '';
originalTitle = detail.original_title || '';
englishTitle = extractEnglishTitle(detail, 'movie');
} else {
const date = String(detail.first_air_date || '').split('-');
years.push(parseInt(date[0], 10) || 0);
title = detail.name || '';
originalTitle = detail.original_name || '';
englishTitle = extractEnglishTitle(detail, 'tv');
if (Array.isArray(detail.seasons)) {
for (const s of detail.seasons) {
const sd = String(s.air_date || '').split('-');
const sy = parseInt(sd[0], 10);
if (sy) years.push(sy);
}
}
}
if (!years[0]) return null;
let ft = filterTitle(title);
let fe = filterTitle(englishTitle);
let fo = filterTitle(originalTitle);
if (!ft && !fe && !fo) return null;
if (ft && mbStrlen(ft) / mbStrlen(title) < 0.5) ft = '';
if (fe && mbStrlen(fe) / mbStrlen(englishTitle) < 0.5) fe = '';
if (fo && mbStrlen(fo) / mbStrlen(originalTitle) < 0.5) fo = '';
// Dedupe years preserving order (PHP array_values(array_unique($years)))
const seen = new Set();
const uniqYears = [];
for (const y of years) {
if (!seen.has(y)) {
seen.add(y);
uniqYears.push(y);
}
}
return [tmdb, title, englishTitle, originalTitle, lower(ft), lower(fe), lower(fo), uniqYears, popularity];
}
export async function buildSearch(type, nbParts = NB_SEARCH_PARTS) {
const indexFile = join(TMDBINTEGRAL_DIR, `${type}.json`);
const database = [];
const stream = createReadStream(indexFile, { encoding: 'utf8' });
const rl = createInterface({ input: stream, crlfDelay: Infinity });
for await (const line of rl) {
if (!line) continue;
let masterObj;
try {
masterObj = JSON.parse(line);
} catch {
continue;
}
const path = entryPath(type, masterObj.id);
if (!existsSync(path)) continue;
let detail;
try {
detail = JSON.parse(readFileSync(path, 'utf8'));
} catch {
continue;
}
const entry = buildEntry(masterObj, detail, type);
if (entry) database.push(entry);
}
const partSize = Math.ceil(database.length / nbParts);
const writes = [];
for (let p = 0; p < nbParts; p++) {
const chunk = database.slice(p * partSize, (p + 1) * partSize);
const out = join(TMDBINTEGRAL_DIR, `search${type}${p}.json`);
console.log(`Writing ${chunk.length} entries to search${type}${p}.json`);
writes.push(writeFile(out, JSON.stringify(chunk)));
}
await Promise.all(writes);
}
if (import.meta.url === `file://${process.argv[1]}`) {
const type = process.argv[2];
const nb = parseInt(process.argv[3] || String(NB_SEARCH_PARTS), 10);
if (type !== 'movie' && type !== 'tv') {
console.error('Usage: node cron/buildSearch.js movie|tv [nbParts]');
process.exit(1);
}
buildSearch(type, nb).catch((err) => {
console.error(err);
process.exit(1);
});
}