// Port of tmdbintegral/search.php // Builds the chunked search database files (searchmovieN.json / searchtvN.json). // // Each entry has the same positional shape as the PHP version: // [TMDB, TITLE, ENGLISHTITLE, ORIGINALTITLE, // FILTEREDTITLE, FILTEREDENGLISHTITLE, FILTEREDORIGINALTITLE, // YEARS[], POPULARITY] // so the runtime search worker can use the same indices. import { createReadStream, existsSync, readFileSync } from 'node:fs'; import { writeFile } from 'node:fs/promises'; import { createInterface } from 'node:readline'; import { join } from 'node:path'; import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js'; import { entryPath } from '../lib/paths.js'; import { filterTitle } from '../lib/titleFilter.js'; import { mbStrlen } from '../lib/mbLevenshtein.js'; function lower(s) { return s.toLocaleLowerCase(); } function extractEnglishTitle(detail, type) { const tr = detail?.translations?.translations; if (!Array.isArray(tr)) return ''; for (const t of tr) { if (t.iso_639_1 === 'en') { return type === 'movie' ? (t.data?.title || '') : (t.data?.name || ''); } } return ''; } function buildEntry(masterObj, detail, type) { const tmdb = masterObj.id; const popularity = parseFloat(masterObj.popularity) || 0; let title, originalTitle, englishTitle; const years = []; if (type === 'movie') { const date = String(detail.release_date || '').split('-'); years.push(parseInt(date[0], 10) || 0); title = detail.title || ''; originalTitle = detail.original_title || ''; englishTitle = extractEnglishTitle(detail, 'movie'); } else { const date = String(detail.first_air_date || '').split('-'); years.push(parseInt(date[0], 10) || 0); title = detail.name || ''; originalTitle = detail.original_name || ''; englishTitle = extractEnglishTitle(detail, 'tv'); if (Array.isArray(detail.seasons)) { for (const s of detail.seasons) { const sd = String(s.air_date || '').split('-'); const sy = parseInt(sd[0], 10); if (sy) years.push(sy); } } } if (!years[0]) return null; let ft = filterTitle(title); let fe = filterTitle(englishTitle); let fo = filterTitle(originalTitle); if (!ft && !fe && !fo) return null; if (ft && mbStrlen(ft) / mbStrlen(title) < 0.5) ft = ''; if (fe && mbStrlen(fe) / mbStrlen(englishTitle) < 0.5) fe = ''; if (fo && mbStrlen(fo) / mbStrlen(originalTitle) < 0.5) fo = ''; // Dedupe years preserving order (PHP array_values(array_unique($years))) const seen = new Set(); const uniqYears = []; for (const y of years) { if (!seen.has(y)) { seen.add(y); uniqYears.push(y); } } return [ tmdb, title, englishTitle, originalTitle, lower(ft), lower(fe), lower(fo), uniqYears, popularity, ]; } export async function buildSearch(type, nbParts = NB_SEARCH_PARTS) { const indexFile = join(TMDBINTEGRAL_DIR, `${type}.json`); const database = []; const stream = createReadStream(indexFile, { encoding: 'utf8' }); const rl = createInterface({ input: stream, crlfDelay: Infinity }); for await (const line of rl) { if (!line) continue; let masterObj; try { masterObj = JSON.parse(line); } catch { continue; } const path = entryPath(type, masterObj.id); if (!existsSync(path)) continue; let detail; try { detail = JSON.parse(readFileSync(path, 'utf8')); } catch { continue; } const entry = buildEntry(masterObj, detail, type); if (entry) database.push(entry); } const partSize = Math.ceil(database.length / nbParts); const writes = []; for (let p = 0; p < nbParts; p++) { const chunk = database.slice(p * partSize, (p + 1) * partSize); const out = join(TMDBINTEGRAL_DIR, `search${type}${p}.json`); console.log(`Writing ${chunk.length} entries to search${type}${p}.json`); writes.push(writeFile(out, JSON.stringify(chunk))); } await Promise.all(writes); } if (import.meta.url === `file://${process.argv[1]}`) { const type = process.argv[2]; const nb = parseInt(process.argv[3] || String(NB_SEARCH_PARTS), 10); if (type !== 'movie' && type !== 'tv') { console.error('Usage: node cron/buildSearch.js movie|tv [nbParts]'); process.exit(1); } buildSearch(type, nb).catch((err) => { console.error(err); process.exit(1); }); }