// Port of tmdbintegral/tmdbintegral.php // // 1. Fetch /changes for the last CHANGES_DAYS to find recently-modified entries // whose local cache file is older than CHANGES_DAYS (so we re-download them). // 2. Stream .json line-by-line, ensure each id has a local detail file // (downloading it if missing or flagged for update). // 3. Walk through every numeric id < max(tmdbs) and remove orphan files that // no longer appear in the master list. import { createReadStream, createWriteStream, existsSync, statSync, readdirSync, unlinkSync } from 'node:fs'; import { mkdir, stat, writeFile, unlink } from 'node:fs/promises'; import { createInterface } from 'node:readline'; import { join } from 'node:path'; import { TMDBINTEGRAL_DIR, MOVIE_DIR, TV_DIR, TMDB_API_KEY, TMDB_API_BASE, CHANGES_DAYS, } from '../config.js'; import { fetchJson, Limiter } from '../lib/http.js'; import { entryDir, entryPath, bucket } from '../lib/paths.js'; const CHANGES_SECS = CHANGES_DAYS * 24 * 3600; const DOWNLOAD_CONCURRENCY = 16; function ymd(date) { const y = date.getUTCFullYear(); const m = String(date.getUTCMonth() + 1).padStart(2, '0'); const d = String(date.getUTCDate()).padStart(2, '0'); return `${y}-${m}-${d}`; } function appendResponse(type) { return type === 'tv' ? 'credits,aggregate_credits,external_ids,release_dates,translations,images,videos' : 'credits,external_ids,release_dates,translations,images,videos'; } function detailUrl(type, id) { const base = `${TMDB_API_BASE}/${type}`; return `${base}/${id}?api_key=${TMDB_API_KEY}&append_to_response=${appendResponse(type)}&include_image_language=fr,null,en&language=fr-FR`; } async function findChanges(type) { const now = new Date(); const start = new Date(now.getTime() - CHANGES_DAYS * 86400 * 1000); const startdate = ymd(start); const enddate = ymd(now); const baseUrl = `${TMDB_API_BASE}/${type}/changes?api_key=${TMDB_API_KEY}&start_date=${startdate}&end_date=${enddate}&page=`; const updates = new Set(); let total = 1; for (let page = 1; page <= total; page++) { const url = `${baseUrl}${page}`; console.log(`Downloading: "${url}"`); const obj = await fetchJson(url); if (!obj) { console.log(`Failed to retrieve TMDb data: "${baseUrl}"`); continue; } if (typeof obj.total_pages === 'number') total = obj.total_pages; if (!Array.isArray(obj.results)) continue; for (const change of obj.results) { const id = change.id; const path = entryPath(type, id); if (!existsSync(path)) continue; let st; try { st = statSync(path); } catch { continue; } // PHP uses filectime; on Linux ctime tracks metadata changes too, but the // intent is "last time the local file was refreshed". We use mtime which // is closer to that intent in JS (writeFile updates mtime). const ageSecs = (Date.now() - st.mtimeMs) / 1000; if (ageSecs >= CHANGES_SECS) { const days = Math.floor(ageSecs / 86400); const hours = Math.floor((ageSecs % 86400) / 3600); const minutes = Math.floor((ageSecs % 3600) / 60); console.log(`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`); updates.add(id); } } } return updates; } async function readMasterIds(type) { const file = join(TMDBINTEGRAL_DIR, `${type}.json`); const ids = []; const stream = createReadStream(file, { encoding: 'utf8' }); const rl = createInterface({ input: stream, crlfDelay: Infinity }); for await (const line of rl) { if (!line) continue; try { const obj = JSON.parse(line); if (typeof obj.id === 'number') ids.push(obj.id); } catch { /* ignore malformed lines */ } } return ids; } async function ensureDir(dir) { if (!existsSync(dir)) { await mkdir(dir, { recursive: true }); } } async function downloadDetail(type, id) { const dir = entryDir(type, id); await ensureDir(dir); const path = entryPath(type, id); console.log(`Downloading: "${type}/${bucket(id)}/${id}.json"`); const url = detailUrl(type, id); const res = await fetch(url); if (!res.ok) { console.log(`Failed to retrieve TMDb data: "${url}"`); return; } const text = await res.text(); await writeFile(path, text); } function removeOrphans(type, sortedIds) { // Walk every bucket directory once, build a set of expected ids, delete the rest. const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR; const expected = new Set(sortedIds); let buckets; try { buckets = readdirSync(baseDir); } catch { return; } for (const b of buckets) { let entries; try { entries = readdirSync(join(baseDir, b)); } catch { continue; } for (const fname of entries) { if (!fname.endsWith('.json')) continue; const id = parseInt(fname.slice(0, -5), 10); if (!Number.isInteger(id)) continue; if (!expected.has(id)) { const p = join(baseDir, b, fname); console.log(`Removing: "${type}/${b}/${fname}"`); try { unlinkSync(p); } catch { /* ignore */ } } } } } export async function syncType(type) { const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR; await mkdir(baseDir, { recursive: true }); const updates = await findChanges(type); const ids = await readMasterIds(type); const limiter = new Limiter(DOWNLOAD_CONCURRENCY); const tasks = []; for (const id of ids) { const path = entryPath(type, id); if (!updates.has(id) && existsSync(path)) continue; tasks.push(limiter.run(() => downloadDetail(type, id))); } await Promise.allSettled(tasks); ids.sort((a, b) => a - b); removeOrphans(type, ids); } if (import.meta.url === `file://${process.argv[1]}`) { const type = process.argv[2]; if (type !== 'movie' && type !== 'tv') { console.error('Usage: node cron/tmdbSync.js movie|tv'); process.exit(1); } syncType(type).catch((err) => { console.error(err); process.exit(1); }); }