Files
proxy_tmdb/cron/tmdbSync.js

169 lines
5.8 KiB
JavaScript
Raw Normal View History

// Port of tmdbintegral/tmdbintegral.php
//
// 1. Fetch /changes for the last CHANGES_DAYS to find recently-modified entries
// whose local cache file is older than CHANGES_DAYS (so we re-download them).
// 2. Stream <type>.json line-by-line, ensure each id has a local detail file
// (downloading it if missing or flagged for update).
// 3. Walk through every numeric id < max(tmdbs) and remove orphan files that
// no longer appear in the master list.
import { createReadStream, createWriteStream, existsSync, statSync, readdirSync, unlinkSync } from 'node:fs';
import { mkdir, stat, writeFile, unlink } from 'node:fs/promises';
import { createInterface } from 'node:readline';
import { join } from 'node:path';
import {
TMDBINTEGRAL_DIR, MOVIE_DIR, TV_DIR, TMDB_API_KEY, TMDB_API_BASE, CHANGES_DAYS,
} from '../config.js';
import { fetchJson, Limiter } from '../lib/http.js';
import { entryDir, entryPath, bucket } from '../lib/paths.js';
const CHANGES_SECS = CHANGES_DAYS * 24 * 3600;
const DOWNLOAD_CONCURRENCY = 16;
function ymd(date) {
const y = date.getUTCFullYear();
const m = String(date.getUTCMonth() + 1).padStart(2, '0');
const d = String(date.getUTCDate()).padStart(2, '0');
return `${y}-${m}-${d}`;
}
function appendResponse(type) {
return type === 'tv'
? 'credits,aggregate_credits,external_ids,release_dates,translations,images,videos'
: 'credits,external_ids,release_dates,translations,images,videos';
}
function detailUrl(type, id) {
const base = `${TMDB_API_BASE}/${type}`;
return `${base}/${id}?api_key=${TMDB_API_KEY}&append_to_response=${appendResponse(type)}&include_image_language=fr,null,en&language=fr-FR`;
}
async function findChanges(type) {
const now = new Date();
const start = new Date(now.getTime() - CHANGES_DAYS * 86400 * 1000);
const startdate = ymd(start);
const enddate = ymd(now);
const baseUrl = `${TMDB_API_BASE}/${type}/changes?api_key=${TMDB_API_KEY}&start_date=${startdate}&end_date=${enddate}&page=`;
const updates = new Set();
let total = 1;
for (let page = 1; page <= total; page++) {
const url = `${baseUrl}${page}`;
console.log(`Downloading: "${url}"`);
const obj = await fetchJson(url);
if (!obj) {
console.log(`Failed to retrieve TMDb data: "${baseUrl}"`);
continue;
}
if (typeof obj.total_pages === 'number') total = obj.total_pages;
if (!Array.isArray(obj.results)) continue;
for (const change of obj.results) {
const id = change.id;
const path = entryPath(type, id);
if (!existsSync(path)) continue;
let st;
try { st = statSync(path); } catch { continue; }
// PHP uses filectime; on Linux ctime tracks metadata changes too, but the
// intent is "last time the local file was refreshed". We use mtime which
// is closer to that intent in JS (writeFile updates mtime).
const ageSecs = (Date.now() - st.mtimeMs) / 1000;
if (ageSecs >= CHANGES_SECS) {
const days = Math.floor(ageSecs / 86400);
const hours = Math.floor((ageSecs % 86400) / 3600);
const minutes = Math.floor((ageSecs % 3600) / 60);
console.log(`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`);
updates.add(id);
}
}
}
return updates;
}
async function readMasterIds(type) {
const file = join(TMDBINTEGRAL_DIR, `${type}.json`);
const ids = [];
const stream = createReadStream(file, { encoding: 'utf8' });
const rl = createInterface({ input: stream, crlfDelay: Infinity });
for await (const line of rl) {
if (!line) continue;
try {
const obj = JSON.parse(line);
if (typeof obj.id === 'number') ids.push(obj.id);
} catch { /* ignore malformed lines */ }
}
return ids;
}
async function ensureDir(dir) {
if (!existsSync(dir)) {
await mkdir(dir, { recursive: true });
}
}
async function downloadDetail(type, id) {
const dir = entryDir(type, id);
await ensureDir(dir);
const path = entryPath(type, id);
console.log(`Downloading: "${type}/${bucket(id)}/${id}.json"`);
const url = detailUrl(type, id);
const res = await fetch(url);
if (!res.ok) {
console.log(`Failed to retrieve TMDb data: "${url}"`);
return;
}
const text = await res.text();
await writeFile(path, text);
}
function removeOrphans(type, sortedIds) {
// Walk every bucket directory once, build a set of expected ids, delete the rest.
const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR;
const expected = new Set(sortedIds);
let buckets;
try { buckets = readdirSync(baseDir); } catch { return; }
for (const b of buckets) {
let entries;
try { entries = readdirSync(join(baseDir, b)); } catch { continue; }
for (const fname of entries) {
if (!fname.endsWith('.json')) continue;
const id = parseInt(fname.slice(0, -5), 10);
if (!Number.isInteger(id)) continue;
if (!expected.has(id)) {
const p = join(baseDir, b, fname);
console.log(`Removing: "${type}/${b}/${fname}"`);
try { unlinkSync(p); } catch { /* ignore */ }
}
}
}
}
export async function syncType(type) {
const updates = await findChanges(type);
const ids = await readMasterIds(type);
const limiter = new Limiter(DOWNLOAD_CONCURRENCY);
const tasks = [];
for (const id of ids) {
const path = entryPath(type, id);
if (!updates.has(id) && existsSync(path)) continue;
tasks.push(limiter.run(() => downloadDetail(type, id)));
}
await Promise.allSettled(tasks);
ids.sort((a, b) => a - b);
removeOrphans(type, ids);
}
if (import.meta.url === `file://${process.argv[1]}`) {
const type = process.argv[2];
if (type !== 'movie' && type !== 'tv') {
console.error('Usage: node cron/tmdbSync.js movie|tv');
process.exit(1);
}
syncType(type).catch((err) => {
console.error(err);
process.exit(1);
});
}