171 lines
5.9 KiB
JavaScript
171 lines
5.9 KiB
JavaScript
// Port of tmdbintegral/tmdbintegral.php
|
|
//
|
|
// 1. Fetch /changes for the last CHANGES_DAYS to find recently-modified entries
|
|
// whose local cache file is older than CHANGES_DAYS (so we re-download them).
|
|
// 2. Stream <type>.json line-by-line, ensure each id has a local detail file
|
|
// (downloading it if missing or flagged for update).
|
|
// 3. Walk through every numeric id < max(tmdbs) and remove orphan files that
|
|
// no longer appear in the master list.
|
|
|
|
import { createReadStream, createWriteStream, existsSync, statSync, readdirSync, unlinkSync } from 'node:fs';
|
|
import { mkdir, stat, writeFile, unlink } from 'node:fs/promises';
|
|
import { createInterface } from 'node:readline';
|
|
import { join } from 'node:path';
|
|
import {
|
|
TMDBINTEGRAL_DIR, MOVIE_DIR, TV_DIR, TMDB_API_KEY, TMDB_API_BASE, CHANGES_DAYS,
|
|
} from '../config.js';
|
|
import { fetchJson, Limiter } from '../lib/http.js';
|
|
import { entryDir, entryPath, bucket } from '../lib/paths.js';
|
|
|
|
const CHANGES_SECS = CHANGES_DAYS * 24 * 3600;
|
|
const DOWNLOAD_CONCURRENCY = 16;
|
|
|
|
function ymd(date) {
|
|
const y = date.getUTCFullYear();
|
|
const m = String(date.getUTCMonth() + 1).padStart(2, '0');
|
|
const d = String(date.getUTCDate()).padStart(2, '0');
|
|
return `${y}-${m}-${d}`;
|
|
}
|
|
|
|
function appendResponse(type) {
|
|
return type === 'tv'
|
|
? 'credits,aggregate_credits,external_ids,release_dates,translations,images,videos'
|
|
: 'credits,external_ids,release_dates,translations,images,videos';
|
|
}
|
|
|
|
function detailUrl(type, id) {
|
|
const base = `${TMDB_API_BASE}/${type}`;
|
|
return `${base}/${id}?api_key=${TMDB_API_KEY}&append_to_response=${appendResponse(type)}&include_image_language=fr,null,en&language=fr-FR`;
|
|
}
|
|
|
|
async function findChanges(type) {
|
|
const now = new Date();
|
|
const start = new Date(now.getTime() - CHANGES_DAYS * 86400 * 1000);
|
|
const startdate = ymd(start);
|
|
const enddate = ymd(now);
|
|
const baseUrl = `${TMDB_API_BASE}/${type}/changes?api_key=${TMDB_API_KEY}&start_date=${startdate}&end_date=${enddate}&page=`;
|
|
|
|
const updates = new Set();
|
|
let total = 1;
|
|
for (let page = 1; page <= total; page++) {
|
|
const url = `${baseUrl}${page}`;
|
|
console.log(`Downloading: "${url}"`);
|
|
const obj = await fetchJson(url);
|
|
if (!obj) {
|
|
console.log(`Failed to retrieve TMDb data: "${baseUrl}"`);
|
|
continue;
|
|
}
|
|
if (typeof obj.total_pages === 'number') total = obj.total_pages;
|
|
if (!Array.isArray(obj.results)) continue;
|
|
|
|
for (const change of obj.results) {
|
|
const id = change.id;
|
|
const path = entryPath(type, id);
|
|
if (!existsSync(path)) continue;
|
|
let st;
|
|
try { st = statSync(path); } catch { continue; }
|
|
// PHP uses filectime; on Linux ctime tracks metadata changes too, but the
|
|
// intent is "last time the local file was refreshed". We use mtime which
|
|
// is closer to that intent in JS (writeFile updates mtime).
|
|
const ageSecs = (Date.now() - st.mtimeMs) / 1000;
|
|
if (ageSecs >= CHANGES_SECS) {
|
|
const days = Math.floor(ageSecs / 86400);
|
|
const hours = Math.floor((ageSecs % 86400) / 3600);
|
|
const minutes = Math.floor((ageSecs % 3600) / 60);
|
|
console.log(`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`);
|
|
updates.add(id);
|
|
}
|
|
}
|
|
}
|
|
return updates;
|
|
}
|
|
|
|
async function readMasterIds(type) {
|
|
const file = join(TMDBINTEGRAL_DIR, `${type}.json`);
|
|
const ids = [];
|
|
const stream = createReadStream(file, { encoding: 'utf8' });
|
|
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
|
for await (const line of rl) {
|
|
if (!line) continue;
|
|
try {
|
|
const obj = JSON.parse(line);
|
|
if (typeof obj.id === 'number') ids.push(obj.id);
|
|
} catch { /* ignore malformed lines */ }
|
|
}
|
|
return ids;
|
|
}
|
|
|
|
async function ensureDir(dir) {
|
|
if (!existsSync(dir)) {
|
|
await mkdir(dir, { recursive: true });
|
|
}
|
|
}
|
|
|
|
async function downloadDetail(type, id) {
|
|
const dir = entryDir(type, id);
|
|
await ensureDir(dir);
|
|
const path = entryPath(type, id);
|
|
console.log(`Downloading: "${type}/${bucket(id)}/${id}.json"`);
|
|
const url = detailUrl(type, id);
|
|
const res = await fetch(url);
|
|
if (!res.ok) {
|
|
console.log(`Failed to retrieve TMDb data: "${url}"`);
|
|
return;
|
|
}
|
|
const text = await res.text();
|
|
await writeFile(path, text);
|
|
}
|
|
|
|
function removeOrphans(type, sortedIds) {
|
|
// Walk every bucket directory once, build a set of expected ids, delete the rest.
|
|
const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR;
|
|
const expected = new Set(sortedIds);
|
|
let buckets;
|
|
try { buckets = readdirSync(baseDir); } catch { return; }
|
|
for (const b of buckets) {
|
|
let entries;
|
|
try { entries = readdirSync(join(baseDir, b)); } catch { continue; }
|
|
for (const fname of entries) {
|
|
if (!fname.endsWith('.json')) continue;
|
|
const id = parseInt(fname.slice(0, -5), 10);
|
|
if (!Number.isInteger(id)) continue;
|
|
if (!expected.has(id)) {
|
|
const p = join(baseDir, b, fname);
|
|
console.log(`Removing: "${type}/${b}/${fname}"`);
|
|
try { unlinkSync(p); } catch { /* ignore */ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
export async function syncType(type) {
|
|
const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR;
|
|
await mkdir(baseDir, { recursive: true });
|
|
const updates = await findChanges(type);
|
|
const ids = await readMasterIds(type);
|
|
|
|
const limiter = new Limiter(DOWNLOAD_CONCURRENCY);
|
|
const tasks = [];
|
|
for (const id of ids) {
|
|
const path = entryPath(type, id);
|
|
if (!updates.has(id) && existsSync(path)) continue;
|
|
tasks.push(limiter.run(() => downloadDetail(type, id)));
|
|
}
|
|
await Promise.allSettled(tasks);
|
|
|
|
ids.sort((a, b) => a - b);
|
|
removeOrphans(type, ids);
|
|
}
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
const type = process.argv[2];
|
|
if (type !== 'movie' && type !== 'tv') {
|
|
console.error('Usage: node cron/tmdbSync.js movie|tv');
|
|
process.exit(1);
|
|
}
|
|
syncType(type).catch((err) => {
|
|
console.error(err);
|
|
process.exit(1);
|
|
});
|
|
}
|