2026-04-23 08:37:48 +02:00
|
|
|
// Port of tmdbintegral/tmdbintegral.php
|
|
|
|
|
//
|
|
|
|
|
// 1. Fetch /changes for the last CHANGES_DAYS to find recently-modified entries
|
|
|
|
|
// whose local cache file is older than CHANGES_DAYS (so we re-download them).
|
|
|
|
|
// 2. Stream <type>.json line-by-line, ensure each id has a local detail file
|
|
|
|
|
// (downloading it if missing or flagged for update).
|
|
|
|
|
// 3. Walk through every numeric id < max(tmdbs) and remove orphan files that
|
|
|
|
|
// no longer appear in the master list.
|
|
|
|
|
|
Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome
2026-04-24 07:35:10 +02:00
|
|
|
import { createReadStream, existsSync, readdirSync, statSync, unlinkSync } from 'node:fs';
|
|
|
|
|
import { mkdir, writeFile } from 'node:fs/promises';
|
2026-04-23 08:37:48 +02:00
|
|
|
import { join } from 'node:path';
|
Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome
2026-04-24 07:35:10 +02:00
|
|
|
import { createInterface } from 'node:readline';
|
|
|
|
|
import { CHANGES_DAYS, MOVIE_DIR, TMDB_API_BASE, TMDB_API_KEY, TMDBINTEGRAL_DIR, TV_DIR } from '../config.js';
|
2026-04-23 08:37:48 +02:00
|
|
|
import { fetchJson, Limiter } from '../lib/http.js';
|
Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome
2026-04-24 07:35:10 +02:00
|
|
|
import { bucket, entryDir, entryPath } from '../lib/paths.js';
|
2026-04-23 08:37:48 +02:00
|
|
|
|
|
|
|
|
const CHANGES_SECS = CHANGES_DAYS * 24 * 3600;
|
|
|
|
|
const DOWNLOAD_CONCURRENCY = 16;
|
|
|
|
|
|
|
|
|
|
function ymd(date) {
|
|
|
|
|
const y = date.getUTCFullYear();
|
|
|
|
|
const m = String(date.getUTCMonth() + 1).padStart(2, '0');
|
|
|
|
|
const d = String(date.getUTCDate()).padStart(2, '0');
|
|
|
|
|
return `${y}-${m}-${d}`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function appendResponse(type) {
|
|
|
|
|
return type === 'tv'
|
|
|
|
|
? 'credits,aggregate_credits,external_ids,release_dates,translations,images,videos'
|
|
|
|
|
: 'credits,external_ids,release_dates,translations,images,videos';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function detailUrl(type, id) {
|
|
|
|
|
const base = `${TMDB_API_BASE}/${type}`;
|
|
|
|
|
return `${base}/${id}?api_key=${TMDB_API_KEY}&append_to_response=${appendResponse(type)}&include_image_language=fr,null,en&language=fr-FR`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function findChanges(type) {
|
|
|
|
|
const now = new Date();
|
|
|
|
|
const start = new Date(now.getTime() - CHANGES_DAYS * 86400 * 1000);
|
|
|
|
|
const startdate = ymd(start);
|
|
|
|
|
const enddate = ymd(now);
|
|
|
|
|
const baseUrl = `${TMDB_API_BASE}/${type}/changes?api_key=${TMDB_API_KEY}&start_date=${startdate}&end_date=${enddate}&page=`;
|
|
|
|
|
|
|
|
|
|
const updates = new Set();
|
|
|
|
|
let total = 1;
|
|
|
|
|
for (let page = 1; page <= total; page++) {
|
|
|
|
|
const url = `${baseUrl}${page}`;
|
|
|
|
|
console.log(`Downloading: "${url}"`);
|
|
|
|
|
const obj = await fetchJson(url);
|
|
|
|
|
if (!obj) {
|
|
|
|
|
console.log(`Failed to retrieve TMDb data: "${baseUrl}"`);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (typeof obj.total_pages === 'number') total = obj.total_pages;
|
|
|
|
|
if (!Array.isArray(obj.results)) continue;
|
|
|
|
|
|
|
|
|
|
for (const change of obj.results) {
|
|
|
|
|
const id = change.id;
|
|
|
|
|
const path = entryPath(type, id);
|
|
|
|
|
if (!existsSync(path)) continue;
|
|
|
|
|
let st;
|
Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome
2026-04-24 07:35:10 +02:00
|
|
|
try {
|
|
|
|
|
st = statSync(path);
|
|
|
|
|
} catch {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2026-04-23 08:37:48 +02:00
|
|
|
// PHP uses filectime; on Linux ctime tracks metadata changes too, but the
|
|
|
|
|
// intent is "last time the local file was refreshed". We use mtime which
|
|
|
|
|
// is closer to that intent in JS (writeFile updates mtime).
|
|
|
|
|
const ageSecs = (Date.now() - st.mtimeMs) / 1000;
|
|
|
|
|
if (ageSecs >= CHANGES_SECS) {
|
|
|
|
|
const days = Math.floor(ageSecs / 86400);
|
|
|
|
|
const hours = Math.floor((ageSecs % 86400) / 3600);
|
|
|
|
|
const minutes = Math.floor((ageSecs % 3600) / 60);
|
Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome
2026-04-24 07:35:10 +02:00
|
|
|
console.log(
|
|
|
|
|
`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`,
|
|
|
|
|
);
|
2026-04-23 08:37:48 +02:00
|
|
|
updates.add(id);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return updates;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function readMasterIds(type) {
|
|
|
|
|
const file = join(TMDBINTEGRAL_DIR, `${type}.json`);
|
|
|
|
|
const ids = [];
|
|
|
|
|
const stream = createReadStream(file, { encoding: 'utf8' });
|
|
|
|
|
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
|
|
|
|
for await (const line of rl) {
|
|
|
|
|
if (!line) continue;
|
|
|
|
|
try {
|
|
|
|
|
const obj = JSON.parse(line);
|
|
|
|
|
if (typeof obj.id === 'number') ids.push(obj.id);
|
Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome
2026-04-24 07:35:10 +02:00
|
|
|
} catch {
|
|
|
|
|
/* ignore malformed lines */
|
|
|
|
|
}
|
2026-04-23 08:37:48 +02:00
|
|
|
}
|
|
|
|
|
return ids;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function ensureDir(dir) {
|
|
|
|
|
if (!existsSync(dir)) {
|
|
|
|
|
await mkdir(dir, { recursive: true });
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function downloadDetail(type, id) {
|
|
|
|
|
const dir = entryDir(type, id);
|
|
|
|
|
await ensureDir(dir);
|
|
|
|
|
const path = entryPath(type, id);
|
|
|
|
|
console.log(`Downloading: "${type}/${bucket(id)}/${id}.json"`);
|
|
|
|
|
const url = detailUrl(type, id);
|
|
|
|
|
const res = await fetch(url);
|
|
|
|
|
if (!res.ok) {
|
|
|
|
|
console.log(`Failed to retrieve TMDb data: "${url}"`);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
const text = await res.text();
|
|
|
|
|
await writeFile(path, text);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function removeOrphans(type, sortedIds) {
|
|
|
|
|
// Walk every bucket directory once, build a set of expected ids, delete the rest.
|
|
|
|
|
const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR;
|
|
|
|
|
const expected = new Set(sortedIds);
|
|
|
|
|
let buckets;
|
Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome
2026-04-24 07:35:10 +02:00
|
|
|
try {
|
|
|
|
|
buckets = readdirSync(baseDir);
|
|
|
|
|
} catch {
|
|
|
|
|
return;
|
|
|
|
|
}
|
2026-04-23 08:37:48 +02:00
|
|
|
for (const b of buckets) {
|
|
|
|
|
let entries;
|
Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome
2026-04-24 07:35:10 +02:00
|
|
|
try {
|
|
|
|
|
entries = readdirSync(join(baseDir, b));
|
|
|
|
|
} catch {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2026-04-23 08:37:48 +02:00
|
|
|
for (const fname of entries) {
|
|
|
|
|
if (!fname.endsWith('.json')) continue;
|
|
|
|
|
const id = parseInt(fname.slice(0, -5), 10);
|
|
|
|
|
if (!Number.isInteger(id)) continue;
|
|
|
|
|
if (!expected.has(id)) {
|
|
|
|
|
const p = join(baseDir, b, fname);
|
|
|
|
|
console.log(`Removing: "${type}/${b}/${fname}"`);
|
Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome
2026-04-24 07:35:10 +02:00
|
|
|
try {
|
|
|
|
|
unlinkSync(p);
|
|
|
|
|
} catch {
|
|
|
|
|
/* ignore */
|
|
|
|
|
}
|
2026-04-23 08:37:48 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export async function syncType(type) {
|
2026-04-23 10:56:45 +02:00
|
|
|
const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR;
|
|
|
|
|
await mkdir(baseDir, { recursive: true });
|
2026-04-23 08:37:48 +02:00
|
|
|
const updates = await findChanges(type);
|
|
|
|
|
const ids = await readMasterIds(type);
|
|
|
|
|
|
|
|
|
|
const limiter = new Limiter(DOWNLOAD_CONCURRENCY);
|
|
|
|
|
const tasks = [];
|
|
|
|
|
for (const id of ids) {
|
|
|
|
|
const path = entryPath(type, id);
|
|
|
|
|
if (!updates.has(id) && existsSync(path)) continue;
|
|
|
|
|
tasks.push(limiter.run(() => downloadDetail(type, id)));
|
|
|
|
|
}
|
|
|
|
|
await Promise.allSettled(tasks);
|
|
|
|
|
|
|
|
|
|
ids.sort((a, b) => a - b);
|
|
|
|
|
removeOrphans(type, ids);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
|
|
|
const type = process.argv[2];
|
|
|
|
|
if (type !== 'movie' && type !== 'tv') {
|
|
|
|
|
console.error('Usage: node cron/tmdbSync.js movie|tv');
|
|
|
|
|
process.exit(1);
|
|
|
|
|
}
|
|
|
|
|
syncType(type).catch((err) => {
|
|
|
|
|
console.error(err);
|
|
|
|
|
process.exit(1);
|
|
|
|
|
});
|
|
|
|
|
}
|