cron/tmdbSync.js

// Port of tmdbintegral/tmdbintegral.php
//
// 1. Fetch /changes for the last CHANGES_DAYS to find recently-modified entries
//    whose local cache file is older than CHANGES_DAYS (so we re-download them).
// 2. Stream <type>.json line-by-line, ensure each id has a local detail file
//    (downloading it if missing or flagged for update).
// 3. Walk through every numeric id < max(tmdbs) and remove orphan files that
//    no longer appear in the master list.

import { createReadStream, createWriteStream, existsSync, statSync, readdirSync, unlinkSync } from 'node:fs';
import { mkdir, stat, writeFile, unlink } from 'node:fs/promises';
import { createInterface } from 'node:readline';
import { join } from 'node:path';
import {
  TMDBINTEGRAL_DIR, MOVIE_DIR, TV_DIR, TMDB_API_KEY, TMDB_API_BASE, CHANGES_DAYS,
} from '../config.js';
import { fetchJson, Limiter } from '../lib/http.js';
import { entryDir, entryPath, bucket } from '../lib/paths.js';

const CHANGES_SECS = CHANGES_DAYS * 24 * 3600;
const DOWNLOAD_CONCURRENCY = 16;

function ymd(date) {
  const y = date.getUTCFullYear();
  const m = String(date.getUTCMonth() + 1).padStart(2, '0');
  const d = String(date.getUTCDate()).padStart(2, '0');
  return `${y}-${m}-${d}`;
}

function appendResponse(type) {
  return type === 'tv'
    ? 'credits,aggregate_credits,external_ids,release_dates,translations,images,videos'
    : 'credits,external_ids,release_dates,translations,images,videos';
}

function detailUrl(type, id) {
  const base = `${TMDB_API_BASE}/${type}`;
  return `${base}/${id}?api_key=${TMDB_API_KEY}&append_to_response=${appendResponse(type)}&include_image_language=fr,null,en&language=fr-FR`;
}

async function findChanges(type) {
  const now = new Date();
  const start = new Date(now.getTime() - CHANGES_DAYS * 86400 * 1000);
  const startdate = ymd(start);
  const enddate = ymd(now);
  const baseUrl = `${TMDB_API_BASE}/${type}/changes?api_key=${TMDB_API_KEY}&start_date=${startdate}&end_date=${enddate}&page=`;

  const updates = new Set();
  let total = 1;
  for (let page = 1; page <= total; page++) {
    const url = `${baseUrl}${page}`;
    console.log(`Downloading: "${url}"`);
    const obj = await fetchJson(url);
    if (!obj) {
      console.log(`Failed to retrieve TMDb data: "${baseUrl}"`);
      continue;
    }
    if (typeof obj.total_pages === 'number') total = obj.total_pages;
    if (!Array.isArray(obj.results)) continue;

    for (const change of obj.results) {
      const id = change.id;
      const path = entryPath(type, id);
      if (!existsSync(path)) continue;
      let st;
      try { st = statSync(path); } catch { continue; }
      // PHP uses filectime; on Linux ctime tracks metadata changes too, but the
      // intent is "last time the local file was refreshed". We use mtime which
      // is closer to that intent in JS (writeFile updates mtime).
      const ageSecs = (Date.now() - st.mtimeMs) / 1000;
      if (ageSecs >= CHANGES_SECS) {
        const days = Math.floor(ageSecs / 86400);
        const hours = Math.floor((ageSecs % 86400) / 3600);
        const minutes = Math.floor((ageSecs % 3600) / 60);
        console.log(`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`);
        updates.add(id);
      }
    }
  }
  return updates;
}

async function readMasterIds(type) {
  const file = join(TMDBINTEGRAL_DIR, `${type}.json`);
  const ids = [];
  const stream = createReadStream(file, { encoding: 'utf8' });
  const rl = createInterface({ input: stream, crlfDelay: Infinity });
  for await (const line of rl) {
    if (!line) continue;
    try {
      const obj = JSON.parse(line);
      if (typeof obj.id === 'number') ids.push(obj.id);
    } catch { /* ignore malformed lines */ }
  }
  return ids;
}

async function ensureDir(dir) {
  if (!existsSync(dir)) {
    await mkdir(dir, { recursive: true });
  }
}

async function downloadDetail(type, id) {
  const dir = entryDir(type, id);
  await ensureDir(dir);
  const path = entryPath(type, id);
  console.log(`Downloading: "${type}/${bucket(id)}/${id}.json"`);
  const url = detailUrl(type, id);
  const res = await fetch(url);
  if (!res.ok) {
    console.log(`Failed to retrieve TMDb data: "${url}"`);
    return;
  }
  const text = await res.text();
  await writeFile(path, text);
}

function removeOrphans(type, sortedIds) {
  // Walk every bucket directory once, build a set of expected ids, delete the rest.
  const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR;
  const expected = new Set(sortedIds);
  let buckets;
  try { buckets = readdirSync(baseDir); } catch { return; }
  for (const b of buckets) {
    let entries;
    try { entries = readdirSync(join(baseDir, b)); } catch { continue; }
    for (const fname of entries) {
      if (!fname.endsWith('.json')) continue;
      const id = parseInt(fname.slice(0, -5), 10);
      if (!Number.isInteger(id)) continue;
      if (!expected.has(id)) {
        const p = join(baseDir, b, fname);
        console.log(`Removing: "${type}/${b}/${fname}"`);
        try { unlinkSync(p); } catch { /* ignore */ }
      }
    }
  }
}

export async function syncType(type) {
  const updates = await findChanges(type);
  const ids = await readMasterIds(type);

  const limiter = new Limiter(DOWNLOAD_CONCURRENCY);
  const tasks = [];
  for (const id of ids) {
    const path = entryPath(type, id);
    if (!updates.has(id) && existsSync(path)) continue;
    tasks.push(limiter.run(() => downloadDetail(type, id)));
  }
  await Promise.allSettled(tasks);

  ids.sort((a, b) => a - b);
  removeOrphans(type, ids);
}

if (import.meta.url === `file://${process.argv[1]}`) {
  const type = process.argv[2];
  if (type !== 'movie' && type !== 'tv') {
    console.error('Usage: node cron/tmdbSync.js movie|tv');
    process.exit(1);
  }
  syncType(type).catch((err) => {
    console.error(err);
    process.exit(1);
  });
}
Portage complet PHP/Bash vers Node.js (Fastify + worker_threads) 2026-04-23 08:37:48 +02:00			`// Port of tmdbintegral/tmdbintegral.php`
			`//`
			`// 1. Fetch /changes for the last CHANGES_DAYS to find recently-modified entries`
			`// whose local cache file is older than CHANGES_DAYS (so we re-download them).`
			`// 2. Stream <type>.json line-by-line, ensure each id has a local detail file`
			`// (downloading it if missing or flagged for update).`
			`// 3. Walk through every numeric id < max(tmdbs) and remove orphan files that`
			`// no longer appear in the master list.`

			`import { createReadStream, createWriteStream, existsSync, statSync, readdirSync, unlinkSync } from 'node:fs';`
			`import { mkdir, stat, writeFile, unlink } from 'node:fs/promises';`
			`import { createInterface } from 'node:readline';`
			`import { join } from 'node:path';`
			`import {`
			`TMDBINTEGRAL_DIR, MOVIE_DIR, TV_DIR, TMDB_API_KEY, TMDB_API_BASE, CHANGES_DAYS,`
			`} from '../config.js';`
			`import { fetchJson, Limiter } from '../lib/http.js';`
			`import { entryDir, entryPath, bucket } from '../lib/paths.js';`

			`const CHANGES_SECS = CHANGES_DAYS * 24 * 3600;`
			`const DOWNLOAD_CONCURRENCY = 16;`

			`function ymd(date) {`
			`const y = date.getUTCFullYear();`
			`const m = String(date.getUTCMonth() + 1).padStart(2, '0');`
			`const d = String(date.getUTCDate()).padStart(2, '0');`
			return `${y}-${m}-${d}`;
			`}`

			`function appendResponse(type) {`
			`return type === 'tv'`
			`? 'credits,aggregate_credits,external_ids,release_dates,translations,images,videos'`
			`: 'credits,external_ids,release_dates,translations,images,videos';`
			`}`

			`function detailUrl(type, id) {`
			const base = `${TMDB_API_BASE}/${type}`;
			return `${base}/${id}?api_key=${TMDB_API_KEY}&append_to_response=${appendResponse(type)}&include_image_language=fr,null,en&language=fr-FR`;
			`}`

			`async function findChanges(type) {`
			`const now = new Date();`
			`const start = new Date(now.getTime() - CHANGES_DAYS * 86400 * 1000);`
			`const startdate = ymd(start);`
			`const enddate = ymd(now);`
			const baseUrl = `${TMDB_API_BASE}/${type}/changes?api_key=${TMDB_API_KEY}&start_date=${startdate}&end_date=${enddate}&page=`;

			`const updates = new Set();`
			`let total = 1;`
			`for (let page = 1; page <= total; page++) {`
			const url = `${baseUrl}${page}`;
			console.log(`Downloading: "${url}"`);
			`const obj = await fetchJson(url);`
			`if (!obj) {`
			console.log(`Failed to retrieve TMDb data: "${baseUrl}"`);
			`continue;`
			`}`
			`if (typeof obj.total_pages === 'number') total = obj.total_pages;`
			`if (!Array.isArray(obj.results)) continue;`

			`for (const change of obj.results) {`
			`const id = change.id;`
			`const path = entryPath(type, id);`
			`if (!existsSync(path)) continue;`
			`let st;`
			`try { st = statSync(path); } catch { continue; }`
			`// PHP uses filectime; on Linux ctime tracks metadata changes too, but the`
			`// intent is "last time the local file was refreshed". We use mtime which`
			`// is closer to that intent in JS (writeFile updates mtime).`
			`const ageSecs = (Date.now() - st.mtimeMs) / 1000;`
			`if (ageSecs >= CHANGES_SECS) {`
			`const days = Math.floor(ageSecs / 86400);`
			`const hours = Math.floor((ageSecs % 86400) / 3600);`
			`const minutes = Math.floor((ageSecs % 3600) / 60);`
			console.log(`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`);
			`updates.add(id);`
			`}`
			`}`
			`}`
			`return updates;`
			`}`

			`async function readMasterIds(type) {`
			const file = join(TMDBINTEGRAL_DIR, `${type}.json`);
			`const ids = [];`
			`const stream = createReadStream(file, { encoding: 'utf8' });`
			`const rl = createInterface({ input: stream, crlfDelay: Infinity });`
			`for await (const line of rl) {`
			`if (!line) continue;`
			`try {`
			`const obj = JSON.parse(line);`
			`if (typeof obj.id === 'number') ids.push(obj.id);`
			`} catch { /* ignore malformed lines */ }`
			`}`
			`return ids;`
			`}`

			`async function ensureDir(dir) {`
			`if (!existsSync(dir)) {`
			`await mkdir(dir, { recursive: true });`
			`}`
			`}`

			`async function downloadDetail(type, id) {`
			`const dir = entryDir(type, id);`
			`await ensureDir(dir);`
			`const path = entryPath(type, id);`
			console.log(`Downloading: "${type}/${bucket(id)}/${id}.json"`);
			`const url = detailUrl(type, id);`
			`const res = await fetch(url);`
			`if (!res.ok) {`
			console.log(`Failed to retrieve TMDb data: "${url}"`);
			`return;`
			`}`
			`const text = await res.text();`
			`await writeFile(path, text);`
			`}`

			`function removeOrphans(type, sortedIds) {`
			`// Walk every bucket directory once, build a set of expected ids, delete the rest.`
			`const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR;`
			`const expected = new Set(sortedIds);`
			`let buckets;`
			`try { buckets = readdirSync(baseDir); } catch { return; }`
			`for (const b of buckets) {`
			`let entries;`
			`try { entries = readdirSync(join(baseDir, b)); } catch { continue; }`
			`for (const fname of entries) {`
			`if (!fname.endsWith('.json')) continue;`
			`const id = parseInt(fname.slice(0, -5), 10);`
			`if (!Number.isInteger(id)) continue;`
			`if (!expected.has(id)) {`
			`const p = join(baseDir, b, fname);`
			console.log(`Removing: "${type}/${b}/${fname}"`);
			`try { unlinkSync(p); } catch { /* ignore */ }`
			`}`
			`}`
			`}`
			`}`

			`export async function syncType(type) {`
			`const updates = await findChanges(type);`
			`const ids = await readMasterIds(type);`

			`const limiter = new Limiter(DOWNLOAD_CONCURRENCY);`
			`const tasks = [];`
			`for (const id of ids) {`
			`const path = entryPath(type, id);`
			`if (!updates.has(id) && existsSync(path)) continue;`
			`tasks.push(limiter.run(() => downloadDetail(type, id)));`
			`}`
			`await Promise.allSettled(tasks);`

			`ids.sort((a, b) => a - b);`
			`removeOrphans(type, ids);`
			`}`

			if (import.meta.url === `file://${process.argv[1]}`) {
			`const type = process.argv[2];`
			`if (type !== 'movie' && type !== 'tv') {`
			`console.error('Usage: node cron/tmdbSync.js movie\|tv');`
			`process.exit(1);`
			`}`
			`syncType(type).catch((err) => {`
			`console.error(err);`
			`process.exit(1);`
			`});`
			`}`