Portage complet PHP/Bash vers Node.js (Fastify + worker_threads)
This commit is contained in:
104
cron/ambiguity.js
Normal file
104
cron/ambiguity.js
Normal file
@@ -0,0 +1,104 @@
|
||||
// Port of tmdbintegral/ambiguity.php
|
||||
// Detects pairs of distinct TMDb ids whose filtered titles collide and whose
|
||||
// years are within YEARTOLERANCE.
|
||||
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js';
|
||||
|
||||
const TMDB = 0;
|
||||
const FILTEREDTITLE = 4;
|
||||
const FILTEREDENGLISHTITLE = 5;
|
||||
const FILTEREDORIGINALTITLE = 6;
|
||||
const YEAR = 7;
|
||||
|
||||
export async function buildAmbiguity(type, nbParts = NB_SEARCH_PARTS) {
|
||||
const yearTolerance = type === 'tv' ? 200 : 1;
|
||||
const out = join(TMDBINTEGRAL_DIR, `ambiguity${type}.csv`);
|
||||
|
||||
const database = [];
|
||||
for (let p = 0; p < nbParts; p++) {
|
||||
const file = join(TMDBINTEGRAL_DIR, `search${type}${p}.json`);
|
||||
const chunk = JSON.parse(readFileSync(file, 'utf8'));
|
||||
for (const e of chunk) database.push(e);
|
||||
}
|
||||
|
||||
const tmdbs = [];
|
||||
const filteredTitles = [];
|
||||
const languages = [];
|
||||
const years = [];
|
||||
for (const db of database) {
|
||||
const fr = db[FILTEREDTITLE];
|
||||
const en = db[FILTEREDENGLISHTITLE];
|
||||
const vo = db[FILTEREDORIGINALTITLE];
|
||||
if (fr) { tmdbs.push(db[TMDB]); filteredTitles.push(fr); years.push(db[YEAR][0]); languages.push('FR'); }
|
||||
if (en) { tmdbs.push(db[TMDB]); filteredTitles.push(en); years.push(db[YEAR][0]); languages.push('EN'); }
|
||||
if (vo) { tmdbs.push(db[TMDB]); filteredTitles.push(vo); years.push(db[YEAR][0]); languages.push('VO'); }
|
||||
}
|
||||
|
||||
// PHP: array_multisort(filteredtitles, years, tmdbs, languages)
|
||||
// Sort indices by (filteredTitle ASC, year ASC, tmdb ASC, language ASC).
|
||||
const idx = filteredTitles.map((_, i) => i);
|
||||
idx.sort((a, b) => {
|
||||
if (filteredTitles[a] < filteredTitles[b]) return -1;
|
||||
if (filteredTitles[a] > filteredTitles[b]) return 1;
|
||||
if (years[a] !== years[b]) return years[a] - years[b];
|
||||
if (tmdbs[a] !== tmdbs[b]) return tmdbs[a] - tmdbs[b];
|
||||
if (languages[a] < languages[b]) return -1;
|
||||
if (languages[a] > languages[b]) return 1;
|
||||
return 0;
|
||||
});
|
||||
|
||||
const sortedTmdbs = idx.map((i) => tmdbs[i]);
|
||||
const sortedFiltered = idx.map((i) => filteredTitles[i]);
|
||||
const sortedYears = idx.map((i) => years[i]);
|
||||
const sortedLanguages = idx.map((i) => languages[i]);
|
||||
|
||||
let oldTmdb = 0;
|
||||
let nbTmdbs = 0;
|
||||
let oldFiltered = '';
|
||||
let ambiguities = [];
|
||||
const lines = [];
|
||||
|
||||
const flush = () => {
|
||||
if (nbTmdbs >= 2) {
|
||||
for (const a1 of ambiguities) {
|
||||
for (const a2 of ambiguities) {
|
||||
if (a1[0] !== a2[0] && Math.abs(a1[1] - a2[1]) <= yearTolerance) {
|
||||
lines.push(`${a1[0]};${a1[2]};${a2[0]};${a2[2]}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ambiguities = [];
|
||||
nbTmdbs = 0;
|
||||
};
|
||||
|
||||
for (let i = 0; i < sortedFiltered.length; i++) {
|
||||
if (sortedTmdbs[i] !== oldTmdb) nbTmdbs++;
|
||||
oldTmdb = sortedTmdbs[i];
|
||||
|
||||
if (sortedFiltered[i] !== oldFiltered) {
|
||||
flush();
|
||||
}
|
||||
oldFiltered = sortedFiltered[i];
|
||||
ambiguities.push([sortedTmdbs[i], sortedYears[i], sortedLanguages[i]]);
|
||||
}
|
||||
flush();
|
||||
|
||||
await writeFile(out, lines.length ? lines.join('\n') + '\n' : '');
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
const type = process.argv[2];
|
||||
const nb = parseInt(process.argv[3] || String(NB_SEARCH_PARTS), 10);
|
||||
if (type !== 'movie' && type !== 'tv') {
|
||||
console.error('Usage: node cron/ambiguity.js movie|tv [nbParts]');
|
||||
process.exit(1);
|
||||
}
|
||||
buildAmbiguity(type, nb).catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
133
cron/buildSearch.js
Normal file
133
cron/buildSearch.js
Normal file
@@ -0,0 +1,133 @@
|
||||
// Port of tmdbintegral/search.php
|
||||
// Builds the chunked search database files (searchmovieN.json / searchtvN.json).
|
||||
//
|
||||
// Each entry has the same positional shape as the PHP version:
|
||||
// [TMDB, TITLE, ENGLISHTITLE, ORIGINALTITLE,
|
||||
// FILTEREDTITLE, FILTEREDENGLISHTITLE, FILTEREDORIGINALTITLE,
|
||||
// YEARS[], POPULARITY]
|
||||
// so the runtime search worker can use the same indices.
|
||||
|
||||
import { createReadStream, existsSync, readFileSync } from 'node:fs';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { join } from 'node:path';
|
||||
import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js';
|
||||
import { entryPath } from '../lib/paths.js';
|
||||
import { filterTitle } from '../lib/titleFilter.js';
|
||||
import { mbStrlen } from '../lib/mbLevenshtein.js';
|
||||
|
||||
function lower(s) { return s.toLocaleLowerCase(); }
|
||||
|
||||
function extractEnglishTitle(detail, type) {
|
||||
const tr = detail?.translations?.translations;
|
||||
if (!Array.isArray(tr)) return '';
|
||||
for (const t of tr) {
|
||||
if (t.iso_639_1 === 'en') {
|
||||
return type === 'movie' ? (t.data?.title || '') : (t.data?.name || '');
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
function buildEntry(masterObj, detail, type) {
|
||||
const tmdb = masterObj.id;
|
||||
const popularity = parseFloat(masterObj.popularity) || 0;
|
||||
|
||||
let title, originalTitle, englishTitle;
|
||||
const years = [];
|
||||
|
||||
if (type === 'movie') {
|
||||
const date = String(detail.release_date || '').split('-');
|
||||
years.push(parseInt(date[0], 10) || 0);
|
||||
title = detail.title || '';
|
||||
originalTitle = detail.original_title || '';
|
||||
englishTitle = extractEnglishTitle(detail, 'movie');
|
||||
} else {
|
||||
const date = String(detail.first_air_date || '').split('-');
|
||||
years.push(parseInt(date[0], 10) || 0);
|
||||
title = detail.name || '';
|
||||
originalTitle = detail.original_name || '';
|
||||
englishTitle = extractEnglishTitle(detail, 'tv');
|
||||
if (Array.isArray(detail.seasons)) {
|
||||
for (const s of detail.seasons) {
|
||||
const sd = String(s.air_date || '').split('-');
|
||||
const sy = parseInt(sd[0], 10);
|
||||
if (sy) years.push(sy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!years[0]) return null;
|
||||
|
||||
let ft = filterTitle(title);
|
||||
let fe = filterTitle(englishTitle);
|
||||
let fo = filterTitle(originalTitle);
|
||||
|
||||
if (!ft && !fe && !fo) return null;
|
||||
|
||||
if (ft && mbStrlen(ft) / mbStrlen(title) < 0.5) ft = '';
|
||||
if (fe && mbStrlen(fe) / mbStrlen(englishTitle) < 0.5) fe = '';
|
||||
if (fo && mbStrlen(fo) / mbStrlen(originalTitle) < 0.5) fo = '';
|
||||
|
||||
// Dedupe years preserving order (PHP array_values(array_unique($years)))
|
||||
const seen = new Set();
|
||||
const uniqYears = [];
|
||||
for (const y of years) {
|
||||
if (!seen.has(y)) { seen.add(y); uniqYears.push(y); }
|
||||
}
|
||||
|
||||
return [
|
||||
tmdb,
|
||||
title,
|
||||
englishTitle,
|
||||
originalTitle,
|
||||
lower(ft),
|
||||
lower(fe),
|
||||
lower(fo),
|
||||
uniqYears,
|
||||
popularity,
|
||||
];
|
||||
}
|
||||
|
||||
export async function buildSearch(type, nbParts = NB_SEARCH_PARTS) {
|
||||
const indexFile = join(TMDBINTEGRAL_DIR, `${type}.json`);
|
||||
const database = [];
|
||||
|
||||
const stream = createReadStream(indexFile, { encoding: 'utf8' });
|
||||
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
||||
|
||||
for await (const line of rl) {
|
||||
if (!line) continue;
|
||||
let masterObj;
|
||||
try { masterObj = JSON.parse(line); } catch { continue; }
|
||||
const path = entryPath(type, masterObj.id);
|
||||
if (!existsSync(path)) continue;
|
||||
let detail;
|
||||
try { detail = JSON.parse(readFileSync(path, 'utf8')); } catch { continue; }
|
||||
const entry = buildEntry(masterObj, detail, type);
|
||||
if (entry) database.push(entry);
|
||||
}
|
||||
|
||||
const partSize = Math.ceil(database.length / nbParts);
|
||||
const writes = [];
|
||||
for (let p = 0; p < nbParts; p++) {
|
||||
const chunk = database.slice(p * partSize, (p + 1) * partSize);
|
||||
const out = join(TMDBINTEGRAL_DIR, `search${type}${p}.json`);
|
||||
console.log(`Writing ${chunk.length} entries to search${type}${p}.json`);
|
||||
writes.push(writeFile(out, JSON.stringify(chunk)));
|
||||
}
|
||||
await Promise.all(writes);
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
const type = process.argv[2];
|
||||
const nb = parseInt(process.argv[3] || String(NB_SEARCH_PARTS), 10);
|
||||
if (type !== 'movie' && type !== 'tv') {
|
||||
console.error('Usage: node cron/buildSearch.js movie|tv [nbParts]');
|
||||
process.exit(1);
|
||||
}
|
||||
buildSearch(type, nb).catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
36
cron/imdbRatings.js
Normal file
36
cron/imdbRatings.js
Normal file
@@ -0,0 +1,36 @@
|
||||
import { createWriteStream } from 'node:fs';
|
||||
import { rename } from 'node:fs/promises';
|
||||
import { pipeline } from 'node:stream/promises';
|
||||
import { createGunzip } from 'node:zlib';
|
||||
import { Readable } from 'node:stream';
|
||||
import { join } from 'node:path';
|
||||
import { ROOT, IMDB_DATASETS_BASE, IMDB_RATINGS } from '../config.js';
|
||||
|
||||
const FILE = 'title.ratings.tsv';
|
||||
|
||||
export async function syncImdbRatings() {
|
||||
const url = `${IMDB_DATASETS_BASE}/${FILE}.gz`;
|
||||
const tmpPath = join(ROOT, `${FILE}.tmp`);
|
||||
|
||||
console.log(`Downloading: "${url}"`);
|
||||
const res = await fetch(url);
|
||||
if (!res.ok || !res.body) {
|
||||
throw new Error(`Failed to fetch ${url}: HTTP ${res.status}`);
|
||||
}
|
||||
|
||||
await pipeline(
|
||||
Readable.fromWeb(res.body),
|
||||
createGunzip(),
|
||||
createWriteStream(tmpPath),
|
||||
);
|
||||
|
||||
await rename(tmpPath, IMDB_RATINGS);
|
||||
console.log(`Wrote ${IMDB_RATINGS}`);
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
syncImdbRatings().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
93
cron/justwatchSync.js
Normal file
93
cron/justwatchSync.js
Normal file
@@ -0,0 +1,93 @@
|
||||
// Port of tmdbintegral/justwatch.php
|
||||
|
||||
import { createReadStream, existsSync, readdirSync, unlinkSync } from 'node:fs';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
TMDBINTEGRAL_DIR, JUSTWATCH_MOVIE_DIR, JUSTWATCH_TV_DIR, TMDB_API_KEY, TMDB_API_BASE,
|
||||
} from '../config.js';
|
||||
import { Limiter } from '../lib/http.js';
|
||||
import { justwatchDir, justwatchPath, bucket } from '../lib/paths.js';
|
||||
|
||||
const DOWNLOAD_CONCURRENCY = 16;
|
||||
|
||||
async function readMasterIds(type) {
|
||||
const file = join(TMDBINTEGRAL_DIR, `${type}.json`);
|
||||
const ids = [];
|
||||
const stream = createReadStream(file, { encoding: 'utf8' });
|
||||
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
||||
for await (const line of rl) {
|
||||
if (!line) continue;
|
||||
try {
|
||||
const obj = JSON.parse(line);
|
||||
if (typeof obj.id === 'number') ids.push(obj.id);
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
async function ensureDir(dir) {
|
||||
if (!existsSync(dir)) await mkdir(dir, { recursive: true });
|
||||
}
|
||||
|
||||
async function downloadProvider(type, id) {
|
||||
const dir = justwatchDir(type, id);
|
||||
await ensureDir(dir);
|
||||
const path = justwatchPath(type, id);
|
||||
const url = `${TMDB_API_BASE}/${type}/${id}/watch/providers?api_key=${TMDB_API_KEY}`;
|
||||
console.log(`Downloading: "justwatch${type}/${bucket(id)}/${id}.json"`);
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) {
|
||||
console.log(`Failed to retrieve TMDb data: "${url}"`);
|
||||
return;
|
||||
}
|
||||
const text = await res.text();
|
||||
await writeFile(path, text);
|
||||
}
|
||||
|
||||
function removeOrphans(type, ids) {
|
||||
const baseDir = type === 'movie' ? JUSTWATCH_MOVIE_DIR : JUSTWATCH_TV_DIR;
|
||||
const expected = new Set(ids);
|
||||
let buckets;
|
||||
try { buckets = readdirSync(baseDir); } catch { return; }
|
||||
for (const b of buckets) {
|
||||
let entries;
|
||||
try { entries = readdirSync(join(baseDir, b)); } catch { continue; }
|
||||
for (const fname of entries) {
|
||||
if (!fname.endsWith('.json')) continue;
|
||||
const id = parseInt(fname.slice(0, -5), 10);
|
||||
if (!Number.isInteger(id)) continue;
|
||||
if (!expected.has(id)) {
|
||||
const p = join(baseDir, b, fname);
|
||||
console.log(`Removing: "justwatch${type}/${b}/${fname}"`);
|
||||
try { unlinkSync(p); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function syncType(type) {
|
||||
const ids = await readMasterIds(type);
|
||||
const limiter = new Limiter(DOWNLOAD_CONCURRENCY);
|
||||
const tasks = [];
|
||||
for (const id of ids) {
|
||||
if (existsSync(justwatchPath(type, id))) continue;
|
||||
tasks.push(limiter.run(() => downloadProvider(type, id)));
|
||||
}
|
||||
await Promise.allSettled(tasks);
|
||||
ids.sort((a, b) => a - b);
|
||||
removeOrphans(type, ids);
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
const type = process.argv[2];
|
||||
if (type !== 'movie' && type !== 'tv') {
|
||||
console.error('Usage: node cron/justwatchSync.js movie|tv');
|
||||
process.exit(1);
|
||||
}
|
||||
syncType(type).catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
19
cron/run.sh
Executable file
19
cron/run.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Wrapper de lancement du cron pour environnement nvm.
|
||||
# Cron n'a pas le PATH de nvm — ce script charge nvm puis lance le cron Node.
|
||||
#
|
||||
# Utilisation crontab :
|
||||
# 13 13 * * * /home/matt/_WEB/proxytmdb/cron/run.sh > /home/matt/_WEB/proxytmdb/lastcron.txt 2>&1
|
||||
|
||||
set -e
|
||||
|
||||
export NVM_DIR="${NVM_DIR:-$HOME/.nvm}"
|
||||
# shellcheck source=/dev/null
|
||||
[ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh"
|
||||
|
||||
# Bascule sur la version "default" de nvm (suit nvm alias default)
|
||||
nvm use default >/dev/null
|
||||
|
||||
cd "$(dirname "$0")/.."
|
||||
exec node --env-file-if-exists=.env cron/runAll.js
|
||||
59
cron/runAll.js
Normal file
59
cron/runAll.js
Normal file
@@ -0,0 +1,59 @@
|
||||
// Port of cron.sh + tmdbintegral/tmdbintegral.sh
|
||||
//
|
||||
// Pipeline:
|
||||
// 1. Refresh imdbratings.tsv
|
||||
// 2. Download daily TMDb exports (movie.json, tv.json)
|
||||
// 3. In parallel: tmdbSync(movie+tv), justwatchSync(movie+tv)
|
||||
// 4. In parallel: tmdb2imdb(movie+tv), buildSearch(movie+tv)
|
||||
// 5. In parallel: ambiguity(movie+tv)
|
||||
//
|
||||
// Writes cron.txt at start/end (mirrors cron.sh).
|
||||
|
||||
import { writeFileSync, appendFileSync } from 'node:fs';
|
||||
import { CRON_TXT } from '../config.js';
|
||||
import { syncImdbRatings } from './imdbRatings.js';
|
||||
import { syncExports } from './tmdbExports.js';
|
||||
import { syncType as syncTmdb } from './tmdbSync.js';
|
||||
import { syncType as syncJustwatch } from './justwatchSync.js';
|
||||
import { buildMapping } from './tmdb2imdb.js';
|
||||
import { buildSearch } from './buildSearch.js';
|
||||
import { buildAmbiguity } from './ambiguity.js';
|
||||
|
||||
function dateStamp() {
|
||||
return new Date().toString();
|
||||
}
|
||||
|
||||
export async function runAll() {
|
||||
writeFileSync(CRON_TXT, `Started At ${dateStamp()}\n`);
|
||||
|
||||
await syncImdbRatings();
|
||||
await syncExports();
|
||||
|
||||
await Promise.all([
|
||||
syncTmdb('movie'),
|
||||
syncTmdb('tv'),
|
||||
syncJustwatch('movie'),
|
||||
syncJustwatch('tv'),
|
||||
]);
|
||||
|
||||
await Promise.all([
|
||||
buildMapping('movie'),
|
||||
buildMapping('tv'),
|
||||
buildSearch('movie'),
|
||||
buildSearch('tv'),
|
||||
]);
|
||||
|
||||
await Promise.all([
|
||||
buildAmbiguity('movie'),
|
||||
buildAmbiguity('tv'),
|
||||
]);
|
||||
|
||||
appendFileSync(CRON_TXT, `Finished At ${dateStamp()}\n`);
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
runAll().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
52
cron/tmdb2imdb.js
Normal file
52
cron/tmdb2imdb.js
Normal file
@@ -0,0 +1,52 @@
|
||||
// Port of tmdbintegral/tmdb2imdb.php
|
||||
// Builds bidirectional TMDb <-> IMDb id mappings from cached detail files.
|
||||
|
||||
import { createReadStream, existsSync, readFileSync } from 'node:fs';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { join } from 'node:path';
|
||||
import { TMDBINTEGRAL_DIR } from '../config.js';
|
||||
import { entryPath } from '../lib/paths.js';
|
||||
|
||||
export async function buildMapping(type) {
|
||||
const inputFile = join(TMDBINTEGRAL_DIR, `${type}.json`);
|
||||
const out1 = join(TMDBINTEGRAL_DIR, `${type}2imdb.json`);
|
||||
const out2 = join(TMDBINTEGRAL_DIR, `imdb2${type}.json`);
|
||||
|
||||
const data1 = {};
|
||||
const data2 = {};
|
||||
|
||||
const stream = createReadStream(inputFile, { encoding: 'utf8' });
|
||||
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
||||
|
||||
for await (const line of rl) {
|
||||
if (!line) continue;
|
||||
let obj;
|
||||
try { obj = JSON.parse(line); } catch { continue; }
|
||||
const tmdb = obj.id;
|
||||
const path = entryPath(type, tmdb);
|
||||
if (!existsSync(path)) continue;
|
||||
let detail;
|
||||
try { detail = JSON.parse(readFileSync(path, 'utf8')); } catch { continue; }
|
||||
const imdb = detail?.external_ids?.imdb_id;
|
||||
if (imdb) {
|
||||
data1[tmdb] = imdb;
|
||||
data2[imdb] = tmdb;
|
||||
}
|
||||
}
|
||||
|
||||
await writeFile(out1, JSON.stringify(data1));
|
||||
await writeFile(out2, JSON.stringify(data2));
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
const type = process.argv[2];
|
||||
if (type !== 'movie' && type !== 'tv') {
|
||||
console.error('Usage: node cron/tmdb2imdb.js movie|tv');
|
||||
process.exit(1);
|
||||
}
|
||||
buildMapping(type).catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
56
cron/tmdbExports.js
Normal file
56
cron/tmdbExports.js
Normal file
@@ -0,0 +1,56 @@
|
||||
import { createWriteStream } from 'node:fs';
|
||||
import { rename } from 'node:fs/promises';
|
||||
import { pipeline } from 'node:stream/promises';
|
||||
import { createGunzip } from 'node:zlib';
|
||||
import { Readable } from 'node:stream';
|
||||
import { join } from 'node:path';
|
||||
import { TMDBINTEGRAL_DIR, TMDB_EXPORTS_BASE } from '../config.js';
|
||||
|
||||
function formatMMDDYYYY(date) {
|
||||
const mm = String(date.getUTCMonth() + 1).padStart(2, '0');
|
||||
const dd = String(date.getUTCDate()).padStart(2, '0');
|
||||
const yyyy = date.getUTCFullYear();
|
||||
return `${mm}_${dd}_${yyyy}`;
|
||||
}
|
||||
|
||||
async function tryDownload(url, outPath) {
|
||||
console.log(`Downloading: "${url}"`);
|
||||
const res = await fetch(url);
|
||||
if (res.status === 403 || res.status === 404) {
|
||||
console.log(`Not published yet (HTTP ${res.status}): ${url}`);
|
||||
return false;
|
||||
}
|
||||
if (!res.ok || !res.body) {
|
||||
throw new Error(`Failed to fetch ${url}: HTTP ${res.status}`);
|
||||
}
|
||||
const tmp = `${outPath}.tmp`;
|
||||
await pipeline(Readable.fromWeb(res.body), createGunzip(), createWriteStream(tmp));
|
||||
await rename(tmp, outPath);
|
||||
console.log(`Wrote ${outPath}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
// TMDb publishes the daily export around 08:00 UTC. If we run before that, the
|
||||
// current-day file returns 403. Try today, then fall back to yesterday.
|
||||
async function downloadExport(prefix, outName) {
|
||||
const now = new Date();
|
||||
const yesterday = new Date(now.getTime() - 86400 * 1000);
|
||||
const out = join(TMDBINTEGRAL_DIR, outName);
|
||||
for (const d of [now, yesterday]) {
|
||||
const url = `${TMDB_EXPORTS_BASE}/${prefix}_${formatMMDDYYYY(d)}.json.gz`;
|
||||
if (await tryDownload(url, out)) return;
|
||||
}
|
||||
throw new Error(`No TMDb ${prefix} export available for today or yesterday`);
|
||||
}
|
||||
|
||||
export async function syncExports() {
|
||||
await downloadExport('movie_ids', 'movie.json');
|
||||
await downloadExport('tv_series_ids', 'tv.json');
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
syncExports().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
168
cron/tmdbSync.js
Normal file
168
cron/tmdbSync.js
Normal file
@@ -0,0 +1,168 @@
|
||||
// Port of tmdbintegral/tmdbintegral.php
|
||||
//
|
||||
// 1. Fetch /changes for the last CHANGES_DAYS to find recently-modified entries
|
||||
// whose local cache file is older than CHANGES_DAYS (so we re-download them).
|
||||
// 2. Stream <type>.json line-by-line, ensure each id has a local detail file
|
||||
// (downloading it if missing or flagged for update).
|
||||
// 3. Walk through every numeric id < max(tmdbs) and remove orphan files that
|
||||
// no longer appear in the master list.
|
||||
|
||||
import { createReadStream, createWriteStream, existsSync, statSync, readdirSync, unlinkSync } from 'node:fs';
|
||||
import { mkdir, stat, writeFile, unlink } from 'node:fs/promises';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
TMDBINTEGRAL_DIR, MOVIE_DIR, TV_DIR, TMDB_API_KEY, TMDB_API_BASE, CHANGES_DAYS,
|
||||
} from '../config.js';
|
||||
import { fetchJson, Limiter } from '../lib/http.js';
|
||||
import { entryDir, entryPath, bucket } from '../lib/paths.js';
|
||||
|
||||
const CHANGES_SECS = CHANGES_DAYS * 24 * 3600;
|
||||
const DOWNLOAD_CONCURRENCY = 16;
|
||||
|
||||
function ymd(date) {
|
||||
const y = date.getUTCFullYear();
|
||||
const m = String(date.getUTCMonth() + 1).padStart(2, '0');
|
||||
const d = String(date.getUTCDate()).padStart(2, '0');
|
||||
return `${y}-${m}-${d}`;
|
||||
}
|
||||
|
||||
function appendResponse(type) {
|
||||
return type === 'tv'
|
||||
? 'credits,aggregate_credits,external_ids,release_dates,translations,images,videos'
|
||||
: 'credits,external_ids,release_dates,translations,images,videos';
|
||||
}
|
||||
|
||||
function detailUrl(type, id) {
|
||||
const base = `${TMDB_API_BASE}/${type}`;
|
||||
return `${base}/${id}?api_key=${TMDB_API_KEY}&append_to_response=${appendResponse(type)}&include_image_language=fr,null,en&language=fr-FR`;
|
||||
}
|
||||
|
||||
async function findChanges(type) {
|
||||
const now = new Date();
|
||||
const start = new Date(now.getTime() - CHANGES_DAYS * 86400 * 1000);
|
||||
const startdate = ymd(start);
|
||||
const enddate = ymd(now);
|
||||
const baseUrl = `${TMDB_API_BASE}/${type}/changes?api_key=${TMDB_API_KEY}&start_date=${startdate}&end_date=${enddate}&page=`;
|
||||
|
||||
const updates = new Set();
|
||||
let total = 1;
|
||||
for (let page = 1; page <= total; page++) {
|
||||
const url = `${baseUrl}${page}`;
|
||||
console.log(`Downloading: "${url}"`);
|
||||
const obj = await fetchJson(url);
|
||||
if (!obj) {
|
||||
console.log(`Failed to retrieve TMDb data: "${baseUrl}"`);
|
||||
continue;
|
||||
}
|
||||
if (typeof obj.total_pages === 'number') total = obj.total_pages;
|
||||
if (!Array.isArray(obj.results)) continue;
|
||||
|
||||
for (const change of obj.results) {
|
||||
const id = change.id;
|
||||
const path = entryPath(type, id);
|
||||
if (!existsSync(path)) continue;
|
||||
let st;
|
||||
try { st = statSync(path); } catch { continue; }
|
||||
// PHP uses filectime; on Linux ctime tracks metadata changes too, but the
|
||||
// intent is "last time the local file was refreshed". We use mtime which
|
||||
// is closer to that intent in JS (writeFile updates mtime).
|
||||
const ageSecs = (Date.now() - st.mtimeMs) / 1000;
|
||||
if (ageSecs >= CHANGES_SECS) {
|
||||
const days = Math.floor(ageSecs / 86400);
|
||||
const hours = Math.floor((ageSecs % 86400) / 3600);
|
||||
const minutes = Math.floor((ageSecs % 3600) / 60);
|
||||
console.log(`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`);
|
||||
updates.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
return updates;
|
||||
}
|
||||
|
||||
async function readMasterIds(type) {
|
||||
const file = join(TMDBINTEGRAL_DIR, `${type}.json`);
|
||||
const ids = [];
|
||||
const stream = createReadStream(file, { encoding: 'utf8' });
|
||||
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
||||
for await (const line of rl) {
|
||||
if (!line) continue;
|
||||
try {
|
||||
const obj = JSON.parse(line);
|
||||
if (typeof obj.id === 'number') ids.push(obj.id);
|
||||
} catch { /* ignore malformed lines */ }
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
async function ensureDir(dir) {
|
||||
if (!existsSync(dir)) {
|
||||
await mkdir(dir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
async function downloadDetail(type, id) {
|
||||
const dir = entryDir(type, id);
|
||||
await ensureDir(dir);
|
||||
const path = entryPath(type, id);
|
||||
console.log(`Downloading: "${type}/${bucket(id)}/${id}.json"`);
|
||||
const url = detailUrl(type, id);
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) {
|
||||
console.log(`Failed to retrieve TMDb data: "${url}"`);
|
||||
return;
|
||||
}
|
||||
const text = await res.text();
|
||||
await writeFile(path, text);
|
||||
}
|
||||
|
||||
function removeOrphans(type, sortedIds) {
|
||||
// Walk every bucket directory once, build a set of expected ids, delete the rest.
|
||||
const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR;
|
||||
const expected = new Set(sortedIds);
|
||||
let buckets;
|
||||
try { buckets = readdirSync(baseDir); } catch { return; }
|
||||
for (const b of buckets) {
|
||||
let entries;
|
||||
try { entries = readdirSync(join(baseDir, b)); } catch { continue; }
|
||||
for (const fname of entries) {
|
||||
if (!fname.endsWith('.json')) continue;
|
||||
const id = parseInt(fname.slice(0, -5), 10);
|
||||
if (!Number.isInteger(id)) continue;
|
||||
if (!expected.has(id)) {
|
||||
const p = join(baseDir, b, fname);
|
||||
console.log(`Removing: "${type}/${b}/${fname}"`);
|
||||
try { unlinkSync(p); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function syncType(type) {
|
||||
const updates = await findChanges(type);
|
||||
const ids = await readMasterIds(type);
|
||||
|
||||
const limiter = new Limiter(DOWNLOAD_CONCURRENCY);
|
||||
const tasks = [];
|
||||
for (const id of ids) {
|
||||
const path = entryPath(type, id);
|
||||
if (!updates.has(id) && existsSync(path)) continue;
|
||||
tasks.push(limiter.run(() => downloadDetail(type, id)));
|
||||
}
|
||||
await Promise.allSettled(tasks);
|
||||
|
||||
ids.sort((a, b) => a - b);
|
||||
removeOrphans(type, ids);
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
const type = process.argv[2];
|
||||
if (type !== 'movie' && type !== 'tv') {
|
||||
console.error('Usage: node cron/tmdbSync.js movie|tv');
|
||||
process.exit(1);
|
||||
}
|
||||
syncType(type).catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user