Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome

This commit is contained in:
unfr
2026-04-24 07:35:10 +02:00
parent f9745a2390
commit a184a21f57
36 changed files with 2060 additions and 364 deletions

View File

@@ -5,7 +5,7 @@
import { readFileSync } from 'node:fs';
import { writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js';
import { NB_SEARCH_PARTS, TMDBINTEGRAL_DIR } from '../config.js';
const TMDB = 0;
const FILTEREDTITLE = 4;
@@ -32,9 +32,24 @@ export async function buildAmbiguity(type, nbParts = NB_SEARCH_PARTS) {
const fr = db[FILTEREDTITLE];
const en = db[FILTEREDENGLISHTITLE];
const vo = db[FILTEREDORIGINALTITLE];
if (fr) { tmdbs.push(db[TMDB]); filteredTitles.push(fr); years.push(db[YEAR][0]); languages.push('FR'); }
if (en) { tmdbs.push(db[TMDB]); filteredTitles.push(en); years.push(db[YEAR][0]); languages.push('EN'); }
if (vo) { tmdbs.push(db[TMDB]); filteredTitles.push(vo); years.push(db[YEAR][0]); languages.push('VO'); }
if (fr) {
tmdbs.push(db[TMDB]);
filteredTitles.push(fr);
years.push(db[YEAR][0]);
languages.push('FR');
}
if (en) {
tmdbs.push(db[TMDB]);
filteredTitles.push(en);
years.push(db[YEAR][0]);
languages.push('EN');
}
if (vo) {
tmdbs.push(db[TMDB]);
filteredTitles.push(vo);
years.push(db[YEAR][0]);
languages.push('VO');
}
}
// PHP: array_multisort(filteredtitles, years, tmdbs, languages)
@@ -87,7 +102,7 @@ export async function buildAmbiguity(type, nbParts = NB_SEARCH_PARTS) {
}
flush();
await writeFile(out, lines.length ? lines.join('\n') + '\n' : '');
await writeFile(out, lines.length ? `${lines.join('\n')}\n` : '');
}
if (import.meta.url === `file://${process.argv[1]}`) {

View File

@@ -9,21 +9,23 @@
import { createReadStream, existsSync, readFileSync } from 'node:fs';
import { writeFile } from 'node:fs/promises';
import { createInterface } from 'node:readline';
import { join } from 'node:path';
import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js';
import { createInterface } from 'node:readline';
import { NB_SEARCH_PARTS, TMDBINTEGRAL_DIR } from '../config.js';
import { mbStrlen } from '../lib/mbLevenshtein.js';
import { entryPath } from '../lib/paths.js';
import { filterTitle } from '../lib/titleFilter.js';
import { mbStrlen } from '../lib/mbLevenshtein.js';
function lower(s) { return s.toLocaleLowerCase(); }
function lower(s) {
return s.toLocaleLowerCase();
}
function extractEnglishTitle(detail, type) {
const tr = detail?.translations?.translations;
if (!Array.isArray(tr)) return '';
for (const t of tr) {
if (t.iso_639_1 === 'en') {
return type === 'movie' ? (t.data?.title || '') : (t.data?.name || '');
return type === 'movie' ? t.data?.title || '' : t.data?.name || '';
}
}
return '';
@@ -73,20 +75,13 @@ function buildEntry(masterObj, detail, type) {
const seen = new Set();
const uniqYears = [];
for (const y of years) {
if (!seen.has(y)) { seen.add(y); uniqYears.push(y); }
if (!seen.has(y)) {
seen.add(y);
uniqYears.push(y);
}
}
return [
tmdb,
title,
englishTitle,
originalTitle,
lower(ft),
lower(fe),
lower(fo),
uniqYears,
popularity,
];
return [tmdb, title, englishTitle, originalTitle, lower(ft), lower(fe), lower(fo), uniqYears, popularity];
}
export async function buildSearch(type, nbParts = NB_SEARCH_PARTS) {
@@ -99,11 +94,19 @@ export async function buildSearch(type, nbParts = NB_SEARCH_PARTS) {
for await (const line of rl) {
if (!line) continue;
let masterObj;
try { masterObj = JSON.parse(line); } catch { continue; }
try {
masterObj = JSON.parse(line);
} catch {
continue;
}
const path = entryPath(type, masterObj.id);
if (!existsSync(path)) continue;
let detail;
try { detail = JSON.parse(readFileSync(path, 'utf8')); } catch { continue; }
try {
detail = JSON.parse(readFileSync(path, 'utf8'));
} catch {
continue;
}
const entry = buildEntry(masterObj, detail, type);
if (entry) database.push(entry);
}

View File

@@ -1,10 +1,10 @@
import { createWriteStream } from 'node:fs';
import { rename } from 'node:fs/promises';
import { join } from 'node:path';
import { Readable } from 'node:stream';
import { pipeline } from 'node:stream/promises';
import { createGunzip } from 'node:zlib';
import { Readable } from 'node:stream';
import { join } from 'node:path';
import { ROOT, IMDB_DATASETS_BASE, IMDB_RATINGS } from '../config.js';
import { IMDB_DATASETS_BASE, IMDB_RATINGS, ROOT } from '../config.js';
const FILE = 'title.ratings.tsv';
@@ -18,11 +18,7 @@ export async function syncImdbRatings() {
throw new Error(`Failed to fetch ${url}: HTTP ${res.status}`);
}
await pipeline(
Readable.fromWeb(res.body),
createGunzip(),
createWriteStream(tmpPath),
);
await pipeline(Readable.fromWeb(res.body), createGunzip(), createWriteStream(tmpPath));
await rename(tmpPath, IMDB_RATINGS);
console.log(`Wrote ${IMDB_RATINGS}`);

View File

@@ -2,13 +2,17 @@
import { createReadStream, existsSync, readdirSync, unlinkSync } from 'node:fs';
import { mkdir, writeFile } from 'node:fs/promises';
import { createInterface } from 'node:readline';
import { join } from 'node:path';
import { createInterface } from 'node:readline';
import {
TMDBINTEGRAL_DIR, JUSTWATCH_MOVIE_DIR, JUSTWATCH_TV_DIR, TMDB_API_KEY, TMDB_API_BASE,
JUSTWATCH_MOVIE_DIR,
JUSTWATCH_TV_DIR,
TMDB_API_BASE,
TMDB_API_KEY,
TMDBINTEGRAL_DIR,
} from '../config.js';
import { Limiter } from '../lib/http.js';
import { justwatchDir, justwatchPath, bucket } from '../lib/paths.js';
import { bucket, justwatchDir, justwatchPath } from '../lib/paths.js';
const DOWNLOAD_CONCURRENCY = 16;
@@ -22,7 +26,9 @@ async function readMasterIds(type) {
try {
const obj = JSON.parse(line);
if (typeof obj.id === 'number') ids.push(obj.id);
} catch { /* ignore */ }
} catch {
/* ignore */
}
}
return ids;
}
@@ -50,10 +56,18 @@ function removeOrphans(type, ids) {
const baseDir = type === 'movie' ? JUSTWATCH_MOVIE_DIR : JUSTWATCH_TV_DIR;
const expected = new Set(ids);
let buckets;
try { buckets = readdirSync(baseDir); } catch { return; }
try {
buckets = readdirSync(baseDir);
} catch {
return;
}
for (const b of buckets) {
let entries;
try { entries = readdirSync(join(baseDir, b)); } catch { continue; }
try {
entries = readdirSync(join(baseDir, b));
} catch {
continue;
}
for (const fname of entries) {
if (!fname.endsWith('.json')) continue;
const id = parseInt(fname.slice(0, -5), 10);
@@ -61,7 +75,11 @@ function removeOrphans(type, ids) {
if (!expected.has(id)) {
const p = join(baseDir, b, fname);
console.log(`Removing: "justwatch${type}/${b}/${fname}"`);
try { unlinkSync(p); } catch { /* ignore */ }
try {
unlinkSync(p);
} catch {
/* ignore */
}
}
}
}

View File

@@ -9,44 +9,36 @@
//
// Writes cron.txt at start/end (mirrors cron.sh).
import { writeFileSync, appendFileSync } from 'node:fs';
import { CRON_TXT } from '../config.js';
import { appendFileSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
import { CRON_TXT, ROOT } from '../config.js';
import { acquireLock } from '../lib/lockFile.js';
import { buildAmbiguity } from './ambiguity.js';
import { buildSearch } from './buildSearch.js';
import { syncImdbRatings } from './imdbRatings.js';
import { syncExports } from './tmdbExports.js';
import { syncType as syncTmdb } from './tmdbSync.js';
import { syncType as syncJustwatch } from './justwatchSync.js';
import { buildMapping } from './tmdb2imdb.js';
import { buildSearch } from './buildSearch.js';
import { buildAmbiguity } from './ambiguity.js';
import { syncExports } from './tmdbExports.js';
import { syncType as syncTmdb } from './tmdbSync.js';
const LOCK_PATH = join(ROOT, '.cron.lock');
function dateStamp() {
return new Date().toString();
}
export async function runAll() {
acquireLock(LOCK_PATH);
writeFileSync(CRON_TXT, `Started At ${dateStamp()}\n`);
await syncImdbRatings();
await syncExports();
await Promise.all([
syncTmdb('movie'),
syncTmdb('tv'),
syncJustwatch('movie'),
syncJustwatch('tv'),
]);
await Promise.all([syncTmdb('movie'), syncTmdb('tv'), syncJustwatch('movie'), syncJustwatch('tv')]);
await Promise.all([
buildMapping('movie'),
buildMapping('tv'),
buildSearch('movie'),
buildSearch('tv'),
]);
await Promise.all([buildMapping('movie'), buildMapping('tv'), buildSearch('movie'), buildSearch('tv')]);
await Promise.all([
buildAmbiguity('movie'),
buildAmbiguity('tv'),
]);
await Promise.all([buildAmbiguity('movie'), buildAmbiguity('tv')]);
appendFileSync(CRON_TXT, `Finished At ${dateStamp()}\n`);
}

View File

@@ -3,8 +3,8 @@
import { createReadStream, existsSync, readFileSync } from 'node:fs';
import { writeFile } from 'node:fs/promises';
import { createInterface } from 'node:readline';
import { join } from 'node:path';
import { createInterface } from 'node:readline';
import { TMDBINTEGRAL_DIR } from '../config.js';
import { entryPath } from '../lib/paths.js';
@@ -22,12 +22,20 @@ export async function buildMapping(type) {
for await (const line of rl) {
if (!line) continue;
let obj;
try { obj = JSON.parse(line); } catch { continue; }
try {
obj = JSON.parse(line);
} catch {
continue;
}
const tmdb = obj.id;
const path = entryPath(type, tmdb);
if (!existsSync(path)) continue;
let detail;
try { detail = JSON.parse(readFileSync(path, 'utf8')); } catch { continue; }
try {
detail = JSON.parse(readFileSync(path, 'utf8'));
} catch {
continue;
}
const imdb = detail?.external_ids?.imdb_id;
if (imdb) {
data1[tmdb] = imdb;

View File

@@ -1,10 +1,10 @@
import { createWriteStream } from 'node:fs';
import { rename, mkdir } from 'node:fs/promises';
import { mkdir, rename } from 'node:fs/promises';
import { join } from 'node:path';
import { Readable } from 'node:stream';
import { pipeline } from 'node:stream/promises';
import { createGunzip } from 'node:zlib';
import { Readable } from 'node:stream';
import { join } from 'node:path';
import { TMDBINTEGRAL_DIR, TMDB_EXPORTS_BASE } from '../config.js';
import { TMDB_EXPORTS_BASE, TMDBINTEGRAL_DIR } from '../config.js';
function formatMMDDYYYY(date) {
const mm = String(date.getUTCMonth() + 1).padStart(2, '0');

View File

@@ -7,15 +7,13 @@
// 3. Walk through every numeric id < max(tmdbs) and remove orphan files that
// no longer appear in the master list.
import { createReadStream, createWriteStream, existsSync, statSync, readdirSync, unlinkSync } from 'node:fs';
import { mkdir, stat, writeFile, unlink } from 'node:fs/promises';
import { createInterface } from 'node:readline';
import { createReadStream, existsSync, readdirSync, statSync, unlinkSync } from 'node:fs';
import { mkdir, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import {
TMDBINTEGRAL_DIR, MOVIE_DIR, TV_DIR, TMDB_API_KEY, TMDB_API_BASE, CHANGES_DAYS,
} from '../config.js';
import { createInterface } from 'node:readline';
import { CHANGES_DAYS, MOVIE_DIR, TMDB_API_BASE, TMDB_API_KEY, TMDBINTEGRAL_DIR, TV_DIR } from '../config.js';
import { fetchJson, Limiter } from '../lib/http.js';
import { entryDir, entryPath, bucket } from '../lib/paths.js';
import { bucket, entryDir, entryPath } from '../lib/paths.js';
const CHANGES_SECS = CHANGES_DAYS * 24 * 3600;
const DOWNLOAD_CONCURRENCY = 16;
@@ -63,7 +61,11 @@ async function findChanges(type) {
const path = entryPath(type, id);
if (!existsSync(path)) continue;
let st;
try { st = statSync(path); } catch { continue; }
try {
st = statSync(path);
} catch {
continue;
}
// PHP uses filectime; on Linux ctime tracks metadata changes too, but the
// intent is "last time the local file was refreshed". We use mtime which
// is closer to that intent in JS (writeFile updates mtime).
@@ -72,7 +74,9 @@ async function findChanges(type) {
const days = Math.floor(ageSecs / 86400);
const hours = Math.floor((ageSecs % 86400) / 3600);
const minutes = Math.floor((ageSecs % 3600) / 60);
console.log(`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`);
console.log(
`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`,
);
updates.add(id);
}
}
@@ -90,7 +94,9 @@ async function readMasterIds(type) {
try {
const obj = JSON.parse(line);
if (typeof obj.id === 'number') ids.push(obj.id);
} catch { /* ignore malformed lines */ }
} catch {
/* ignore malformed lines */
}
}
return ids;
}
@@ -121,10 +127,18 @@ function removeOrphans(type, sortedIds) {
const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR;
const expected = new Set(sortedIds);
let buckets;
try { buckets = readdirSync(baseDir); } catch { return; }
try {
buckets = readdirSync(baseDir);
} catch {
return;
}
for (const b of buckets) {
let entries;
try { entries = readdirSync(join(baseDir, b)); } catch { continue; }
try {
entries = readdirSync(join(baseDir, b));
} catch {
continue;
}
for (const fname of entries) {
if (!fname.endsWith('.json')) continue;
const id = parseInt(fname.slice(0, -5), 10);
@@ -132,7 +146,11 @@ function removeOrphans(type, sortedIds) {
if (!expected.has(id)) {
const p = join(baseDir, b, fname);
console.log(`Removing: "${type}/${b}/${fname}"`);
try { unlinkSync(p); } catch { /* ignore */ }
try {
unlinkSync(p);
} catch {
/* ignore */
}
}
}
}