Phase 1: lock cron, reload chaud, argon2, providers, IMDb lookup, cache LRU, /health, /metrics, rate limit, UI dark, biome
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js';
|
||||
import { NB_SEARCH_PARTS, TMDBINTEGRAL_DIR } from '../config.js';
|
||||
|
||||
const TMDB = 0;
|
||||
const FILTEREDTITLE = 4;
|
||||
@@ -32,9 +32,24 @@ export async function buildAmbiguity(type, nbParts = NB_SEARCH_PARTS) {
|
||||
const fr = db[FILTEREDTITLE];
|
||||
const en = db[FILTEREDENGLISHTITLE];
|
||||
const vo = db[FILTEREDORIGINALTITLE];
|
||||
if (fr) { tmdbs.push(db[TMDB]); filteredTitles.push(fr); years.push(db[YEAR][0]); languages.push('FR'); }
|
||||
if (en) { tmdbs.push(db[TMDB]); filteredTitles.push(en); years.push(db[YEAR][0]); languages.push('EN'); }
|
||||
if (vo) { tmdbs.push(db[TMDB]); filteredTitles.push(vo); years.push(db[YEAR][0]); languages.push('VO'); }
|
||||
if (fr) {
|
||||
tmdbs.push(db[TMDB]);
|
||||
filteredTitles.push(fr);
|
||||
years.push(db[YEAR][0]);
|
||||
languages.push('FR');
|
||||
}
|
||||
if (en) {
|
||||
tmdbs.push(db[TMDB]);
|
||||
filteredTitles.push(en);
|
||||
years.push(db[YEAR][0]);
|
||||
languages.push('EN');
|
||||
}
|
||||
if (vo) {
|
||||
tmdbs.push(db[TMDB]);
|
||||
filteredTitles.push(vo);
|
||||
years.push(db[YEAR][0]);
|
||||
languages.push('VO');
|
||||
}
|
||||
}
|
||||
|
||||
// PHP: array_multisort(filteredtitles, years, tmdbs, languages)
|
||||
@@ -87,7 +102,7 @@ export async function buildAmbiguity(type, nbParts = NB_SEARCH_PARTS) {
|
||||
}
|
||||
flush();
|
||||
|
||||
await writeFile(out, lines.length ? lines.join('\n') + '\n' : '');
|
||||
await writeFile(out, lines.length ? `${lines.join('\n')}\n` : '');
|
||||
}
|
||||
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
|
||||
@@ -9,21 +9,23 @@
|
||||
|
||||
import { createReadStream, existsSync, readFileSync } from 'node:fs';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { join } from 'node:path';
|
||||
import { TMDBINTEGRAL_DIR, NB_SEARCH_PARTS } from '../config.js';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { NB_SEARCH_PARTS, TMDBINTEGRAL_DIR } from '../config.js';
|
||||
import { mbStrlen } from '../lib/mbLevenshtein.js';
|
||||
import { entryPath } from '../lib/paths.js';
|
||||
import { filterTitle } from '../lib/titleFilter.js';
|
||||
import { mbStrlen } from '../lib/mbLevenshtein.js';
|
||||
|
||||
function lower(s) { return s.toLocaleLowerCase(); }
|
||||
function lower(s) {
|
||||
return s.toLocaleLowerCase();
|
||||
}
|
||||
|
||||
function extractEnglishTitle(detail, type) {
|
||||
const tr = detail?.translations?.translations;
|
||||
if (!Array.isArray(tr)) return '';
|
||||
for (const t of tr) {
|
||||
if (t.iso_639_1 === 'en') {
|
||||
return type === 'movie' ? (t.data?.title || '') : (t.data?.name || '');
|
||||
return type === 'movie' ? t.data?.title || '' : t.data?.name || '';
|
||||
}
|
||||
}
|
||||
return '';
|
||||
@@ -73,20 +75,13 @@ function buildEntry(masterObj, detail, type) {
|
||||
const seen = new Set();
|
||||
const uniqYears = [];
|
||||
for (const y of years) {
|
||||
if (!seen.has(y)) { seen.add(y); uniqYears.push(y); }
|
||||
if (!seen.has(y)) {
|
||||
seen.add(y);
|
||||
uniqYears.push(y);
|
||||
}
|
||||
}
|
||||
|
||||
return [
|
||||
tmdb,
|
||||
title,
|
||||
englishTitle,
|
||||
originalTitle,
|
||||
lower(ft),
|
||||
lower(fe),
|
||||
lower(fo),
|
||||
uniqYears,
|
||||
popularity,
|
||||
];
|
||||
return [tmdb, title, englishTitle, originalTitle, lower(ft), lower(fe), lower(fo), uniqYears, popularity];
|
||||
}
|
||||
|
||||
export async function buildSearch(type, nbParts = NB_SEARCH_PARTS) {
|
||||
@@ -99,11 +94,19 @@ export async function buildSearch(type, nbParts = NB_SEARCH_PARTS) {
|
||||
for await (const line of rl) {
|
||||
if (!line) continue;
|
||||
let masterObj;
|
||||
try { masterObj = JSON.parse(line); } catch { continue; }
|
||||
try {
|
||||
masterObj = JSON.parse(line);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const path = entryPath(type, masterObj.id);
|
||||
if (!existsSync(path)) continue;
|
||||
let detail;
|
||||
try { detail = JSON.parse(readFileSync(path, 'utf8')); } catch { continue; }
|
||||
try {
|
||||
detail = JSON.parse(readFileSync(path, 'utf8'));
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const entry = buildEntry(masterObj, detail, type);
|
||||
if (entry) database.push(entry);
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import { createWriteStream } from 'node:fs';
|
||||
import { rename } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { Readable } from 'node:stream';
|
||||
import { pipeline } from 'node:stream/promises';
|
||||
import { createGunzip } from 'node:zlib';
|
||||
import { Readable } from 'node:stream';
|
||||
import { join } from 'node:path';
|
||||
import { ROOT, IMDB_DATASETS_BASE, IMDB_RATINGS } from '../config.js';
|
||||
import { IMDB_DATASETS_BASE, IMDB_RATINGS, ROOT } from '../config.js';
|
||||
|
||||
const FILE = 'title.ratings.tsv';
|
||||
|
||||
@@ -18,11 +18,7 @@ export async function syncImdbRatings() {
|
||||
throw new Error(`Failed to fetch ${url}: HTTP ${res.status}`);
|
||||
}
|
||||
|
||||
await pipeline(
|
||||
Readable.fromWeb(res.body),
|
||||
createGunzip(),
|
||||
createWriteStream(tmpPath),
|
||||
);
|
||||
await pipeline(Readable.fromWeb(res.body), createGunzip(), createWriteStream(tmpPath));
|
||||
|
||||
await rename(tmpPath, IMDB_RATINGS);
|
||||
console.log(`Wrote ${IMDB_RATINGS}`);
|
||||
|
||||
@@ -2,13 +2,17 @@
|
||||
|
||||
import { createReadStream, existsSync, readdirSync, unlinkSync } from 'node:fs';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { join } from 'node:path';
|
||||
import { createInterface } from 'node:readline';
|
||||
import {
|
||||
TMDBINTEGRAL_DIR, JUSTWATCH_MOVIE_DIR, JUSTWATCH_TV_DIR, TMDB_API_KEY, TMDB_API_BASE,
|
||||
JUSTWATCH_MOVIE_DIR,
|
||||
JUSTWATCH_TV_DIR,
|
||||
TMDB_API_BASE,
|
||||
TMDB_API_KEY,
|
||||
TMDBINTEGRAL_DIR,
|
||||
} from '../config.js';
|
||||
import { Limiter } from '../lib/http.js';
|
||||
import { justwatchDir, justwatchPath, bucket } from '../lib/paths.js';
|
||||
import { bucket, justwatchDir, justwatchPath } from '../lib/paths.js';
|
||||
|
||||
const DOWNLOAD_CONCURRENCY = 16;
|
||||
|
||||
@@ -22,7 +26,9 @@ async function readMasterIds(type) {
|
||||
try {
|
||||
const obj = JSON.parse(line);
|
||||
if (typeof obj.id === 'number') ids.push(obj.id);
|
||||
} catch { /* ignore */ }
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
@@ -50,10 +56,18 @@ function removeOrphans(type, ids) {
|
||||
const baseDir = type === 'movie' ? JUSTWATCH_MOVIE_DIR : JUSTWATCH_TV_DIR;
|
||||
const expected = new Set(ids);
|
||||
let buckets;
|
||||
try { buckets = readdirSync(baseDir); } catch { return; }
|
||||
try {
|
||||
buckets = readdirSync(baseDir);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
for (const b of buckets) {
|
||||
let entries;
|
||||
try { entries = readdirSync(join(baseDir, b)); } catch { continue; }
|
||||
try {
|
||||
entries = readdirSync(join(baseDir, b));
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
for (const fname of entries) {
|
||||
if (!fname.endsWith('.json')) continue;
|
||||
const id = parseInt(fname.slice(0, -5), 10);
|
||||
@@ -61,7 +75,11 @@ function removeOrphans(type, ids) {
|
||||
if (!expected.has(id)) {
|
||||
const p = join(baseDir, b, fname);
|
||||
console.log(`Removing: "justwatch${type}/${b}/${fname}"`);
|
||||
try { unlinkSync(p); } catch { /* ignore */ }
|
||||
try {
|
||||
unlinkSync(p);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,44 +9,36 @@
|
||||
//
|
||||
// Writes cron.txt at start/end (mirrors cron.sh).
|
||||
|
||||
import { writeFileSync, appendFileSync } from 'node:fs';
|
||||
import { CRON_TXT } from '../config.js';
|
||||
import { appendFileSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { CRON_TXT, ROOT } from '../config.js';
|
||||
import { acquireLock } from '../lib/lockFile.js';
|
||||
import { buildAmbiguity } from './ambiguity.js';
|
||||
import { buildSearch } from './buildSearch.js';
|
||||
import { syncImdbRatings } from './imdbRatings.js';
|
||||
import { syncExports } from './tmdbExports.js';
|
||||
import { syncType as syncTmdb } from './tmdbSync.js';
|
||||
import { syncType as syncJustwatch } from './justwatchSync.js';
|
||||
import { buildMapping } from './tmdb2imdb.js';
|
||||
import { buildSearch } from './buildSearch.js';
|
||||
import { buildAmbiguity } from './ambiguity.js';
|
||||
import { syncExports } from './tmdbExports.js';
|
||||
import { syncType as syncTmdb } from './tmdbSync.js';
|
||||
|
||||
const LOCK_PATH = join(ROOT, '.cron.lock');
|
||||
|
||||
function dateStamp() {
|
||||
return new Date().toString();
|
||||
}
|
||||
|
||||
export async function runAll() {
|
||||
acquireLock(LOCK_PATH);
|
||||
writeFileSync(CRON_TXT, `Started At ${dateStamp()}\n`);
|
||||
|
||||
await syncImdbRatings();
|
||||
await syncExports();
|
||||
|
||||
await Promise.all([
|
||||
syncTmdb('movie'),
|
||||
syncTmdb('tv'),
|
||||
syncJustwatch('movie'),
|
||||
syncJustwatch('tv'),
|
||||
]);
|
||||
await Promise.all([syncTmdb('movie'), syncTmdb('tv'), syncJustwatch('movie'), syncJustwatch('tv')]);
|
||||
|
||||
await Promise.all([
|
||||
buildMapping('movie'),
|
||||
buildMapping('tv'),
|
||||
buildSearch('movie'),
|
||||
buildSearch('tv'),
|
||||
]);
|
||||
await Promise.all([buildMapping('movie'), buildMapping('tv'), buildSearch('movie'), buildSearch('tv')]);
|
||||
|
||||
await Promise.all([
|
||||
buildAmbiguity('movie'),
|
||||
buildAmbiguity('tv'),
|
||||
]);
|
||||
await Promise.all([buildAmbiguity('movie'), buildAmbiguity('tv')]);
|
||||
|
||||
appendFileSync(CRON_TXT, `Finished At ${dateStamp()}\n`);
|
||||
}
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
|
||||
import { createReadStream, existsSync, readFileSync } from 'node:fs';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { join } from 'node:path';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { TMDBINTEGRAL_DIR } from '../config.js';
|
||||
import { entryPath } from '../lib/paths.js';
|
||||
|
||||
@@ -22,12 +22,20 @@ export async function buildMapping(type) {
|
||||
for await (const line of rl) {
|
||||
if (!line) continue;
|
||||
let obj;
|
||||
try { obj = JSON.parse(line); } catch { continue; }
|
||||
try {
|
||||
obj = JSON.parse(line);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const tmdb = obj.id;
|
||||
const path = entryPath(type, tmdb);
|
||||
if (!existsSync(path)) continue;
|
||||
let detail;
|
||||
try { detail = JSON.parse(readFileSync(path, 'utf8')); } catch { continue; }
|
||||
try {
|
||||
detail = JSON.parse(readFileSync(path, 'utf8'));
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const imdb = detail?.external_ids?.imdb_id;
|
||||
if (imdb) {
|
||||
data1[tmdb] = imdb;
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import { createWriteStream } from 'node:fs';
|
||||
import { rename, mkdir } from 'node:fs/promises';
|
||||
import { mkdir, rename } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { Readable } from 'node:stream';
|
||||
import { pipeline } from 'node:stream/promises';
|
||||
import { createGunzip } from 'node:zlib';
|
||||
import { Readable } from 'node:stream';
|
||||
import { join } from 'node:path';
|
||||
import { TMDBINTEGRAL_DIR, TMDB_EXPORTS_BASE } from '../config.js';
|
||||
import { TMDB_EXPORTS_BASE, TMDBINTEGRAL_DIR } from '../config.js';
|
||||
|
||||
function formatMMDDYYYY(date) {
|
||||
const mm = String(date.getUTCMonth() + 1).padStart(2, '0');
|
||||
|
||||
@@ -7,15 +7,13 @@
|
||||
// 3. Walk through every numeric id < max(tmdbs) and remove orphan files that
|
||||
// no longer appear in the master list.
|
||||
|
||||
import { createReadStream, createWriteStream, existsSync, statSync, readdirSync, unlinkSync } from 'node:fs';
|
||||
import { mkdir, stat, writeFile, unlink } from 'node:fs/promises';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { createReadStream, existsSync, readdirSync, statSync, unlinkSync } from 'node:fs';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
TMDBINTEGRAL_DIR, MOVIE_DIR, TV_DIR, TMDB_API_KEY, TMDB_API_BASE, CHANGES_DAYS,
|
||||
} from '../config.js';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { CHANGES_DAYS, MOVIE_DIR, TMDB_API_BASE, TMDB_API_KEY, TMDBINTEGRAL_DIR, TV_DIR } from '../config.js';
|
||||
import { fetchJson, Limiter } from '../lib/http.js';
|
||||
import { entryDir, entryPath, bucket } from '../lib/paths.js';
|
||||
import { bucket, entryDir, entryPath } from '../lib/paths.js';
|
||||
|
||||
const CHANGES_SECS = CHANGES_DAYS * 24 * 3600;
|
||||
const DOWNLOAD_CONCURRENCY = 16;
|
||||
@@ -63,7 +61,11 @@ async function findChanges(type) {
|
||||
const path = entryPath(type, id);
|
||||
if (!existsSync(path)) continue;
|
||||
let st;
|
||||
try { st = statSync(path); } catch { continue; }
|
||||
try {
|
||||
st = statSync(path);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
// PHP uses filectime; on Linux ctime tracks metadata changes too, but the
|
||||
// intent is "last time the local file was refreshed". We use mtime which
|
||||
// is closer to that intent in JS (writeFile updates mtime).
|
||||
@@ -72,7 +74,9 @@ async function findChanges(type) {
|
||||
const days = Math.floor(ageSecs / 86400);
|
||||
const hours = Math.floor((ageSecs % 86400) / 3600);
|
||||
const minutes = Math.floor((ageSecs % 3600) / 60);
|
||||
console.log(`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`);
|
||||
console.log(
|
||||
`Updating: "${type}/${bucket(id)}/${id}.json" ${days} days, ${hours} hours, ${minutes} minutes`,
|
||||
);
|
||||
updates.add(id);
|
||||
}
|
||||
}
|
||||
@@ -90,7 +94,9 @@ async function readMasterIds(type) {
|
||||
try {
|
||||
const obj = JSON.parse(line);
|
||||
if (typeof obj.id === 'number') ids.push(obj.id);
|
||||
} catch { /* ignore malformed lines */ }
|
||||
} catch {
|
||||
/* ignore malformed lines */
|
||||
}
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
@@ -121,10 +127,18 @@ function removeOrphans(type, sortedIds) {
|
||||
const baseDir = type === 'movie' ? MOVIE_DIR : TV_DIR;
|
||||
const expected = new Set(sortedIds);
|
||||
let buckets;
|
||||
try { buckets = readdirSync(baseDir); } catch { return; }
|
||||
try {
|
||||
buckets = readdirSync(baseDir);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
for (const b of buckets) {
|
||||
let entries;
|
||||
try { entries = readdirSync(join(baseDir, b)); } catch { continue; }
|
||||
try {
|
||||
entries = readdirSync(join(baseDir, b));
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
for (const fname of entries) {
|
||||
if (!fname.endsWith('.json')) continue;
|
||||
const id = parseInt(fname.slice(0, -5), 10);
|
||||
@@ -132,7 +146,11 @@ function removeOrphans(type, sortedIds) {
|
||||
if (!expected.has(id)) {
|
||||
const p = join(baseDir, b, fname);
|
||||
console.log(`Removing: "${type}/${b}/${fname}"`);
|
||||
try { unlinkSync(p); } catch { /* ignore */ }
|
||||
try {
|
||||
unlinkSync(p);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user