Portage complet PHP/Bash vers Node.js (Fastify + worker_threads)
This commit is contained in:
21
lib/format.js
Normal file
21
lib/format.js
Normal file
@@ -0,0 +1,21 @@
|
||||
// Money formatting (Intl.NumberFormat replaces PHP's NumberFormatter::CURRENCY).
|
||||
|
||||
const FMT = new Intl.NumberFormat('en-US', {
|
||||
style: 'currency',
|
||||
currency: 'USD',
|
||||
maximumFractionDigits: 0,
|
||||
minimumFractionDigits: 0,
|
||||
});
|
||||
|
||||
export function formatCurrency(n) {
|
||||
return FMT.format(n || 0);
|
||||
}
|
||||
|
||||
export function pad2(n) {
|
||||
return n < 10 ? `0${n}` : String(n);
|
||||
}
|
||||
|
||||
export function formatRuntime(runtime) {
|
||||
if (!runtime) return '';
|
||||
return `${Math.floor(runtime / 60)} h ${pad2(runtime % 60)} min`;
|
||||
}
|
||||
66
lib/http.js
Normal file
66
lib/http.js
Normal file
@@ -0,0 +1,66 @@
|
||||
// Tiny fetch wrapper with retry and concurrency limiter.
|
||||
|
||||
export async function fetchText(url, { retries = 3, timeoutMs = 30000 } = {}) {
|
||||
let lastErr;
|
||||
for (let attempt = 0; attempt <= retries; attempt++) {
|
||||
const ac = new AbortController();
|
||||
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
||||
try {
|
||||
const res = await fetch(url, { signal: ac.signal });
|
||||
clearTimeout(timer);
|
||||
if (!res.ok) {
|
||||
if (res.status === 404) return null;
|
||||
throw new Error(`HTTP ${res.status} ${res.statusText}`);
|
||||
}
|
||||
return await res.text();
|
||||
} catch (err) {
|
||||
clearTimeout(timer);
|
||||
lastErr = err;
|
||||
if (attempt < retries) {
|
||||
await new Promise((r) => setTimeout(r, 500 * (attempt + 1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
console.error(`fetchText failed: ${url} :: ${lastErr?.message}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function fetchJson(url, opts) {
|
||||
const text = await fetchText(url, opts);
|
||||
if (!text) return null;
|
||||
try {
|
||||
return JSON.parse(text);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export class Limiter {
|
||||
constructor(max) {
|
||||
this.max = max;
|
||||
this.active = 0;
|
||||
this.queue = [];
|
||||
}
|
||||
run(fn) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const tryRun = () => {
|
||||
if (this.active >= this.max) {
|
||||
this.queue.push(tryRun);
|
||||
return;
|
||||
}
|
||||
this.active++;
|
||||
Promise.resolve()
|
||||
.then(fn)
|
||||
.then(
|
||||
(v) => { this.active--; resolve(v); this._next(); },
|
||||
(e) => { this.active--; reject(e); this._next(); },
|
||||
);
|
||||
};
|
||||
tryRun();
|
||||
});
|
||||
}
|
||||
_next() {
|
||||
const next = this.queue.shift();
|
||||
if (next) next();
|
||||
}
|
||||
}
|
||||
49
lib/imdbRatings.js
Normal file
49
lib/imdbRatings.js
Normal file
@@ -0,0 +1,49 @@
|
||||
import { createReadStream, statSync } from 'node:fs';
|
||||
import { createInterface } from 'node:readline';
|
||||
import { IMDB_RATINGS } from '../config.js';
|
||||
|
||||
let cache = null;
|
||||
let cacheMtime = 0;
|
||||
|
||||
export async function loadRatings(filePath = IMDB_RATINGS) {
|
||||
const map = new Map();
|
||||
const stream = createReadStream(filePath, { encoding: 'utf8' });
|
||||
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
||||
let first = true;
|
||||
for await (const line of rl) {
|
||||
if (first) { first = false; continue; }
|
||||
if (!line) continue;
|
||||
const tab1 = line.indexOf('\t');
|
||||
if (tab1 < 0) continue;
|
||||
const tab2 = line.indexOf('\t', tab1 + 1);
|
||||
if (tab2 < 0) continue;
|
||||
const id = line.slice(0, tab1);
|
||||
const rating = line.slice(tab1 + 1, tab2);
|
||||
const votes = line.slice(tab2 + 1);
|
||||
map.set(id, [rating, votes]);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
export async function getRatings() {
|
||||
try {
|
||||
const st = statSync(IMDB_RATINGS);
|
||||
if (cache && st.mtimeMs === cacheMtime) return cache;
|
||||
cache = await loadRatings(IMDB_RATINGS);
|
||||
cacheMtime = st.mtimeMs;
|
||||
return cache;
|
||||
} catch (err) {
|
||||
if (cache) return cache;
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
export function lookupRating(map, imdbId) {
|
||||
if (!imdbId) return { rating: 0, votes: 0 };
|
||||
const row = map.get(imdbId);
|
||||
if (!row) return { rating: 0, votes: 0 };
|
||||
return {
|
||||
rating: parseFloat(row[0]) || 0,
|
||||
votes: parseInt(row[1], 10) || 0,
|
||||
};
|
||||
}
|
||||
47
lib/mbLevenshtein.js
Normal file
47
lib/mbLevenshtein.js
Normal file
@@ -0,0 +1,47 @@
|
||||
// UTF-8-safe Levenshtein distance with custom insertion/replacement/deletion costs.
|
||||
// Iterates by Unicode code point (matches the PHP mb_levenshtein behaviour).
|
||||
|
||||
export function mbLevenshtein(s1, s2, costIns = 1, costRep = 1, costDel = 1) {
|
||||
const a = [...s1];
|
||||
const b = [...s2];
|
||||
const la = a.length;
|
||||
const lb = b.length;
|
||||
|
||||
if (la === 0) return lb * costIns;
|
||||
if (lb === 0) return la * costDel;
|
||||
|
||||
let prev = new Array(lb + 1);
|
||||
let curr = new Array(lb + 1);
|
||||
for (let j = 0; j <= lb; j++) prev[j] = j * costIns;
|
||||
|
||||
for (let i = 1; i <= la; i++) {
|
||||
curr[0] = i * costDel;
|
||||
for (let j = 1; j <= lb; j++) {
|
||||
const cost = a[i - 1] === b[j - 1] ? 0 : costRep;
|
||||
const del = prev[j] + costDel;
|
||||
const ins = curr[j - 1] + costIns;
|
||||
const rep = prev[j - 1] + cost;
|
||||
curr[j] = del < ins ? (del < rep ? del : rep) : (ins < rep ? ins : rep);
|
||||
}
|
||||
[prev, curr] = [curr, prev];
|
||||
}
|
||||
return prev[lb];
|
||||
}
|
||||
|
||||
export function mbLevenshteinRatio(s1, s2, costIns = 1, costRep = 1, costDel = 1) {
|
||||
const l1 = [...s1].length;
|
||||
const l2 = [...s2].length;
|
||||
const size = Math.max(l1, l2);
|
||||
if (!size) return 0;
|
||||
if (!s1) return l2 / size;
|
||||
if (!s2) return l1 / size;
|
||||
return 1 - mbLevenshtein(s1, s2, costIns, costRep, costDel) / size;
|
||||
}
|
||||
|
||||
export function mbStrlen(s) {
|
||||
return [...s].length;
|
||||
}
|
||||
|
||||
export function mbStrtolower(s) {
|
||||
return s.toLocaleLowerCase();
|
||||
}
|
||||
26
lib/paths.js
Normal file
26
lib/paths.js
Normal file
@@ -0,0 +1,26 @@
|
||||
import { join } from 'node:path';
|
||||
import { MOVIE_DIR, TV_DIR, JUSTWATCH_MOVIE_DIR, JUSTWATCH_TV_DIR } from '../config.js';
|
||||
|
||||
export function bucket(id) {
|
||||
return String(Math.floor(id / 1000));
|
||||
}
|
||||
|
||||
export function entryPath(type, id) {
|
||||
const base = type === 'movie' ? MOVIE_DIR : TV_DIR;
|
||||
return join(base, bucket(id), `${id}.json`);
|
||||
}
|
||||
|
||||
export function entryDir(type, id) {
|
||||
const base = type === 'movie' ? MOVIE_DIR : TV_DIR;
|
||||
return join(base, bucket(id));
|
||||
}
|
||||
|
||||
export function justwatchPath(type, id) {
|
||||
const base = type === 'movie' ? JUSTWATCH_MOVIE_DIR : JUSTWATCH_TV_DIR;
|
||||
return join(base, bucket(id), `${id}.json`);
|
||||
}
|
||||
|
||||
export function justwatchDir(type, id) {
|
||||
const base = type === 'movie' ? JUSTWATCH_MOVIE_DIR : JUSTWATCH_TV_DIR;
|
||||
return join(base, bucket(id));
|
||||
}
|
||||
85
lib/queryParser.js
Normal file
85
lib/queryParser.js
Normal file
@@ -0,0 +1,85 @@
|
||||
// Replicates the query parsing logic shared by api.php and search.php:
|
||||
// - extract a year (last (19|20)\d{2} match, ignoring 1080/2160)
|
||||
// - extract an episode marker (SxxExxx, SxxExx, Sxx, partN, NxN, Exxx)
|
||||
// - choose movie vs tv accordingly
|
||||
// - extract titlein from the bytes before the year/episode
|
||||
|
||||
import { FILTER_RE } from './titleFilter.js';
|
||||
|
||||
const YEAR_RE = /(19|20)\d{2}/g;
|
||||
// Single-pass regex matching the PHP behaviour:
|
||||
// - S/s and E/e and "part" are case-insensitive ([Ss], [Ee], [Pp]art)
|
||||
// - the lowercase 'x' in NxN, and uppercase 'E' in standalone Exxx, are case-sensitive
|
||||
// Greedy left-to-right alternation means "S01E02" is consumed whole, so the
|
||||
// trailing "E02" alternative cannot match inside it.
|
||||
const EPISODE_RE = /[Ss][0-9]{1,2}.?[Ee][0-9]{1,3}|[Ss][0-9]{2}|[Pp]art\.?[0-9]{1,3}|[0-9]{1,2}x[0-9]{1,3}|E[0-9]{1,3}/g;
|
||||
|
||||
// PHP uses byte offsets (substr). To stay byte-faithful, work on the UTF-8 bytes.
|
||||
const utf8 = (s) => Buffer.from(s, 'utf8');
|
||||
const sliceBytes = (s, end) => utf8(s).slice(0, end).toString('utf8');
|
||||
|
||||
function findAll(re, str) {
|
||||
const out = [];
|
||||
re.lastIndex = 0;
|
||||
let m;
|
||||
while ((m = re.exec(str)) !== null) {
|
||||
out.push({ value: m[0], byteOffset: Buffer.byteLength(str.slice(0, m.index), 'utf8') });
|
||||
if (m.index === re.lastIndex) re.lastIndex++;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function stripFilter(s) {
|
||||
return s.replace(FILTER_RE, '');
|
||||
}
|
||||
|
||||
export function parseQuery(query) {
|
||||
if (!query) return null;
|
||||
|
||||
let yearin = 0;
|
||||
let yearpos = -1;
|
||||
let titlein = '';
|
||||
|
||||
const years = findAll(YEAR_RE, query).reverse();
|
||||
for (const m of years) {
|
||||
if (m.value === '1080' || m.value === '2160') continue;
|
||||
yearin = parseInt(m.value, 10);
|
||||
yearpos = m.byteOffset;
|
||||
titlein = sliceBytes(query, yearpos);
|
||||
break;
|
||||
}
|
||||
|
||||
let episodein = '';
|
||||
let episodepos = -1;
|
||||
const eps = findAll(EPISODE_RE, query).reverse();
|
||||
for (const m of eps) {
|
||||
episodein = m.value;
|
||||
episodepos = m.byteOffset;
|
||||
break;
|
||||
}
|
||||
|
||||
if (episodein) {
|
||||
if (!yearin) {
|
||||
titlein = sliceBytes(query, episodepos);
|
||||
} else if (episodepos > yearpos) {
|
||||
titlein = sliceBytes(query, yearpos);
|
||||
if (!stripFilter(titlein)) {
|
||||
titlein = sliceBytes(query, episodepos);
|
||||
yearin = 0;
|
||||
}
|
||||
} else {
|
||||
titlein = sliceBytes(query, episodepos);
|
||||
if (!stripFilter(titlein)) {
|
||||
titlein = sliceBytes(query, yearpos);
|
||||
episodein = '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!yearin && !episodein) {
|
||||
return { error: 'Year or episode not found in query', titlein, yearin, episodein };
|
||||
}
|
||||
|
||||
const type = episodein ? 'tv' : 'movie';
|
||||
return { type, titlein, yearin, episodein };
|
||||
}
|
||||
80
lib/searchEngine.js
Normal file
80
lib/searchEngine.js
Normal file
@@ -0,0 +1,80 @@
|
||||
// Spawns N worker threads (one per searchTYPEi.json chunk) and orchestrates
|
||||
// queries across them. Workers are kept alive between requests so the chunks
|
||||
// stay loaded in memory (replaces the per-request `php searchmultithreads.php`
|
||||
// fork from the PHP version).
|
||||
|
||||
import { Worker } from 'node:worker_threads';
|
||||
import { join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { dirname } from 'node:path';
|
||||
import { existsSync } from 'node:fs';
|
||||
import { TMDBINTEGRAL_DIR, NB_WORKERS } from '../config.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const WORKER_PATH = join(__dirname, 'searchWorker.js');
|
||||
|
||||
const pools = new Map();
|
||||
|
||||
class WorkerPool {
|
||||
constructor(type) {
|
||||
this.type = type;
|
||||
this.workers = [];
|
||||
this.nextId = 1;
|
||||
this.pending = new Map();
|
||||
for (let i = 0; i < NB_WORKERS; i++) {
|
||||
const chunkPath = join(TMDBINTEGRAL_DIR, `search${type}${i}.json`);
|
||||
if (!existsSync(chunkPath)) {
|
||||
console.warn(`Missing search chunk: ${chunkPath}`);
|
||||
continue;
|
||||
}
|
||||
const w = new Worker(WORKER_PATH, { workerData: { chunkPath } });
|
||||
w.on('message', (msg) => this._onMessage(msg));
|
||||
w.on('error', (err) => console.error(`Worker ${type}/${i} error:`, err));
|
||||
w.unref();
|
||||
this.workers.push(w);
|
||||
}
|
||||
}
|
||||
|
||||
_onMessage(msg) {
|
||||
const entry = this.pending.get(msg.id);
|
||||
if (!entry) return;
|
||||
if (msg.type === 'result') entry.results.push(...msg.results);
|
||||
entry.remaining--;
|
||||
if (entry.remaining === 0) {
|
||||
this.pending.delete(msg.id);
|
||||
entry.resolve(entry.results);
|
||||
}
|
||||
}
|
||||
|
||||
search(payload) {
|
||||
return new Promise((resolve) => {
|
||||
const id = this.nextId++;
|
||||
this.pending.set(id, { results: [], remaining: this.workers.length, resolve });
|
||||
for (const w of this.workers) {
|
||||
w.postMessage({ type: 'search', id, payload });
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export function getPool(type) {
|
||||
if (!pools.has(type)) pools.set(type, new WorkerPool(type));
|
||||
return pools.get(type);
|
||||
}
|
||||
|
||||
export async function search(type, filteredTitleIn, yearIn) {
|
||||
const pool = getPool(type);
|
||||
const results = await pool.search({ filteredTitleIn, yearIn });
|
||||
|
||||
// Sort by delta ASC, then -popularity ASC (i.e. popularity DESC),
|
||||
// then deltaYear ASC, then tmdb ASC. Equivalent to PHP's
|
||||
// array_multisort($deltas, $pops, $deltayears, $tmdbs, ...).
|
||||
results.sort((a, b) => {
|
||||
if (a.delta !== b.delta) return a.delta - b.delta;
|
||||
if (a.pop !== b.pop) return a.pop - b.pop;
|
||||
if (a.deltaYear !== b.deltaYear) return a.deltaYear - b.deltaYear;
|
||||
return a.tmdb - b.tmdb;
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
109
lib/searchWorker.js
Normal file
109
lib/searchWorker.js
Normal file
@@ -0,0 +1,109 @@
|
||||
// Worker thread used by lib/searchEngine.js. Equivalent to one fork in
|
||||
// searchmultithreads.php: load one search chunk and emit candidate matches.
|
||||
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { parentPort, workerData } from 'node:worker_threads';
|
||||
import { mbLevenshtein, mbStrlen } from './mbLevenshtein.js';
|
||||
import {
|
||||
TITLE_TOLERANCE, LEV_INS, LEV_REP, LEV_DEL, LEV_SCALE, YEAR_TOLERANCE,
|
||||
} from '../config.js';
|
||||
|
||||
const TMDB = 0;
|
||||
const TITLE = 1;
|
||||
const ENGLISHTITLE = 2;
|
||||
const ORIGINALTITLE = 3;
|
||||
const FILTEREDTITLE = 4;
|
||||
const FILTEREDENGLISHTITLE = 5;
|
||||
const FILTEREDORIGINALTITLE = 6;
|
||||
const YEAR = 7;
|
||||
const POPULARITY = 8;
|
||||
|
||||
let chunkPath;
|
||||
let chunk = null;
|
||||
|
||||
if (workerData?.chunkPath) {
|
||||
chunkPath = workerData.chunkPath;
|
||||
}
|
||||
|
||||
function loadChunk() {
|
||||
if (chunk) return chunk;
|
||||
chunk = JSON.parse(readFileSync(chunkPath, 'utf8'));
|
||||
return chunk;
|
||||
}
|
||||
|
||||
function score(filteredIn, target, ftiLen) {
|
||||
if (!target) return 0;
|
||||
const tlen = mbStrlen(target);
|
||||
return 100 - (mbLevenshtein(filteredIn, target, LEV_INS, LEV_REP, LEV_DEL) /
|
||||
(Math.max(ftiLen, tlen) * LEV_SCALE)) * 100;
|
||||
}
|
||||
|
||||
function search({ filteredTitleIn, yearIn }) {
|
||||
const db = loadChunk();
|
||||
const out = [];
|
||||
const ftiLen = mbStrlen(filteredTitleIn);
|
||||
|
||||
for (const row of db) {
|
||||
let deltaYear = 0;
|
||||
if (yearIn) {
|
||||
let ok = false;
|
||||
for (const y of row[YEAR]) {
|
||||
const dy = Math.abs(yearIn - y);
|
||||
if (dy <= YEAR_TOLERANCE) { ok = true; deltaYear = dy; break; }
|
||||
}
|
||||
if (!ok) continue;
|
||||
}
|
||||
|
||||
const fT = row[FILTEREDTITLE];
|
||||
const fE = row[FILTEREDENGLISHTITLE];
|
||||
const fO = row[FILTEREDORIGINALTITLE];
|
||||
|
||||
const pO = score(filteredTitleIn, fO, ftiLen);
|
||||
|
||||
let pT;
|
||||
if (fT) {
|
||||
pT = (fT === fO) ? pO : score(filteredTitleIn, fT, ftiLen);
|
||||
} else pT = 0;
|
||||
|
||||
let pE;
|
||||
if (fE) {
|
||||
if (fE === fO) pE = pO;
|
||||
else if (fE === fT) pE = pT;
|
||||
else pE = score(filteredTitleIn, fE, ftiLen);
|
||||
} else pE = 0;
|
||||
|
||||
const dT = 100 - pT;
|
||||
const dE = 100 - pE;
|
||||
const dO = 100 - pO;
|
||||
const delta = Math.min(dT, dE, dO);
|
||||
if (delta > TITLE_TOLERANCE) continue;
|
||||
|
||||
out.push({
|
||||
delta,
|
||||
pop: -row[POPULARITY],
|
||||
deltaYear,
|
||||
tmdb: row[TMDB],
|
||||
title: row[TITLE],
|
||||
englishTitle: row[ENGLISHTITLE],
|
||||
originalTitle: row[ORIGINALTITLE],
|
||||
filteredTitle: fT,
|
||||
filteredEnglishTitle: fE,
|
||||
filteredOriginalTitle: fO,
|
||||
year: row[YEAR][0],
|
||||
});
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
if (parentPort) {
|
||||
parentPort.on('message', (msg) => {
|
||||
if (msg?.type === 'search') {
|
||||
try {
|
||||
const results = search(msg.payload);
|
||||
parentPort.postMessage({ type: 'result', id: msg.id, results });
|
||||
} catch (err) {
|
||||
parentPort.postMessage({ type: 'error', id: msg.id, error: err.message });
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
29
lib/titleFilter.js
Normal file
29
lib/titleFilter.js
Normal file
@@ -0,0 +1,29 @@
|
||||
// Replicates the PHP search.php title normalization:
|
||||
// - replace ligatures and superscripts
|
||||
// - strip everything that is not Latin or 0-9
|
||||
// - lowercase
|
||||
|
||||
const TITLE_SEARCHES = ['œ', 'Œ', 'æ', 'Æ', 'é', 'è', '²', '³', '⁴'];
|
||||
const TITLE_REPLACES = ['oe', 'Oe', 'ae', 'Ae', 'é', 'è', '2', '3', '4'];
|
||||
|
||||
const FILTER_RE = /[^\p{Script=Latin}0-9]+/gu;
|
||||
|
||||
export function translit(s) {
|
||||
if (!s) return '';
|
||||
let out = s;
|
||||
for (let i = 0; i < TITLE_SEARCHES.length; i++) {
|
||||
out = out.split(TITLE_SEARCHES[i]).join(TITLE_REPLACES[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export function filterTitle(s) {
|
||||
if (!s) return '';
|
||||
return translit(s).replace(FILTER_RE, '');
|
||||
}
|
||||
|
||||
export function filterAndLower(s) {
|
||||
return filterTitle(s).toLocaleLowerCase();
|
||||
}
|
||||
|
||||
export { FILTER_RE };
|
||||
Reference in New Issue
Block a user