110 lines
2.8 KiB
JavaScript
110 lines
2.8 KiB
JavaScript
// Worker thread used by lib/searchEngine.js. Equivalent to one fork in
|
|
// searchmultithreads.php: load one search chunk and emit candidate matches.
|
|
|
|
import { readFileSync } from 'node:fs';
|
|
import { parentPort, workerData } from 'node:worker_threads';
|
|
import { mbLevenshtein, mbStrlen } from './mbLevenshtein.js';
|
|
import {
|
|
TITLE_TOLERANCE, LEV_INS, LEV_REP, LEV_DEL, LEV_SCALE, YEAR_TOLERANCE,
|
|
} from '../config.js';
|
|
|
|
const TMDB = 0;
|
|
const TITLE = 1;
|
|
const ENGLISHTITLE = 2;
|
|
const ORIGINALTITLE = 3;
|
|
const FILTEREDTITLE = 4;
|
|
const FILTEREDENGLISHTITLE = 5;
|
|
const FILTEREDORIGINALTITLE = 6;
|
|
const YEAR = 7;
|
|
const POPULARITY = 8;
|
|
|
|
let chunkPath;
|
|
let chunk = null;
|
|
|
|
if (workerData?.chunkPath) {
|
|
chunkPath = workerData.chunkPath;
|
|
}
|
|
|
|
function loadChunk() {
|
|
if (chunk) return chunk;
|
|
chunk = JSON.parse(readFileSync(chunkPath, 'utf8'));
|
|
return chunk;
|
|
}
|
|
|
|
function score(filteredIn, target, ftiLen) {
|
|
if (!target) return 0;
|
|
const tlen = mbStrlen(target);
|
|
return 100 - (mbLevenshtein(filteredIn, target, LEV_INS, LEV_REP, LEV_DEL) /
|
|
(Math.max(ftiLen, tlen) * LEV_SCALE)) * 100;
|
|
}
|
|
|
|
function search({ filteredTitleIn, yearIn }) {
|
|
const db = loadChunk();
|
|
const out = [];
|
|
const ftiLen = mbStrlen(filteredTitleIn);
|
|
|
|
for (const row of db) {
|
|
let deltaYear = 0;
|
|
if (yearIn) {
|
|
let ok = false;
|
|
for (const y of row[YEAR]) {
|
|
const dy = Math.abs(yearIn - y);
|
|
if (dy <= YEAR_TOLERANCE) { ok = true; deltaYear = dy; break; }
|
|
}
|
|
if (!ok) continue;
|
|
}
|
|
|
|
const fT = row[FILTEREDTITLE];
|
|
const fE = row[FILTEREDENGLISHTITLE];
|
|
const fO = row[FILTEREDORIGINALTITLE];
|
|
|
|
const pO = score(filteredTitleIn, fO, ftiLen);
|
|
|
|
let pT;
|
|
if (fT) {
|
|
pT = (fT === fO) ? pO : score(filteredTitleIn, fT, ftiLen);
|
|
} else pT = 0;
|
|
|
|
let pE;
|
|
if (fE) {
|
|
if (fE === fO) pE = pO;
|
|
else if (fE === fT) pE = pT;
|
|
else pE = score(filteredTitleIn, fE, ftiLen);
|
|
} else pE = 0;
|
|
|
|
const dT = 100 - pT;
|
|
const dE = 100 - pE;
|
|
const dO = 100 - pO;
|
|
const delta = Math.min(dT, dE, dO);
|
|
if (delta > TITLE_TOLERANCE) continue;
|
|
|
|
out.push({
|
|
delta,
|
|
pop: -row[POPULARITY],
|
|
deltaYear,
|
|
tmdb: row[TMDB],
|
|
title: row[TITLE],
|
|
englishTitle: row[ENGLISHTITLE],
|
|
originalTitle: row[ORIGINALTITLE],
|
|
filteredTitle: fT,
|
|
filteredEnglishTitle: fE,
|
|
filteredOriginalTitle: fO,
|
|
year: row[YEAR][0],
|
|
});
|
|
}
|
|
return out;
|
|
}
|
|
|
|
if (parentPort) {
|
|
parentPort.on('message', (msg) => {
|
|
if (msg?.type === 'search') {
|
|
try {
|
|
const results = search(msg.payload);
|
|
parentPort.postMessage({ type: 'result', id: msg.id, results });
|
|
} catch (err) {
|
|
parentPort.postMessage({ type: 'error', id: msg.id, error: err.message });
|
|
}
|
|
}
|
|
});
|
|
}
|