Files
proxy_tmdb/lib/mbLevenshtein.js

48 lines
1.4 KiB
JavaScript
Raw Permalink Normal View History

// UTF-8-safe Levenshtein distance with custom insertion/replacement/deletion costs.
// Iterates by Unicode code point (matches the PHP mb_levenshtein behaviour).
export function mbLevenshtein(s1, s2, costIns = 1, costRep = 1, costDel = 1) {
const a = [...s1];
const b = [...s2];
const la = a.length;
const lb = b.length;
if (la === 0) return lb * costIns;
if (lb === 0) return la * costDel;
let prev = new Array(lb + 1);
let curr = new Array(lb + 1);
for (let j = 0; j <= lb; j++) prev[j] = j * costIns;
for (let i = 1; i <= la; i++) {
curr[0] = i * costDel;
for (let j = 1; j <= lb; j++) {
const cost = a[i - 1] === b[j - 1] ? 0 : costRep;
const del = prev[j] + costDel;
const ins = curr[j - 1] + costIns;
const rep = prev[j - 1] + cost;
curr[j] = del < ins ? (del < rep ? del : rep) : ins < rep ? ins : rep;
}
[prev, curr] = [curr, prev];
}
return prev[lb];
}
export function mbLevenshteinRatio(s1, s2, costIns = 1, costRep = 1, costDel = 1) {
const l1 = [...s1].length;
const l2 = [...s2].length;
const size = Math.max(l1, l2);
if (!size) return 0;
if (!s1) return l2 / size;
if (!s2) return l1 / size;
return 1 - mbLevenshtein(s1, s2, costIns, costRep, costDel) / size;
}
export function mbStrlen(s) {
return [...s].length;
}
export function mbStrtolower(s) {
return s.toLocaleLowerCase();
}