48 lines
1.4 KiB
JavaScript
48 lines
1.4 KiB
JavaScript
|
|
// UTF-8-safe Levenshtein distance with custom insertion/replacement/deletion costs.
|
||
|
|
// Iterates by Unicode code point (matches the PHP mb_levenshtein behaviour).
|
||
|
|
|
||
|
|
export function mbLevenshtein(s1, s2, costIns = 1, costRep = 1, costDel = 1) {
|
||
|
|
const a = [...s1];
|
||
|
|
const b = [...s2];
|
||
|
|
const la = a.length;
|
||
|
|
const lb = b.length;
|
||
|
|
|
||
|
|
if (la === 0) return lb * costIns;
|
||
|
|
if (lb === 0) return la * costDel;
|
||
|
|
|
||
|
|
let prev = new Array(lb + 1);
|
||
|
|
let curr = new Array(lb + 1);
|
||
|
|
for (let j = 0; j <= lb; j++) prev[j] = j * costIns;
|
||
|
|
|
||
|
|
for (let i = 1; i <= la; i++) {
|
||
|
|
curr[0] = i * costDel;
|
||
|
|
for (let j = 1; j <= lb; j++) {
|
||
|
|
const cost = a[i - 1] === b[j - 1] ? 0 : costRep;
|
||
|
|
const del = prev[j] + costDel;
|
||
|
|
const ins = curr[j - 1] + costIns;
|
||
|
|
const rep = prev[j - 1] + cost;
|
||
|
|
curr[j] = del < ins ? (del < rep ? del : rep) : (ins < rep ? ins : rep);
|
||
|
|
}
|
||
|
|
[prev, curr] = [curr, prev];
|
||
|
|
}
|
||
|
|
return prev[lb];
|
||
|
|
}
|
||
|
|
|
||
|
|
export function mbLevenshteinRatio(s1, s2, costIns = 1, costRep = 1, costDel = 1) {
|
||
|
|
const l1 = [...s1].length;
|
||
|
|
const l2 = [...s2].length;
|
||
|
|
const size = Math.max(l1, l2);
|
||
|
|
if (!size) return 0;
|
||
|
|
if (!s1) return l2 / size;
|
||
|
|
if (!s2) return l1 / size;
|
||
|
|
return 1 - mbLevenshtein(s1, s2, costIns, costRep, costDel) / size;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function mbStrlen(s) {
|
||
|
|
return [...s].length;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function mbStrtolower(s) {
|
||
|
|
return s.toLocaleLowerCase();
|
||
|
|
}
|