improved use of levenshtein distance

This commit is contained in:
adueck 2023-08-14 13:41:54 +04:00
parent ea50654689
commit d8119a1475
2 changed files with 21 additions and 24 deletions

View File

@ -31,7 +31,7 @@ const dictionaryInfoUrl = `https://storage.googleapis.com/lingdocs/dictionary-in
const dictionaryInfoLocalStorageKey = "dictionaryInfo5"; const dictionaryInfoLocalStorageKey = "dictionaryInfo5";
const dictionaryCollectionName = "dictionary3"; const dictionaryCollectionName = "dictionary3";
// const dictionaryDatabaseName = "dictdb.db"; // const dictionaryDatabaseName = "dictdb.db";
export const pageSize = 35; export const pageSize = 60;
const db = indexedDB.open("inPrivate"); const db = indexedDB.open("inPrivate");
db.onerror = (e) => { db.onerror = (e) => {
@ -364,18 +364,9 @@ function pashtoFuzzyLookup<S extends T.DictionaryEntry>({
: [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults]; : [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults];
// sort out each chunk (based on limit used multiple times by infinite scroll) // sort out each chunk (based on limit used multiple times by infinite scroll)
// so that when infinite scrolling, it doesn't re-sort the previous chunks given // so that when infinite scrolling, it doesn't re-sort the previous chunks given
const closeResultsLength = exactResults.length + slightlyFuzzyResults.length; // const closeResultsLength = exactResults.length + slightlyFuzzyResults.length;
const chunksToSort = chunkOutArray(results, pageSize); const chunksToSort = chunkOutArray(results, pageSize);
return chunksToSort.reduce( return chunksToSort.flatMap((c) => sortByRelevancy(c, search, index));
(acc, cur, i) =>
i === 0
? [
...sortByRelevancy(cur.slice(0, closeResultsLength), search, index),
...sortByRelevancy(cur.slice(closeResultsLength), search, index),
]
: [...acc, ...sortByRelevancy(cur, search, index)],
[]
);
} }
function sortByRelevancy<T extends Record<"p" | "g", string>>( function sortByRelevancy<T extends Record<"p" | "g", string>>(
@ -390,8 +381,8 @@ function sortByRelevancy<T extends Record<"p" | "g", string>>(
// then don't mess with the relevancy // then don't mess with the relevancy
// now instead of an extra pass for exact, we can just use this! // now instead of an extra pass for exact, we can just use this!
const similars = { const similars = {
p: ["دډتټ", "زذضظځ", "صسث", "رړڼ", "ڼن", "یيېۍ", "قک", "ګږ", "ښخحه"], p: ["دډتټ", "زذضظځ", "صسث", "رړڼ", "ڼن", "یيېۍ", "قک", "ګږ", "ښخحه", "پف"],
g: ["tdTD", "rRN", "nN", "ei", "xkg"], g: ["tdTD", "rRN", "nN", "ei", "xkg", "pf", "au"],
}; };
function insert() { function insert() {
return 1; return 1;
@ -401,18 +392,25 @@ function sortByRelevancy<T extends Record<"p" | "g", string>>(
return 1; return 1;
} }
function update(a: string, b: string) { function update(a: string, b: string) {
return a !== b return similars[index].find((x) => x.includes(a) && x.includes(b))
? 1
: similars[index].find((x) => x.includes(a) && x.includes(b))
? 0.5 ? 0.5
: a !== b
? 1
: 0; : 0;
} }
function levenOverVars(g: string, s: string): number {
return Math.min(
...g
.split(",")
.map((x) => levenshtein(x, s, insert, remove, update).distance)
);
}
const toSort = [...arr]; const toSort = [...arr];
toSort.sort((a, b) => { toSort.sort((a, b) => {
const aDist = levenshtein(a[index], searchI, insert, remove, update); const aDist = levenOverVars(a[index], searchI);
const bDist = levenshtein(b[index], searchI, insert, remove, update); const bDist = levenOverVars(b[index], searchI);
return aDist.distance - bDist.distance; return aDist - bDist;
}); });
return toSort; return toSort;
} }

View File

@ -98,11 +98,10 @@ const fReplacer = {
kh: hKhF, kh: hKhF,
ts: sSoundsF, ts: sSoundsF,
s: sSoundsF, s: sSoundsF,
// only used if ignoring accents a: "[a|á|u|ú]",
a: "[a|á]",
á: "[a|á|u|ú]", á: "[a|á|u|ú]",
u: "[u|ú|a|á]", u: "[u|ú|a|á]",
ú: "[u|ú]", ú: "[u|ú|a|á]",
o: "[o|ó]", o: "[o|ó]",
ó: "[o|ó]", ó: "[o|ó]",
i: "[i|í]", i: "[i|í]",
@ -118,7 +117,7 @@ const fReplacer = {
const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g"); const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g");
const fRepRegex = /ey|ay|uy|ee|e|z|dz|x|kh|h|ts|s/g; const fRepRegex = /ey|ay|uy|ee|a|u|e|z|dz|x|kh|h|ts|s/g;
const fRepRegexWAccents = const fRepRegexWAccents =
/ey|éy|ay|áy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g; /ey|éy|ay|áy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g;