improved use of levenshtein distance
This commit is contained in:
parent
ea50654689
commit
d8119a1475
|
@ -31,7 +31,7 @@ const dictionaryInfoUrl = `https://storage.googleapis.com/lingdocs/dictionary-in
|
||||||
const dictionaryInfoLocalStorageKey = "dictionaryInfo5";
|
const dictionaryInfoLocalStorageKey = "dictionaryInfo5";
|
||||||
const dictionaryCollectionName = "dictionary3";
|
const dictionaryCollectionName = "dictionary3";
|
||||||
// const dictionaryDatabaseName = "dictdb.db";
|
// const dictionaryDatabaseName = "dictdb.db";
|
||||||
export const pageSize = 35;
|
export const pageSize = 60;
|
||||||
|
|
||||||
const db = indexedDB.open("inPrivate");
|
const db = indexedDB.open("inPrivate");
|
||||||
db.onerror = (e) => {
|
db.onerror = (e) => {
|
||||||
|
@ -364,18 +364,9 @@ function pashtoFuzzyLookup<S extends T.DictionaryEntry>({
|
||||||
: [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults];
|
: [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults];
|
||||||
// sort out each chunk (based on limit used multiple times by infinite scroll)
|
// sort out each chunk (based on limit used multiple times by infinite scroll)
|
||||||
// so that when infinite scrolling, it doesn't re-sort the previous chunks given
|
// so that when infinite scrolling, it doesn't re-sort the previous chunks given
|
||||||
const closeResultsLength = exactResults.length + slightlyFuzzyResults.length;
|
// const closeResultsLength = exactResults.length + slightlyFuzzyResults.length;
|
||||||
const chunksToSort = chunkOutArray(results, pageSize);
|
const chunksToSort = chunkOutArray(results, pageSize);
|
||||||
return chunksToSort.reduce(
|
return chunksToSort.flatMap((c) => sortByRelevancy(c, search, index));
|
||||||
(acc, cur, i) =>
|
|
||||||
i === 0
|
|
||||||
? [
|
|
||||||
...sortByRelevancy(cur.slice(0, closeResultsLength), search, index),
|
|
||||||
...sortByRelevancy(cur.slice(closeResultsLength), search, index),
|
|
||||||
]
|
|
||||||
: [...acc, ...sortByRelevancy(cur, search, index)],
|
|
||||||
[]
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function sortByRelevancy<T extends Record<"p" | "g", string>>(
|
function sortByRelevancy<T extends Record<"p" | "g", string>>(
|
||||||
|
@ -390,8 +381,8 @@ function sortByRelevancy<T extends Record<"p" | "g", string>>(
|
||||||
// then don't mess with the relevancy
|
// then don't mess with the relevancy
|
||||||
// now instead of an extra pass for exact, we can just use this!
|
// now instead of an extra pass for exact, we can just use this!
|
||||||
const similars = {
|
const similars = {
|
||||||
p: ["دډتټ", "زذضظځ", "صسث", "رړڼ", "ڼن", "یيېۍ", "قک", "ګږ", "ښخحه"],
|
p: ["دډتټ", "زذضظځ", "صسث", "رړڼ", "ڼن", "یيېۍ", "قک", "ګږ", "ښخحه", "پف"],
|
||||||
g: ["tdTD", "rRN", "nN", "ei", "xkg"],
|
g: ["tdTD", "rRN", "nN", "ei", "xkg", "pf", "au"],
|
||||||
};
|
};
|
||||||
function insert() {
|
function insert() {
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -401,18 +392,25 @@ function sortByRelevancy<T extends Record<"p" | "g", string>>(
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
function update(a: string, b: string) {
|
function update(a: string, b: string) {
|
||||||
return a !== b
|
return similars[index].find((x) => x.includes(a) && x.includes(b))
|
||||||
? 1
|
|
||||||
: similars[index].find((x) => x.includes(a) && x.includes(b))
|
|
||||||
? 0.5
|
? 0.5
|
||||||
|
: a !== b
|
||||||
|
? 1
|
||||||
: 0;
|
: 0;
|
||||||
}
|
}
|
||||||
|
function levenOverVars(g: string, s: string): number {
|
||||||
|
return Math.min(
|
||||||
|
...g
|
||||||
|
.split(",")
|
||||||
|
.map((x) => levenshtein(x, s, insert, remove, update).distance)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const toSort = [...arr];
|
const toSort = [...arr];
|
||||||
toSort.sort((a, b) => {
|
toSort.sort((a, b) => {
|
||||||
const aDist = levenshtein(a[index], searchI, insert, remove, update);
|
const aDist = levenOverVars(a[index], searchI);
|
||||||
const bDist = levenshtein(b[index], searchI, insert, remove, update);
|
const bDist = levenOverVars(b[index], searchI);
|
||||||
return aDist.distance - bDist.distance;
|
return aDist - bDist;
|
||||||
});
|
});
|
||||||
return toSort;
|
return toSort;
|
||||||
}
|
}
|
||||||
|
|
|
@ -98,11 +98,10 @@ const fReplacer = {
|
||||||
kh: hKhF,
|
kh: hKhF,
|
||||||
ts: sSoundsF,
|
ts: sSoundsF,
|
||||||
s: sSoundsF,
|
s: sSoundsF,
|
||||||
// only used if ignoring accents
|
a: "[a|á|u|ú]",
|
||||||
a: "[a|á]",
|
|
||||||
á: "[a|á|u|ú]",
|
á: "[a|á|u|ú]",
|
||||||
u: "[u|ú|a|á]",
|
u: "[u|ú|a|á]",
|
||||||
ú: "[u|ú]",
|
ú: "[u|ú|a|á]",
|
||||||
o: "[o|ó]",
|
o: "[o|ó]",
|
||||||
ó: "[o|ó]",
|
ó: "[o|ó]",
|
||||||
i: "[i|í]",
|
i: "[i|í]",
|
||||||
|
@ -118,7 +117,7 @@ const fReplacer = {
|
||||||
|
|
||||||
const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g");
|
const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g");
|
||||||
|
|
||||||
const fRepRegex = /ey|ay|uy|ee|e|z|dz|x|kh|h|ts|s/g;
|
const fRepRegex = /ey|ay|uy|ee|a|u|e|z|dz|x|kh|h|ts|s/g;
|
||||||
|
|
||||||
const fRepRegexWAccents =
|
const fRepRegexWAccents =
|
||||||
/ey|éy|ay|áy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g;
|
/ey|éy|ay|áy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g;
|
||||||
|
|
Loading…
Reference in New Issue