BIG IMPROVEMENT TO FUZZY SEARCH - USING 3 PASSES
This commit is contained in:
parent
523bbb5b62
commit
254e33f48a
|
@ -7,7 +7,7 @@
|
|||
"name": "functions",
|
||||
"dependencies": {
|
||||
"@google-cloud/storage": "^5.8.1",
|
||||
"@lingdocs/pashto-inflector": "3.6.0",
|
||||
"@lingdocs/pashto-inflector": "3.6.2",
|
||||
"@types/cors": "^2.8.10",
|
||||
"@types/google-spreadsheet": "^3.0.2",
|
||||
"cors": "^2.8.5",
|
||||
|
@ -30,7 +30,7 @@
|
|||
"node": "16"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@lingdocs/pashto-inflector": "3.6.0"
|
||||
"@lingdocs/pashto-inflector": "3.6.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/code-frame": {
|
||||
|
@ -505,9 +505,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@lingdocs/pashto-inflector": {
|
||||
"version": "3.6.0",
|
||||
"resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz",
|
||||
"integrity": "sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg==",
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz",
|
||||
"integrity": "sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@formkit/auto-animate": "^1.0.0-beta.1",
|
||||
|
@ -3682,9 +3682,9 @@
|
|||
}
|
||||
},
|
||||
"@lingdocs/pashto-inflector": {
|
||||
"version": "3.6.0",
|
||||
"resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz",
|
||||
"integrity": "sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg==",
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz",
|
||||
"integrity": "sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ==",
|
||||
"requires": {
|
||||
"@formkit/auto-animate": "^1.0.0-beta.1",
|
||||
"classnames": "^2.2.6",
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
"main": "lib/functions/src/index.js",
|
||||
"dependencies": {
|
||||
"@google-cloud/storage": "^5.8.1",
|
||||
"@lingdocs/pashto-inflector": "3.6.0",
|
||||
"@lingdocs/pashto-inflector": "3.6.2",
|
||||
"@types/cors": "^2.8.10",
|
||||
"@types/google-spreadsheet": "^3.0.2",
|
||||
"cors": "^2.8.5",
|
||||
|
@ -35,6 +35,6 @@
|
|||
},
|
||||
"private": true,
|
||||
"peerDependencies": {
|
||||
"@lingdocs/pashto-inflector": "3.6.0"
|
||||
"@lingdocs/pashto-inflector": "3.6.2"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
"version": "0.3.1",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@lingdocs/pashto-inflector": "3.6.0",
|
||||
"@lingdocs/pashto-inflector": "3.6.2",
|
||||
"lokijs": "^1.5.12",
|
||||
"nano": "^9.0.5",
|
||||
"passport-github2": "^0.1.12",
|
||||
|
@ -29,7 +29,7 @@
|
|||
"typescript": "^4.4.3"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@lingdocs/pashto-inflector": "3.6.0"
|
||||
"@lingdocs/pashto-inflector": "3.6.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/code-frame": {
|
||||
|
@ -312,9 +312,9 @@
|
|||
"dev": true
|
||||
},
|
||||
"node_modules/@lingdocs/pashto-inflector": {
|
||||
"version": "3.6.0",
|
||||
"resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz",
|
||||
"integrity": "sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg==",
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz",
|
||||
"integrity": "sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@formkit/auto-animate": "^1.0.0-beta.1",
|
||||
|
@ -4213,9 +4213,9 @@
|
|||
"dev": true
|
||||
},
|
||||
"@lingdocs/pashto-inflector": {
|
||||
"version": "3.6.0",
|
||||
"resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz",
|
||||
"integrity": "sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg==",
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz",
|
||||
"integrity": "sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ==",
|
||||
"requires": {
|
||||
"@formkit/auto-animate": "^1.0.0-beta.1",
|
||||
"classnames": "^2.2.6",
|
||||
|
|
|
@ -17,12 +17,12 @@
|
|||
"url": "git@github.com-lingdocs:lingdocs/lingdocs-main.git"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@lingdocs/pashto-inflector": "3.6.0"
|
||||
"@lingdocs/pashto-inflector": "3.6.2"
|
||||
},
|
||||
"author": "lingdocs.com",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@lingdocs/pashto-inflector": "3.6.0",
|
||||
"@lingdocs/pashto-inflector": "3.6.2",
|
||||
"lokijs": "^1.5.12",
|
||||
"nano": "^9.0.5",
|
||||
"passport-github2": "^0.1.12",
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
"private": true,
|
||||
"dependencies": {
|
||||
"@fortawesome/fontawesome-free": "^5.15.2",
|
||||
"@lingdocs/pashto-inflector": "3.6.0",
|
||||
"@lingdocs/pashto-inflector": "3.6.2",
|
||||
"@testing-library/jest-dom": "^5.11.4",
|
||||
"@testing-library/react": "^11.1.0",
|
||||
"@testing-library/user-event": "^12.1.10",
|
||||
|
@ -110,6 +110,6 @@
|
|||
"user-event": "^4.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@lingdocs/pashto-inflector": "3.6.0"
|
||||
"@lingdocs/pashto-inflector": "3.6.2"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -271,7 +271,18 @@ function pashtoFuzzyLookup<S extends T.DictionaryEntry>({ searchString, page, tp
|
|||
.simplesort("i")
|
||||
.data();
|
||||
resultsGiven = exactResults.map((mpd: any) => mpd.$loki);
|
||||
|
||||
// Get slightly fuzzy matches
|
||||
const slightlyFuzzy = new RegExp(makeAWeeBitFuzzy(search, infIndex), "i");
|
||||
const slightlyFuzzyQuery = {
|
||||
[index]: { $regex: slightlyFuzzy },
|
||||
$loki: { $nin: resultsGiven },
|
||||
};
|
||||
const slightlyFuzzyResultsLimit = (pageSize * page) - resultsGiven.length;
|
||||
const slightlyFuzzyResults = dictDb.collection.chain()
|
||||
.find(slightlyFuzzyQuery)
|
||||
.limit(slightlyFuzzyResultsLimit)
|
||||
.data();
|
||||
resultsGiven.push(...slightlyFuzzyResults.map((mpd: any) => mpd.$loki));
|
||||
// Get fuzzy matches
|
||||
const pashtoRegExLogic = fuzzifyPashto(search, {
|
||||
script: index === "p" ? "Pashto" : "Latin",
|
||||
|
@ -301,17 +312,18 @@ function pashtoFuzzyLookup<S extends T.DictionaryEntry>({ searchString, page, tp
|
|||
.limit(fuzzyResultsLimit)
|
||||
.data();
|
||||
const results = tpFilter
|
||||
? [...exactResults, ...fuzzyResults].filter(tpFilter)
|
||||
: [...exactResults, ...fuzzyResults];
|
||||
const chunksToSort = chunkOutArray(results, pageSize);
|
||||
? [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults].filter(tpFilter)
|
||||
: [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults];
|
||||
// sort out each chunk (based on limit used multiple times by infinite scroll)
|
||||
// so that when infinite scrolling, it doesn't resort the previous chunks given
|
||||
// TODO: If on the first page, only sort the fuzzyResults
|
||||
// so that when infinite scrolling, it doesn't re-sort the previous chunks given
|
||||
const closeResultsLength = exactResults.length + slightlyFuzzyResults.length;
|
||||
const chunksToSort = chunkOutArray(results, pageSize);
|
||||
return chunksToSort
|
||||
.reduce((acc, cur, i) => ((i === 0)
|
||||
? [
|
||||
...sortByRelevancy(cur.slice(0, exactResults.length), search, index),
|
||||
...sortByRelevancy(cur.slice(exactResults.length), search, index),
|
||||
// don't sort theclose results in the first chunk
|
||||
...cur.slice(0, closeResultsLength),
|
||||
...sortByRelevancy(cur.slice(closeResultsLength), search, index),
|
||||
]
|
||||
: [
|
||||
...acc,
|
||||
|
@ -386,14 +398,12 @@ function makeVerbLookupPortal(): T.EntryLookupPortal<T.VerbEntry> {
|
|||
page: 1,
|
||||
tpFilter: tp.isVerbDictionaryEntry,
|
||||
});
|
||||
const r = vEntries.map((entry): T.VerbEntry => ({
|
||||
return vEntries.map((entry): T.VerbEntry => ({
|
||||
entry,
|
||||
complement: (entry.c?.includes("comp.") && entry.l)
|
||||
? dictionary.findOneByTs(entry.l)
|
||||
: undefined,
|
||||
}));
|
||||
console.log(r);
|
||||
return r;
|
||||
},
|
||||
getByTs: (ts: number): T.VerbEntry | undefined => {
|
||||
const entry = dictDb.findOneByTs(ts);
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
import { makeAWeeBitFuzzy } from "./wee-bit-fuzzy";
|
||||
|
||||
const pMatches = [
|
||||
["پیټی", "پېټی"],
|
||||
["دوستی", "دوستي"],
|
||||
["پته", "پټه"],
|
||||
];
|
||||
|
||||
const fMatches = [
|
||||
["sareyy", "saRey"],
|
||||
["peyTey", "peTey"],
|
||||
];
|
||||
|
||||
pMatches.forEach((pair) => {
|
||||
test(`${pair[0]} should match ${pair[1]}`, () => {
|
||||
const re = makeAWeeBitFuzzy(pair[0], "p");
|
||||
const result = pair[1].match(new RegExp(re, "i"));
|
||||
expect(result).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
fMatches.forEach((pair) => {
|
||||
test(`${pair[0]} should match ${pair[1]}`, () => {
|
||||
const re = makeAWeeBitFuzzy(pair[0], "f");
|
||||
const result = pair[1].match(new RegExp(re, "i"));
|
||||
expect(result).toBeTruthy();
|
||||
});
|
||||
});
|
|
@ -6,36 +6,104 @@
|
|||
*
|
||||
*/
|
||||
|
||||
const matcher = {
|
||||
q: "[q|k]",
|
||||
k: "[q|k]",
|
||||
// TODO: this might not be the best way to handle
|
||||
// double aa's passing as a's - because it can totally ignore the a's
|
||||
a: "[a|á|ă]?a?",
|
||||
á: "[a|á|ă]?a?",
|
||||
ă: "[a|á|ă]?a?",
|
||||
u: "[u|ú]",
|
||||
ú: "[u|ú]",
|
||||
e: "[e|é]",
|
||||
é: "[e|é]",
|
||||
i: "[i|í]",
|
||||
í: "[i|í]",
|
||||
o: "[o|ó]",
|
||||
ó: "[o|ó]",
|
||||
g: "[g|G]",
|
||||
G: "[g|G]",
|
||||
r: "[r|R]",
|
||||
R: "[r|R]",
|
||||
// const matcher = {
|
||||
// q: "[q|k]",
|
||||
// k: "[q|k]",
|
||||
// // TODO: this might not be the best way to handle
|
||||
// // double aa's passing as a's - because it can totally ignore the a's
|
||||
// a: "[a|á|ă]?a?",
|
||||
// á: "[a|á|ă]?a?",
|
||||
// ă: "[a|á|ă]?a?",
|
||||
// u: "[u|ú]",
|
||||
// ú: "[u|ú]",
|
||||
// e: "[e|é]",
|
||||
// é: "[e|é]",
|
||||
// i: "[i|í]",
|
||||
// í: "[i|í]",
|
||||
// o: "[o|ó]",
|
||||
// ó: "[o|ó]",
|
||||
// g: "[g|G]",
|
||||
// G: "[g|G]",
|
||||
// r: "[r|R]",
|
||||
// R: "[r|R]",
|
||||
// };
|
||||
|
||||
const fiveYeys = "[ئ|ۍ|ي|ې|ی]";
|
||||
const sSounds = "[س|ص|ث|څ]";
|
||||
const zSounds = "[ز|ژ|ض|ظ|ذ|ځ]";
|
||||
const tSounds = "[ت|ط|ټ]";
|
||||
const dSounds = "[د|ډ]";
|
||||
const rSounds = "[ر|ړ|ڼ]";
|
||||
const nSounds = "[ن|ڼ]";
|
||||
|
||||
const pReplacer = {
|
||||
"ی": fiveYeys,
|
||||
"ي": fiveYeys,
|
||||
"ۍ": fiveYeys,
|
||||
"ئ": fiveYeys,
|
||||
"ې": fiveYeys,
|
||||
|
||||
"س": sSounds,
|
||||
"ص": sSounds,
|
||||
"ث": sSounds,
|
||||
"څ": sSounds,
|
||||
|
||||
"ز": zSounds,
|
||||
"ظ": zSounds,
|
||||
"ذ": zSounds,
|
||||
"ض": zSounds,
|
||||
"ژ": zSounds,
|
||||
"ځ": zSounds,
|
||||
|
||||
"ت": tSounds,
|
||||
"ط": tSounds,
|
||||
"ټ": tSounds,
|
||||
|
||||
"د": dSounds,
|
||||
"ډ": dSounds,
|
||||
|
||||
"ر": rSounds,
|
||||
"ړ": rSounds,
|
||||
|
||||
"ن": nSounds,
|
||||
"ڼ": nSounds,
|
||||
};
|
||||
|
||||
const fRepRegex = /r|R|q|k|a|á|ă|e|é|i|í|o|ó|g|G|u|ú/g;
|
||||
const fiveYeysF = "(?:eyy|ey|ee|e|uy)";
|
||||
const zSoundsF = "(?:z|dz)";
|
||||
|
||||
const fReplacer = {
|
||||
"eyy": fiveYeysF,
|
||||
"ey": fiveYeysF,
|
||||
"uy": fiveYeysF,
|
||||
"ee": fiveYeysF,
|
||||
"e": fiveYeysF,
|
||||
|
||||
"z": zSoundsF,
|
||||
"dz": zSoundsF,
|
||||
};
|
||||
|
||||
const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g");
|
||||
|
||||
const fRepRegex = /eyy|ey|uy|ee|e|z|dz/g;
|
||||
|
||||
function makePAWeeBitFuzzy(s: string): string {
|
||||
// + s.replace(/ /g, "").split("").join(" *");
|
||||
return "^" + s.replace(pRepRegex, mtch => {
|
||||
// @ts-ignore
|
||||
return pReplacer[mtch];
|
||||
});
|
||||
}
|
||||
|
||||
function makeFAWeeBitFuzzy(s: string): string {
|
||||
return "^" + s.replace(fRepRegex, mtch => {
|
||||
// @ts-ignore
|
||||
return fReplacer[mtch];
|
||||
});
|
||||
}
|
||||
|
||||
export function makeAWeeBitFuzzy(s: string, i: "f" | "p"): string {
|
||||
const logic = i === "f"
|
||||
? "^" + s.replace(/ /g, "").split("").join("['|`]? *").replace(fRepRegex, (mtch) => {
|
||||
// @ts-ignore
|
||||
return matcher[mtch];
|
||||
})
|
||||
: "^" + s.replace(/ /g, "").split("").join(" *");
|
||||
return logic;
|
||||
return i === "p"
|
||||
? makePAWeeBitFuzzy(s)
|
||||
: makeFAWeeBitFuzzy(s);
|
||||
}
|
|
@ -1590,10 +1590,10 @@
|
|||
"@types/yargs" "^16.0.0"
|
||||
chalk "^4.0.0"
|
||||
|
||||
"@lingdocs/pashto-inflector@3.6.0":
|
||||
version "3.6.0"
|
||||
resolved "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz#89143246341bbca70c340f9cbcd72650f8348231"
|
||||
integrity sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg==
|
||||
"@lingdocs/pashto-inflector@3.6.2":
|
||||
version "3.6.2"
|
||||
resolved "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz#122eaeaac59253ea0ee708d772e860502aa1d6b7"
|
||||
integrity sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ==
|
||||
dependencies:
|
||||
"@formkit/auto-animate" "^1.0.0-beta.1"
|
||||
classnames "^2.2.6"
|
||||
|
|
|
@ -178,10 +178,10 @@
|
|||
"resolved" "https://registry.npmjs.org/@hapi/hoek/-/hoek-9.2.1.tgz"
|
||||
"version" "9.2.1"
|
||||
|
||||
"@lingdocs/pashto-inflector@3.6.0":
|
||||
"integrity" "sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg=="
|
||||
"resolved" "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz"
|
||||
"version" "3.6.0"
|
||||
"@lingdocs/pashto-inflector@3.6.2":
|
||||
"integrity" "sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ=="
|
||||
"resolved" "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz"
|
||||
"version" "3.6.2"
|
||||
dependencies:
|
||||
"@formkit/auto-animate" "^1.0.0-beta.1"
|
||||
"classnames" "^2.2.6"
|
||||
|
|
Loading…
Reference in New Issue