From cecd3e56a8fd68f43809afea11a9aee129d28443 Mon Sep 17 00:00:00 2001 From: adueck Date: Sun, 25 Sep 2022 17:06:54 +0400 Subject: [PATCH] much beter reverse inflection search --- .../InflectionSearchResultDisplay.tsx | 2 +- website/src/lib/search-all-inflections.ts | 31 ++++++----------- website/src/lib/wee-bit-fuzzy.ts | 33 +++++++++++++++---- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/website/src/components/InflectionSearchResultDisplay.tsx b/website/src/components/InflectionSearchResultDisplay.tsx index 4339e34..fda17ae 100644 --- a/website/src/components/InflectionSearchResultDisplay.tsx +++ b/website/src/components/InflectionSearchResultDisplay.tsx @@ -44,7 +44,7 @@ function InflectionSearchResultDisplay( }; return
{displayFormResult(result.form)}
- {result.matches.map((match) =>
+ {result.matches.map((match, i) =>
{match.ps}
diff --git a/website/src/lib/search-all-inflections.ts b/website/src/lib/search-all-inflections.ts index 8e3c107..0020aad 100644 --- a/website/src/lib/search-all-inflections.ts +++ b/website/src/lib/search-all-inflections.ts @@ -22,32 +22,21 @@ import { makeAWeeBitFuzzy } from "./wee-bit-fuzzy"; // That's so much better I'm removing the option of skipping compounds // ~4th iteration:~ ignore perfective or imperfective if wasn't present in verb info (not worth it - scrapped) -function fFuzzy(f: string): string { - return f.replace(/e|é/g, "[e|é]") - .replace(/i|í/g, "[i|í]") - .replace(/o|ó/g, "[o|ó]") - .replace(/u|ú/g, "[u|ú]") - .replace(/a|á/g, "[a|á]") - .replace(/U|Ú/g, "[Ú|U]"); -} - export function searchAllInflections(allDocs: T.DictionaryEntry[], searchValue: string): { entry: T.DictionaryEntry, results: InflectionSearchResult[] }[] { // const timerLabel = "Search inflections"; const script = isPashtoScript(searchValue) ? "p" : "f"; - const beg = script === "p" - ? makeAWeeBitFuzzy(searchValue.slice(0, 2), script) - : fFuzzy(searchValue.slice(0, 2)); - const preSearchFun = isPashtoScript(searchValue) - ? (ps: T.PsString) => !!ps.p.slice(0, 2).match(beg) - : (ps: T.PsString) => !!ps.f.slice(0, 2).match(beg); - const searchRegex = new RegExp("^" - + (script === "f" ? fFuzzy(searchValue) : makeAWeeBitFuzzy(searchValue, "p")) - + "$"); + const begRegex = new RegExp( + makeAWeeBitFuzzy(searchValue.slice(0, 3), script, true), + "i", + ); + const preSearchFun = (ps: T.PsString) => !!ps[script].match(begRegex); + const searchRegex = new RegExp( + makeAWeeBitFuzzy(searchValue, script, true) + "$", + "i", + ); // add little bit fuzzy // also do version without directional pronoun on front - const searchFun = isPashtoScript(searchValue) - ? (ps: T.PsString) => !!ps.p.match(searchRegex) - : (ps: T.PsString) => !!ps.f.match(searchRegex); + const searchFun = (ps: T.PsString) => !!ps[script].match(searchRegex) // console.time(timerLabel); const results = allDocs.reduce((all: { entry: T.DictionaryEntry, results: InflectionSearchResult[] }[], entry) => { const type = isNounAdjOrVerb(entry); diff --git a/website/src/lib/wee-bit-fuzzy.ts b/website/src/lib/wee-bit-fuzzy.ts index f7b591a..9922108 100644 --- a/website/src/lib/wee-bit-fuzzy.ts +++ b/website/src/lib/wee-bit-fuzzy.ts @@ -79,9 +79,10 @@ const pReplacer = { "آ": alef, }; -const fiveYeysF = "(?:eyy|ey|ee|e|uy)"; +const fiveYeysF = "(?:eyy|ey|ee|é|e|uy)"; const hKhF = "(?:kh|h|x)"; const zSoundsF = "(?:z|dz)"; +const sSoundsF = "(?:ts|s)"; const fReplacer = { "eyy": fiveYeysF, @@ -95,11 +96,31 @@ const fReplacer = { "x": hKhF, "h": hKhF, "kh": hKhF, + "ts": sSoundsF, + "s": sSoundsF, + // only used if ignoring accents + "a": "[a|á]", + "á": "[a|á]", + "u": "[u|ú]", + "ú": "[u|ú]", + "o": "[o|ó]", + "ó": "[o|ó]", + "i": "[i|í]", + "í": "[i|í]", + "U": "[U|Ú]", + "Ú": "[U|Ú]", + "éy": fiveYeysF, + "éyy": fiveYeysF, + "úy": fiveYeysF, + "ée": fiveYeysF, + "é": fiveYeysF, }; const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g"); -const fRepRegex = /eyy|ey|uy|ee|e|z|dz|x|kh|h/g; +const fRepRegex = /eyy|ey|uy|ee|e|z|dz|x|kh|h|ts|s/g; + +const fRepRegexWAccents = /eyy|éyy|ey|éy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g; function makePAWeeBitFuzzy(s: string): string { // + s.replace(/ /g, "").split("").join(" *"); @@ -109,15 +130,15 @@ function makePAWeeBitFuzzy(s: string): string { }); } -function makeFAWeeBitFuzzy(s: string): string { - return "^" + s.replace(fRepRegex, mtch => { +function makeFAWeeBitFuzzy(s: string, ignoreAccent?: boolean): string { + return "^" + s.replace((ignoreAccent ? fRepRegexWAccents : fRepRegex), mtch => { // @ts-ignore return fReplacer[mtch]; }); } -export function makeAWeeBitFuzzy(s: string, i: "f" | "p"): string { +export function makeAWeeBitFuzzy(s: string, i: "f" | "p", ignoreAccent?: boolean): string { return i === "p" ? makePAWeeBitFuzzy(s) - : makeFAWeeBitFuzzy(s); + : makeFAWeeBitFuzzy(s, ignoreAccent); } \ No newline at end of file