much beter reverse inflection search

This commit is contained in:
adueck 2022-09-25 17:06:54 +04:00
parent 513de28779
commit cecd3e56a8
3 changed files with 38 additions and 28 deletions

View File

@ -44,7 +44,7 @@ function InflectionSearchResultDisplay(
};
return <div className="mb-4">
<div className="mb-2"><strong>{displayFormResult(result.form)}</strong></div>
{result.matches.map((match) => <div className="ml-2">
{result.matches.map((match, i) => <div className="ml-2" key={i}>
<InlinePs opts={textOptions}>{match.ps}</InlinePs>
<div className="ml-3 my-2">
<em>

View File

@ -22,32 +22,21 @@ import { makeAWeeBitFuzzy } from "./wee-bit-fuzzy";
// That's so much better I'm removing the option of skipping compounds
// ~4th iteration:~ ignore perfective or imperfective if wasn't present in verb info (not worth it - scrapped)
function fFuzzy(f: string): string {
return f.replace(/e|é/g, "[e|é]")
.replace(/i|í/g, "[i|í]")
.replace(/o|ó/g, "[o|ó]")
.replace(/u|ú/g, "[u|ú]")
.replace(/a|á/g, "[a|á]")
.replace(/U|Ú/g, "[Ú|U]");
}
export function searchAllInflections(allDocs: T.DictionaryEntry[], searchValue: string): { entry: T.DictionaryEntry, results: InflectionSearchResult[] }[] {
// const timerLabel = "Search inflections";
const script = isPashtoScript(searchValue) ? "p" : "f";
const beg = script === "p"
? makeAWeeBitFuzzy(searchValue.slice(0, 2), script)
: fFuzzy(searchValue.slice(0, 2));
const preSearchFun = isPashtoScript(searchValue)
? (ps: T.PsString) => !!ps.p.slice(0, 2).match(beg)
: (ps: T.PsString) => !!ps.f.slice(0, 2).match(beg);
const searchRegex = new RegExp("^"
+ (script === "f" ? fFuzzy(searchValue) : makeAWeeBitFuzzy(searchValue, "p"))
+ "$");
const begRegex = new RegExp(
makeAWeeBitFuzzy(searchValue.slice(0, 3), script, true),
"i",
);
const preSearchFun = (ps: T.PsString) => !!ps[script].match(begRegex);
const searchRegex = new RegExp(
makeAWeeBitFuzzy(searchValue, script, true) + "$",
"i",
);
// add little bit fuzzy
// also do version without directional pronoun on front
const searchFun = isPashtoScript(searchValue)
? (ps: T.PsString) => !!ps.p.match(searchRegex)
: (ps: T.PsString) => !!ps.f.match(searchRegex);
const searchFun = (ps: T.PsString) => !!ps[script].match(searchRegex)
// console.time(timerLabel);
const results = allDocs.reduce((all: { entry: T.DictionaryEntry, results: InflectionSearchResult[] }[], entry) => {
const type = isNounAdjOrVerb(entry);

View File

@ -79,9 +79,10 @@ const pReplacer = {
"آ": alef,
};
const fiveYeysF = "(?:eyy|ey|ee|e|uy)";
const fiveYeysF = "(?:eyy|ey|ee|é|e|uy)";
const hKhF = "(?:kh|h|x)";
const zSoundsF = "(?:z|dz)";
const sSoundsF = "(?:ts|s)";
const fReplacer = {
"eyy": fiveYeysF,
@ -95,11 +96,31 @@ const fReplacer = {
"x": hKhF,
"h": hKhF,
"kh": hKhF,
"ts": sSoundsF,
"s": sSoundsF,
// only used if ignoring accents
"a": "[a|á]",
"á": "[a|á]",
"u": "[u|ú]",
"ú": "[u|ú]",
"o": "[o|ó]",
"ó": "[o|ó]",
"i": "[i|í]",
"í": "[i|í]",
"U": "[U|Ú]",
"Ú": "[U|Ú]",
"éy": fiveYeysF,
"éyy": fiveYeysF,
"úy": fiveYeysF,
"ée": fiveYeysF,
"é": fiveYeysF,
};
const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g");
const fRepRegex = /eyy|ey|uy|ee|e|z|dz|x|kh|h/g;
const fRepRegex = /eyy|ey|uy|ee|e|z|dz|x|kh|h|ts|s/g;
const fRepRegexWAccents = /eyy|éyy|ey|éy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g;
function makePAWeeBitFuzzy(s: string): string {
// + s.replace(/ /g, "").split("").join(" *");
@ -109,15 +130,15 @@ function makePAWeeBitFuzzy(s: string): string {
});
}
function makeFAWeeBitFuzzy(s: string): string {
return "^" + s.replace(fRepRegex, mtch => {
function makeFAWeeBitFuzzy(s: string, ignoreAccent?: boolean): string {
return "^" + s.replace((ignoreAccent ? fRepRegexWAccents : fRepRegex), mtch => {
// @ts-ignore
return fReplacer[mtch];
});
}
export function makeAWeeBitFuzzy(s: string, i: "f" | "p"): string {
export function makeAWeeBitFuzzy(s: string, i: "f" | "p", ignoreAccent?: boolean): string {
return i === "p"
? makePAWeeBitFuzzy(s)
: makeFAWeeBitFuzzy(s);
: makeFAWeeBitFuzzy(s, ignoreAccent);
}