much beter reverse inflection search
This commit is contained in:
parent
513de28779
commit
cecd3e56a8
|
@ -44,7 +44,7 @@ function InflectionSearchResultDisplay(
|
|||
};
|
||||
return <div className="mb-4">
|
||||
<div className="mb-2"><strong>{displayFormResult(result.form)}</strong></div>
|
||||
{result.matches.map((match) => <div className="ml-2">
|
||||
{result.matches.map((match, i) => <div className="ml-2" key={i}>
|
||||
<InlinePs opts={textOptions}>{match.ps}</InlinePs>
|
||||
<div className="ml-3 my-2">
|
||||
<em>
|
||||
|
|
|
@ -22,32 +22,21 @@ import { makeAWeeBitFuzzy } from "./wee-bit-fuzzy";
|
|||
// That's so much better I'm removing the option of skipping compounds
|
||||
// ~4th iteration:~ ignore perfective or imperfective if wasn't present in verb info (not worth it - scrapped)
|
||||
|
||||
function fFuzzy(f: string): string {
|
||||
return f.replace(/e|é/g, "[e|é]")
|
||||
.replace(/i|í/g, "[i|í]")
|
||||
.replace(/o|ó/g, "[o|ó]")
|
||||
.replace(/u|ú/g, "[u|ú]")
|
||||
.replace(/a|á/g, "[a|á]")
|
||||
.replace(/U|Ú/g, "[Ú|U]");
|
||||
}
|
||||
|
||||
export function searchAllInflections(allDocs: T.DictionaryEntry[], searchValue: string): { entry: T.DictionaryEntry, results: InflectionSearchResult[] }[] {
|
||||
// const timerLabel = "Search inflections";
|
||||
const script = isPashtoScript(searchValue) ? "p" : "f";
|
||||
const beg = script === "p"
|
||||
? makeAWeeBitFuzzy(searchValue.slice(0, 2), script)
|
||||
: fFuzzy(searchValue.slice(0, 2));
|
||||
const preSearchFun = isPashtoScript(searchValue)
|
||||
? (ps: T.PsString) => !!ps.p.slice(0, 2).match(beg)
|
||||
: (ps: T.PsString) => !!ps.f.slice(0, 2).match(beg);
|
||||
const searchRegex = new RegExp("^"
|
||||
+ (script === "f" ? fFuzzy(searchValue) : makeAWeeBitFuzzy(searchValue, "p"))
|
||||
+ "$");
|
||||
const begRegex = new RegExp(
|
||||
makeAWeeBitFuzzy(searchValue.slice(0, 3), script, true),
|
||||
"i",
|
||||
);
|
||||
const preSearchFun = (ps: T.PsString) => !!ps[script].match(begRegex);
|
||||
const searchRegex = new RegExp(
|
||||
makeAWeeBitFuzzy(searchValue, script, true) + "$",
|
||||
"i",
|
||||
);
|
||||
// add little bit fuzzy
|
||||
// also do version without directional pronoun on front
|
||||
const searchFun = isPashtoScript(searchValue)
|
||||
? (ps: T.PsString) => !!ps.p.match(searchRegex)
|
||||
: (ps: T.PsString) => !!ps.f.match(searchRegex);
|
||||
const searchFun = (ps: T.PsString) => !!ps[script].match(searchRegex)
|
||||
// console.time(timerLabel);
|
||||
const results = allDocs.reduce((all: { entry: T.DictionaryEntry, results: InflectionSearchResult[] }[], entry) => {
|
||||
const type = isNounAdjOrVerb(entry);
|
||||
|
|
|
@ -79,9 +79,10 @@ const pReplacer = {
|
|||
"آ": alef,
|
||||
};
|
||||
|
||||
const fiveYeysF = "(?:eyy|ey|ee|e|uy)";
|
||||
const fiveYeysF = "(?:eyy|ey|ee|é|e|uy)";
|
||||
const hKhF = "(?:kh|h|x)";
|
||||
const zSoundsF = "(?:z|dz)";
|
||||
const sSoundsF = "(?:ts|s)";
|
||||
|
||||
const fReplacer = {
|
||||
"eyy": fiveYeysF,
|
||||
|
@ -95,11 +96,31 @@ const fReplacer = {
|
|||
"x": hKhF,
|
||||
"h": hKhF,
|
||||
"kh": hKhF,
|
||||
"ts": sSoundsF,
|
||||
"s": sSoundsF,
|
||||
// only used if ignoring accents
|
||||
"a": "[a|á]",
|
||||
"á": "[a|á]",
|
||||
"u": "[u|ú]",
|
||||
"ú": "[u|ú]",
|
||||
"o": "[o|ó]",
|
||||
"ó": "[o|ó]",
|
||||
"i": "[i|í]",
|
||||
"í": "[i|í]",
|
||||
"U": "[U|Ú]",
|
||||
"Ú": "[U|Ú]",
|
||||
"éy": fiveYeysF,
|
||||
"éyy": fiveYeysF,
|
||||
"úy": fiveYeysF,
|
||||
"ée": fiveYeysF,
|
||||
"é": fiveYeysF,
|
||||
};
|
||||
|
||||
const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g");
|
||||
|
||||
const fRepRegex = /eyy|ey|uy|ee|e|z|dz|x|kh|h/g;
|
||||
const fRepRegex = /eyy|ey|uy|ee|e|z|dz|x|kh|h|ts|s/g;
|
||||
|
||||
const fRepRegexWAccents = /eyy|éyy|ey|éy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g;
|
||||
|
||||
function makePAWeeBitFuzzy(s: string): string {
|
||||
// + s.replace(/ /g, "").split("").join(" *");
|
||||
|
@ -109,15 +130,15 @@ function makePAWeeBitFuzzy(s: string): string {
|
|||
});
|
||||
}
|
||||
|
||||
function makeFAWeeBitFuzzy(s: string): string {
|
||||
return "^" + s.replace(fRepRegex, mtch => {
|
||||
function makeFAWeeBitFuzzy(s: string, ignoreAccent?: boolean): string {
|
||||
return "^" + s.replace((ignoreAccent ? fRepRegexWAccents : fRepRegex), mtch => {
|
||||
// @ts-ignore
|
||||
return fReplacer[mtch];
|
||||
});
|
||||
}
|
||||
|
||||
export function makeAWeeBitFuzzy(s: string, i: "f" | "p"): string {
|
||||
export function makeAWeeBitFuzzy(s: string, i: "f" | "p", ignoreAccent?: boolean): string {
|
||||
return i === "p"
|
||||
? makePAWeeBitFuzzy(s)
|
||||
: makeFAWeeBitFuzzy(s);
|
||||
: makeFAWeeBitFuzzy(s, ignoreAccent);
|
||||
}
|
Loading…
Reference in New Issue