much beter reverse inflection search
This commit is contained in:
parent
513de28779
commit
cecd3e56a8
|
@ -44,7 +44,7 @@ function InflectionSearchResultDisplay(
|
||||||
};
|
};
|
||||||
return <div className="mb-4">
|
return <div className="mb-4">
|
||||||
<div className="mb-2"><strong>{displayFormResult(result.form)}</strong></div>
|
<div className="mb-2"><strong>{displayFormResult(result.form)}</strong></div>
|
||||||
{result.matches.map((match) => <div className="ml-2">
|
{result.matches.map((match, i) => <div className="ml-2" key={i}>
|
||||||
<InlinePs opts={textOptions}>{match.ps}</InlinePs>
|
<InlinePs opts={textOptions}>{match.ps}</InlinePs>
|
||||||
<div className="ml-3 my-2">
|
<div className="ml-3 my-2">
|
||||||
<em>
|
<em>
|
||||||
|
|
|
@ -22,32 +22,21 @@ import { makeAWeeBitFuzzy } from "./wee-bit-fuzzy";
|
||||||
// That's so much better I'm removing the option of skipping compounds
|
// That's so much better I'm removing the option of skipping compounds
|
||||||
// ~4th iteration:~ ignore perfective or imperfective if wasn't present in verb info (not worth it - scrapped)
|
// ~4th iteration:~ ignore perfective or imperfective if wasn't present in verb info (not worth it - scrapped)
|
||||||
|
|
||||||
function fFuzzy(f: string): string {
|
|
||||||
return f.replace(/e|é/g, "[e|é]")
|
|
||||||
.replace(/i|í/g, "[i|í]")
|
|
||||||
.replace(/o|ó/g, "[o|ó]")
|
|
||||||
.replace(/u|ú/g, "[u|ú]")
|
|
||||||
.replace(/a|á/g, "[a|á]")
|
|
||||||
.replace(/U|Ú/g, "[Ú|U]");
|
|
||||||
}
|
|
||||||
|
|
||||||
export function searchAllInflections(allDocs: T.DictionaryEntry[], searchValue: string): { entry: T.DictionaryEntry, results: InflectionSearchResult[] }[] {
|
export function searchAllInflections(allDocs: T.DictionaryEntry[], searchValue: string): { entry: T.DictionaryEntry, results: InflectionSearchResult[] }[] {
|
||||||
// const timerLabel = "Search inflections";
|
// const timerLabel = "Search inflections";
|
||||||
const script = isPashtoScript(searchValue) ? "p" : "f";
|
const script = isPashtoScript(searchValue) ? "p" : "f";
|
||||||
const beg = script === "p"
|
const begRegex = new RegExp(
|
||||||
? makeAWeeBitFuzzy(searchValue.slice(0, 2), script)
|
makeAWeeBitFuzzy(searchValue.slice(0, 3), script, true),
|
||||||
: fFuzzy(searchValue.slice(0, 2));
|
"i",
|
||||||
const preSearchFun = isPashtoScript(searchValue)
|
);
|
||||||
? (ps: T.PsString) => !!ps.p.slice(0, 2).match(beg)
|
const preSearchFun = (ps: T.PsString) => !!ps[script].match(begRegex);
|
||||||
: (ps: T.PsString) => !!ps.f.slice(0, 2).match(beg);
|
const searchRegex = new RegExp(
|
||||||
const searchRegex = new RegExp("^"
|
makeAWeeBitFuzzy(searchValue, script, true) + "$",
|
||||||
+ (script === "f" ? fFuzzy(searchValue) : makeAWeeBitFuzzy(searchValue, "p"))
|
"i",
|
||||||
+ "$");
|
);
|
||||||
// add little bit fuzzy
|
// add little bit fuzzy
|
||||||
// also do version without directional pronoun on front
|
// also do version without directional pronoun on front
|
||||||
const searchFun = isPashtoScript(searchValue)
|
const searchFun = (ps: T.PsString) => !!ps[script].match(searchRegex)
|
||||||
? (ps: T.PsString) => !!ps.p.match(searchRegex)
|
|
||||||
: (ps: T.PsString) => !!ps.f.match(searchRegex);
|
|
||||||
// console.time(timerLabel);
|
// console.time(timerLabel);
|
||||||
const results = allDocs.reduce((all: { entry: T.DictionaryEntry, results: InflectionSearchResult[] }[], entry) => {
|
const results = allDocs.reduce((all: { entry: T.DictionaryEntry, results: InflectionSearchResult[] }[], entry) => {
|
||||||
const type = isNounAdjOrVerb(entry);
|
const type = isNounAdjOrVerb(entry);
|
||||||
|
|
|
@ -79,9 +79,10 @@ const pReplacer = {
|
||||||
"آ": alef,
|
"آ": alef,
|
||||||
};
|
};
|
||||||
|
|
||||||
const fiveYeysF = "(?:eyy|ey|ee|e|uy)";
|
const fiveYeysF = "(?:eyy|ey|ee|é|e|uy)";
|
||||||
const hKhF = "(?:kh|h|x)";
|
const hKhF = "(?:kh|h|x)";
|
||||||
const zSoundsF = "(?:z|dz)";
|
const zSoundsF = "(?:z|dz)";
|
||||||
|
const sSoundsF = "(?:ts|s)";
|
||||||
|
|
||||||
const fReplacer = {
|
const fReplacer = {
|
||||||
"eyy": fiveYeysF,
|
"eyy": fiveYeysF,
|
||||||
|
@ -95,11 +96,31 @@ const fReplacer = {
|
||||||
"x": hKhF,
|
"x": hKhF,
|
||||||
"h": hKhF,
|
"h": hKhF,
|
||||||
"kh": hKhF,
|
"kh": hKhF,
|
||||||
|
"ts": sSoundsF,
|
||||||
|
"s": sSoundsF,
|
||||||
|
// only used if ignoring accents
|
||||||
|
"a": "[a|á]",
|
||||||
|
"á": "[a|á]",
|
||||||
|
"u": "[u|ú]",
|
||||||
|
"ú": "[u|ú]",
|
||||||
|
"o": "[o|ó]",
|
||||||
|
"ó": "[o|ó]",
|
||||||
|
"i": "[i|í]",
|
||||||
|
"í": "[i|í]",
|
||||||
|
"U": "[U|Ú]",
|
||||||
|
"Ú": "[U|Ú]",
|
||||||
|
"éy": fiveYeysF,
|
||||||
|
"éyy": fiveYeysF,
|
||||||
|
"úy": fiveYeysF,
|
||||||
|
"ée": fiveYeysF,
|
||||||
|
"é": fiveYeysF,
|
||||||
};
|
};
|
||||||
|
|
||||||
const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g");
|
const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g");
|
||||||
|
|
||||||
const fRepRegex = /eyy|ey|uy|ee|e|z|dz|x|kh|h/g;
|
const fRepRegex = /eyy|ey|uy|ee|e|z|dz|x|kh|h|ts|s/g;
|
||||||
|
|
||||||
|
const fRepRegexWAccents = /eyy|éyy|ey|éy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g;
|
||||||
|
|
||||||
function makePAWeeBitFuzzy(s: string): string {
|
function makePAWeeBitFuzzy(s: string): string {
|
||||||
// + s.replace(/ /g, "").split("").join(" *");
|
// + s.replace(/ /g, "").split("").join(" *");
|
||||||
|
@ -109,15 +130,15 @@ function makePAWeeBitFuzzy(s: string): string {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function makeFAWeeBitFuzzy(s: string): string {
|
function makeFAWeeBitFuzzy(s: string, ignoreAccent?: boolean): string {
|
||||||
return "^" + s.replace(fRepRegex, mtch => {
|
return "^" + s.replace((ignoreAccent ? fRepRegexWAccents : fRepRegex), mtch => {
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
return fReplacer[mtch];
|
return fReplacer[mtch];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export function makeAWeeBitFuzzy(s: string, i: "f" | "p"): string {
|
export function makeAWeeBitFuzzy(s: string, i: "f" | "p", ignoreAccent?: boolean): string {
|
||||||
return i === "p"
|
return i === "p"
|
||||||
? makePAWeeBitFuzzy(s)
|
? makePAWeeBitFuzzy(s)
|
||||||
: makeFAWeeBitFuzzy(s);
|
: makeFAWeeBitFuzzy(s, ignoreAccent);
|
||||||
}
|
}
|
Loading…
Reference in New Issue