inflector with plurals beta working!
This commit is contained in:
parent
9baa2d5e58
commit
916bc24487
|
@ -118,6 +118,7 @@ export function removeAccents(s: T.PsString | string): T.PsString | string {
|
|||
*
|
||||
* @param s a string of Pashto phonetics
|
||||
*/
|
||||
export function hasAccents(s: string): boolean {
|
||||
export function hasAccents(s: string | T.PsString): boolean {
|
||||
if (typeof s !== "string") return hasAccents(s.f);
|
||||
return accentReplacer.some((x) => s.includes(x.accented));
|
||||
}
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
|
||||
import {
|
||||
concatPsString,
|
||||
firstPhonetics,
|
||||
makePsString,
|
||||
removeEndingL,
|
||||
yulEndingInfinitive,
|
||||
|
@ -22,6 +21,11 @@ import {
|
|||
removeRetroflexR,
|
||||
splitDoubleWord,
|
||||
endsInConsonant,
|
||||
addOEnding,
|
||||
removeFVarients,
|
||||
endsInShwa,
|
||||
removeAynEnding,
|
||||
splitPsByVarients,
|
||||
} from "./p-text-helpers";
|
||||
import * as T from "../types";
|
||||
import {
|
||||
|
@ -617,9 +621,11 @@ test(`complementInflects`, () => {
|
|||
})).toBe(false);
|
||||
});
|
||||
|
||||
test(`firstPhonetics should work`, () => {
|
||||
expect(firstPhonetics("ist'imaal, istimaal")).toBe("ist'imaal");
|
||||
expect(firstPhonetics("kor")).toBe("kor");
|
||||
test(`removeFVarients`, () => {
|
||||
expect(removeFVarients("ist'imaal, istimaal")).toBe("ist'imaal");
|
||||
expect(removeFVarients({ p: "معالوم", f: "ma'aalóom, maalóom" }))
|
||||
.toEqual({ p: "معالوم", f: "ma'aalóom" });
|
||||
expect(removeFVarients("kor")).toBe("kor");
|
||||
});
|
||||
|
||||
test(`makePsString should work`, () => {
|
||||
|
@ -1034,4 +1040,86 @@ test("endsInAConsonant", () => {
|
|||
];
|
||||
does.forEach((x) => expect(endsInConsonant(x)).toBe(true));
|
||||
doesnt.forEach((x) => expect(endsInConsonant(x)).toBe(false));
|
||||
})
|
||||
|
||||
test("addOEnding", () => {
|
||||
const tests: { in: T.PsString, out: T.PsString[] }[] = [
|
||||
{
|
||||
in: { p: "کتابونه", f: "kitaabóona" },
|
||||
out: [{ p: "کتابونو", f: "kitaabóono" }],
|
||||
},
|
||||
{
|
||||
in: { p: "کارغان", f: "kaargháan" },
|
||||
out: [{ p: "کارغانو", f: "kaargháano" }],
|
||||
},
|
||||
{
|
||||
in: { p: "کارغانې", f: "kaargháane" },
|
||||
out: [{ p: "کارغانو", f: "kaargháano" }],
|
||||
},
|
||||
{
|
||||
in: { p: "ښځې", f: "xúdze" },
|
||||
out: [{ p: "ښځو", f: "xúdzo" }],
|
||||
},
|
||||
// TODO: Make this last thing accented??
|
||||
{
|
||||
in: { p: "کور", f: "kor" },
|
||||
out: [{ p: "کورو", f: "koro" }],
|
||||
},
|
||||
{
|
||||
in: { p: "سړی", f: "saRéy" },
|
||||
out: [{ p: "سړیو", f: "saRíyo" }, { p: "سړو", f: "saRó"}],
|
||||
},
|
||||
{
|
||||
in: { p: "افغانۍ", f: "afghaanúy" },
|
||||
out: [{ p: "افغانیو", f: "afghaanúyo" }],
|
||||
},
|
||||
{
|
||||
in: { p: "اوبه", f: "oobú" },
|
||||
out: [{ p: "اوبو", f: "oobó" }],
|
||||
},
|
||||
{
|
||||
in: { p: "شودې", f: "shoodé" },
|
||||
out: [{ p: "شودو", f: "shoodó" }],
|
||||
},
|
||||
{
|
||||
in: { p: "منابع", f: "manaabí" },
|
||||
out: [{ p: "منابو", f: "manaabó" }],
|
||||
},
|
||||
{
|
||||
in: { p: "انبیا", f: "ambiyáa" },
|
||||
out: [{ p: "انبیاوو", f: "ambiyáawo" }],
|
||||
},
|
||||
{
|
||||
in: { p: "مراجع", f: "maraají'" },
|
||||
out: [{ p: "مراجو", f: "maraajó" }],
|
||||
},
|
||||
{
|
||||
in: { p: "اتباع", f: "atbaa" },
|
||||
out: [{ p: "اتباعوو", f: "atbaawo" }],
|
||||
},
|
||||
{
|
||||
in: { p: "اتباع", "f": "atbáa'" },
|
||||
out: [{ p: "اتباعوو", f: "atbáawo" }],
|
||||
},
|
||||
];
|
||||
tests.forEach((t) => {
|
||||
expect(addOEnding(t.in)).toEqual(t.out);
|
||||
});
|
||||
});
|
||||
|
||||
test("endsInShwa", () => {
|
||||
expect(endsInShwa({ p: "ښایسته", f: "xaaystú" })).toBe(true);
|
||||
expect(endsInShwa({ p: "ښایسته", f: "xaaystu" })).toBe(true);
|
||||
expect(endsInShwa({ p: "ښایسته", f: "xaaysta" })).toBe(false);
|
||||
expect(endsInShwa({ p: "کور", f: "kor" })).toBe(false);
|
||||
});
|
||||
|
||||
test("splitPsByVarients", () => {
|
||||
expect(splitPsByVarients({ p: "حوادث, حادثات", f: "hawáadis, haadisáat" }))
|
||||
.toEqual([{ p: "حوادث", f: "hawáadis" }, { p: "حادثات", f: "haadisáat" }]);
|
||||
// should work with Pashto comma too
|
||||
expect(splitPsByVarients({ p: "حوادث، حادثات", f: "hawáadis, haadisáat" }))
|
||||
.toEqual([{ p: "حوادث", f: "hawáadis" }, { p: "حادثات", f: "haadisáat" }]);
|
||||
expect(splitPsByVarients({ p: "کور", f: "kor" }))
|
||||
.toEqual([{ p: "کور", f: "kor" }]);
|
||||
})
|
|
@ -15,8 +15,8 @@ import {
|
|||
getPersonInflectionsKey,
|
||||
} from "./misc-helpers";
|
||||
import * as T from "../types";
|
||||
import { removeAccents } from "./accent-helpers";
|
||||
import { pashtoConsonants, phoneticsConsonants } from "./pashto-consonants";
|
||||
import { hasAccents, removeAccents } from "./accent-helpers";
|
||||
import { phoneticsConsonants } from "./pashto-consonants";
|
||||
import { simplifyPhonetics } from "./simplify-phonetics";
|
||||
|
||||
// export function concatPsStringWithVars(...items: Array<T.PsString | " " | "">): T.PsString[] {
|
||||
|
@ -190,14 +190,12 @@ export function removeFVarients(x: string | T.PsString | T.DictionaryEntry): T.F
|
|||
return {
|
||||
...x,
|
||||
f: removeFVarients(x.f),
|
||||
__brand: "name for a dictionary entry with all the phonetics variations removed",
|
||||
} as T.DictionaryEntryNoFVars;
|
||||
} as unknown as T.DictionaryEntryNoFVars;
|
||||
}
|
||||
return {
|
||||
...x,
|
||||
f: removeFVarients(x.f),
|
||||
__brand: "name for a ps string with all the phonetics variations removed",
|
||||
} as T.PsStringNoFVars;
|
||||
} as unknown as T.PsStringNoFVars;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -796,7 +794,7 @@ export function ensureUnisexInflections(infs: T.InflectorOutput, w: T.Dictionary
|
|||
|
||||
export function endsInAaOrOo(w: T.PsString): boolean {
|
||||
const fEnd = simplifyPhonetics(w.f).slice(-2);
|
||||
const pEnd = w.p.slice(-1);
|
||||
const pEnd = w.p.slice(-1) === "ع" ? w.p.slice(-2, -1) : w.p.slice(-1);
|
||||
return (
|
||||
pEnd === "و" && fEnd.endsWith("o")
|
||||
||
|
||||
|
@ -804,7 +802,6 @@ export function endsInAaOrOo(w: T.PsString): boolean {
|
|||
);
|
||||
}
|
||||
|
||||
|
||||
export function endsInConsonant(w: T.PsString): boolean {
|
||||
// TODO: Add reporting back that the plural ending will need a space?
|
||||
|
||||
|
@ -823,4 +820,106 @@ export function endsInConsonant(w: T.PsString): boolean {
|
|||
// const pCons = pashtoConsonants.includes(w.p.slice(-1));
|
||||
const fCons = phoneticsConsonants.includes(simplifyPhonetics(w.f).slice(-1));
|
||||
return fCons;
|
||||
}
|
||||
|
||||
/**
|
||||
* adds a و - o ending (used in plurals 2nd inflection) to a given PsString
|
||||
* It will wipe out a ه - a / u or ې - e and will preserve the accent
|
||||
*
|
||||
* @param w
|
||||
* @returns
|
||||
*/
|
||||
export function addOEnding(ps: T.PsString): T.ArrayOneOrMore<T.PsString> {
|
||||
const w = removeEndTick(ps);
|
||||
const lastLetter = makePsString(
|
||||
w.p.slice(-1),
|
||||
w.f.slice(-1),
|
||||
);
|
||||
const hasEyEnding = (lastLetter.p === "ی") && ["ey", "éy"].includes(w.f.slice(-2));
|
||||
if (hasEyEnding) {
|
||||
const base = makePsString(w.p.slice(0, -1), w.f.slice(0, -2));
|
||||
const endHadAccent = w.f.slice(-2) === "éy";
|
||||
return [
|
||||
concatPsString(base, { p: "یو", f: endHadAccent ? "íyo" : "iyo" }),
|
||||
concatPsString(base, { p: "و", f: endHadAccent ? "ó" : "o" }),
|
||||
];
|
||||
}
|
||||
if (lastLetter.p === "ۍ") {
|
||||
const base = makePsString(w.p.slice(0, -1), w.f.slice(0, -2));
|
||||
const endHadAccent = w.f.slice(-2) === "úy";
|
||||
return [
|
||||
concatPsString(base, { p: "یو", f: endHadAccent ? "úyo" : "uyo" }),
|
||||
];
|
||||
}
|
||||
if (lastLetter.p === "ا" || (w.p.slice(-2) === "اع")) {
|
||||
return [concatPsString(w, { p: "وو", f: "wo" })];
|
||||
}
|
||||
const base = (
|
||||
(["ه", "ع"].includes(lastLetter.p) && lastLetter.f.match(/[a|u|i|U|á|ú|í|Ú]/)) ||
|
||||
(lastLetter.p === "ې" && ["e", "é"].includes(lastLetter.f))
|
||||
) ? makePsString(
|
||||
w.p.slice(0, -1),
|
||||
w.f.slice(0, -1),
|
||||
) : w;
|
||||
return [concatPsString(
|
||||
base,
|
||||
makePsString(
|
||||
"و",
|
||||
hasAccents(lastLetter.f) ? "ó" : "o",
|
||||
),
|
||||
)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether a string ends in a shwa or not
|
||||
*
|
||||
* @param w
|
||||
*/
|
||||
export function endsInShwa(w: T.PsString): boolean {
|
||||
const p = w.p.slice(-1);
|
||||
const f = w.f.slice(-1);
|
||||
return p === "ه" && ["u", "ú"].includes(f);
|
||||
}
|
||||
|
||||
/**
|
||||
* applies f function to both the p and f in a PsString
|
||||
*
|
||||
*/
|
||||
export function mapPsString<T>(ps: T.PsString, f: (s: string) => T): { p: T, f: T } {
|
||||
return {
|
||||
p: f(ps.p),
|
||||
f: f(ps.f),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* splits up a given PsString by comma-seperated varients
|
||||
*
|
||||
* @param w
|
||||
* @returns
|
||||
*/
|
||||
export function splitPsByVarients(w: T.PsString): T.ArrayOneOrMore<T.PsString> {
|
||||
function cut(s: string) {
|
||||
return s.split(/[,|،]/).map((s) => s.trim());
|
||||
}
|
||||
const ps = mapPsString(w, cut);
|
||||
return ps.p.map((p, i) => {
|
||||
if (!ps.f[i]) throw new Error("uneven comma seperated ps varients: " + JSON.stringify(w))
|
||||
return makePsString(
|
||||
p,
|
||||
ps.f[i],
|
||||
);
|
||||
}) as T.ArrayOneOrMore<T.PsString>;
|
||||
}
|
||||
|
||||
|
||||
export function removeEndTick(w: T.PsString): T.PsString;
|
||||
export function removeEndTick(w: string): string;
|
||||
export function removeEndTick(w: T.PsString | string): T.PsString | string {
|
||||
if (typeof w !== "string") {
|
||||
return makePsString(w.p, removeEndTick(w.f));
|
||||
}
|
||||
return (w.slice(-1) === "'")
|
||||
? w.slice(0, -1)
|
||||
: w;
|
||||
}
|
|
@ -442,19 +442,7 @@ const nouns: Array<{
|
|||
},
|
||||
// Masculine irregular
|
||||
{
|
||||
in: {
|
||||
ts: 1527813809,
|
||||
p: "لمونځ",
|
||||
f: "lamoondz",
|
||||
g: "",
|
||||
e: "Muslim ritual prayers (namaz, salah, salat)",
|
||||
c: "n. m. irreg.",
|
||||
i: 9835,
|
||||
infap: "لمانځه",
|
||||
infaf: "lamaandzu",
|
||||
infbp: "لمنځ",
|
||||
infbf: "lamandz",
|
||||
},
|
||||
in: {"ts":1527813809,"i":11318,"p":"لمونځ","f":"lamoondz","g":"lamoondz","e":"Muslim ritual prayers (namaz, salah, salat)","c":"n. m. irreg.","infap":"لمانځه","infaf":"lamaandzu","infbp":"لمنځ","infbf":"lamandz","ppp":"لمونځونه","ppf":"lamoondzóona"},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
|
@ -463,17 +451,17 @@ const nouns: Array<{
|
|||
[{p: "لمنځو", f: "lamandzo"}],
|
||||
],
|
||||
},
|
||||
// plural: {
|
||||
// masc: [
|
||||
// [{ p: "لمونځونه", f: "lamoondzóona" }],
|
||||
// [{ p: "لمونځونو", f: "lamoondzóono" }],
|
||||
// ],
|
||||
// },
|
||||
plural: {
|
||||
masc: [
|
||||
[{ p: "لمونځونه", f: "lamoondzóona" }],
|
||||
[{ p: "لمونځونو", f: "lamoondzóono" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Masculine short squish
|
||||
{
|
||||
in: {"i":9049,"ts":1527813593,"p":"غر","f":"ghar, ghur","g":"ghar,ghur","e":"mountain","c":"n. m.","infap":"غره","infaf":"ghru","infbp":"غرو","infbf":"ghro"},
|
||||
in: {"i":9049,"ts":1527813593,"p":"غر","f":"ghar, ghur","g":"ghar,ghur","e":"mountain","c":"n. m.","infap":"غره","infaf":"ghru","infbp":"غر","infbf":"ghr"},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
|
@ -500,6 +488,11 @@ const nouns: Array<{
|
|||
[{ p: "خره", f: "khru" }],
|
||||
[{ p: "خرو", f: "khro" }],
|
||||
],
|
||||
fem: [
|
||||
[{ p: "خره", f: "khra" }],
|
||||
[{ p: "خرې", f: "khre" }],
|
||||
[{ p: "خرو", f: "khro" }],
|
||||
],
|
||||
},
|
||||
plural: {
|
||||
masc: [
|
||||
|
@ -587,12 +580,12 @@ const nouns: Array<{
|
|||
},
|
||||
},
|
||||
{
|
||||
in: {"ts":1527815394,"i":13991,"p":"واده","f":"waadú","g":"waadu","e":"wedding, marriage","c":"n. m."},
|
||||
in: {"ts":1527815394,"i":13991,"p":"واده","f":"waadú","g":"waadu","e":"wedding, marriage","c":"n. m.","ppp":"ودونه","ppf":"wadóona"},
|
||||
out: {
|
||||
plural: {
|
||||
masc: [
|
||||
[{ p: "وادونه", f: "waadóona" }],
|
||||
[{ p: "وادونو", f: "waadóono" }],
|
||||
[{ p: "ودونه", f: "wadóona" }],
|
||||
[{ p: "ودونو", f: "wadóono" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
|
@ -655,8 +648,8 @@ const nouns: Array<{
|
|||
inflections: {
|
||||
fem: [
|
||||
[{p: "اره", f: "ará"}],
|
||||
[{p: "ارې", f: "are"}],
|
||||
[{p: "ارو", f: "aro"}],
|
||||
[{p: "ارې", f: "aré"}],
|
||||
[{p: "ارو", f: "aró"}],
|
||||
],
|
||||
},
|
||||
},
|
||||
|
@ -672,7 +665,7 @@ const nouns: Array<{
|
|||
c: "n. f.",
|
||||
i: 10661,
|
||||
app: "مراجع",
|
||||
apf: "maraají’",
|
||||
apf: "maraají'",
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
|
@ -682,6 +675,12 @@ const nouns: Array<{
|
|||
[{p: "مرجعو", f: "marjo"}],
|
||||
],
|
||||
},
|
||||
arabicPlural: {
|
||||
fem: [
|
||||
[{ p: "مراجع", f: "maraají'" }],
|
||||
[{ p: "مراجو", f: "maraajó" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -700,8 +699,128 @@ const nouns: Array<{
|
|||
inflections: {
|
||||
fem: [
|
||||
[{p: "منبع", f: "manbá"}],
|
||||
[{p: "منبعې", f: "manbe"}],
|
||||
[{p: "منبعو", f: "manbo"}],
|
||||
[{p: "منبعې", f: "manbé"}],
|
||||
[{p: "منبعو", f: "manbó"}],
|
||||
],
|
||||
},
|
||||
arabicPlural: {
|
||||
fem: [
|
||||
[{ p: "منابع", f: "manaabí" }],
|
||||
[{ p: "منابو", f: "manaabó" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {"ts":1527823093,"i":13207,"p":"نبي","f":"nabee","g":"nabee","e":"prophet","c":"n. m. anim.","app":"انبیا","apf":"ambiyáa"},
|
||||
out: {
|
||||
arabicPlural: {
|
||||
masc: [
|
||||
[{ p: "انبیا", f: "ambiyáa" }],
|
||||
[{ p: "انبیاوو", f: "ambiyáawo" }],
|
||||
],
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
in: {"ts":1527819536,"i":3063,"p":"تبع","f":"taba'","g":"taba","e":"follower, adherent, supporter, subject, national","c":"n. m. unisex anim.","app":"اتباع","apf":"atbaa"},
|
||||
out: {
|
||||
arabicPlural: {
|
||||
masc: [
|
||||
[{ p: "اتباع", f: "atbaa" }],
|
||||
[{ p: "اتباعوو", f: "atbaawo" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {"ts":1527816113,"i":3072,"p":"تبلیغ","f":"tableegh","g":"tableegh","e":"propaganda; preaching, evangelism","c":"n. m.","app":"تبلیغات","apf":"tableegháat"},
|
||||
out: {
|
||||
plural: {
|
||||
masc: [
|
||||
[{ p: "تبلیغونه", f: "tableeghóona" }],
|
||||
[{ p: "تبلیغونو", f: "tableeghóono" }],
|
||||
],
|
||||
},
|
||||
arabicPlural: {
|
||||
masc: [
|
||||
[{ p: "تبلیغات", f: "tableegháat" }],
|
||||
[{ p: "تبلیغاتو", f: "tableegháato" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {"ts":1527815921,"i":3844,"p":"توقع","f":"tawaqqU","g":"tawakkU","e":"expectation, hope, anticipation","c":"n. f.","app":"توقعات","apf":"tawaqqUaat"},
|
||||
out: {
|
||||
arabicPlural: {
|
||||
masc: [
|
||||
[{ p: "توقعات", f: "tawaqqUaat" }],
|
||||
[{ p: "توقعاتو", f: "tawaqqUaato" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {"ts":1527815820,"i":5177,"p":"حادثه","f":"haadisá","g":"haadisa","e":"accident, event","c":"n. f.","app":"حوادث, حادثات","apf":"hawaadis, haadisaat"},
|
||||
out: {
|
||||
inflections: {
|
||||
fem: [
|
||||
[{ p: "حادثه", f: "haadisá" }],
|
||||
[{ p: "حادثې", f: "haadisé" }],
|
||||
[{ p: "حادثو", f: "haadisó" }],
|
||||
],
|
||||
},
|
||||
arabicPlural: {
|
||||
masc: [
|
||||
[{ p: "حوادث", f: "hawaadis"}, { p: "حادثات", f: "haadisaat" }],
|
||||
[{ p: "حوادثو", f: "hawaadiso"}, { p: "حادثاتو", f: "haadisaato" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {"ts":1527815329,"i":3097,"p":"تجربه","f":"tajrabá, tajribá","g":"tajraba,tajriba","e":"experience","c":"n. f.","app":"تجارب","apf":"tajaarib"},
|
||||
out: {
|
||||
inflections: {
|
||||
fem: [
|
||||
[{ p: "تجربه", f: "tajrabá" }],
|
||||
[{ p: "تجربې", f: "tajrabé" }],
|
||||
[{ p: "تجربو", f: "tajrabó" }],
|
||||
],
|
||||
},
|
||||
arabicPlural: {
|
||||
masc: [
|
||||
[{ p: "تجارب", f: "tajaarib"}],
|
||||
[{ p: "تجاربو", f: "tajaaribo"}],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {"ts":1527814069,"i":5194,"p":"حال","f":"haal","g":"haal","e":"state, condition, circumstance","c":"n. m.","app":"احوال","apf":"ahwáal"},
|
||||
out: {
|
||||
plural: {
|
||||
masc: [
|
||||
[{ p: "حالونه", f: "haalóona" }],
|
||||
[{ p: "حالونو", f: "haalóono" }],
|
||||
],
|
||||
},
|
||||
arabicPlural: {
|
||||
masc: [
|
||||
[{ p: "احوال", f: "ahwáal" }],
|
||||
[{ p: "احوالو", f: "ahwáalo" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {"ts":1527819536,"i":3063,"p":"تبع","f":"taba'","g":"taba","e":"follower, adherent, supporter, subject, national","c":"n. m. unisex anim.","app":"اتباع","apf":"atbáa'"},
|
||||
out: {
|
||||
arabicPlural: {
|
||||
masc: [
|
||||
[{ p: "اتباع", f: "atbáa'" }],
|
||||
[{ p: "اتباعوو", f: "atbáawo" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
|
@ -856,15 +975,14 @@ const nouns: Array<{
|
|||
c: "n. f.",
|
||||
i: 12205,
|
||||
},
|
||||
out: false,
|
||||
// out: {
|
||||
// plural: {
|
||||
// fem: [
|
||||
// [{p: "وداع وې", f: "widáawe"}, {p: "وداع ګانې", f: "widaagáane"}],
|
||||
// [{p: "وداع وو", f: "widáawo"}, {p: "وداع ګانو", f: "widaagáano"}],
|
||||
// ],
|
||||
// },
|
||||
// },
|
||||
out: {
|
||||
plural: {
|
||||
fem: [
|
||||
[{p: "وداع وې", f: "widáawe"}, {p: "وداع ګانې", f: "widaagáane"}],
|
||||
[{p: "وداع وو", f: "widáawo"}, {p: "وداع ګانو", f: "widaagáano"}],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// TODO: Plaar plaroona paaraan - wrooNa
|
||||
// Word with no inflections
|
||||
|
@ -881,7 +999,6 @@ const nouns: Array<{
|
|||
},
|
||||
out: false,
|
||||
},
|
||||
// TODO: WORDS THAT ARE ALREADY PLURAL!
|
||||
];
|
||||
|
||||
const others: T.DictionaryEntry[] = [
|
||||
|
@ -912,6 +1029,7 @@ adjectives.forEach((word) => {
|
|||
});
|
||||
|
||||
nouns.forEach((word) => {
|
||||
// if (word.in.p !== "نبي") return;
|
||||
test(`${word.in.p} should inflect properly`, () => {
|
||||
expect(inflectWord(word.in)).toEqual(word.out);
|
||||
});
|
||||
|
@ -936,4 +1054,4 @@ test(`inflectRegularYeyUnisex should work`, () => {
|
|||
[{p: "لیدونکو", f: "leedóonko"}],
|
||||
],
|
||||
});
|
||||
})
|
||||
});
|
||||
|
|
|
@ -16,8 +16,13 @@ import {
|
|||
concatPsString,
|
||||
endsInConsonant,
|
||||
endsInAaOrOo,
|
||||
addOEnding,
|
||||
endsInShwa,
|
||||
splitPsByVarients,
|
||||
removeEndTick,
|
||||
} from "./p-text-helpers";
|
||||
import {
|
||||
hasAccents,
|
||||
removeAccents,
|
||||
} from "./accent-helpers";
|
||||
import * as T from "../types";
|
||||
|
@ -43,6 +48,9 @@ export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
|
|||
) as T.UnisexInflections,
|
||||
};
|
||||
}
|
||||
if (w.c && w.c.includes("pl.")) {
|
||||
return handlePluralNoun(w);
|
||||
}
|
||||
if (w.c && (w.c.includes("adj.") || w.c.includes("unisex"))) {
|
||||
return handleUnisexWord(w);
|
||||
}
|
||||
|
@ -61,24 +69,24 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
|||
// Get last letter of Pashto and last two letters of phonetics
|
||||
// TODO: !!! Handle weird endings / symbols ' etc.
|
||||
const pEnd = word.p.slice(-1);
|
||||
const plural = makePlural(word);
|
||||
const plurals = makePlural(word);
|
||||
if (word.infap && word.infaf && word.infbp && word.infbf) {
|
||||
return {
|
||||
inflections: inflectIrregularUnisex(word.p, word.f, [
|
||||
{p: word.infap, f: word.infaf},
|
||||
{p: word.infbp, f: word.infbf},
|
||||
]),
|
||||
plural,
|
||||
...plurals,
|
||||
};
|
||||
}
|
||||
if (pEnd === "ی" && word.f.slice(-2) === "ey") {
|
||||
return { inflections: inflectRegularYeyUnisex(word.p, word.f), plural };
|
||||
return { inflections: inflectRegularYeyUnisex(word.p, word.f), ...plurals };
|
||||
}
|
||||
if (pEnd === "ه" && word.g.slice(-1) === "u") {
|
||||
return { inflections: inflectRegularShwaEndingUnisex(word.p, word.f), plural };
|
||||
return { inflections: inflectRegularShwaEndingUnisex(word.p, word.f), ...plurals };
|
||||
}
|
||||
if (pEnd === "ی" && word.f.slice(-2) === "éy") {
|
||||
return { inflections: inflectEmphasizedYeyUnisex(word.p, word.f), plural };
|
||||
return { inflections: inflectEmphasizedYeyUnisex(word.p, word.f), ...plurals };
|
||||
}
|
||||
if (
|
||||
pashtoConsonants.includes(pEnd) ||
|
||||
|
@ -86,15 +94,23 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
|||
word.p.slice(-2) === "ای" ||
|
||||
(word.p.slice(-1) === "ه" && word.f.slice(-1) === "h")
|
||||
) {
|
||||
return { inflections: inflectConsonantEndingUnisex(word.p, word.f), plural };
|
||||
return { inflections: inflectConsonantEndingUnisex(word.p, word.f), ...plurals };
|
||||
}
|
||||
if (plurals) return plurals;
|
||||
return false;
|
||||
}
|
||||
|
||||
function handlePluralNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
||||
if (!w.c || !w.c.includes("n.")) return false;
|
||||
const plurals = makePlural(w);
|
||||
if (!plurals) return false;
|
||||
return { ...plurals };
|
||||
}
|
||||
|
||||
function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
||||
// Get last letter of Pashto and last two letters of phonetics
|
||||
// TODO: !!! Handle weird endings / symbols ' etc.
|
||||
const plural = makePlural(w);
|
||||
const plurals = makePlural(w);
|
||||
const pEnd = w.p.slice(-1);
|
||||
const fEnd = w.f.slice(-2);
|
||||
if (w.infap && w.infaf && w.infbp && w.infbf) {
|
||||
|
@ -103,20 +119,20 @@ function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
|||
{p: w.infap, f: w.infaf},
|
||||
{p: w.infbp, f: w.infbf},
|
||||
]),
|
||||
plural,
|
||||
...plurals,
|
||||
};
|
||||
}
|
||||
const isTobEnding = (w.p.slice(-3) === "توب" && ["tób", "tob"].includes(w.f.slice(-3)) && w.p.length > 3);
|
||||
if (isTobEnding) {
|
||||
return { inflections: inflectTobMasc(w.p, w.f), plural };
|
||||
return { inflections: inflectTobMasc(w.p, w.f), ...plurals };
|
||||
}
|
||||
if (pEnd === "ی" && fEnd === "ey") {
|
||||
return { inflections: inflectRegularYeyMasc(w.p, w.f), plural };
|
||||
return { inflections: inflectRegularYeyMasc(w.p, w.f), ...plurals };
|
||||
}
|
||||
if (pEnd === "ی" && fEnd === "éy") {
|
||||
return { inflections: inflectRegularEmphasizedYeyMasc(w.p, w.f), plural };
|
||||
return { inflections: inflectRegularEmphasizedYeyMasc(w.p, w.f), ...plurals };
|
||||
}
|
||||
return plural ? { plural } : false
|
||||
return plurals ? { ...plurals } : false
|
||||
}
|
||||
|
||||
function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
||||
|
@ -126,27 +142,27 @@ function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
|||
const animate = c.includes("anim.");
|
||||
const pEnd = word.p.slice(-1);
|
||||
|
||||
const plural = makePlural(word);
|
||||
const plurals = makePlural(word);
|
||||
|
||||
if (endingInHeyOrAynRegex.test(word.p) && endingInSingleARegex.test(word.f)) {
|
||||
return { inflections: inflectRegularAFem(word.p, word.f), plural };
|
||||
return { inflections: inflectRegularAFem(word.p, word.f), ...plurals };
|
||||
}
|
||||
if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(word.f)) {
|
||||
return { inflections: inflectRegularAWithHimPEnding(word.p, word.f), plural };
|
||||
return { inflections: inflectRegularAWithHimPEnding(word.p, word.f), ...plurals };
|
||||
}
|
||||
if (pashtoConsonants.includes(pEnd) && !animate) {
|
||||
return { inflections: inflectRegularInanMissingAFem(word.p, word.f), plural };
|
||||
return { inflections: inflectRegularInanMissingAFem(word.p, word.f), ...plurals };
|
||||
}
|
||||
if (pEnd === "ي" && (!animate)) {
|
||||
return { inflections: inflectRegularInanEeFem(word.p, word.f), plural };
|
||||
return { inflections: inflectRegularInanEeFem(word.p, word.f), ...plurals };
|
||||
}
|
||||
if (pEnd === "ۍ") {
|
||||
return { inflections: inflectRegularUyFem(word.p, word.f), plural };
|
||||
return { inflections: inflectRegularUyFem(word.p, word.f), ...plurals };
|
||||
}
|
||||
// if (endingInAlefRegex.test(word.p)) {
|
||||
// return { inflections: inflectRegularAaFem(word.p, f) };
|
||||
// }
|
||||
return plural ? { plural } : false;
|
||||
return plurals ? { ...plurals } : false;
|
||||
}
|
||||
|
||||
// LEVEL 3 FUNCTIONS
|
||||
|
@ -294,13 +310,15 @@ function inflectIrregularMasc(p: string, f: string, inflections: Array<{p: strin
|
|||
}
|
||||
|
||||
function inflectRegularAFem(p: string, f: string): T.Inflections {
|
||||
const baseF = ["'", "’"].includes(f.slice(-1)) ? f.slice(0, -2) : f.slice(0, -1);
|
||||
const withoutTrailingComma = ["'", "’"].includes(f.slice(-1)) ? f.slice(0, -1) : f;
|
||||
const accentLast = hasAccents(withoutTrailingComma.slice(-1));
|
||||
const baseF = withoutTrailingComma.slice(0, -1);
|
||||
const baseP = p.slice(-1) === "ع" ? p : p.slice(0, -1);
|
||||
return {
|
||||
fem: [
|
||||
[{p, f}],
|
||||
[{p: `${baseP}ې`, f: `${baseF}e`}],
|
||||
[{p: `${baseP}و`, f: `${baseF}o`}],
|
||||
[{p: `${baseP}ې`, f: `${baseF}${accentLast ? "é" : "e"}`}],
|
||||
[{p: `${baseP}و`, f: `${baseF}${accentLast ? "ó" : "o"}`}],
|
||||
],
|
||||
};
|
||||
}
|
||||
|
@ -356,53 +374,91 @@ function inflectRegularUyFem(p: string, f: string): T.Inflections {
|
|||
function makePashtoPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
|
||||
if (!(word.ppp && word.ppf)) return undefined;
|
||||
const base = makePsString(word.ppp, word.ppf);
|
||||
// TODO: Add male Pashto plural
|
||||
function getBaseAndO(): T.PluralInflectionSet {
|
||||
return [[base], addOEnding(base)];
|
||||
}
|
||||
if (word.c?.includes("n. m.")) {
|
||||
return { masc: getBaseAndO() };
|
||||
}
|
||||
if (word.c?.includes("n. f.")) {
|
||||
return {
|
||||
fem: [
|
||||
[base],
|
||||
// todo: function to add و ending automatically
|
||||
[concatPsString(
|
||||
makePsString(base.p.slice(0, -1), base.f.slice(0, -1)),
|
||||
{ p: "و", f: "o" },
|
||||
)],
|
||||
],
|
||||
}
|
||||
return { fem: getBaseAndO() };
|
||||
}
|
||||
// TODO: handle masculine and unisex
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function makePlural(w: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
|
||||
// TODO: Include the Pashto plural thing here
|
||||
const pashtoPlural = makePashtoPlural(w);
|
||||
if (pashtoPlural) return pashtoPlural;
|
||||
function addMascPluralSuffix(animate?: boolean): T.PluralInflectionSet {
|
||||
const base = removeAccents(w);
|
||||
function makeArabicPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
|
||||
if (!(word.apf && word.app)) return undefined;
|
||||
const w = makePsString(word.app, word.apf);
|
||||
const plural = splitPsByVarients(w);
|
||||
const end = removeAccents(removeEndTick(word.apf).slice(-1));
|
||||
// again typescript being dumb and not letting me use a typed key here
|
||||
const value = [
|
||||
plural,
|
||||
plural.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
|
||||
] as T.PluralInflectionSet;
|
||||
// feminine words that have arabic plurals stay feminine with the plural - ie مرجع - مراجع
|
||||
// but masculine words that appear feminine in the plural aren't femening with the Arabic plural - ie. نبي - انبیا
|
||||
if (["i", "e", "a"].includes(end) && word.c?.includes("n. f.")) {
|
||||
return { fem: value };
|
||||
}
|
||||
return { masc: value };
|
||||
}
|
||||
|
||||
function makePlural(w: T.DictionaryEntryNoFVars): { plural: T.PluralInflections } | { arabicPlural: T.PluralInflections } | undefined {
|
||||
function addSecondInf(plur: T.ArrayOneOrMore<T.PsString> | T.PsString): T.PluralInflectionSet {
|
||||
if (!Array.isArray(plur)) {
|
||||
return addSecondInf([plur]);
|
||||
}
|
||||
return [
|
||||
[concatPsString(base, animate ? { p: "ان", f: "áan" } : { p: "ونه", f: "óona" })],
|
||||
[concatPsString(base, animate ? { p: "انو", f: "áano" } : { p: "ونو", f: "óono" })],
|
||||
plur,
|
||||
plur.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
|
||||
];
|
||||
}
|
||||
if (w.c && w.c.includes("pl.")) {
|
||||
const plural = addSecondInf(makePsString(w.p, w.f));
|
||||
// Typescript being dumb and not letting me do a typed variable for the key
|
||||
// could try refactoring with an updated TypeScript dependency
|
||||
if (w.c.includes("n. m.")) return { plural: { masc: plural }};
|
||||
if (w.c.includes("n. f.")) return { plural: { fem: plural }};
|
||||
}
|
||||
// TODO: MAKE ARABIC PLURAL HERE IF THERE IS ARABIC PLURAL
|
||||
const arabicPlural = makeArabicPlural(w);
|
||||
const pashtoPlural = makePashtoPlural(w);
|
||||
if (pashtoPlural) return { plural: pashtoPlural, arabicPlural };
|
||||
function addMascPluralSuffix(animate?: boolean, shortSquish?: boolean): T.PluralInflectionSet {
|
||||
if (shortSquish && (w.infap == undefined || w.infaf === undefined)) {
|
||||
throw new Error(`no irregular inflection info for ${w.p} - ${w.ts}`);
|
||||
}
|
||||
const b = removeAccents(shortSquish
|
||||
? makePsString((w.infap as string).slice(0, -1), (w.infaf as string).slice(0, -1))
|
||||
: w
|
||||
);
|
||||
const base = endsInShwa(b)
|
||||
? makePsString(b.p.slice(0, -1), b.f.slice(0, -1))
|
||||
: b;
|
||||
return addSecondInf(
|
||||
concatPsString(base, (animate && !shortSquish) ? { p: "ان", f: "áan" } : { p: "ونه", f: "óona" }),
|
||||
);
|
||||
}
|
||||
function addAnimUnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
|
||||
const base = removeAccents(w);
|
||||
return {
|
||||
masc: addMascPluralSuffix(true),
|
||||
fem: [
|
||||
[concatPsString(base, { p: "انې", f: "áane" })],
|
||||
[concatPsString(base, { p: "انو", f: "áano" })],
|
||||
],
|
||||
fem: addSecondInf(concatPsString(base, { p: "انې", f: "áane" })),
|
||||
};
|
||||
}
|
||||
function addFemLongVowelSuffix(): T.PluralInflectionSet {
|
||||
const base = makePsString(w.p, w.f);
|
||||
const base = removeEndTick(makePsString(w.p, w.f));
|
||||
const baseWOutAccents = removeAccents(base);
|
||||
return [
|
||||
[concatPsString(base, { p: "وې", f: "we" }), concatPsString(baseWOutAccents, { p: "ګانې", f: "gáane" })],
|
||||
[concatPsString(base, { p: "وو", f: "wo" }), concatPsString(baseWOutAccents, { p: "ګانو", f: "gáano" })],
|
||||
];
|
||||
const space = (w.p.slice(-1) === "ع" || w.p.slice(-1) === "ه") ? { p: " ", f: "" } : "";
|
||||
return addSecondInf([
|
||||
concatPsString(base, space, { p: "وې", f: "we" }),
|
||||
concatPsString(baseWOutAccents, space, { p: "ګانې", f: "gáane" })
|
||||
]);
|
||||
}
|
||||
|
||||
const shortSquish = !!w.infap && !w.infap.includes("ا");
|
||||
const anim = w.c?.includes("anim.");
|
||||
const type = (w.c?.includes("unisex"))
|
||||
? "unisex noun"
|
||||
|
@ -411,19 +467,33 @@ function makePlural(w: T.DictionaryEntryNoFVars): T.PluralInflections | undefine
|
|||
: (w.c?.includes("n. f."))
|
||||
? "fem noun"
|
||||
: "other";
|
||||
if (type === "unisex noun" && endsInConsonant(w) && (!w.infap) && anim) {
|
||||
return addAnimUnisexPluralSuffix();
|
||||
if (type === "unisex noun") {
|
||||
if (endsInConsonant(w) && (!w.infap) && anim) {
|
||||
return { arabicPlural, plural: addAnimUnisexPluralSuffix() };
|
||||
}
|
||||
if (shortSquish) {
|
||||
return { arabicPlural, plural: { masc: addMascPluralSuffix(anim, shortSquish) }};
|
||||
}
|
||||
}
|
||||
if (type === "masc noun" && endsInConsonant(w) && (!w.infap) && (w.p.slice(-3) !== "توب")) {
|
||||
if (type === "masc noun" && (shortSquish || (endsInConsonant(w) || endsInShwa(w) && (!w.infap))) && (w.p.slice(-3) !== "توب")) {
|
||||
return {
|
||||
masc: addMascPluralSuffix(anim),
|
||||
arabicPlural,
|
||||
plural: {
|
||||
masc: addMascPluralSuffix(anim, shortSquish),
|
||||
},
|
||||
};
|
||||
}
|
||||
// TODO: What about endings in long ee / animate at inanimate
|
||||
if (type === "fem noun" && endsInAaOrOo(w) && (!w.infap)) {
|
||||
return {
|
||||
fem: addFemLongVowelSuffix(),
|
||||
arabicPlural,
|
||||
plural: {
|
||||
fem: addFemLongVowelSuffix(),
|
||||
},
|
||||
};
|
||||
}
|
||||
if (arabicPlural) {
|
||||
return { arabicPlural, plural: pashtoPlural };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
import { standardizePashto } from "./standardize-pashto";
|
||||
import { standardizePashto, standardizePhonetics } from "./standardize-pashto";
|
||||
|
||||
const testPairs = [
|
||||
["گوگل", "ګوګل"],
|
||||
|
@ -31,3 +31,14 @@ testPairs.forEach((pair) => {
|
|||
expect(result).toBe(pair[1]);
|
||||
});
|
||||
});
|
||||
|
||||
test("standardizePashto", () => {
|
||||
const pairs = [
|
||||
["ma’aaloom", "ma'aaloom"],
|
||||
["ma‘aaloom", "ma'aaloom"],
|
||||
["ma'aaloom", "ma'aaloom"],
|
||||
];
|
||||
pairs.forEach((x) => {
|
||||
expect(standardizePhonetics(x[0])).toBe(x[1])
|
||||
});
|
||||
})
|
||||
|
|
|
@ -20,3 +20,8 @@ export function standardizePashto(input: string): string {
|
|||
// Replace آ two character version with combined آ character
|
||||
.replace(/آ/g, "آ");
|
||||
}
|
||||
|
||||
export function standardizePhonetics(input: string): string {
|
||||
// TODO: check that these are the only kinds of smart comments
|
||||
return input.replace(/[‘|’]/g, "'");
|
||||
}
|
||||
|
|
|
@ -352,7 +352,12 @@ export type Inflections = GenderedSet<InflectionSet>;
|
|||
export type PluralInflections = GenderedSet<PluralInflectionSet>;
|
||||
|
||||
export type InflectorOutput = {
|
||||
arabicPlural: PluralInflections,
|
||||
plural?: PluralInflections,
|
||||
inflections?: Inflections,
|
||||
} | {
|
||||
plural: PluralInflections,
|
||||
arabicPlural?: PluralInflections,
|
||||
inflections?: Inflections,
|
||||
} | {
|
||||
inflections: Inflections,
|
||||
|
|
Loading…
Reference in New Issue