getting going on plural-suffixes

This commit is contained in:
lingdocs 2021-09-07 15:49:57 +04:00
parent a4eddf82d2
commit 47551aa7dd
7 changed files with 571 additions and 318 deletions

View File

@ -21,6 +21,7 @@ import {
psStringEquals,
removeRetroflexR,
splitDoubleWord,
endsInConsonant,
} from "./p-text-helpers";
import * as T from "../types";
import {
@ -1003,10 +1004,34 @@ test("psStringEquals", () => {
expect(
psStringEquals({ p: "بور", f: "bor" }, { p: "تور", f: "tor" })
).toBe(false);
expect(
psStringEquals({ p: "ملګری", f: "malgúrey" }, { p: "ملګری", f: "malgurey" })
).toBe(false);
expect(
psStringEquals({ p: "ملګری", f: "malgúrey" }, { p: "ملګری", f: "malgurey" }, true)
).toBe(true);
});
test("removeRetroflexR", () => {
expect(
removeRetroflexR({ p: "وکړ", f: "óokR" }),
).toEqual({ p: "وک", f: "óok" });
});
});
test("endsInAConsonant", () => {
const does: T.PsString[] = [
{ p: "پښتون", f: "puxtoon" },
{ p: "کور", f: "kor" },
{ p: "ګناه", f: "gUnaah" },
{ p: "زوی", f: "zooy" },
{ p: "ځای", f: "dzaay" },
];
const doesnt: T.PsString[] = [
{ p: "بابا", f: "baabaa" },
{ p: "قاضي", f: "qaazee" },
{ p: "ګناه", f: "gunaa" },
{ p: "اطلاع", f: "itlaa" },
];
does.forEach((x) => expect(endsInConsonant(x)).toBe(true));
doesnt.forEach((x) => expect(endsInConsonant(x)).toBe(false));
})

View File

@ -15,6 +15,9 @@ import {
getPersonInflectionsKey,
} from "./misc-helpers";
import * as T from "../types";
import { removeAccents } from "./accent-helpers";
import { pashtoConsonants, phoneticsConsonants } from "./pashto-consonants";
import { simplifyPhonetics } from "./simplify-phonetics";
// export function concatPsStringWithVars(...items: Array<T.PsString | " " | "">): T.PsString[] {
@ -90,7 +93,7 @@ export function concatPsString(...items: Array<T.PsString | T.LengthOptions<T.Ps
* @param w
* @returns
*/
export function splitDoubleWord(w: T.DictionaryEntry): [T.DictionaryEntry, T.DictionaryEntry] {
export function splitDoubleWord(w: T.DictionaryEntryNoFVars): [T.DictionaryEntryNoFVars, T.DictionaryEntryNoFVars] {
const pSplit = w.p.split(" ");
const fSplit = w.f.split(" ");
const c = w.c?.replace(" doub.", "");
@ -176,25 +179,25 @@ export function ensureBaAt(ps: T.FullForm<T.PsString>, pos: number): T.FullForm<
return baInserted;
}
/**
* Returns the first phonetics value in a comma-seperated list
*
* @param f - a phonetics string
*/
export function firstPhonetics(f: string): string {
return f.split(",")[0];
}
/**
* returs a PsString or DictionaryEntry ensuring only one phonetics variation
*
* @param ps
*/
export function removeFVariants(ps: T.PsString): T.PsString {
export function removeFVarients(x: T.DictionaryEntry): T.DictionaryEntryNoFVars;
export function removeFVarients(x: T.PsString): T.PsStringNoFVars;
export function removeFVarients(x: string): T.FStringNoFVars;
export function removeFVarients(x: string | T.PsString | T.DictionaryEntry): T.FStringNoFVars | T.PsStringNoFVars | T.DictionaryEntryNoFVars {
if (typeof x === "string") {
return x.split(",")[0] as T.FStringNoFVars;
}
if ("ts" in x) {
return {
...x,
f: removeFVarients(x.f),
__brand: "name for a dictionary entry with all the phonetics variations removed",
} as T.DictionaryEntryNoFVars;
}
return {
...ps,
f: firstPhonetics(ps.f),
};
...x,
f: removeFVarients(x.f),
__brand: "name for a ps string with all the phonetics variations removed",
} as T.PsStringNoFVars;
}
/**
@ -514,13 +517,6 @@ export function yulEndingInfinitive(s: T.PsString): boolean {
return ((pEnding === "یل") && (["yul", "yúl"].includes(fEnding)));
}
export function psStringFromEntry(entry: T.DictionaryEntry): T.PsString {
return makePsString(
entry.p,
firstPhonetics(entry.f),
);
}
export function allOnePersonInflection(block: T.ImperativeForm, person: T.Person): T.SingleOrLengthOpts<T.ImperativeBlock>;
export function allOnePersonInflection(block: T.VerbForm, person: T.Person): T.SingleOrLengthOpts<T.VerbBlock>;
export function allOnePersonInflection(block: T.SingleOrLengthOpts<T.UnisexInflections>, person: T.Person): T.SingleOrLengthOpts<T.UnisexInflections>;
@ -617,8 +613,9 @@ export function complementInflects(inf: T.UnisexInflections): boolean {
// );
}
export function psStringEquals(ps1: T.PsString, ps2: T.PsString): boolean {
return (ps1.p === ps2.p) && (ps1.f === ps2.f);
export function psStringEquals(ps1: T.PsString, ps2: T.PsString, ignoreAccents?: boolean): boolean {
const [p1, p2] = ignoreAccents ? [removeAccents(ps1), removeAccents(ps2)] : [ps1, ps2];
return (p1.p === p2.p) && (p1.f === p2.f);
}
export function removeRetroflexR(ps: T.PsString): T.PsString {
@ -754,33 +751,76 @@ export function ensureShortWurShwaShift(ps: T.PsString): T.PsString {
return ps;
}
export function ensureUnisexInflections(infs: T.Inflections | false, w: T.DictionaryEntry): T.UnisexInflections {
const ps = { p: w.p, f: firstPhonetics(w.f) };
if (infs === false) {
export function ensureUnisexInflections(infs: T.InflectorOutput, w: T.DictionaryEntryNoFVars): {
inflections: T.UnisexInflections,
plural?: T.PluralInflections,
} {
const ps = { p: w.p, f: w.f };
if (infs === false || infs.inflections === undefined) {
return {
masc: [
[ps],
[ps],
[ps],
],
fem: [
[ps],
[ps],
[ps],
],
inflections: {
masc: [
[ps],
[ps],
[ps],
],
fem: [
[ps],
[ps],
[ps],
],
},
};
}
if (!("fem" in infs)) {
if (!("fem" in infs.inflections)) {
return {
...infs,
fem: [[ps], [ps], [ps]],
inflections: {
...infs.inflections,
fem: [[ps], [ps], [ps]],
}
};
}
if (!("masc" in infs)) {
if (!("masc" in infs.inflections)) {
return {
...infs,
masc: [[ps], [ps], [ps]],
inflections: {
...infs.inflections,
masc: [[ps], [ps], [ps]],
},
};
}
return infs;
// for some dumb reason have to do this for type safety
return {
inflections: infs.inflections,
};
}
export function endsInAaOrOo(w: T.PsString): boolean {
const fEnd = simplifyPhonetics(w.f).slice(-2);
const pEnd = w.p.slice(-1);
return (
pEnd === "و" && fEnd.endsWith("o")
||
pEnd === "ا" && fEnd === "aa"
);
}
export function endsInConsonant(w: T.PsString): boolean {
// TODO: Add reporting back that the plural ending will need a space?
function endsInLongDipthong(w: T.PsString): boolean {
function isLongDipthong(end: T.PsString): boolean {
return (psStringEquals(end, { p: "ای", f: "aay" }, true) || psStringEquals(end, { p: "وی", f: "ooy" }, true));
}
const end = makePsString(
w.p.slice(-2),
w.f.slice(-3),
);
return isLongDipthong(end);
}
if (endsInLongDipthong(w)) return true;
// const pCons = pashtoConsonants.includes(w.p.slice(-1));
const fCons = phoneticsConsonants.includes(simplifyPhonetics(w.f).slice(-1));
return fCons;
}

View File

@ -7,3 +7,6 @@
*/
export const pashtoConsonants = ["ب", "پ", "ت", "ټ", "ث", "ج", "چ", "ح", "خ", "څ", "ځ", "د", "ډ", "ذ", "ر", "ړ", "ز", "ژ", "ږ", "س", "ش", "ښ", "ص", "ض", "ط", "ظ", "غ", "ف", "ق", "ک", "ګ", "گ", "ل", "ل", "م", "ن", "ڼ"];
export const phoneticsConsonants = [
"b", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "w", "z",
];

View File

@ -16,7 +16,7 @@ import * as T from "../types";
const adjectives: Array<{
in: T.DictionaryEntry,
out: T.Inflections | false,
out: T.InflectorOutput,
}> = [
// irregular adj.
{
@ -34,16 +34,18 @@ const adjectives: Array<{
infbf: "zaR",
},
out: {
masc: [
[{p: "زوړ", f: "zoR"}],
[{p: "زاړه", f: "zaaRu"}],
[{p: "زړو", f: "zaRo"}],
],
fem: [
[{p: "زړه", f: "zaRa"}],
[{p: "زړې", f: "zaRe"}],
[{p: "زړو", f: "zaRo"}],
],
inflections:{
masc: [
[{p: "زوړ", f: "zoR"}],
[{p: "زاړه", f: "zaaRu"}],
[{p: "زړو", f: "zaRo"}],
],
fem: [
[{p: "زړه", f: "zaRa"}],
[{p: "زړې", f: "zaRe"}],
[{p: "زړو", f: "zaRo"}],
],
},
},
},
// regular adjective ending in ی
@ -58,16 +60,18 @@ const adjectives: Array<{
i: 6564,
},
out: {
masc: [
[{p: "ستړی", f: "stúRey"}],
[{p: "ستړي", f: "stúRee"}],
[{p: "ستړیو", f: "stúRiyo"}, {p: "ستړو", f: "stúRo"}],
],
fem: [
[{p: "ستړې", f: "stúRe"}],
[{p: "ستړې", f: "stúRe"}],
[{p: "ستړو", f: "stúRo"}],
],
inflections: {
masc: [
[{p: "ستړی", f: "stúRey"}],
[{p: "ستړي", f: "stúRee"}],
[{p: "ستړیو", f: "stúRiyo"}, {p: "ستړو", f: "stúRo"}],
],
fem: [
[{p: "ستړې", f: "stúRe"}],
[{p: "ستړې", f: "stúRe"}],
[{p: "ستړو", f: "stúRo"}],
],
}
},
},
// regular adjective ending in ی with stress on the end
@ -82,16 +86,18 @@ const adjectives: Array<{
i: 12026,
},
out: {
masc: [
[{p: "وروستی", f: "wroostéy"}],
[{p: "وروستي", f: "wroostée"}],
[{p: "وروستیو", f: "wroostiyo"}, {p: "وروستو", f: "wroostó"}],
],
fem: [
[{p: "وروستۍ", f: "wroostúy"}],
[{p: "وروستۍ", f: "wroostúy"}],
[{p: "وروستیو", f: "wroostúyo"}, {p: "وروستو", f: "wroostó"}],
],
inflections: {
masc: [
[{p: "وروستی", f: "wroostéy"}],
[{p: "وروستي", f: "wroostée"}],
[{p: "وروستیو", f: "wroostiyo"}, {p: "وروستو", f: "wroostó"}],
],
fem: [
[{p: "وروستۍ", f: "wroostúy"}],
[{p: "وروستۍ", f: "wroostúy"}],
[{p: "وروستیو", f: "wroostúyo"}, {p: "وروستو", f: "wroostó"}],
],
}
},
},
// regular adjective ending in a consonant
@ -106,16 +112,18 @@ const adjectives: Array<{
i: 6502,
},
out: {
masc: [
[{p: "سپک", f: "spuk"}],
[{p: "سپک", f: "spuk"}],
[{p: "سپکو", f: "spuko"}],
],
fem: [
[{p: "سپکه", f: "spuka"}],
[{p: "سپکې", f: "spuke"}],
[{p: "سپکو", f: "spuko"}],
],
inflections: {
masc: [
[{p: "سپک", f: "spuk"}],
[{p: "سپک", f: "spuk"}],
[{p: "سپکو", f: "spuko"}],
],
fem: [
[{p: "سپکه", f: "spuka"}],
[{p: "سپکې", f: "spuke"}],
[{p: "سپکو", f: "spuko"}],
],
},
},
},
{
@ -129,16 +137,18 @@ const adjectives: Array<{
i: 9945,
},
out: {
masc: [
[{p: "لوی", f: "looy"}],
[{p: "لوی", f: "looy"}],
[{p: "لویو", f: "looyo"}],
],
fem: [
[{p: "لویه", f: "looya"}],
[{p: "لویې", f: "looye"}],
[{p: "لویو", f: "looyo"}],
],
inflections: {
masc: [
[{p: "لوی", f: "looy"}],
[{p: "لوی", f: "looy"}],
[{p: "لویو", f: "looyo"}],
],
fem: [
[{p: "لویه", f: "looya"}],
[{p: "لویې", f: "looye"}],
[{p: "لویو", f: "looyo"}],
],
},
},
},
{
@ -152,16 +162,18 @@ const adjectives: Array<{
i: 2430,
},
out: {
masc: [
[{p: "پوه", f: "poh"}],
[{p: "پوه", f: "poh"}],
[{p: "پوهو", f: "poho"}],
],
fem: [
[{p: "پوهه", f: "poha"}],
[{p: "پوهې", f: "pohe"}],
[{p: "پوهو", f: "poho"}],
],
inflections: {
masc: [
[{p: "پوه", f: "poh"}],
[{p: "پوه", f: "poh"}],
[{p: "پوهو", f: "poho"}],
],
fem: [
[{p: "پوهه", f: "poha"}],
[{p: "پوهې", f: "pohe"}],
[{p: "پوهو", f: "poho"}],
],
},
},
},
// adjective ending in u
@ -176,16 +188,18 @@ const adjectives: Array<{
i: 1,
},
out: {
masc: [
[{p: "ویده", f: "weedú"}],
[{p: "ویده", f: "weedú"}],
[{p: "ویدو", f: "weedó"}],
],
fem: [
[{p: "ویده", f: "weedá"}],
[{p: "ویدې", f: "weedé"}],
[{p: "ویدو", f: "weedó"}],
],
inflections: {
masc: [
[{p: "ویده", f: "weedú"}],
[{p: "ویده", f: "weedú"}],
[{p: "ویدو", f: "weedó"}],
],
fem: [
[{p: "ویده", f: "weedá"}],
[{p: "ویدې", f: "weedé"}],
[{p: "ویدو", f: "weedó"}],
],
},
},
},
// adjective non-inflecting
@ -225,23 +239,25 @@ const adjectives: Array<{
i: 1,
},
out: {
masc: [
[{ p: "ګډ وډ", f: "guD wuD" }],
[{ p: "ګډ وډ", f: "guD wuD" }],
[{ p: "ګډو وډو", f: "guDo wuDo" }],
],
fem: [
[{ p: "ګډه وډه", f: "guDa wuDa" }],
[{ p: "ګډې وډې", f: "guDe wuDe" }],
[{ p: "ګډو وډو", f: "guDo wuDo" }],
],
}
}
inflections: {
masc: [
[{ p: "ګډ وډ", f: "guD wuD" }],
[{ p: "ګډ وډ", f: "guD wuD" }],
[{ p: "ګډو وډو", f: "guDo wuDo" }],
],
fem: [
[{ p: "ګډه وډه", f: "guDa wuDa" }],
[{ p: "ګډې وډې", f: "guDe wuDe" }],
[{ p: "ګډو وډو", f: "guDo wuDo" }],
],
},
},
},
];
const nouns: Array<{
in: T.DictionaryEntry,
out: T.Inflections | false,
out: T.InflectorOutput,
}> = [
// ## UNISEX
// Unisex noun irregular
@ -260,16 +276,18 @@ const nouns: Array<{
infbf: "melman",
},
out: {
masc: [
[{p: "مېلمه", f: "melmá"}],
[{p: "مېلمانه", f: "melmaanu"}],
[{p: "مېلمنو", f: "melmano"}],
],
fem: [
[{p: "مېلمنه", f: "melmana"}],
[{p: "مېلمنې", f: "melmane"}],
[{p: "مېلمنو", f: "melmano"}],
],
inflections: {
masc: [
[{p: "مېلمه", f: "melmá"}],
[{p: "مېلمانه", f: "melmaanu"}],
[{p: "مېلمنو", f: "melmano"}],
],
fem: [
[{p: "مېلمنه", f: "melmana"}],
[{p: "مېلمنې", f: "melmane"}],
[{p: "مېلمنو", f: "melmano"}],
],
},
},
},
// Unisex noun ending with ی
@ -284,16 +302,18 @@ const nouns: Array<{
i: 10943,
},
out: {
masc: [
[{p: "ملګری", f: "malgúrey"}],
[{p: "ملګري", f: "malgúree"}],
[{p: "ملګریو", f: "malgúriyo"}, {p: "ملګرو", f: "malgúro"}],
],
fem: [
[{p: "ملګرې", f: "malgúre"}],
[{p: "ملګرې", f: "malgúre"}],
[{p: "ملګرو", f: "malgúro"}],
],
inflections: {
masc: [
[{p: "ملګری", f: "malgúrey"}],
[{p: "ملګري", f: "malgúree"}],
[{p: "ملګریو", f: "malgúriyo"}, {p: "ملګرو", f: "malgúro"}],
],
fem: [
[{p: "ملګرې", f: "malgúre"}],
[{p: "ملګرې", f: "malgúre"}],
[{p: "ملګرو", f: "malgúro"}],
],
},
},
},
// Unisex noun ending on ی with emphasis on the end
@ -308,16 +328,24 @@ const nouns: Array<{
i: 2900,
},
out: {
masc: [
[{p: "ترورزی", f: "trorzéy"}],
[{p: "ترورزي", f: "trorzée"}],
[{p: "ترورزیو", f: "trorziyo"}, {p: "ترورزو", f: "trorzó"}],
],
fem: [
[{p: "ترورزۍ", f: "trorzúy"}],
[{p: "ترورزۍ", f: "trorzúy"}],
[{p: "ترورزیو", f: "trorzúyo"}, {p: "ترورزو", f: "trorzó"}],
],
inflections: {
masc: [
[{p: "ترورزی", f: "trorzéy"}],
[{p: "ترورزي", f: "trorzée"}],
[{p: "ترورزیو", f: "trorziyo"}, {p: "ترورزو", f: "trorzó"}],
],
fem: [
[{p: "ترورزۍ", f: "trorzúy"}],
[{p: "ترورزۍ", f: "trorzúy"}],
[{p: "ترورزیو", f: "trorzúyo"}, {p: "ترورزو", f: "trorzó"}],
],
},
// plural: {
// masc: [
// [{ p: "ترورزامن", f: "trorzaamun" }],
// [{ p: "ترورزامنو", f: "trorzaamuno" }],
// ],
// },
},
},
// Unisex noun ending with a consanant
@ -328,20 +356,32 @@ const nouns: Array<{
f: "churg",
g: "",
e: "rooster, cock; chicken, poultry",
c: "n. m. unisex",
c: "n. m. unisex anim.",
i: 4101,
},
out: {
masc: [
[{p: "چرګ", f: "churg"}],
[{p: "چرګ", f: "churg"}],
[{p: "چرګو", f: "churgo"}],
],
fem: [
[{p: "چرګه", f: "churga"}],
[{p: "چرګې", f: "churge"}],
[{p: "چرګو", f: "churgo"}],
],
inflections: {
masc: [
[{p: "چرګ", f: "churg"}],
[{p: "چرګ", f: "churg"}],
[{p: "چرګو", f: "churgo"}],
],
fem: [
[{p: "چرګه", f: "churga"}],
[{p: "چرګې", f: "churge"}],
[{p: "چرګو", f: "churgo"}],
],
},
plural: {
masc: [
[{p: "چرګان", f: "churgáan"}],
[{p: "چرګانو", f: "churgáano"}],
],
fem: [
[{p: "چرګانې", f: "churgáane"}],
[{p: "چرګانو", f: "churgáano"}],
],
},
},
},
// ## MASCULINE
@ -357,11 +397,13 @@ const nouns: Array<{
i: 6750,
},
out: {
masc: [
[{p: "سړی", f: "saRey"}],
[{p: "سړي", f: "saRee"}],
[{p: "سړیو", f: "saRiyo"}, {p: "سړو", f: "saRo"}],
],
inflections: {
masc: [
[{p: "سړی", f: "saRey"}],
[{p: "سړي", f: "saRee"}],
[{p: "سړیو", f: "saRiyo"}, {p: "سړو", f: "saRo"}],
],
}
},
},
// Masculine regular ending in ی with emphasis on end
@ -376,11 +418,13 @@ const nouns: Array<{
i: 2931,
},
out: {
masc: [
[{p: "ترېلی", f: "treléy"}],
[{p: "ترېلي", f: "trelée"}],
[{p: "ترېلیو", f: "treliyo"}, {p: "ترېلو", f: "trelo"}],
],
inflections: {
masc: [
[{p: "ترېلی", f: "treléy"}],
[{p: "ترېلي", f: "trelée"}],
[{p: "ترېلیو", f: "treliyo"}, {p: "ترېلو", f: "trelo"}],
],
},
},
},
// Masculine ending in tob
@ -395,11 +439,13 @@ const nouns: Array<{
c: "n. m.",
},
out: {
masc: [
[{p: "مشرتوب", f: "mushurtob"}],
[{p: "مشرتابه", f: "mushurtaabu"}],
[{p: "مشرتبو", f: "mushurtabo"}],
],
inflections: {
masc: [
[{p: "مشرتوب", f: "mushurtob"}],
[{p: "مشرتابه", f: "mushurtaabu"}],
[{p: "مشرتبو", f: "mushurtabo"}],
],
},
},
},
// Masculine irregular
@ -418,11 +464,19 @@ const nouns: Array<{
infbf: "lamandz",
},
out: {
masc: [
[{p: "لمونځ", f: "lamoondz"}],
[{p: "لمانځه", f: "lamaandzu"}],
[{p: "لمنځو", f: "lamandzo"}],
],
inflections: {
masc: [
[{p: "لمونځ", f: "lamoondz"}],
[{p: "لمانځه", f: "lamaandzu"}],
[{p: "لمنځو", f: "lamandzo"}],
],
},
// plural: {
// masc: [
// [{ p: "لمونځونه", f: "lamoondzóona" }],
// [{ p: "لمونځونو", f: "lamoondzóono" }],
// ],
// },
},
},
// Masculine non-inflecting
@ -436,7 +490,14 @@ const nouns: Array<{
c: "n. m.",
i: 8640,
},
out: false,
out: {
plural: {
masc: [
[{ p: "کتابونه", f: "kitaabóona" }],
[{ p: "کتابونو", f: "kitaabóono" }],
],
},
},
},
// ## FEMININE
// Feminine regular ending in ه
@ -451,11 +512,13 @@ const nouns: Array<{
i: 7444,
},
out: {
fem: [
[{p: "ښځه", f: "xudza"}],
[{p: "ښځې", f: "xudze"}],
[{p: "ښځو", f: "xudzo"}],
],
inflections: {
fem: [
[{p: "ښځه", f: "xudza"}],
[{p: "ښځې", f: "xudze"}],
[{p: "ښځو", f: "xudzo"}],
],
},
},
},
{
@ -469,11 +532,13 @@ const nouns: Array<{
i: 365,
},
out: {
fem: [
[{p: "اره", f: "ará"}],
[{p: "ارې", f: "are"}],
[{p: "ارو", f: "aro"}],
],
inflections: {
fem: [
[{p: "اره", f: "ará"}],
[{p: "ارې", f: "are"}],
[{p: "ارو", f: "aro"}],
],
},
},
},
// Feminine regular ending in ع - a'
@ -490,11 +555,13 @@ const nouns: Array<{
apf: "maraají",
},
out: {
fem: [
[{p: "مرجع", f: "marja'"}],
[{p: "مرجعې", f: "marje"}],
[{p: "مرجعو", f: "marjo"}],
],
inflections: {
fem: [
[{p: "مرجع", f: "marja'"}],
[{p: "مرجعې", f: "marje"}],
[{p: "مرجعو", f: "marjo"}],
],
},
},
},
{
@ -510,11 +577,13 @@ const nouns: Array<{
apf: "manaabí",
},
out: {
fem: [
[{p: "منبع", f: "manbá"}],
[{p: "منبعې", f: "manbe"}],
[{p: "منبعو", f: "manbo"}],
],
inflections: {
fem: [
[{p: "منبع", f: "manbá"}],
[{p: "منبعې", f: "manbe"}],
[{p: "منبعو", f: "manbo"}],
],
},
},
},
// Feminine regular ending in ح - a
@ -529,11 +598,13 @@ const nouns: Array<{
i: 5813,
},
out: {
fem: [
[{p: "ذبح", f: "zabha"}],
[{p: "ذبحې", f: "zabhe"}],
[{p: "ذبحو", f: "zabho"}],
],
inflections: {
fem: [
[{p: "ذبح", f: "zabha"}],
[{p: "ذبحې", f: "zabhe"}],
[{p: "ذبحو", f: "zabho"}],
],
},
},
},
// Feminine inanimate regular with missing ه
@ -548,14 +619,17 @@ const nouns: Array<{
i: 9593,
},
out: {
fem: [
[{p: "لار", f: "laar"}],
[{p: "لارې", f: "laare"}],
[{p: "لارو", f: "laaro"}],
],
inflections: {
fem: [
[{p: "لار", f: "laar"}],
[{p: "لارې", f: "laare"}],
[{p: "لارو", f: "laaro"}],
],
},
},
},
// Feminine animate ending in a consonant
// TODO: ALLOW FOR MULTIPLE PLURAL POSSIBILITIES میندې, میېنې etc.
{
in: {
ts: 1527812928,
@ -564,9 +638,18 @@ const nouns: Array<{
g: "",
e: "mother, mom",
c: "n. f. anim.",
ppp: "میندې",
ppf: "meynde",
i: 11113,
},
out: false,
out: {
plural: {
fem: [
[{ p: "میندې", f: "meynde" }],
[{ p: "میندو", f: "meyndo" }],
],
},
},
},
// Feminine regular inanimate ending in ي
{
@ -580,11 +663,13 @@ const nouns: Array<{
i: 5503,
},
out: {
fem: [
[{p: "دوستي", f: "dostee"}],
[{p: "دوستۍ", f: "dostuy"}],
[{p: "دوستیو", f: "dostuyo"}],
],
inflections: {
fem: [
[{p: "دوستي", f: "dostee"}],
[{p: "دوستۍ", f: "dostuy"}],
[{p: "دوستیو", f: "dostuyo"}],
],
},
},
},
// Feminine regular ending in ۍ
@ -599,11 +684,13 @@ const nouns: Array<{
i: 8718,
},
out: {
fem: [
[{p: "کرسۍ", f: "kUrsuy"}],
[{p: "کرسۍ", f: "kUrsuy"}],
[{p: "کرسیو", f: "kUrsuyo"}, { p: "کرسو", f: "kUrso"}],
],
inflections: {
fem: [
[{p: "کرسۍ", f: "kUrsuy"}],
[{p: "کرسۍ", f: "kUrsuy"}],
[{p: "کرسیو", f: "kUrsuyo"}, { p: "کرسو", f: "kUrso"}],
],
},
},
},
// Feminine regular ending in ا
@ -611,18 +698,19 @@ const nouns: Array<{
in: {
ts: 1527812456,
p: "اړتیا",
f: "aRtiyaa, aRtyaa",
f: "aRtiyáa, aRtyáa",
g: "",
e: "need, necessity",
c: "n. f.",
i: 376,
},
out: {
fem: [
[{p: "اړتیا", f: "aRtiyaa"}],
[{p: "اړتیاوې", f: "aRtiyaawe"}],
[{p: "اړتیاوو", f: "aRtiyaawo"}],
],
plural: {
fem: [
[{p: "اړتیاوې", f: "aRtiyáawe"}, { p: "اړتیاګانې", f:"aRtiyaagáane"}],
[{p: "اړتیاوو", f: "aRtiyáawo"}, { p: "اړتیاګانو", f:"aRtiyaagáano"}],
],
},
},
},
// Feminine regular ending in اع
@ -636,13 +724,15 @@ const nouns: Array<{
c: "n. f.",
i: 12205,
},
out: {
fem: [
[{p: "وداع", f: "widáa'"}],
[{p: "وداعوې", f: "widáawe"}],
[{p: "وداعوو", f: "widáawo"}],
],
},
out: false,
// out: {
// plural: {
// fem: [
// [{p: "وداع وې", f: "widáawe"}, {p: "وداع ګانې", f: "widaagáane"}],
// [{p: "وداع وو", f: "widáawo"}, {p: "وداع ګانو", f: "widaagáano"}],
// ],
// },
// },
},
// Word with no inflections
{
@ -658,6 +748,7 @@ const nouns: Array<{
},
out: false,
},
// TODO: WORDS THAT ARE ALREADY PLURAL!
];
const others: T.DictionaryEntry[] = [

View File

@ -12,6 +12,10 @@ import {
splitDoubleWord,
ensureUnisexInflections,
makePsString,
removeFVarients,
concatPsString,
endsInConsonant,
endsInAaOrOo,
} from "./p-text-helpers";
import {
removeAccents,
@ -20,95 +24,102 @@ import * as T from "../types";
const endingInSingleARegex = /[^a]'??[aá]'??$/;
const endingInHeyOrAynRegex = /[^ا][هع]$/;
const endingInAlefRegex = /اع?$/;
// const endingInAlefRegex = /اع?$/;
export function inflectWord(word: T.DictionaryEntry): T.Inflections | false {
export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
// If it's a noun/adj, inflect accordingly
// TODO: What about n. f. / adj. that end in ي ??
if (word.noInf) {
const w = removeFVarients(word);
if (w.noInf) {
return false;
}
if (word.c?.includes("doub.")) {
const words = splitDoubleWord(word);
const inflected = words.map((word) => ensureUnisexInflections(inflectWord(word), word));
return concatInflections(
inflected[0],
inflected[1],
) as T.UnisexInflections;
if (w.c?.includes("doub.")) {
const words = splitDoubleWord(w);
const inflected = words.map((x) => ensureUnisexInflections(inflectWord(x), x));
return {
inflections: concatInflections(
inflected[0].inflections,
inflected[1].inflections,
) as T.UnisexInflections,
};
}
if (word.c && (word.c.includes("adj.") || word.c.includes("unisex"))) {
return handleUnisexWord(word);
if (w.c && (w.c.includes("adj.") || w.c.includes("unisex"))) {
return handleUnisexWord(w);
}
if (word.c && (word.c.includes("n. m."))) {
return handleMascNoun(word);
if (w.c && (w.c.includes("n. m."))) {
return handleMascNoun(w);
}
if (word.c && (word.c.includes("n. f."))) {
return handleFemNoun(word);
if (w.c && (w.c.includes("n. f."))) {
return handleFemNoun(w);
}
// It's not a noun/adj
return false;
}
// LEVEL 2 FUNCTIONS
function handleUnisexWord(word: T.DictionaryEntry): T.Inflections | false {
// Get first of comma seperated phonetics entries
const f = word.f.split(",")[0].trim();
function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
// Get last letter of Pashto and last two letters of phonetics
// TODO: !!! Handle weird endings / symbols ' etc.
const pEnd = word.p.slice(-1);
const plural = makePlural(word);
if (word.infap && word.infaf && word.infbp && word.infbf) {
return inflectIrregularUnisex(word.p, f, [
{p: word.infap, f: word.infaf},
{p: word.infbp, f: word.infbf},
]);
return {
inflections: inflectIrregularUnisex(word.p, word.f, [
{p: word.infap, f: word.infaf},
{p: word.infbp, f: word.infbf},
]),
plural,
};
}
if (pEnd === "ی" && f.slice(-2) === "ey") {
return inflectRegularYeyUnisex(word.p, f);
if (pEnd === "ی" && word.f.slice(-2) === "ey") {
return { inflections: inflectRegularYeyUnisex(word.p, word.f), plural };
}
if (pEnd === "ه" && word.g.slice(-1) === "u") {
return inflectRegularShwaEndingUnisex(word.p, f);
return { inflections: inflectRegularShwaEndingUnisex(word.p, word.f), plural };
}
if (pEnd === "ی" && f.slice(-2) === "éy") {
return inflectEmphasizedYeyUnisex(word.p, f);
if (pEnd === "ی" && word.f.slice(-2) === "éy") {
return { inflections: inflectEmphasizedYeyUnisex(word.p, word.f), plural };
}
if (
pashtoConsonants.includes(pEnd) ||
word.p.slice(-2) === "وی" ||
word.p.slice(-2) === "ای" ||
(word.p.slice(-1) === "ه" && f.slice(-1) === "h")
(word.p.slice(-1) === "ه" && word.f.slice(-1) === "h")
) {
return inflectConsonantEndingUnisex(word.p, f);
return { inflections: inflectConsonantEndingUnisex(word.p, word.f), plural };
}
return false;
}
function handleMascNoun(word: T.DictionaryEntry): T.Inflections | false {
// Get first of comma seperated phonetics entries
const f = word.f.split(",")[0].trim();
function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
// Get last letter of Pashto and last two letters of phonetics
// TODO: !!! Handle weird endings / symbols ' etc.
const pEnd = word.p.slice(-1);
const fEnd = f.slice(-2);
if (word.infap && word.infaf && word.infbp && word.infbf) {
return inflectIrregularMasc(word.p, f, [
{p: word.infap, f: word.infaf},
{p: word.infbp, f: word.infbf},
]);
const plural = makePlural(w);
const pEnd = w.p.slice(-1);
const fEnd = w.f.slice(-2);
if (w.infap && w.infaf && w.infbp && w.infbf) {
return {
inflections: inflectIrregularMasc(w.p, w.f, [
{p: w.infap, f: w.infaf},
{p: w.infbp, f: w.infbf},
]),
plural,
};
}
const isTobEnding = (word.p.slice(-3) === "توب" && ["tób", "tob"].includes(f.slice(-3)) && word.p.length > 3);
const isTobEnding = (w.p.slice(-3) === "توب" && ["tób", "tob"].includes(w.f.slice(-3)) && w.p.length > 3);
if (isTobEnding) {
return inflectTobMasc(word.p, f);
return { inflections: inflectTobMasc(w.p, w.f), plural };
}
if (pEnd === "ی" && fEnd === "ey") {
return inflectRegularYeyMasc(word.p, f);
return { inflections: inflectRegularYeyMasc(w.p, w.f), plural };
}
if (pEnd === "ی" && fEnd === "éy") {
return inflectRegularEmphasizedYeyMasc(word.p, f);
return { inflections: inflectRegularEmphasizedYeyMasc(w.p, w.f), plural };
}
return false;
return plural ? { plural } : false
}
function handleFemNoun(word: T.DictionaryEntry): T.Inflections | false {
function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
// Get first of comma seperated phonetics entries
const f = word.f.split(",")[0].trim();
/* istanbul ignore next */ // will always have word.c at this point
@ -116,25 +127,27 @@ function handleFemNoun(word: T.DictionaryEntry): T.Inflections | false {
const animate = c.includes("anim.");
const pEnd = word.p.slice(-1);
const plural = makePlural(word);
if (endingInHeyOrAynRegex.test(word.p) && endingInSingleARegex.test(f)) {
return inflectRegularAFem(word.p, f);
return { inflections: inflectRegularAFem(word.p, f), plural };
}
if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(f)) {
return inflectRegularAWithHimPEnding(word.p, f);
return { inflections: inflectRegularAWithHimPEnding(word.p, f), plural };
}
if (pashtoConsonants.includes(pEnd) && !animate) {
return inflectRegularInanMissingAFem(word.p, f);
return { inflections: inflectRegularInanMissingAFem(word.p, f), plural };
}
if (pEnd === "ي" && (!animate)) {
return inflectRegularInanEeFem(word.p, f);
return { inflections: inflectRegularInanEeFem(word.p, f), plural };
}
if (pEnd === "ۍ") {
return inflectRegularUyFem(word.p, f);
return { inflections: inflectRegularUyFem(word.p, f), plural };
}
if (endingInAlefRegex.test(word.p)) {
return inflectRegularAaFem(word.p, f);
}
return false;
// if (endingInAlefRegex.test(word.p)) {
// return { inflections: inflectRegularAaFem(word.p, f) };
// }
return plural ? { plural } : false;
}
// LEVEL 3 FUNCTIONS
@ -341,13 +354,77 @@ function inflectRegularUyFem(p: string, f: string): T.Inflections {
};
}
function inflectRegularAaFem(p: string, f: string): T.Inflections {
const baseF = ["'", ""].includes(f.slice(-1)) ? f.slice(0, -1) : f;
return {
fem: [
[{p, f}],
[{p: `${p}وې`, f: `${baseF}we`}],
[{p: `${p}وو`, f: `${baseF}wo`}],
],
};
function makePashtoPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
if (!(word.ppp && word.ppf)) return undefined;
const base = makePsString(word.ppp, word.ppf);
// TODO: Add male Pashto plural
if (word.c?.includes("n. f.")) {
return {
fem: [
[base],
// todo: function to add و ending automatically
[concatPsString(
makePsString(base.p.slice(0, -1), base.f.slice(0, -1)),
{ p: "و", f: "o" },
)],
],
}
}
// TODO: handle masculine and unisex
return undefined;
}
function makePlural(w: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
// TODO: Include the Pashto plural thing here
const pashtoPlural = makePashtoPlural(w);
if (pashtoPlural) return pashtoPlural;
function addMascPluralSuffix(animate?: boolean): T.PluralInflectionSet {
const base = removeAccents(w);
return [
[concatPsString(base, animate ? { p: "ان", f: "áan" } : { p: "ونه", f: "óona" })],
[concatPsString(base, animate ? { p: "انو", f: "áano" } : { p: "ونو", f: "óono" })],
];
}
function addAnimUnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
const base = removeAccents(w);
return {
masc: addMascPluralSuffix(true),
fem: [
[concatPsString(base, { p: "انې", f: "áane" })],
[concatPsString(base, { p: "انو", f: "áano" })],
],
};
}
function addFemLongVowelSuffix(): T.PluralInflectionSet {
const base = makePsString(w.p, w.f);
const baseWOutAccents = removeAccents(base);
return [
[concatPsString(base, { p: "وې", f: "we" }), concatPsString(baseWOutAccents, { p: "ګانې", f: "gáane" })],
[concatPsString(base, { p: "وو", f: "wo" }), concatPsString(baseWOutAccents, { p: "ګانو", f: "gáano" })],
];
}
const anim = w.c?.includes("anim.");
const type = (w.c?.includes("unisex"))
? "unisex noun"
: (w.c?.includes("n. m."))
? "masc noun"
: (w.c?.includes("n. f."))
? "fem noun"
: "other";
if (type === "unisex noun" && endsInConsonant(w) && (!w.infap) && anim) {
return addAnimUnisexPluralSuffix();
}
if (type === "masc noun" && endsInConsonant(w) && (!w.infap) && (w.p.slice(-3) !== "توب")) {
return {
masc: addMascPluralSuffix(anim),
};
}
// TODO: What about endings in long ee / animate at inanimate
if (type === "fem noun" && endsInAaOrOo(w) && (!w.infap)) {
return {
fem: addFemLongVowelSuffix(),
};
}
return undefined;
}

View File

@ -121,7 +121,7 @@ export function getVerbInfo(
return getGenerativeStativeCompoundVerbInfo(entry, complement as T.DictionaryEntry);
}
}
const comp = complement ? ensureUnisexInflections(complement) : undefined;
const comp = complement ? ensureUnisexInf(complement) : undefined;
const root = getVerbRoots(entry, transitivity, comp);
const stem = getVerbStems(entry, root, transitivity, comp);
const infinitive = "mascSing" in root.imperfective ? root.imperfective.mascSing.long : root.imperfective.long;
@ -877,7 +877,7 @@ function addOoPrefix(
};
}
function ensureUnisexInflections(complement: T.DictionaryEntry): T.UnisexInflections {
function ensureUnisexInf(complement: T.DictionaryEntry): T.UnisexInflections {
const inflected = inflectWord(complement);
const isUnisex = inflected && (("masc" in inflected) && ("fem" in inflected));
if (isUnisex) {

View File

@ -114,6 +114,10 @@ export type DictionaryEntry = {
ep?: string;
}
export type DictionaryEntryNoFVars = DictionaryEntry & { __brand: "name for a dictionary entry with all the phonetics variations removed" };
export type PsStringNoFVars = PsString & { __brand: "name for a ps string with all the phonetics variations removed" };
export type FStringNoFVars = string & { __brand: "name for a phonetics string with all the phonetics variations removed" };
export type DictionaryEntryTextField = "p" | "f" | "e" | "c" | "infap" | "infaf" | "infbp" | "infbf" | "app" | "apf" | "ppp" | "ppf" | "psp" | "psf" | "ssp" | "ssf" | "prp" | "prf" | "pprtp" | "pprtf" | "tppp" | "tppf" | "ec" | "ep";
export type DictionaryEntryBooleanField = "noInf" | "shortIntrans" | "noOo" | "sepOo" | "diacExcept";
export type DictionaryEntryNumberField = "ts" | "i" | "l" | "separationAtP" | "separationAtF";
@ -334,12 +338,25 @@ export type PerfectContent = {
// Plain, 1st, and 2nd Inflection
export type InflectionSet = ArrayFixed<ArrayOneOrMore<PsString>, 3>;
// Plural and Second Inflection
export type PluralInflectionSet = ArrayFixed<ArrayOneOrMore<PsString>, 2>
export type Gender = "masc" | "fem";
export type UnisexInflections = Record<Gender, InflectionSet>;
export type UnisexSet<T> = Record<Gender, T>;
export type GenderedSet<T> = UnisexSet<T> | Omit<UnisexSet<T>, "fem"> | Omit<UnisexSet<T>, "masc">;
export type UnisexInflections = UnisexSet<InflectionSet>;
export type Inflections = UnisexInflections
| Omit<UnisexInflections, "fem"> | Omit<UnisexInflections, "masc">;
export type Inflections = GenderedSet<InflectionSet>;
export type PluralInflections = GenderedSet<PluralInflectionSet>;
export type InflectorOutput = {
plural: PluralInflections,
inflections?: Inflections,
} | {
inflections: Inflections,
} | false;
export type PersonLine = [
/** singular form of person */