added vocatives!

This commit is contained in:
adueck 2024-07-24 14:50:48 -04:00
parent 635a2cd805
commit b9269b8559
17 changed files with 1070 additions and 144 deletions

4
package-lock.json generated
View File

@ -1,12 +1,12 @@
{
"name": "pashto-inflector",
"version": "7.1.7",
"version": "7.2.0",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "pashto-inflector",
"version": "7.1.7",
"version": "7.2.0",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {

View File

@ -1,6 +1,6 @@
{
"name": "pashto-inflector",
"version": "7.1.7",
"version": "7.2.0",
"author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com",

View File

@ -1,12 +1,12 @@
{
"name": "@lingdocs/ps-react",
"version": "7.1.7",
"version": "7.2.0",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@lingdocs/ps-react",
"version": "7.1.7",
"version": "7.2.0",
"license": "MIT",
"dependencies": {
"@formkit/auto-animate": "^1.0.0-beta.3",

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/ps-react",
"version": "7.1.7",
"version": "7.2.0",
"description": "Pashto inflector library module with React components",
"main": "dist/components/library.js",
"module": "dist/components/library.js",

View File

@ -43,10 +43,12 @@ const InflectionTable = ({
inf,
textOptions,
hideTitle,
vocative,
}: {
inf: T.Inflections | T.PluralInflections;
textOptions: T.TextOptions;
hideTitle?: boolean;
vocative?: boolean;
}) => {
// const [showingExplanation, setShowingExplanation] = useState(false);
/* istanbul ignore next */ // Insanely can't see the modal to close it
@ -105,6 +107,8 @@ const InflectionTable = ({
<tbody>
{(!isPluralInfs
? ["Plain", "1st", "2nd"]
: vocative
? ["Voc.", "Plur. Voc."]
: ["Plural", "2nd Inf."]
).map((title, i) => (
<tr key={title}>

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/inflect",
"version": "7.1.7",
"version": "7.2.0",
"description": "Pashto inflector library",
"main": "dist/index.js",
"types": "dist/lib/library.d.ts",

View File

@ -15,6 +15,7 @@ import {
splitUpSyllables,
hasAccents,
countSyllables,
getAccentPos,
} from "./accent-helpers";
const toAccentFront = [
@ -40,6 +41,27 @@ test(`accentOnFront should work`, () => {
});
});
const toGetAccentPos = [
{
input: makePsString("کورنۍ", "koranúy"),
output: 0,
},
{
input: makePsString("ستړی", "stúRay"),
output: 1,
},
{
input: makePsString("لیدلی", "leedulay"),
output: -1,
},
];
test(`getAccentPos should work`, () => {
toGetAccentPos.forEach((item) => {
expect(getAccentPos(item.input)).toEqual(item.output);
});
});
const toAccentPastParticiple = [
{
input: makePsString("پرېښی", "prexay"),
@ -59,6 +81,8 @@ test(`accentPastParticiple should work`, () => {
test(`splitUpSyllables should work`, () => {
expect(splitUpSyllables("akheestul")).toEqual(["akh", "eest", "ul"]);
expect(splitUpSyllables("kh")).toEqual([]);
expect(splitUpSyllables("x")).toEqual([]);
});
test("countSyllables", () => {
@ -74,9 +98,19 @@ test("countSyllables", () => {
test(`accentOnFSylsOnNFromEnd should work`, () => {
expect(accentFSylsOnNFromEnd(["pu", "xtaa", "nu"], 0)).toBe("puxtaanú");
expect(accentFSylsOnNFromEnd(["leed", "ul", "ay"], 1)).toBe("leedúlay");
expect(accentFSylsOnNFromEnd([], 0)).toBe("");
expect(accentFSylsOnNFromEnd("x", 0)).toBe("x");
});
test(`accentOnNFromEnd should work`, () => {
expect(accentOnNFromEnd({ p: "ښه", f: "xu" }, 0)).toEqual({
p: "ښه",
f: "xú",
});
expect(accentOnNFromEnd({ p: "ښ", f: "x" }, 0)).toEqual({
p: "ښ",
f: "x",
});
expect(accentOnNFromEnd({ p: "پښتانه", f: "puxtaanu" }, 0)).toEqual({
p: "پښتانه",
f: "puxtaanú",

View File

@ -85,7 +85,11 @@ export function accentFSylsOnNFromEnd(
n: number
): string {
if (typeof syls === "string") {
return accentFSylsOnNFromEnd(splitUpSyllables(syls), n);
const s = splitUpSyllables(syls);
if (s.length === 0) {
return syls;
}
return accentFSylsOnNFromEnd(s, n);
}
if (syls.length === 0) {
return "";
@ -100,9 +104,9 @@ export function accentFSylsOnNFromEnd(
export function accentOnNFromEnd(ps: T.PsString, n: number): T.PsString {
const fNoAccents = removeAccents(ps.f);
const fSyls = splitUpSyllables(fNoAccents);
// TODO: enable this and fix the tests it breaks!!!
// don't add accent if only one syllable
// if (fSyls.length === 1) return makePsString(ps.p, fNoAccents);
if (fSyls.length === 0) {
return ps;
}
return makePsString(ps.p, accentFSylsOnNFromEnd(fSyls, n));
}
@ -127,6 +131,26 @@ export function accentLetter(s: string): string {
});
}
/**
* returns the position of an accent on a word, 0 being the last syllable
* -1 means there is no accent
*
* @param ps
*/
export function getAccentPos(ps: T.PsString): number {
const syls = splitUpSyllables(ps.f);
for (let i = 0; i < syls.length; i++) {
if (hasAccents(syls.at(-(i + 1)) || "")) {
return i;
}
}
return -1;
}
export function accentIsOnEnd(ps: T.PsString): boolean {
return getAccentPos(ps) === 0;
}
export function accentPsSyllable(ps: T.PsString): T.PsString {
return {
p: ps.p,

View File

@ -76,6 +76,42 @@ export function pureSingleOrLengthOpts<A>(a: A): T.SingleOrLengthOpts<A> {
return a;
}
export function applyPsString(
f:
| {
p: (x: string) => string;
}
| {
f: (x: string) => string;
}
| {
p: (x: string) => string;
f: (x: string) => string;
},
x: T.PsString
): T.PsString {
if ("p" in f && "f" in f) {
return {
p: f.p(x.p),
f: f.f(x.f),
};
}
if ("p" in f) {
return {
p: f.p(x.p),
f: x.f,
};
}
return {
p: x.p,
f: f.f(x.f),
};
}
export function mapGen<A, B>(f: (x: A) => B, x: A): B {
return f(x);
}
/**
* like and applicative <*> operator for SingleOrLengthOpts
*

View File

@ -11,7 +11,7 @@ import {
} from "./type-predicates";
export function getInflectionPattern(
e: T.NounEntry | T.AdjectiveEntry
e: T.InflectableEntry
): T.InflectionPattern {
return isPattern1Entry(e)
? T.InflectionPattern.Basic

View File

@ -21,7 +21,6 @@ import {
splitDoubleWord,
endsInConsonant,
addOEnding,
endsInShwa,
splitPsByVarients,
endsWith,
trimOffPs,
@ -1596,13 +1595,6 @@ test("addOEnding", () => {
});
});
test("endsInShwa", () => {
expect(endsInShwa({ p: "ښایسته", f: "xaaystú" })).toBe(true);
expect(endsInShwa({ p: "ښایسته", f: "xaaystu" })).toBe(true);
expect(endsInShwa({ p: "ښایسته", f: "xaaysta" })).toBe(false);
expect(endsInShwa({ p: "کور", f: "kor" })).toBe(false);
});
test("splitPsByVarients", () => {
expect(
splitPsByVarients({ p: "حوادث, حادثات", f: "hawáadis, haadisáat" })

View File

@ -563,6 +563,44 @@ export function unisexInfToObjectMatrix(
};
}
export function concatPlurals(
a: T.PluralInflections,
b: T.PluralInflections
): T.PluralInflections {
function concatPsArraysWSpace(
a: T.ArrayOneOrMore<T.PsString>,
b: T.ArrayOneOrMore<T.PsString>
): T.ArrayOneOrMore<T.PsString> {
if (a.length !== b.length) {
throw new Error("arrays of plural/vocative inflections are different!");
}
return a.map((x, i) =>
concatPsString(x, " ", b[i])
) as T.ArrayOneOrMore<T.PsString>;
}
function concatPluralSet(
a: T.PluralInflectionSet,
b: T.PluralInflectionSet
): T.PluralInflectionSet {
return [concatPsArraysWSpace(a[0], b[0]), concatPsArraysWSpace(a[1], b[1])];
}
const masc =
"masc" in a && "masc" in b ? concatPluralSet(a.masc, b.masc) : undefined;
const fem =
"fem" in a && "fem" in b ? concatPluralSet(a.fem, b.fem) : undefined;
if (masc && fem) {
return { masc, fem };
}
if (masc) {
return { masc };
}
if (fem) {
return { fem };
}
throw new Error("error concating plural/vocative inflections for double!");
}
export function concatInflections(
comp: T.PsString | T.SingleOrLengthOpts<T.UnisexInflections>,
infs: T.SingleOrLengthOpts<T.UnisexInflections>
@ -640,6 +678,10 @@ export function allOnePersonInflection(
return block;
}
export function hasShwaEnding({ f }: T.PsString): boolean {
return f.endsWith("u") || f.endsWith("ú");
}
export function choosePersInf<T extends object>(
x: T.FullForm<T>,
persInf: T.PersonInflectionsField
@ -950,9 +992,10 @@ export function ensureUnisexInflections(
): {
inflections: T.UnisexInflections;
plural?: T.PluralInflections;
vocative?: T.PluralInflections;
} {
const ps = { p: w.p, f: w.f };
if (infs === false || infs.inflections === undefined) {
if (infs === false) {
return {
inflections: {
masc: [[ps], [ps], [ps]],
@ -960,12 +1003,24 @@ export function ensureUnisexInflections(
},
};
}
if (!infs.inflections) {
return {
inflections: {
masc: [[ps], [ps], [ps]],
fem: [[ps], [ps], [ps]],
},
...("plural" in infs ? { plural: infs.plural } : {}),
...("vocative" in infs ? { vocative: infs.vocative } : {}),
};
}
if (!("fem" in infs.inflections)) {
return {
inflections: {
...infs.inflections,
fem: [[ps], [ps], [ps]],
},
...("plural" in infs ? { plural: infs.plural } : {}),
...("vocative" in infs ? { vocative: infs.vocative } : {}),
};
}
if (!("masc" in infs.inflections)) {
@ -974,11 +1029,14 @@ export function ensureUnisexInflections(
...infs.inflections,
masc: [[ps], [ps], [ps]],
},
...("plural" in infs ? { plural: infs.plural } : {}),
...("vocative" in infs ? { vocative: infs.vocative } : {}),
};
}
// for some dumb reason have to do this for type safety
return {
inflections: infs.inflections,
...("plural" in infs ? { plural: infs.plural } : {}),
...("vocative" in infs ? { vocative: infs.vocative } : {}),
};
}
@ -990,24 +1048,27 @@ export function endsInAaOrOo(w: T.PsString): boolean {
);
}
export function endsInTob(ps: T.PsString): boolean {
return (
ps.p.slice(-3) === "توب" &&
["tób", "tob"].includes(ps.f.slice(-3)) &&
ps.p.length > 3
);
}
export function endsInConsonant(w: T.PsString): boolean {
// TODO: Add reporting back that the plural ending will need a space?
function endsInLongDipthong(w: T.PsString): boolean {
function isLongDipthong(end: T.PsString): boolean {
return (
psStringEquals(end, { p: "ای", f: "aay" }, true) ||
psStringEquals(end, { p: "وی", f: "ooy" }, true)
);
}
const end = makePsString(w.p.slice(-2), w.f.slice(-3));
return isLongDipthong(end);
}
if (endsInLongDipthong(w)) return true;
// const pCons = pashtoConsonants.includes(w.p.slice(-1));
const fCons = phoneticsConsonants.includes(simplifyPhonetics(w.f).slice(-1));
return fCons;
return (
phoneticsConsonants.includes(simplifyPhonetics(w.f).slice(-1)) ||
endsWith(
[
{ p: "ای", f: "aay" },
{ p: "وی", f: "ooy" },
],
w
) ||
endsWith([{ p: "ه", f: "h" }], w) ||
endsWith([{ p: "و", f: "w" }], w)
);
}
/**
@ -1052,24 +1113,13 @@ export function addOEnding(ps: T.PsString): T.ArrayOneOrMore<T.PsString> {
];
}
/**
* Determines whether a string ends in a shwa or not
*
* @param w
*/
export function endsInShwa(w: T.PsString): boolean {
const p = w.p.slice(-1);
const f = w.f.slice(-1);
return p === "ه" && ["u", "ú"].includes(f);
}
/**
* applies f function to both the p and f in a PsString
*
*/
export function mapPsString<T>(
ps: T.PsString,
f: (s: string) => T
f: (s: string) => T,
ps: T.PsString
): { p: T; f: T } {
return {
p: f(ps.p),
@ -1084,7 +1134,7 @@ export function mapPsString<T>(
* @returns
*/
export function splitPsByVarients(w: T.PsString): T.ArrayOneOrMore<T.PsString> {
const { p, f } = mapPsString(w, splitVarients);
const { p, f } = mapPsString(splitVarients, w);
return zipWith(makePsString, p, f) as T.ArrayOneOrMore<T.PsString>;
}

View File

@ -43,6 +43,10 @@ const adjectives: {
[{ p: "زړو", f: "zaRó" }],
],
},
vocative: {
masc: [[{ p: "زوړه", f: "zóRa" }], [{ p: "زړو", f: "zaRó" }]],
fem: [[{ p: "زړې", f: "zaRé" }], [{ p: "زړو", f: "zaRó" }]],
},
},
},
// regular adjective ending in ی
@ -75,6 +79,22 @@ const adjectives: {
],
],
},
vocative: {
masc: [
[{ p: "ستړیه", f: "stúRiya" }],
[
{ p: "ستړیو", f: "stúRiyo" },
{ p: "ستړو", f: "stúRo" },
],
],
fem: [
[{ p: "ستړې", f: "stúRe" }],
[
{ p: "ستړیو", f: "stúRiyo" },
{ p: "ستړو", f: "stúRo" },
],
],
},
},
},
// regular adjective ending in ی with stress on the end
@ -107,6 +127,22 @@ const adjectives: {
],
],
},
vocative: {
masc: [
[{ p: "وروستیه", f: "wroostúya" }],
[
{ p: "وروستیو", f: "wroostúyo" },
{ p: "وروستو", f: "wroostó" },
],
],
fem: [
[{ p: "وروستۍ", f: "wroostúy" }],
[
{ p: "وروستیو", f: "wroostúyo" },
{ p: "وروستو", f: "wroostó" },
],
],
},
},
},
// regular adjective ending in a consonant
@ -133,6 +169,10 @@ const adjectives: {
[{ p: "سپکو", f: "spúko" }],
],
},
vocative: {
masc: [[{ p: "سپکه", f: "spúka" }], [{ p: "سپکو", f: "spúko" }]],
fem: [[{ p: "سپکې", f: "spúke" }], [{ p: "سپکو", f: "spúko" }]],
},
},
},
// regular adjective ending in a consonant with an accent already
@ -159,6 +199,10 @@ const adjectives: {
[{ p: "ارتو", f: "aráto" }],
],
},
vocative: {
masc: [[{ p: "ارته", f: "aráta" }], [{ p: "ارتو", f: "aráto" }]],
fem: [[{ p: "ارتې", f: "aráte" }], [{ p: "ارتو", f: "aráto" }]],
},
},
},
{
@ -184,6 +228,10 @@ const adjectives: {
[{ p: "لویو", f: "lóoyo" }],
],
},
vocative: {
masc: [[{ p: "لویه", f: "lóoya" }], [{ p: "لویو", f: "lóoyo" }]],
fem: [[{ p: "لویې", f: "lóoye" }], [{ p: "لویو", f: "lóoyo" }]],
},
},
},
{
@ -209,6 +257,10 @@ const adjectives: {
[{ p: "پوهو", f: "póho" }],
],
},
vocative: {
masc: [[{ p: "پوهه", f: "póha" }], [{ p: "پوهو", f: "póho" }]],
fem: [[{ p: "پوهې", f: "póhe" }], [{ p: "پوهو", f: "póho" }]],
},
},
},
// adjective ending in u
@ -235,6 +287,10 @@ const adjectives: {
[{ p: "ویدو", f: "weedó" }],
],
},
vocative: {
masc: [[{ p: "ویده", f: "weedá" }], [{ p: "ویدو", f: "weedó" }]],
fem: [[{ p: "ویدې", f: "weedé" }], [{ p: "ویدو", f: "weedó" }]],
},
},
},
{
@ -260,6 +316,13 @@ const adjectives: {
[{ p: "ښایستو", f: "xaaysto" }],
],
},
vocative: {
masc: [
[{ p: "ښایسته", f: "xaaysta" }],
[{ p: "ښایستو", f: "xaaysto" }],
],
fem: [[{ p: "ښایستې", f: "xaayste" }], [{ p: "ښایستو", f: "xaaysto" }]],
},
},
},
// numbers should inflect just like adjectives
@ -286,6 +349,10 @@ const adjectives: {
[{ p: "شپږو", f: "shpúGo" }],
],
},
vocative: {
masc: [[{ p: "شپږه", f: "shpúGa" }], [{ p: "شپږو", f: "shpúGo" }]],
fem: [[{ p: "شپږې", f: "shpúGe" }], [{ p: "شپږو", f: "shpúGo" }]],
},
},
},
// without accents
@ -312,6 +379,46 @@ const adjectives: {
[{ p: "ښو", f: "xo" }],
],
},
vocative: {
masc: [[{ p: "ښه", f: "xa" }], [{ p: "ښو", f: "xo" }]],
fem: [[{ p: "ښې", f: "xe" }], [{ p: "ښو", f: "xo" }]],
},
},
},
// pattern 5 adjectives
{
in: {
ts: 1527815265,
i: 10891,
p: "شین",
f: "sheen",
g: "sheen",
e: "green, blue; unripe, immature; bright, sunny",
r: 4,
c: "adj.",
infap: "شنه",
infaf: "shnu",
infbp: "شن",
infbf: "shn",
a: 1,
},
out: {
inflections: {
masc: [
[{ p: "شین", f: "sheen" }],
[{ p: "شنه", f: "shnu" }],
[{ p: "شنو", f: "shno" }],
],
fem: [
[{ p: "شنه", f: "shna" }],
[{ p: "شنې", f: "shne" }],
[{ p: "شنو", f: "shno" }],
],
},
vocative: {
masc: [[{ p: "شنه", f: "shna" }], [{ p: "شنو", f: "shno" }]],
fem: [[{ p: "شنې", f: "shne" }], [{ p: "شنو", f: "shno" }]],
},
},
},
// adjective non-inflecting
@ -363,6 +470,16 @@ const adjectives: {
[{ p: "ګډو وډو", f: "gúDo wúDo" }],
],
},
vocative: {
masc: [
[{ p: "ګډه وډه", f: "gúDa wúDa" }],
[{ p: "ګډو وډو", f: "gúDo wúDo" }],
],
fem: [
[{ p: "ګډې وډې", f: "gúDe wúDe" }],
[{ p: "ګډو وډو", f: "gúDo wúDo" }],
],
},
},
},
];
@ -400,6 +517,10 @@ const nouns: {
[{ p: "مېلمنو", f: "melmanó" }],
],
},
vocative: {
masc: [[{ p: "مېلمه", f: "melmá" }], [{ p: "مېلمنو", f: "melmanó" }]],
fem: [[{ p: "مېلمنې", f: "melmané" }], [{ p: "مېلمنو", f: "melmanó" }]],
},
},
},
// Unisex noun ending with ی
@ -432,6 +553,22 @@ const nouns: {
],
],
},
vocative: {
masc: [
[{ p: "ملګریه", f: "malgúriya" }],
[
{ p: "ملګریو", f: "malgúriyo" },
{ p: "ملګرو", f: "malgúro" },
],
],
fem: [
[{ p: "ملګرې", f: "malgúre" }],
[
{ p: "ملګریو", f: "malgúriyo" },
{ p: "ملګرو", f: "malgúro" },
],
],
},
},
},
// Unisex noun ending on ی with emphasis on the end
@ -466,6 +603,22 @@ const nouns: {
],
],
},
vocative: {
masc: [
[{ p: "ترورزیه", f: "trorzúya" }],
[
{ p: "ترورزیو", f: "trorzúyo" },
{ p: "ترورزو", f: "trorzó" },
],
],
fem: [
[{ p: "ترورزۍ", f: "trorzúy" }],
[
{ p: "ترورزیو", f: "trorzúyo" },
{ p: "ترورزو", f: "trorzó" },
],
],
},
plural: {
masc: [
[{ p: "ترورزامن", f: "trorzaamun" }],
@ -498,6 +651,10 @@ const nouns: {
[{ p: "چرګو", f: "chúrgo" }],
],
},
vocative: {
masc: [[{ p: "چرګه", f: "chúrga" }], [{ p: "چرګو", f: "chúrgo" }]],
fem: [[{ p: "چرګې", f: "chúrge" }], [{ p: "چرګو", f: "chúrgo" }]],
},
plural: {
masc: [
[{ p: "چرګان", f: "churgáan" }],
@ -543,6 +700,22 @@ const nouns: {
],
],
},
vocative: {
masc: [
[{ p: "پلویه", f: "palawúya" }],
[
{ p: "پلویو", f: "palawúyo" },
{ p: "پلوو", f: "palawó" },
],
],
fem: [
[{ p: "پلوۍ", f: "palawúy" }],
[
{ p: "پلویو", f: "palawúyo" },
{ p: "پلوو", f: "palawó" },
],
],
},
plural: {
masc: [
[{ p: "پلویان", f: "palawiyáan" }],
@ -578,6 +751,16 @@ const nouns: {
],
],
},
vocative: {
masc: [
[{ p: "سړیه", f: "saRúya" }],
[
{ p: "سړیو", f: "saRúyo" },
{ p: "سړو", f: "saRó" },
],
],
},
},
},
// Masculine #3 anim
@ -590,6 +773,7 @@ const nouns: {
f: "saylaanáy",
g: "saylaanay",
e: "tourist, sightseer, visitor",
// only masculine here for testing purposes
c: "n. m. anim.",
},
out: {
@ -603,6 +787,15 @@ const nouns: {
],
],
},
vocative: {
masc: [
[{ p: "سیلانیه", f: "saylaanúya" }],
[
{ p: "سیلانیو", f: "saylaanúyo" },
{ p: "سیلانو", f: "saylaanó" },
],
],
},
plural: {
masc: [
[{ p: "سیلانیان", f: "saylaaniyáan" }],
@ -633,6 +826,15 @@ const nouns: {
],
],
},
vocative: {
masc: [
[{ p: "ترېلیه", f: "trelúya" }],
[
{ p: "ترېلیو", f: "trelúyo" },
{ p: "ترېلو", f: "treló" },
],
],
},
},
},
// Masculine ending in tob
@ -641,7 +843,7 @@ const nouns: {
i: 11998,
ts: 1586760783536,
p: "مشرتوب",
f: "mushurtob",
f: "mushurtób",
g: "",
e: "leadership, authority, presidency",
c: "n. m.",
@ -649,9 +851,15 @@ const nouns: {
out: {
inflections: {
masc: [
[{ p: "مشرتوب", f: "mushurtob" }],
[{ p: "مشرتابه", f: "mushurtaabu" }],
[{ p: "مشرتبو", f: "mushurtabo" }],
[{ p: "مشرتوب", f: "mushurtób" }],
[{ p: "مشرتابه", f: "mushurtaabú" }],
[{ p: "مشرتبو", f: "mushurtábo" }],
],
},
vocative: {
masc: [
[{ p: "مشرتوبه", f: "mushurtóba" }],
[{ p: "مشرتبو", f: "mushurtábo" }],
],
},
},
@ -662,12 +870,12 @@ const nouns: {
ts: 1527813809,
i: 11318,
p: "لمونځ",
f: "lamoondz",
g: "lamoondz",
f: "lamóondz",
g: "lamóondz",
e: "Muslim ritual prayers (namaz, salah, salat)",
c: "n. m. irreg.",
infap: "لمانځه",
infaf: "lamaandzu",
infaf: "lamaandzú",
infbp: "لمنځ",
infbf: "lamandz",
ppp: "لمونځونه",
@ -676,11 +884,17 @@ const nouns: {
out: {
inflections: {
masc: [
[{ p: "لمونځ", f: "lamoondz" }],
[{ p: "لمونځ", f: "lamóondz" }],
[{ p: "لمانځه", f: "lamaandzú" }],
[{ p: "لمنځو", f: "lamandzó" }],
],
},
vocative: {
masc: [
[{ p: "لمونځه", f: "lamóondza" }],
[{ p: "لمنځو", f: "lamandzó" }],
],
},
plural: {
masc: [
[{ p: "لمونځونه", f: "lamoondzóona" }],
@ -712,6 +926,9 @@ const nouns: {
[{ p: "غرو", f: "ghro" }],
],
},
vocative: {
masc: [[{ p: "غره", f: "ghra" }], [{ p: "غرو", f: "ghro" }]],
},
plural: {
masc: [[{ p: "غرونه", f: "ghróona" }], [{ p: "غرونو", f: "ghróono" }]],
},
@ -749,6 +966,10 @@ const nouns: {
[{ p: "خرو", f: "khro" }],
],
},
vocative: {
masc: [[{ p: "خره", f: "khra" }], [{ p: "خرو", f: "khro" }]],
fem: [[{ p: "خرې", f: "khre" }], [{ p: "خرو", f: "khro" }]],
},
},
},
// masc plural
@ -793,9 +1014,18 @@ const nouns: {
g: "zRu",
e: "heart",
c: "n. m.",
noInf: true,
},
out: {
inflections: {
masc: [
[{ p: "زړه", f: "zRu" }],
[{ p: "زړه", f: "zRu" }],
[{ p: "زړو", f: "zRo" }],
],
},
vocative: {
masc: [[{ p: "زړه", f: "zRa" }], [{ p: "زړو", f: "zRo" }]],
},
plural: {
masc: [[{ p: "زړونه", f: "zRóona" }], [{ p: "زړونو", f: "zRóono" }]],
},
@ -928,6 +1158,16 @@ const nouns: {
i: 8640,
},
out: {
inflections: {
masc: [
[{ p: "کتاب", f: "kitaab" }],
[{ p: "کتاب", f: "kitaab" }],
[{ p: "کتابو", f: "kitaabo" }],
],
},
vocative: {
masc: [[{ p: "کتابه", f: "kitaaba" }], [{ p: "کتابو", f: "kitaabo" }]],
},
plural: {
masc: [
[{ p: "کتابونه", f: "kitaabóona" }],
@ -952,6 +1192,16 @@ const nouns: {
ep: "teeth",
},
out: {
inflections: {
masc: [
[{ p: "غاښ", f: "ghaax" }],
[{ p: "غاښ", f: "ghaax" }],
[{ p: "غاښو", f: "gháaxo" }],
],
},
vocative: {
masc: [[{ p: "غاښه", f: "gháaxa" }], [{ p: "غاښو", f: "gháaxo" }]],
},
plural: {
masc: [
[{ p: "غاښونه", f: "ghaaxóona" }],
@ -976,6 +1226,16 @@ const nouns: {
ppf: "wadóona",
},
out: {
inflections: {
masc: [
[{ p: "واده", f: "waadú" }],
[{ p: "واده", f: "waadú" }],
[{ p: "وادو", f: "waadó" }],
],
},
vocative: {
masc: [[{ p: "واده", f: "waadá" }], [{ p: "وادو", f: "waadó" }]],
},
plural: {
masc: [[{ p: "ودونه", f: "wadóona" }], [{ p: "ودونو", f: "wadóono" }]],
},
@ -986,12 +1246,22 @@ const nouns: {
ts: 1527817768,
i: 9791,
p: "کارګه",
f: "kaargu",
g: "kaargu",
f: "kaargú",
g: "kaargú",
e: "raven, crow",
c: "n. m. anim.",
},
out: {
inflections: {
masc: [
[{ p: "کارګه", f: "kaargú" }],
[{ p: "کارګه", f: "kaargú" }],
[{ p: "کارګو", f: "kaargó" }],
],
},
vocative: {
masc: [[{ p: "کارګه", f: "kaargá" }], [{ p: "کارګو", f: "kaargó" }]],
},
plural: {
masc: [
[{ p: "کارګان", f: "kaargáan" }],
@ -1011,6 +1281,16 @@ const nouns: {
c: "n. m.",
},
out: {
inflections: {
masc: [
[{ p: "لو", f: "law" }],
[{ p: "لو", f: "law" }],
[{ p: "لوو", f: "láwo" }],
],
},
vocative: {
masc: [[{ p: "لوه", f: "láwa" }], [{ p: "لوو", f: "láwo" }]],
},
plural: {
masc: [[{ p: "لوونه", f: "lawóona" }], [{ p: "لوونو", f: "lawóono" }]],
},
@ -1025,7 +1305,7 @@ const nouns: {
in: {
ts: 1527812797,
p: "ښځه",
f: "xudza",
f: "xúdza",
g: "",
e: "woman, wife",
c: "n. f.",
@ -1034,11 +1314,14 @@ const nouns: {
out: {
inflections: {
fem: [
[{ p: "ښځه", f: "xudza" }],
[{ p: "ښځې", f: "xudze" }],
[{ p: "ښځو", f: "xudzo" }],
[{ p: "ښځه", f: "xúdza" }],
[{ p: "ښځې", f: "xúdze" }],
[{ p: "ښځو", f: "xúdzo" }],
],
},
vocative: {
fem: [[{ p: "ښځې", f: "xúdze" }], [{ p: "ښځو", f: "xúdzo" }]],
},
},
},
{
@ -1059,6 +1342,9 @@ const nouns: {
[{ p: "ارو", f: "aró" }],
],
},
vocative: {
fem: [[{ p: "ارې", f: "aré" }], [{ p: "ارو", f: "aró" }]],
},
},
},
// Feminine regular ending in ع - a'
@ -1082,6 +1368,9 @@ const nouns: {
[{ p: "مرجعو", f: "marjo" }],
],
},
vocative: {
fem: [[{ p: "مرجعې", f: "marje" }], [{ p: "مرجعو", f: "marjo" }]],
},
arabicPlural: {
fem: [[{ p: "مراجع", f: "maraají'" }], [{ p: "مراجو", f: "maraajó" }]],
},
@ -1107,6 +1396,9 @@ const nouns: {
[{ p: "منبعو", f: "manbó" }],
],
},
vocative: {
fem: [[{ p: "منبعې", f: "manbé" }], [{ p: "منبعو", f: "manbó" }]],
},
arabicPlural: {
fem: [[{ p: "منابع", f: "manaabí" }], [{ p: "منابو", f: "manaabó" }]],
},
@ -1162,14 +1454,27 @@ const nouns: {
ts: 1527816113,
i: 3072,
p: "تبلیغ",
f: "tableegh",
g: "tableegh",
f: "tabléegh",
g: "tabléegh",
e: "propaganda; preaching, evangelism",
c: "n. m.",
app: "تبلیغات",
apf: "tableegháat",
},
out: {
inflections: {
masc: [
[{ p: "تبلیغ", f: "tabléegh" }],
[{ p: "تبلیغ", f: "tabléegh" }],
[{ p: "تبلیغو", f: "tabléegho" }],
],
},
vocative: {
masc: [
[{ p: "تبلیغه", f: "tabléegha" }],
[{ p: "تبلیغو", f: "tabléegho" }],
],
},
plural: {
masc: [
[{ p: "تبلیغونه", f: "tableeghóona" }],
@ -1178,8 +1483,8 @@ const nouns: {
},
bundledPlural: {
masc: [
[{ p: "تبلیغه", f: "tableegha" }],
[{ p: "تبلیغو", f: "tableegho" }],
[{ p: "تبلیغه", f: "tabléegha" }],
[{ p: "تبلیغو", f: "tabléegho" }],
],
},
arabicPlural: {
@ -1231,6 +1536,9 @@ const nouns: {
[{ p: "حادثو", f: "haadisó" }],
],
},
vocative: {
fem: [[{ p: "حادثې", f: "haadisé" }], [{ p: "حادثو", f: "haadisó" }]],
},
arabicPlural: {
masc: [
[
@ -1265,6 +1573,9 @@ const nouns: {
[{ p: "تجربو", f: "tajrabó" }],
],
},
vocative: {
fem: [[{ p: "تجربې", f: "tajrabé" }], [{ p: "تجربو", f: "tajrabó" }]],
},
arabicPlural: {
masc: [
[{ p: "تجارب", f: "tajaarib" }],
@ -1286,6 +1597,16 @@ const nouns: {
apf: "ahwáal",
},
out: {
inflections: {
masc: [
[{ p: "حال", f: "haal" }],
[{ p: "حال", f: "haal" }],
[{ p: "حالو", f: "háalo" }],
],
},
vocative: {
masc: [[{ p: "حاله", f: "háala" }], [{ p: "حالو", f: "háalo" }]],
},
plural: {
masc: [
[{ p: "حالونه", f: "haalóona" }],
@ -1337,6 +1658,9 @@ const nouns: {
[{ p: "ذبحو", f: "zabho" }],
],
},
vocative: {
fem: [[{ p: "ذبحې", f: "zabhe" }], [{ p: "ذبحو", f: "zabho" }]],
},
},
},
// Feminine inanimate regular with missing ه
@ -1358,6 +1682,9 @@ const nouns: {
[{ p: "لارو", f: "láaro" }],
],
},
vocative: {
fem: [[{ p: "لارې", f: "láare" }], [{ p: "لارو", f: "láaro" }]],
},
},
},
// Feminine animate ending in a consonant
@ -1375,6 +1702,9 @@ const nouns: {
i: 11113,
},
out: {
vocative: {
fem: [[{ p: "مورې", f: "móre" }], [{ p: "میندو", f: "mayndo" }]],
},
plural: {
fem: [[{ p: "میندې", f: "maynde" }], [{ p: "میندو", f: "mayndo" }]],
},
@ -1432,6 +1762,15 @@ const nouns: {
],
],
},
vocative: {
fem: [
[{ p: "کرسۍ", f: "kUrsúy" }],
[
{ p: "کرسیو", f: "kUrsúyo" },
{ p: "کرسو", f: "kUrsó" },
],
],
},
},
},
{
@ -1456,6 +1795,15 @@ const nouns: {
],
],
},
vocative: {
fem: [
[{ p: "قاضۍ", f: "qaazúy" }],
[
{ p: "قاضیو", f: "qaazúyo" },
{ p: "قاضو", f: "qaazó" },
],
],
},
plural: {
fem: [
[{ p: "قاضیانې", f: "qaaziyáane" }],
@ -1561,7 +1909,16 @@ const nouns: {
[{ p: "شی", f: "shay" }],
[{ p: "شي", f: "shee" }],
[
{ p: "شیو", f: "shiyo" },
{ p: "شیو", f: "shúyo" },
{ p: "شو", f: "sho" },
],
],
},
vocative: {
masc: [
[{ p: "شیه", f: "shúya" }],
[
{ p: "شیو", f: "shúyo" },
{ p: "شو", f: "sho" },
],
],
@ -1623,6 +1980,16 @@ const nouns: {
[{ p: "رشوت خورو", f: "rishwat khwaró" }],
],
},
vocative: {
masc: [
[{ p: "رشوت خوره", f: "rishwat khóra" }],
[{ p: "رشوت خورو", f: "rishwat khwaró" }],
],
fem: [
[{ p: "رشوت خورې", f: "rishwat khwaré" }],
[{ p: "رشوت خورو", f: "rishwat khwaró" }],
],
},
},
},
];
@ -1655,7 +2022,6 @@ adjectives.forEach((word) => {
});
nouns.forEach((word) => {
// if (word.in.p !== "نبي") return;
test(`${word.in.p} should inflect properly`, () => {
expect(inflectWord(word.in)).toEqual(word.out);
});

View File

@ -15,10 +15,13 @@ import {
endsInConsonant,
endsInAaOrOo,
addOEnding,
endsInShwa,
splitPsByVarients,
removeEndTick,
endsWith,
concatPlurals,
hasShwaEnding,
mapPsString,
endsInTob,
} from "./p-text-helpers";
import { makePsString, removeFVarients } from "./accent-and-ps-utils";
import {
@ -30,7 +33,13 @@ import {
splitUpSyllables,
} from "./accent-helpers";
import * as T from "../../types";
import { fmapSingleOrLengthOpts } from "./fp-ps";
import { applyPsString, fmapSingleOrLengthOpts } from "./fp-ps";
import { getVocatives } from "./vocatives";
import {
isAdjectiveEntry,
isNumberEntry,
isPattern1Entry,
} from "./type-predicates";
const endingInSingleARegex = /[^a]'??[aá]'??$/;
const endingInHayOrAynRegex = /[^ا][هع]$/;
@ -42,14 +51,24 @@ export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
const w = removeFVarients(word);
if (w.c?.includes("doub.")) {
const words = splitDoubleWord(w);
const inflected = words.map((x) =>
ensureUnisexInflections(inflectWord(x), x)
);
// TODO: Make this work for non-unisex double words
// Right now this an extremely bad and complex way to do this
// with ensureUnisexInflections
const inflected = words.map((x) => {
const res = inflectWord(x);
return ensureUnisexInflections(res, x);
});
const vocatives = inflected
.map((x) => "vocative" in x && x.vocative)
.filter((x) => x) as T.PluralInflections[];
return {
inflections: concatInflections(
inflected[0].inflections,
inflected[1].inflections
) as T.UnisexInflections,
...(vocatives.length
? { vocative: concatPlurals(vocatives[0], vocatives[1]) }
: {}),
};
}
if (w.c && w.c.includes("pl.")) {
@ -57,7 +76,7 @@ export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
}
if (
w.c &&
(w.c.includes("adj.") || w.c.includes("unisex") || w.c.includes("num"))
(isAdjectiveEntry(word) || w.c.includes("unisex") || isNumberEntry(word))
) {
return handleUnisexWord(w);
}
@ -77,6 +96,7 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
// TODO: !!! Handle weird endings / symbols ' etc.
const pEnd = word.p.slice(-1);
const plurals = makePlural(word);
const vocative = getVocatives(word);
if (word.noInf) {
return !plurals ? false : { ...plurals };
}
@ -86,21 +106,28 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
{ p: word.infap, f: word.infaf },
{ p: word.infbp, f: word.infbf },
]),
vocative,
...plurals,
};
}
if (pEnd === "ی" && word.f.slice(-2) === "ay") {
return { inflections: inflectRegularYayUnisex(word.p, word.f), ...plurals };
return {
inflections: inflectRegularYayUnisex(word.p, word.f),
vocative,
...plurals,
};
}
if (pEnd === "ه" && word.g.slice(-1) === "u") {
return {
inflections: inflectRegularShwaEndingUnisex(word.p, word.f),
vocative,
...plurals,
};
}
if (pEnd === "ی" && word.f.slice(-2) === "áy") {
return {
inflections: inflectEmphasizedYayUnisex(word.p, word.f),
vocative,
...plurals,
};
}
@ -113,6 +140,7 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
) {
return {
inflections: inflectConsonantEndingUnisex(word.p, word.f),
vocative,
...plurals,
};
}
@ -134,6 +162,7 @@ function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
// Get last letter of Pashto and last two letters of phonetics
// TODO: !!! Handle weird endings / symbols ' etc.
const plurals = makePlural(w);
const vocative = getVocatives(w);
if (w.noInf) {
return !plurals ? false : { ...plurals };
}
@ -145,22 +174,42 @@ function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
{ p: w.infap, f: w.infaf },
{ p: w.infbp, f: w.infbf },
]),
vocative,
...plurals,
};
}
const isTobEnding =
w.p.slice(-3) === "توب" &&
["tób", "tob"].includes(w.f.slice(-3)) &&
w.p.length > 3;
if (isTobEnding) {
return { inflections: inflectTobMasc(w.p, w.f), ...plurals };
if (endsInTob(w)) {
return { inflections: inflectTobMasc(w.p, w.f), vocative, ...plurals };
}
// TODO: stopgap before refactoring
// @ts-ignore
if (isPattern1Entry(w)) {
return {
inflections: {
masc: inflectPattern1Masc(
// @ts-ignore
makePsString(w.p, w.f)
),
},
vocative,
...plurals,
};
}
if (
pEnd === "ی" &&
(fEnd === "áy" || (fEnd === "ay" && countSyllables(w) === 1))
) {
const inflections = inflectRegularEmphasizedYayMasc(w.p, w.f);
return {
inflections,
vocative,
...plurals,
};
}
if (pEnd === "ی" && fEnd === "ay") {
return { inflections: inflectRegularYayMasc(w.p, w.f), ...plurals };
}
if (pEnd === "ی" && fEnd === "áy") {
return {
inflections: inflectRegularEmphasizedYayMasc(w.p, w.f),
inflections: inflectRegularYayMasc(w.p, w.f),
vocative,
...plurals,
};
}
@ -173,17 +222,22 @@ function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
const c = word.c || "";
const animate = c.includes("anim.");
const pEnd = word.p.slice(-1);
const vocative = getVocatives(word);
const plurals = makePlural(word);
if (word.noInf) {
return !plurals ? false : { ...plurals };
}
if (endingInHayOrAynRegex.test(word.p) && endingInSingleARegex.test(word.f)) {
return { inflections: inflectRegularAFem(word.p, word.f), ...plurals };
return {
inflections: inflectRegularAFem(word.p, word.f),
vocative,
...plurals,
};
}
if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(word.f)) {
return {
vocative,
inflections: inflectRegularAWithHimPEnding(word.p, word.f),
...plurals,
};
@ -194,20 +248,38 @@ function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
!animate
) {
return {
vocative,
inflections: inflectRegularInanMissingAFem(word.p, word.f),
...plurals,
};
}
if (pEnd === "ي" && !animate) {
return { inflections: inflectRegularInanEeFem(word.p, word.f), ...plurals };
return {
inflections: inflectRegularInanEeFem(word.p, word.f),
vocative,
...plurals,
};
}
if (pEnd === "ۍ") {
return { inflections: inflectRegularUyFem(word.p, word.f), ...plurals };
return {
inflections: inflectRegularUyFem(word.p, word.f),
vocative,
...plurals,
};
}
// if (endingInAlefRegex.test(word.p)) {
// return { inflections: inflectRegularAaFem(word.p, f) };
// }
return plurals ? { ...plurals } : false;
return plurals || vocative
? {
...(plurals ? plurals : {}),
...(vocative
? {
vocative,
}
: {}),
}
: false;
}
// LEVEL 3 FUNCTIONS
@ -309,6 +381,20 @@ function inflectEmphasizedYayUnisex(p: string, f: string): T.UnisexInflections {
};
}
function inflectPattern1Masc(e: T.PsString): T.InflectionSet {
const shwaEnding = hasShwaEnding(e);
const base = applyPsString(
{
f: (x) => (countSyllables(e) === 1 ? accentFSylsOnNFromEnd(x, 0) : x),
},
mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e)
);
if (e.f.endsWith("ú")) {
return [[e], [e], [{ p: `${base.p}و`, f: `${base.f}ó` }]];
}
return [[e], [e], [{ p: `${base.p}و`, f: `${base.f}o` }]];
}
function inflectConsonantEndingUnisex(
p: string,
f: string
@ -344,13 +430,14 @@ function inflectRegularYayMasc(p: string, f: string): T.Inflections {
}
function inflectTobMasc(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -3);
const baseF = f.slice(0, -3);
const base = removeAccents(
mapPsString((x) => x.slice(0, -3), makePsString(p, f))
);
return {
masc: [
[{ p, f }],
[{ p: `${baseP}تابه`, f: `${baseF}taabu` }],
[{ p: `${baseP}تبو`, f: `${baseF}tabo` }],
[{ p: `${base.p}تابه`, f: `${base.f}taabú` }],
[{ p: `${base.p}تبو`, f: `${base.f}bo` }],
],
};
}
@ -358,6 +445,19 @@ function inflectTobMasc(p: string, f: string): T.Inflections {
function inflectRegularEmphasizedYayMasc(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -2);
if (countSyllables(makePsString(p, f)) === 1) {
return {
masc: [
[{ p, f }],
[{ p: `${baseP}ي`, f: `${baseF}ee` }],
[
{ p: `${baseP}یو`, f: `${baseF}úyo` },
{ p: `${baseP}و`, f: `${baseF}o` },
],
],
};
}
return {
masc: [
[{ p, f }],
@ -576,7 +676,7 @@ function makePlural(
)
: w
);
const base = endsInShwa(b)
const base = hasShwaEnding(b)
? makePsString(b.p.slice(0, -1), b.f.slice(0, -1))
: b;
return addSecondInf(
@ -707,7 +807,7 @@ function makePlural(
}
if (
type === "masc noun" &&
(shortSquish || ((endsInConsonant(w) || endsInShwa(w)) && !w.infap)) &&
(shortSquish || ((endsInConsonant(w) || hasShwaEnding(w)) && !w.infap)) &&
w.p.slice(-3) !== "توب"
) {
return {

View File

@ -1,6 +1,6 @@
import * as T from "../../types";
import { pashtoConsonants } from "./pashto-consonants";
import { endsWith } from "./p-text-helpers";
import { endsInConsonant, endsWith, hasShwaEnding } from "./p-text-helpers";
import { countSyllables } from "./accent-helpers";
const verbTenses: T.VerbTense[] = [
@ -61,6 +61,24 @@ export function isNounOrAdjEntry(
return isNounEntry(e) || isAdjectiveEntry(e);
}
export function isInflectableEntry(
e: T.Entry | T.DictionaryEntry | T.DictionaryEntryNoFVars
): e is T.InflectableEntry {
if ("entry" in e) {
return false;
}
return isNounEntry(e) || isAdjectiveEntry(e) || isNumberEntry(e);
}
export function isNumberEntry(
e: T.Entry | T.DictionaryEntry
): e is T.NumberEntry {
if ("entry" in e) {
return false;
}
return e.c ? e.c.includes("num.") : false;
}
export function isVerbDictionaryEntry(
e: T.DictionaryEntry | T.DictionaryEntryNoFVars
): e is T.VerbDictionaryEntry {
@ -76,47 +94,41 @@ export function isVerbEntry(
return "entry" in e && isVerbDictionaryEntry(e.entry);
}
export function isMascNounEntry(
e: T.NounEntry | T.AdjectiveEntry
): e is T.MascNounEntry {
export function isMascNounEntry(e: T.InflectableEntry): e is T.MascNounEntry {
return !!e.c && e.c.includes("n. m.");
}
export function isFemNounEntry(
e: T.NounEntry | T.AdjectiveEntry
): e is T.FemNounEntry {
export function isFemNounEntry(e: T.InflectableEntry): e is T.FemNounEntry {
return !!e.c && e.c.includes("n. f.");
}
export function isUnisexNounEntry(
e: T.NounEntry | T.AdjectiveEntry
e: T.InflectableEntry
): e is T.UnisexNounEntry {
return isNounEntry(e) && e.c.includes("unisex");
}
export function isAnimNounEntry(
e: T.NounEntry | T.AdverbEntry
): e is T.AnimNounEntry {
export function isAnimNounEntry(e: T.InflectableEntry): e is T.AnimNounEntry {
return e.c.includes("anim.");
}
export function isUnisexAnimNounEntry(
e: T.NounEntry | T.AdjectiveEntry
e: T.InflectableEntry
): e is T.UnisexAnimNounEntry {
return isUnisexNounEntry(e) && isAnimNounEntry(e);
}
export function isAdjOrUnisexNounEntry(
e: T.Entry
e: T.Entry | T.InflectableEntry
): e is T.AdjectiveEntry | T.UnisexNounEntry {
return isAdjectiveEntry(e) || (isNounEntry(e) && isUnisexNounEntry(e));
}
export function isPattern(
p: T.InflectionPattern | "all"
): (entry: T.NounEntry | T.AdjectiveEntry) => boolean {
): (entry: T.InflectableEntry) => boolean {
if (p === 0) {
return (e: T.NounEntry | T.AdjectiveEntry) =>
return (e: T.InflectableEntry) =>
!isPattern1Entry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
@ -151,40 +163,27 @@ export function isPattern(
* @param e
* @returns
*/
export function isPattern1Entry<T extends T.NounEntry | T.AdjectiveEntry>(
export function isPattern1Entry<T extends T.InflectableEntry>(
e: T
): e is T.Pattern1Entry<T> {
if (e.noInf) return false;
if (e.infap) return false;
if (e.infap || e.infbp) return false;
if (isFemNounEntry(e)) {
return (
endsWith(
(endsWith(
[
{ p: "ه", f: "a" },
{ p: "ح", f: "a" },
{ p: "ع", f: "a" },
{ p: "ع", f: "a'" },
],
e
) ||
) &&
!e.p.endsWith("اع")) ||
(endsWith({ p: pashtoConsonants }, e) && !e.c.includes("anim."))
);
}
return (
endsWith([{ p: pashtoConsonants }], e) ||
endsWith(
[
{ p: "ه", f: "u" },
{ p: "ه", f: "h" },
],
e
) ||
endsWith(
[
{ p: "ای", f: "aay" },
{ p: "وی", f: "ooy" },
],
e
)
);
return endsInConsonant(e) || hasShwaEnding(e);
}
/**
@ -193,7 +192,7 @@ export function isPattern1Entry<T extends T.NounEntry | T.AdjectiveEntry>(
* @param e
* @returns T.T.T.T.
*/
export function isPattern2Entry<T extends T.NounEntry | T.AdjectiveEntry>(
export function isPattern2Entry<T extends T.InflectableEntry>(
e: T
): e is T.Pattern2Entry<T> {
if (e.noInf) return false;
@ -211,7 +210,7 @@ export function isPattern2Entry<T extends T.NounEntry | T.AdjectiveEntry>(
* @param e
* @returns
*/
export function isPattern3Entry<T extends T.NounEntry | T.AdjectiveEntry>(
export function isPattern3Entry<T extends T.InflectableEntry>(
e: T
): e is T.Pattern3Entry<T> {
if (e.noInf) return false;
@ -230,7 +229,7 @@ export function isPattern3Entry<T extends T.NounEntry | T.AdjectiveEntry>(
* @param e
* @returns
*/
export function isPattern4Entry<T extends T.NounEntry | T.AdjectiveEntry>(
export function isPattern4Entry<T extends T.InflectableEntry>(
e: T
): e is T.Pattern4Entry<T> {
if (e.noInf) return false;
@ -247,7 +246,7 @@ export function isPattern4Entry<T extends T.NounEntry | T.AdjectiveEntry>(
* @param e
* @returns
*/
export function isPattern5Entry<T extends T.NounEntry | T.AdjectiveEntry>(
export function isPattern5Entry<T extends T.InflectableEntry>(
e: T
): e is T.Pattern5Entry<T> {
if (e.noInf) return false;
@ -259,7 +258,7 @@ export function isPattern5Entry<T extends T.NounEntry | T.AdjectiveEntry>(
}
export function isPattern6FemEntry(
e: T.NounEntry | T.AdjectiveEntry
e: T.InflectableEntry
): e is T.Pattern6FemEntry<T.FemNounEntry> {
if (!isFemNounEntry(e)) return false;
if (e.c.includes("anim.")) return false;

310
src/lib/src/vocatives.ts Normal file
View File

@ -0,0 +1,310 @@
import * as T from "../../types";
import { makePsString } from "./accent-and-ps-utils";
import {
accentIsOnEnd,
accentOnNFromEnd,
countSyllables,
removeAccents,
} from "./accent-helpers";
import { applyPsString, mapGen } from "./fp-ps";
import { getInflectionPattern } from "./inflection-pattern";
import {
endsInConsonant,
endsInTob,
hasShwaEnding,
mapPsString,
endsWith,
} from "./p-text-helpers";
import {
isAdjOrUnisexNounEntry,
isAnimNounEntry,
isFemNounEntry,
isInflectableEntry,
isMascNounEntry,
isNounEntry,
isNumberEntry,
} from "./type-predicates";
export function getVocatives(
e: T.DictionaryEntryNoFVars
): T.PluralInflections | undefined {
if (!isInflectableEntry(e)) {
return undefined;
}
const entry: T.InflectableEntry = e;
const pattern = getInflectionPattern(entry);
if (
pattern === 0 &&
isFemNounEntry(e) &&
isAnimNounEntry(e) &&
endsInConsonant(e)
) {
return vocFemAnimException(e);
}
const gender: T.Gender | "unisex" =
isAdjOrUnisexNounEntry(entry) || isNumberEntry(entry)
? "unisex"
: isMascNounEntry(entry)
? "masc"
: "fem";
if (pattern === 0 || pattern === 6) {
return undefined;
}
const funcs = patternFuncs[pattern];
if (gender === "masc") {
return {
masc: funcs.masc(e),
};
}
if (gender === "fem") {
return {
fem: funcs.fem(e),
};
}
if (gender === "unisex") {
return {
masc: funcs.masc(e),
fem: funcs.fem(e),
};
}
}
const patternFuncs: Record<
1 | 2 | 3 | 4 | 5,
Record<T.Gender, (e: T.DictionaryEntryNoFVars) => T.PluralInflectionSet>
> = {
1: {
masc: vocPattern1Masc,
fem: vocPattern1Fem,
},
2: {
masc: vocPattern2Masc,
fem: vocPattern2Fem,
},
3: {
masc: vocPattern3Masc,
fem: vocPattern3Fem,
},
4: {
masc: vocPattern4Masc,
fem: vocPattern4Fem,
},
5: {
masc: vocPattern5Masc,
fem: vocPattern5Fem,
},
};
function vocFemAnimException(e: T.NounEntry): T.PluralInflections {
if (!e.ppp || !e.ppf) {
throw new Error(
"plural missing for feminine animate exception noun " + e.p
);
}
// TODO: HANDLE BETTER WITH PLURALS!
const plurBase = mapPsString(
(x) => x.slice(0, -1),
makePsString(e.ppp, e.ppf)
);
const base =
countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : makePsString(e.p, e.f);
return {
fem: [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${plurBase.p}و`, f: `${plurBase.f}o` }],
],
};
}
function vocPattern1Masc(
e: T.DictionaryEntryNoFVars | T.NounEntry
): T.PluralInflectionSet {
if (isNounEntry(e) && endsInTob(e)) {
const base = mapPsString((x) => x.slice(0, -3), e);
return [
[{ p: `${e.p}ه`, f: `${e.f}a` }],
[{ p: `${base.p}تبو`, f: `${base.f}tábo` }],
];
}
const shwaEnding = hasShwaEnding(e);
const base = mapGen(
(ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps),
mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e)
);
if (shwaEnding && e.f.endsWith("ú")) {
return [
[{ p: `${base.p}ه`, f: `${base.f}á` }],
[{ p: `${base.p}و`, f: `${base.f}ó` }],
];
}
return [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
];
}
function vocPattern1Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
const shwaEnding = hasShwaEnding(e);
const hasFemEnding = endsWith([{ p: "ه", f: "a" }], e) || shwaEnding;
const base = mapGen(
(ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps),
hasFemEnding
? mapPsString((x) => x.slice(0, -1), e)
: makePsString(e.p, e.f)
);
if (
endsWith(
[
{ p: "ع", f: "a" },
{ p: "ع", f: "a'" },
],
e
)
) {
const base = applyPsString(
{
f: (f) => f.slice(0, f.endsWith("'") ? -2 : -1),
},
e
);
if (accentIsOnEnd(e)) {
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
[{ p: `${base.p}و`, f: `${base.f}ó` }],
];
}
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
];
}
if (endsWith([{ p: "ح", f: "a" }], e)) {
const base = applyPsString(
{
f: (f) => f.slice(0, -1),
},
e
);
if (accentIsOnEnd(e)) {
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
[{ p: `${base.p}و`, f: `${base.f}ó` }],
];
}
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
];
}
if (hasFemEnding && accentIsOnEnd(e)) {
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
[{ p: `${base.p}و`, f: `${base.f}ó` }],
];
}
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
];
}
function vocPattern2Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
const base = makePsString(e.p.slice(0, -1), e.f.slice(0, -2));
return [
[{ p: `${base.p}یه`, f: `${base.f}iya` }],
[
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
],
];
}
function vocPattern2Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
const base = makePsString(
e.p.slice(0, -1),
e.f.slice(0, e.f.endsWith("ay") ? -2 : -1)
);
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
],
];
}
function vocPattern3Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
const base = makePsString(
e.p.slice(0, -1),
// shouldn't be accents here but remove just to be sure
removeAccents(e.f.slice(0, -2))
);
const baseSyls = countSyllables(base);
return [
[{ p: `${base.p}یه`, f: `${base.f}úya` }],
[
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}${baseSyls ? "ó" : "o"}` },
],
];
}
function vocPattern3Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
const base = makePsString(
e.p.slice(0, -1),
// shouldn't be accents here but remove just to be sure
removeAccents(e.f.slice(0, -2))
);
// TODO: This works well for unisex nouns/adjs but would be redundant for fem. nouns?
return [
[{ p: `${base.p}ۍ`, f: `${base.f}úy` }],
[
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}ó` },
],
];
}
function vocPattern4Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
const base = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e;
const plurBase = makePsString(e.infbp || "", e.infbf || "");
if (endsInConsonant(e)) {
return [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
[{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }],
];
}
// TODO: is this even possible?
if (hasShwaEnding(e)) {
return [
[{ p: `${base.p.slice(0, -1)}ه`, f: `${base.f.slice(0, -1)}á` }],
[{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }],
];
}
// exception for مېلمه, کوربه
return [[{ p: e.p, f: e.f }], [{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }]];
}
function vocPattern4Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
const base = makePsString(e.infbp || "", e.infbf || "");
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
[{ p: `${base.p}و`, f: `${base.f}ó` }],
];
}
function vocPattern5Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
const base = makePsString(e.infbp || "", e.infbf || "");
return [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
];
}
function vocPattern5Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
const base = makePsString(e.infbp || "", e.infbf || "");
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
];
}

View File

@ -496,15 +496,18 @@ export type InflectorOutput =
plural?: PluralInflections;
bundledPlural?: PluralInflections;
inflections?: Inflections;
vocative?: PluralInflections;
}
| {
plural: PluralInflections;
arabicPlural?: PluralInflections;
bundledPlural?: PluralInflections;
inflections?: Inflections;
vocative?: PluralInflections;
}
| {
inflections: Inflections;
inflections?: Inflections;
vocative?: PluralInflections;
}
| false;
@ -627,9 +630,17 @@ export type DisplayFormSubgroup = {
export type AayTail = "ey" | "aay";
export type InflectableEntry =
| NounEntry
| AdjectiveEntry
| NumberEntry
| AdverbEntry;
export type NounEntry = DictionaryEntry & { c: string } & {
__brand: "a noun entry";
};
export type NumberEntry = DictionaryEntry & { c: string } & {
__brand: "a number entry";
};
export type MascNounEntry = NounEntry & { __brand2: "a masc noun entry" };
export type FemNounEntry = NounEntry & { __brand2: "a fem noun entry" };
export type AnimNounEntry = NounEntry & { __brand3: "a anim noun entry" };