add plurals into vocatives

This commit is contained in:
adueck 2024-07-25 16:08:41 -04:00
parent b9269b8559
commit 0861c03c82
8 changed files with 228 additions and 80 deletions

4
package-lock.json generated
View File

@ -1,12 +1,12 @@
{
"name": "pashto-inflector",
"version": "7.2.0",
"version": "7.2.1",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "pashto-inflector",
"version": "7.2.0",
"version": "7.2.1",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {

View File

@ -1,6 +1,6 @@
{
"name": "pashto-inflector",
"version": "7.2.0",
"version": "7.2.1",
"author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com",

View File

@ -1,12 +1,12 @@
{
"name": "@lingdocs/ps-react",
"version": "7.2.0",
"version": "7.2.1",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@lingdocs/ps-react",
"version": "7.2.0",
"version": "7.2.1",
"license": "MIT",
"dependencies": {
"@formkit/auto-animate": "^1.0.0-beta.3",

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/ps-react",
"version": "7.2.0",
"version": "7.2.1",
"description": "Pashto inflector library module with React components",
"main": "dist/components/library.js",
"module": "dist/components/library.js",

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/inflect",
"version": "7.2.0",
"version": "7.2.1",
"description": "Pashto inflector library",
"main": "dist/index.js",
"types": "dist/lib/library.d.ts",

View File

@ -609,6 +609,7 @@ const nouns: {
[
{ p: "ترورزیو", f: "trorzúyo" },
{ p: "ترورزو", f: "trorzó" },
{ p: "ترورزامنو", f: "trorzaamuno" },
],
],
fem: [
@ -652,8 +653,20 @@ const nouns: {
],
},
vocative: {
masc: [[{ p: "چرګه", f: "chúrga" }], [{ p: "چرګو", f: "chúrgo" }]],
fem: [[{ p: "چرګې", f: "chúrge" }], [{ p: "چرګو", f: "chúrgo" }]],
masc: [
[{ p: "چرګه", f: "chúrga" }],
[
{ p: "چرګو", f: "chúrgo" },
{ p: "چرګانو", f: "churgáano" },
],
],
fem: [
[{ p: "چرګې", f: "chúrge" }],
[
{ p: "چرګو", f: "chúrgo" },
{ p: "چرګانو", f: "churgáano" },
],
],
},
plural: {
masc: [
@ -706,6 +719,7 @@ const nouns: {
[
{ p: "پلویو", f: "palawúyo" },
{ p: "پلوو", f: "palawó" },
{ p: "پلویانو", f: "palawiyáano" },
],
],
fem: [
@ -713,6 +727,7 @@ const nouns: {
[
{ p: "پلویو", f: "palawúyo" },
{ p: "پلوو", f: "palawó" },
{ p: "پلویانو", f: "palawiyáano" },
],
],
},
@ -793,6 +808,7 @@ const nouns: {
[
{ p: "سیلانیو", f: "saylaanúyo" },
{ p: "سیلانو", f: "saylaanó" },
{ p: "سیلانیانو", f: "saylaaniyáano" },
],
],
},
@ -892,7 +908,10 @@ const nouns: {
vocative: {
masc: [
[{ p: "لمونځه", f: "lamóondza" }],
[{ p: "لمنځو", f: "lamandzó" }],
[
{ p: "لمنځو", f: "lamandzó" },
{ p: "لمونځونو", f: "lamoondzóono" },
],
],
},
plural: {
@ -927,7 +946,13 @@ const nouns: {
],
},
vocative: {
masc: [[{ p: "غره", f: "ghra" }], [{ p: "غرو", f: "ghro" }]],
masc: [
[{ p: "غره", f: "ghra" }],
[
{ p: "غرو", f: "ghro" },
{ p: "غرونو", f: "ghróono" },
],
],
},
plural: {
masc: [[{ p: "غرونه", f: "ghróona" }], [{ p: "غرونو", f: "ghróono" }]],
@ -1024,7 +1049,13 @@ const nouns: {
],
},
vocative: {
masc: [[{ p: "زړه", f: "zRa" }], [{ p: "زړو", f: "zRo" }]],
masc: [
[{ p: "زړه", f: "zRa" }],
[
{ p: "زړو", f: "zRo" },
{ p: "زړونو", f: "zRóono" },
],
],
},
plural: {
masc: [[{ p: "زړونه", f: "zRóona" }], [{ p: "زړونو", f: "zRóono" }]],
@ -1166,7 +1197,13 @@ const nouns: {
],
},
vocative: {
masc: [[{ p: "کتابه", f: "kitaaba" }], [{ p: "کتابو", f: "kitaabo" }]],
masc: [
[{ p: "کتابه", f: "kitaaba" }],
[
{ p: "کتابو", f: "kitaabo" },
{ p: "کتابونو", f: "kitaabóono" },
],
],
},
plural: {
masc: [
@ -1200,7 +1237,13 @@ const nouns: {
],
},
vocative: {
masc: [[{ p: "غاښه", f: "gháaxa" }], [{ p: "غاښو", f: "gháaxo" }]],
masc: [
[{ p: "غاښه", f: "gháaxa" }],
[
{ p: "غاښو", f: "gháaxo" },
{ p: "غاښونو", f: "ghaaxóono" },
],
],
},
plural: {
masc: [
@ -1234,7 +1277,13 @@ const nouns: {
],
},
vocative: {
masc: [[{ p: "واده", f: "waadá" }], [{ p: "وادو", f: "waadó" }]],
masc: [
[{ p: "واده", f: "waadá" }],
[
{ p: "وادو", f: "waadó" },
{ p: "ودونو", f: "wadóono" },
],
],
},
plural: {
masc: [[{ p: "ودونه", f: "wadóona" }], [{ p: "ودونو", f: "wadóono" }]],
@ -1260,7 +1309,13 @@ const nouns: {
],
},
vocative: {
masc: [[{ p: "کارګه", f: "kaargá" }], [{ p: "کارګو", f: "kaargó" }]],
masc: [
[{ p: "کارګه", f: "kaargá" }],
[
{ p: "کارګو", f: "kaargó" },
{ p: "کارګانو", f: "kaargáano" },
],
],
},
plural: {
masc: [
@ -1289,7 +1344,13 @@ const nouns: {
],
},
vocative: {
masc: [[{ p: "لوه", f: "láwa" }], [{ p: "لوو", f: "láwo" }]],
masc: [
[{ p: "لوه", f: "láwa" }],
[
{ p: "لوو", f: "láwo" },
{ p: "لوونو", f: "lawóono" },
],
],
},
plural: {
masc: [[{ p: "لوونه", f: "lawóona" }], [{ p: "لوونو", f: "lawóono" }]],
@ -1369,7 +1430,13 @@ const nouns: {
],
},
vocative: {
fem: [[{ p: "مرجعې", f: "marje" }], [{ p: "مرجعو", f: "marjo" }]],
fem: [
[{ p: "مرجعې", f: "marje" }],
[
{ p: "مرجعو", f: "marjo" },
{ p: "مراجو", f: "maraajó" },
],
],
},
arabicPlural: {
fem: [[{ p: "مراجع", f: "maraají'" }], [{ p: "مراجو", f: "maraajó" }]],
@ -1397,7 +1464,13 @@ const nouns: {
],
},
vocative: {
fem: [[{ p: "منبعې", f: "manbé" }], [{ p: "منبعو", f: "manbó" }]],
fem: [
[{ p: "منبعې", f: "manbé" }],
[
{ p: "منبعو", f: "manbó" },
{ p: "منابو", f: "manaabó" },
],
],
},
arabicPlural: {
fem: [[{ p: "منابع", f: "manaabí" }], [{ p: "منابو", f: "manaabó" }]],
@ -1472,7 +1545,11 @@ const nouns: {
vocative: {
masc: [
[{ p: "تبلیغه", f: "tabléegha" }],
[{ p: "تبلیغو", f: "tabléegho" }],
[
{ p: "تبلیغو", f: "tabléegho" },
{ p: "تبلیغونو", f: "tableeghóono" },
{ p: "تبلیغاتو", f: "tableegháato" },
],
],
},
plural: {
@ -1605,7 +1682,14 @@ const nouns: {
],
},
vocative: {
masc: [[{ p: "حاله", f: "háala" }], [{ p: "حالو", f: "háalo" }]],
masc: [
[{ p: "حاله", f: "háala" }],
[
{ p: "حالو", f: "háalo" },
{ p: "حالونو", f: "haalóono" },
{ p: "احوالو", f: "ahwáalo" },
],
],
},
plural: {
masc: [
@ -1801,6 +1885,7 @@ const nouns: {
[
{ p: "قاضیو", f: "qaazúyo" },
{ p: "قاضو", f: "qaazó" },
{ p: "قاضیانو", f: "qaaziyáano" },
],
],
},
@ -1920,6 +2005,8 @@ const nouns: {
[
{ p: "شیو", f: "shúyo" },
{ p: "شو", f: "sho" },
{ p: "شیانو", f: "shayáano" },
{ p: "شیونو", f: "shayóono" },
],
],
},

View File

@ -96,7 +96,7 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
// TODO: !!! Handle weird endings / symbols ' etc.
const pEnd = word.p.slice(-1);
const plurals = makePlural(word);
const vocative = getVocatives(word);
const vocative = getVocatives(word, plurals);
if (word.noInf) {
return !plurals ? false : { ...plurals };
}
@ -162,7 +162,7 @@ function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
// Get last letter of Pashto and last two letters of phonetics
// TODO: !!! Handle weird endings / symbols ' etc.
const plurals = makePlural(w);
const vocative = getVocatives(w);
const vocative = getVocatives(w, plurals);
if (w.noInf) {
return !plurals ? false : { ...plurals };
}
@ -222,8 +222,8 @@ function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
const c = word.c || "";
const animate = c.includes("anim.");
const pEnd = word.p.slice(-1);
const vocative = getVocatives(word);
const plurals = makePlural(word);
const vocative = getVocatives(word, plurals);
if (word.noInf) {
return !plurals ? false : { ...plurals };
}

View File

@ -15,6 +15,7 @@ import {
mapPsString,
endsWith,
} from "./p-text-helpers";
import { removeDuplicates } from "./phrase-building/vp-tools";
import {
isAdjOrUnisexNounEntry,
isAnimNounEntry,
@ -25,26 +26,35 @@ import {
isNumberEntry,
} from "./type-predicates";
type Plurals =
| {
plural?: T.PluralInflections;
arabicPlural?: T.PluralInflections;
}
| undefined;
export function getVocatives(
e: T.DictionaryEntryNoFVars
entry: T.DictionaryEntryNoFVars,
plurals: Plurals
): T.PluralInflections | undefined {
if (!isInflectableEntry(e)) {
if (!isInflectableEntry(entry)) {
return undefined;
}
const entry: T.InflectableEntry = e;
const pattern = getInflectionPattern(entry);
// @ts-ignore
const e: T.InflectableEntry = entry as T.InflectableEntry;
const pattern = getInflectionPattern(e);
if (
pattern === 0 &&
isFemNounEntry(e) &&
isAnimNounEntry(e) &&
endsInConsonant(e)
) {
return vocFemAnimException(e);
return vocFemAnimException({ e, plurals: genderPlural("fem", plurals) });
}
const gender: T.Gender | "unisex" =
isAdjOrUnisexNounEntry(entry) || isNumberEntry(entry)
isAdjOrUnisexNounEntry(e) || isNumberEntry(e)
? "unisex"
: isMascNounEntry(entry)
: isMascNounEntry(e)
? "masc"
: "fem";
if (pattern === 0 || pattern === 6) {
@ -53,25 +63,30 @@ export function getVocatives(
const funcs = patternFuncs[pattern];
if (gender === "masc") {
return {
masc: funcs.masc(e),
masc: funcs.masc({ e, plurals: genderPlural("masc", plurals) }),
};
}
if (gender === "fem") {
return {
fem: funcs.fem(e),
fem: funcs.fem({ e, plurals: genderPlural("fem", plurals) }),
};
}
if (gender === "unisex") {
return {
masc: funcs.masc(e),
fem: funcs.fem(e),
masc: funcs.masc({ e, plurals: genderPlural("masc", plurals) }),
fem: funcs.fem({ e, plurals: genderPlural("fem", plurals) }),
};
}
}
type PatternInput = {
e: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry;
plurals: T.PsString[];
};
const patternFuncs: Record<
1 | 2 | 3 | 4 | 5,
Record<T.Gender, (e: T.DictionaryEntryNoFVars) => T.PluralInflectionSet>
Record<T.Gender, (inp: PatternInput) => T.PluralInflectionSet>
> = {
1: {
masc: vocPattern1Masc,
@ -95,7 +110,20 @@ const patternFuncs: Record<
},
};
function vocFemAnimException(e: T.NounEntry): T.PluralInflections {
function addPlurals(
e: T.ArrayOneOrMore<T.PsString>,
plurals: T.PsString[]
): T.ArrayOneOrMore<T.PsString> {
if (!plurals) {
return e;
}
return removeDuplicates([...e, ...plurals]) as T.ArrayOneOrMore<T.PsString>;
}
function vocFemAnimException({
e,
plurals,
}: PatternInput): T.PluralInflections {
if (!e.ppp || !e.ppf) {
throw new Error(
"plural missing for feminine animate exception noun " + e.p
@ -111,19 +139,17 @@ function vocFemAnimException(e: T.NounEntry): T.PluralInflections {
return {
fem: [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${plurBase.p}و`, f: `${plurBase.f}o` }],
addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}o` }], plurals),
],
};
}
function vocPattern1Masc(
e: T.DictionaryEntryNoFVars | T.NounEntry
): T.PluralInflectionSet {
function vocPattern1Masc({ e, plurals }: PatternInput): T.PluralInflectionSet {
if (isNounEntry(e) && endsInTob(e)) {
const base = mapPsString((x) => x.slice(0, -3), e);
return [
[{ p: `${e.p}ه`, f: `${e.f}a` }],
[{ p: `${base.p}تبو`, f: `${base.f}tábo` }],
addPlurals([{ p: `${base.p}تبو`, f: `${base.f}tábo` }], plurals),
];
}
const shwaEnding = hasShwaEnding(e);
@ -134,16 +160,16 @@ function vocPattern1Masc(
if (shwaEnding && e.f.endsWith("ú")) {
return [
[{ p: `${base.p}ه`, f: `${base.f}á` }],
[{ p: `${base.p}و`, f: `${base.f}ó` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals),
];
}
return [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
function vocPattern1Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
function vocPattern1Fem({ e, plurals }: PatternInput): T.PluralInflectionSet {
const shwaEnding = hasShwaEnding(e);
const hasFemEnding = endsWith([{ p: "ه", f: "a" }], e) || shwaEnding;
const base = mapGen(
@ -170,12 +196,12 @@ function vocPattern1Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
if (accentIsOnEnd(e)) {
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
[{ p: `${base.p}و`, f: `${base.f}ó` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals),
];
}
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
if (endsWith([{ p: "ح", f: "a" }], e)) {
@ -188,52 +214,58 @@ function vocPattern1Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
if (accentIsOnEnd(e)) {
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
[{ p: `${base.p}و`, f: `${base.f}ó` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals),
];
}
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
if (hasFemEnding && accentIsOnEnd(e)) {
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
[{ p: `${base.p}و`, f: `${base.f}ó` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals),
];
}
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
function vocPattern2Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
function vocPattern2Masc({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(e.p.slice(0, -1), e.f.slice(0, -2));
return [
[{ p: `${base.p}یه`, f: `${base.f}iya` }],
[
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
],
addPlurals(
[
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
],
plurals
),
];
}
function vocPattern2Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
function vocPattern2Fem({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(
e.p.slice(0, -1),
e.f.slice(0, e.f.endsWith("ay") ? -2 : -1)
);
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
],
addPlurals(
[
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
],
plurals
),
];
}
function vocPattern3Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
function vocPattern3Masc({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(
e.p.slice(0, -1),
// shouldn't be accents here but remove just to be sure
@ -242,14 +274,17 @@ function vocPattern3Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
const baseSyls = countSyllables(base);
return [
[{ p: `${base.p}یه`, f: `${base.f}úya` }],
[
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}${baseSyls ? "ó" : "o"}` },
],
addPlurals(
[
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}${baseSyls ? "ó" : "o"}` },
],
plurals
),
];
}
function vocPattern3Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
function vocPattern3Fem({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(
e.p.slice(0, -1),
// shouldn't be accents here but remove just to be sure
@ -258,53 +293,79 @@ function vocPattern3Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
// TODO: This works well for unisex nouns/adjs but would be redundant for fem. nouns?
return [
[{ p: `${base.p}ۍ`, f: `${base.f}úy` }],
[
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}ó` },
],
addPlurals(
[
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}ó` },
],
plurals
),
];
}
function vocPattern4Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
function vocPattern4Masc({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e;
const plurBase = makePsString(e.infbp || "", e.infbf || "");
if (endsInConsonant(e)) {
return [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
[{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }],
addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }], plurals),
];
}
// TODO: is this even possible?
if (hasShwaEnding(e)) {
return [
[{ p: `${base.p.slice(0, -1)}ه`, f: `${base.f.slice(0, -1)}á` }],
[{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }],
addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }], plurals),
];
}
// exception for مېلمه, کوربه
return [[{ p: e.p, f: e.f }], [{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }]];
}
function vocPattern4Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
function vocPattern4Fem({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(e.infbp || "", e.infbf || "");
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
[{ p: `${base.p}و`, f: `${base.f}ó` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals),
];
}
function vocPattern5Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
function vocPattern5Masc({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(e.infbp || "", e.infbf || "");
return [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
function vocPattern5Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet {
function vocPattern5Fem({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(e.infbp || "", e.infbf || "");
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${base.p}و`, f: `${base.f}o` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
function genderPlural(gender: T.Gender, plurals: Plurals): T.PsString[] {
if (!plurals) return [];
if (gender === "masc") {
return [
...(plurals.plural && "masc" in plurals.plural
? plurals.plural.masc[1]
: []),
...(plurals.arabicPlural && "masc" in plurals.arabicPlural
? plurals.arabicPlural.masc[1]
: []),
];
} else {
return [
...(plurals.plural && "fem" in plurals.plural
? plurals.plural.fem[1]
: []),
...(plurals.arabicPlural && "fem" in plurals.arabicPlural
? plurals.arabicPlural.fem[1]
: []),
];
}
}