From b9269b855927e7582d8cc2fcbbd25d9a5226889c Mon Sep 17 00:00:00 2001 From: adueck Date: Wed, 24 Jul 2024 14:50:48 -0400 Subject: [PATCH] added vocatives! --- package-lock.json | 4 +- package.json | 2 +- src/components/package-lock.json | 4 +- src/components/package.json | 2 +- src/components/src/InflectionsTable.tsx | 4 + src/lib/package.json | 2 +- src/lib/src/accent-helpers.test.ts | 34 ++ src/lib/src/accent-helpers.ts | 32 +- src/lib/src/fp-ps.ts | 36 +++ src/lib/src/inflection-pattern.ts | 2 +- src/lib/src/p-text-helpers.test.ts | 8 - src/lib/src/p-text-helpers.ts | 116 +++++-- src/lib/src/pashto-inflector.test.ts | 408 ++++++++++++++++++++++-- src/lib/src/pashto-inflector.ts | 156 +++++++-- src/lib/src/type-predicates.ts | 81 +++-- src/lib/src/vocatives.ts | 310 ++++++++++++++++++ src/types.ts | 13 +- 17 files changed, 1070 insertions(+), 144 deletions(-) create mode 100644 src/lib/src/vocatives.ts diff --git a/package-lock.json b/package-lock.json index 5a6506a..0940f38 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "pashto-inflector", - "version": "7.1.7", + "version": "7.2.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "pashto-inflector", - "version": "7.1.7", + "version": "7.2.0", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index ed519f0..55ad165 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pashto-inflector", - "version": "7.1.7", + "version": "7.2.0", "author": "lingdocs.com", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "homepage": "https://verbs.lingdocs.com", diff --git a/src/components/package-lock.json b/src/components/package-lock.json index 8218fa8..05e84d6 100644 --- a/src/components/package-lock.json +++ b/src/components/package-lock.json @@ -1,12 +1,12 @@ { "name": "@lingdocs/ps-react", - "version": "7.1.7", + "version": "7.2.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@lingdocs/ps-react", - "version": "7.1.7", + "version": "7.2.0", "license": "MIT", "dependencies": { "@formkit/auto-animate": "^1.0.0-beta.3", diff --git a/src/components/package.json b/src/components/package.json index e7dd1b6..9ee9f55 100644 --- a/src/components/package.json +++ b/src/components/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/ps-react", - "version": "7.1.7", + "version": "7.2.0", "description": "Pashto inflector library module with React components", "main": "dist/components/library.js", "module": "dist/components/library.js", diff --git a/src/components/src/InflectionsTable.tsx b/src/components/src/InflectionsTable.tsx index 2cfc924..eb3f561 100644 --- a/src/components/src/InflectionsTable.tsx +++ b/src/components/src/InflectionsTable.tsx @@ -43,10 +43,12 @@ const InflectionTable = ({ inf, textOptions, hideTitle, + vocative, }: { inf: T.Inflections | T.PluralInflections; textOptions: T.TextOptions; hideTitle?: boolean; + vocative?: boolean; }) => { // const [showingExplanation, setShowingExplanation] = useState(false); /* istanbul ignore next */ // Insanely can't see the modal to close it @@ -105,6 +107,8 @@ const InflectionTable = ({ {(!isPluralInfs ? ["Plain", "1st", "2nd"] + : vocative + ? ["Voc.", "Plur. Voc."] : ["Plural", "2nd Inf."] ).map((title, i) => ( diff --git a/src/lib/package.json b/src/lib/package.json index b24c0b5..8ce39ac 100644 --- a/src/lib/package.json +++ b/src/lib/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/inflect", - "version": "7.1.7", + "version": "7.2.0", "description": "Pashto inflector library", "main": "dist/index.js", "types": "dist/lib/library.d.ts", diff --git a/src/lib/src/accent-helpers.test.ts b/src/lib/src/accent-helpers.test.ts index 99e8527..73d106c 100644 --- a/src/lib/src/accent-helpers.test.ts +++ b/src/lib/src/accent-helpers.test.ts @@ -15,6 +15,7 @@ import { splitUpSyllables, hasAccents, countSyllables, + getAccentPos, } from "./accent-helpers"; const toAccentFront = [ @@ -40,6 +41,27 @@ test(`accentOnFront should work`, () => { }); }); +const toGetAccentPos = [ + { + input: makePsString("کورنۍ", "koranúy"), + output: 0, + }, + { + input: makePsString("ستړی", "stúRay"), + output: 1, + }, + { + input: makePsString("لیدلی", "leedulay"), + output: -1, + }, +]; + +test(`getAccentPos should work`, () => { + toGetAccentPos.forEach((item) => { + expect(getAccentPos(item.input)).toEqual(item.output); + }); +}); + const toAccentPastParticiple = [ { input: makePsString("پرېښی", "prexay"), @@ -59,6 +81,8 @@ test(`accentPastParticiple should work`, () => { test(`splitUpSyllables should work`, () => { expect(splitUpSyllables("akheestul")).toEqual(["akh", "eest", "ul"]); + expect(splitUpSyllables("kh")).toEqual([]); + expect(splitUpSyllables("x")).toEqual([]); }); test("countSyllables", () => { @@ -74,9 +98,19 @@ test("countSyllables", () => { test(`accentOnFSylsOnNFromEnd should work`, () => { expect(accentFSylsOnNFromEnd(["pu", "xtaa", "nu"], 0)).toBe("puxtaanú"); expect(accentFSylsOnNFromEnd(["leed", "ul", "ay"], 1)).toBe("leedúlay"); + expect(accentFSylsOnNFromEnd([], 0)).toBe(""); + expect(accentFSylsOnNFromEnd("x", 0)).toBe("x"); }); test(`accentOnNFromEnd should work`, () => { + expect(accentOnNFromEnd({ p: "ښه", f: "xu" }, 0)).toEqual({ + p: "ښه", + f: "xú", + }); + expect(accentOnNFromEnd({ p: "ښ", f: "x" }, 0)).toEqual({ + p: "ښ", + f: "x", + }); expect(accentOnNFromEnd({ p: "پښتانه", f: "puxtaanu" }, 0)).toEqual({ p: "پښتانه", f: "puxtaanú", diff --git a/src/lib/src/accent-helpers.ts b/src/lib/src/accent-helpers.ts index 9713a52..c1e93f2 100644 --- a/src/lib/src/accent-helpers.ts +++ b/src/lib/src/accent-helpers.ts @@ -85,7 +85,11 @@ export function accentFSylsOnNFromEnd( n: number ): string { if (typeof syls === "string") { - return accentFSylsOnNFromEnd(splitUpSyllables(syls), n); + const s = splitUpSyllables(syls); + if (s.length === 0) { + return syls; + } + return accentFSylsOnNFromEnd(s, n); } if (syls.length === 0) { return ""; @@ -100,9 +104,9 @@ export function accentFSylsOnNFromEnd( export function accentOnNFromEnd(ps: T.PsString, n: number): T.PsString { const fNoAccents = removeAccents(ps.f); const fSyls = splitUpSyllables(fNoAccents); - // TODO: enable this and fix the tests it breaks!!! - // don't add accent if only one syllable - // if (fSyls.length === 1) return makePsString(ps.p, fNoAccents); + if (fSyls.length === 0) { + return ps; + } return makePsString(ps.p, accentFSylsOnNFromEnd(fSyls, n)); } @@ -127,6 +131,26 @@ export function accentLetter(s: string): string { }); } +/** + * returns the position of an accent on a word, 0 being the last syllable + * -1 means there is no accent + * + * @param ps + */ +export function getAccentPos(ps: T.PsString): number { + const syls = splitUpSyllables(ps.f); + for (let i = 0; i < syls.length; i++) { + if (hasAccents(syls.at(-(i + 1)) || "")) { + return i; + } + } + return -1; +} + +export function accentIsOnEnd(ps: T.PsString): boolean { + return getAccentPos(ps) === 0; +} + export function accentPsSyllable(ps: T.PsString): T.PsString { return { p: ps.p, diff --git a/src/lib/src/fp-ps.ts b/src/lib/src/fp-ps.ts index b9a3833..d1f5c7c 100644 --- a/src/lib/src/fp-ps.ts +++ b/src/lib/src/fp-ps.ts @@ -76,6 +76,42 @@ export function pureSingleOrLengthOpts(a: A): T.SingleOrLengthOpts { return a; } +export function applyPsString( + f: + | { + p: (x: string) => string; + } + | { + f: (x: string) => string; + } + | { + p: (x: string) => string; + f: (x: string) => string; + }, + x: T.PsString +): T.PsString { + if ("p" in f && "f" in f) { + return { + p: f.p(x.p), + f: f.f(x.f), + }; + } + if ("p" in f) { + return { + p: f.p(x.p), + f: x.f, + }; + } + return { + p: x.p, + f: f.f(x.f), + }; +} + +export function mapGen(f: (x: A) => B, x: A): B { + return f(x); +} + /** * like and applicative <*> operator for SingleOrLengthOpts * diff --git a/src/lib/src/inflection-pattern.ts b/src/lib/src/inflection-pattern.ts index 44e8dc2..2b5190f 100644 --- a/src/lib/src/inflection-pattern.ts +++ b/src/lib/src/inflection-pattern.ts @@ -11,7 +11,7 @@ import { } from "./type-predicates"; export function getInflectionPattern( - e: T.NounEntry | T.AdjectiveEntry + e: T.InflectableEntry ): T.InflectionPattern { return isPattern1Entry(e) ? T.InflectionPattern.Basic diff --git a/src/lib/src/p-text-helpers.test.ts b/src/lib/src/p-text-helpers.test.ts index e6c2ab3..103f60b 100644 --- a/src/lib/src/p-text-helpers.test.ts +++ b/src/lib/src/p-text-helpers.test.ts @@ -21,7 +21,6 @@ import { splitDoubleWord, endsInConsonant, addOEnding, - endsInShwa, splitPsByVarients, endsWith, trimOffPs, @@ -1596,13 +1595,6 @@ test("addOEnding", () => { }); }); -test("endsInShwa", () => { - expect(endsInShwa({ p: "ښایسته", f: "xaaystú" })).toBe(true); - expect(endsInShwa({ p: "ښایسته", f: "xaaystu" })).toBe(true); - expect(endsInShwa({ p: "ښایسته", f: "xaaysta" })).toBe(false); - expect(endsInShwa({ p: "کور", f: "kor" })).toBe(false); -}); - test("splitPsByVarients", () => { expect( splitPsByVarients({ p: "حوادث, حادثات", f: "hawáadis, haadisáat" }) diff --git a/src/lib/src/p-text-helpers.ts b/src/lib/src/p-text-helpers.ts index 109f831..1527dfd 100644 --- a/src/lib/src/p-text-helpers.ts +++ b/src/lib/src/p-text-helpers.ts @@ -563,6 +563,44 @@ export function unisexInfToObjectMatrix( }; } +export function concatPlurals( + a: T.PluralInflections, + b: T.PluralInflections +): T.PluralInflections { + function concatPsArraysWSpace( + a: T.ArrayOneOrMore, + b: T.ArrayOneOrMore + ): T.ArrayOneOrMore { + if (a.length !== b.length) { + throw new Error("arrays of plural/vocative inflections are different!"); + } + return a.map((x, i) => + concatPsString(x, " ", b[i]) + ) as T.ArrayOneOrMore; + } + + function concatPluralSet( + a: T.PluralInflectionSet, + b: T.PluralInflectionSet + ): T.PluralInflectionSet { + return [concatPsArraysWSpace(a[0], b[0]), concatPsArraysWSpace(a[1], b[1])]; + } + const masc = + "masc" in a && "masc" in b ? concatPluralSet(a.masc, b.masc) : undefined; + const fem = + "fem" in a && "fem" in b ? concatPluralSet(a.fem, b.fem) : undefined; + if (masc && fem) { + return { masc, fem }; + } + if (masc) { + return { masc }; + } + if (fem) { + return { fem }; + } + throw new Error("error concating plural/vocative inflections for double!"); +} + export function concatInflections( comp: T.PsString | T.SingleOrLengthOpts, infs: T.SingleOrLengthOpts @@ -640,6 +678,10 @@ export function allOnePersonInflection( return block; } +export function hasShwaEnding({ f }: T.PsString): boolean { + return f.endsWith("u") || f.endsWith("ú"); +} + export function choosePersInf( x: T.FullForm, persInf: T.PersonInflectionsField @@ -950,9 +992,10 @@ export function ensureUnisexInflections( ): { inflections: T.UnisexInflections; plural?: T.PluralInflections; + vocative?: T.PluralInflections; } { const ps = { p: w.p, f: w.f }; - if (infs === false || infs.inflections === undefined) { + if (infs === false) { return { inflections: { masc: [[ps], [ps], [ps]], @@ -960,12 +1003,24 @@ export function ensureUnisexInflections( }, }; } + if (!infs.inflections) { + return { + inflections: { + masc: [[ps], [ps], [ps]], + fem: [[ps], [ps], [ps]], + }, + ...("plural" in infs ? { plural: infs.plural } : {}), + ...("vocative" in infs ? { vocative: infs.vocative } : {}), + }; + } if (!("fem" in infs.inflections)) { return { inflections: { ...infs.inflections, fem: [[ps], [ps], [ps]], }, + ...("plural" in infs ? { plural: infs.plural } : {}), + ...("vocative" in infs ? { vocative: infs.vocative } : {}), }; } if (!("masc" in infs.inflections)) { @@ -974,11 +1029,14 @@ export function ensureUnisexInflections( ...infs.inflections, masc: [[ps], [ps], [ps]], }, + ...("plural" in infs ? { plural: infs.plural } : {}), + ...("vocative" in infs ? { vocative: infs.vocative } : {}), }; } - // for some dumb reason have to do this for type safety return { inflections: infs.inflections, + ...("plural" in infs ? { plural: infs.plural } : {}), + ...("vocative" in infs ? { vocative: infs.vocative } : {}), }; } @@ -990,24 +1048,27 @@ export function endsInAaOrOo(w: T.PsString): boolean { ); } +export function endsInTob(ps: T.PsString): boolean { + return ( + ps.p.slice(-3) === "توب" && + ["tób", "tob"].includes(ps.f.slice(-3)) && + ps.p.length > 3 + ); +} + export function endsInConsonant(w: T.PsString): boolean { - // TODO: Add reporting back that the plural ending will need a space? - - function endsInLongDipthong(w: T.PsString): boolean { - function isLongDipthong(end: T.PsString): boolean { - return ( - psStringEquals(end, { p: "ای", f: "aay" }, true) || - psStringEquals(end, { p: "وی", f: "ooy" }, true) - ); - } - const end = makePsString(w.p.slice(-2), w.f.slice(-3)); - return isLongDipthong(end); - } - - if (endsInLongDipthong(w)) return true; - // const pCons = pashtoConsonants.includes(w.p.slice(-1)); - const fCons = phoneticsConsonants.includes(simplifyPhonetics(w.f).slice(-1)); - return fCons; + return ( + phoneticsConsonants.includes(simplifyPhonetics(w.f).slice(-1)) || + endsWith( + [ + { p: "ای", f: "aay" }, + { p: "وی", f: "ooy" }, + ], + w + ) || + endsWith([{ p: "ه", f: "h" }], w) || + endsWith([{ p: "و", f: "w" }], w) + ); } /** @@ -1052,24 +1113,13 @@ export function addOEnding(ps: T.PsString): T.ArrayOneOrMore { ]; } -/** - * Determines whether a string ends in a shwa or not - * - * @param w - */ -export function endsInShwa(w: T.PsString): boolean { - const p = w.p.slice(-1); - const f = w.f.slice(-1); - return p === "ه" && ["u", "ú"].includes(f); -} - /** * applies f function to both the p and f in a PsString * */ export function mapPsString( - ps: T.PsString, - f: (s: string) => T + f: (s: string) => T, + ps: T.PsString ): { p: T; f: T } { return { p: f(ps.p), @@ -1084,7 +1134,7 @@ export function mapPsString( * @returns */ export function splitPsByVarients(w: T.PsString): T.ArrayOneOrMore { - const { p, f } = mapPsString(w, splitVarients); + const { p, f } = mapPsString(splitVarients, w); return zipWith(makePsString, p, f) as T.ArrayOneOrMore; } diff --git a/src/lib/src/pashto-inflector.test.ts b/src/lib/src/pashto-inflector.test.ts index 9afbc86..0bf8f0c 100644 --- a/src/lib/src/pashto-inflector.test.ts +++ b/src/lib/src/pashto-inflector.test.ts @@ -43,6 +43,10 @@ const adjectives: { [{ p: "زړو", f: "zaRó" }], ], }, + vocative: { + masc: [[{ p: "زوړه", f: "zóRa" }], [{ p: "زړو", f: "zaRó" }]], + fem: [[{ p: "زړې", f: "zaRé" }], [{ p: "زړو", f: "zaRó" }]], + }, }, }, // regular adjective ending in ی @@ -75,6 +79,22 @@ const adjectives: { ], ], }, + vocative: { + masc: [ + [{ p: "ستړیه", f: "stúRiya" }], + [ + { p: "ستړیو", f: "stúRiyo" }, + { p: "ستړو", f: "stúRo" }, + ], + ], + fem: [ + [{ p: "ستړې", f: "stúRe" }], + [ + { p: "ستړیو", f: "stúRiyo" }, + { p: "ستړو", f: "stúRo" }, + ], + ], + }, }, }, // regular adjective ending in ی with stress on the end @@ -107,6 +127,22 @@ const adjectives: { ], ], }, + vocative: { + masc: [ + [{ p: "وروستیه", f: "wroostúya" }], + [ + { p: "وروستیو", f: "wroostúyo" }, + { p: "وروستو", f: "wroostó" }, + ], + ], + fem: [ + [{ p: "وروستۍ", f: "wroostúy" }], + [ + { p: "وروستیو", f: "wroostúyo" }, + { p: "وروستو", f: "wroostó" }, + ], + ], + }, }, }, // regular adjective ending in a consonant @@ -133,6 +169,10 @@ const adjectives: { [{ p: "سپکو", f: "spúko" }], ], }, + vocative: { + masc: [[{ p: "سپکه", f: "spúka" }], [{ p: "سپکو", f: "spúko" }]], + fem: [[{ p: "سپکې", f: "spúke" }], [{ p: "سپکو", f: "spúko" }]], + }, }, }, // regular adjective ending in a consonant with an accent already @@ -159,6 +199,10 @@ const adjectives: { [{ p: "ارتو", f: "aráto" }], ], }, + vocative: { + masc: [[{ p: "ارته", f: "aráta" }], [{ p: "ارتو", f: "aráto" }]], + fem: [[{ p: "ارتې", f: "aráte" }], [{ p: "ارتو", f: "aráto" }]], + }, }, }, { @@ -184,6 +228,10 @@ const adjectives: { [{ p: "لویو", f: "lóoyo" }], ], }, + vocative: { + masc: [[{ p: "لویه", f: "lóoya" }], [{ p: "لویو", f: "lóoyo" }]], + fem: [[{ p: "لویې", f: "lóoye" }], [{ p: "لویو", f: "lóoyo" }]], + }, }, }, { @@ -209,6 +257,10 @@ const adjectives: { [{ p: "پوهو", f: "póho" }], ], }, + vocative: { + masc: [[{ p: "پوهه", f: "póha" }], [{ p: "پوهو", f: "póho" }]], + fem: [[{ p: "پوهې", f: "póhe" }], [{ p: "پوهو", f: "póho" }]], + }, }, }, // adjective ending in u @@ -235,6 +287,10 @@ const adjectives: { [{ p: "ویدو", f: "weedó" }], ], }, + vocative: { + masc: [[{ p: "ویده", f: "weedá" }], [{ p: "ویدو", f: "weedó" }]], + fem: [[{ p: "ویدې", f: "weedé" }], [{ p: "ویدو", f: "weedó" }]], + }, }, }, { @@ -260,6 +316,13 @@ const adjectives: { [{ p: "ښایستو", f: "xaaysto" }], ], }, + vocative: { + masc: [ + [{ p: "ښایسته", f: "xaaysta" }], + [{ p: "ښایستو", f: "xaaysto" }], + ], + fem: [[{ p: "ښایستې", f: "xaayste" }], [{ p: "ښایستو", f: "xaaysto" }]], + }, }, }, // numbers should inflect just like adjectives @@ -286,6 +349,10 @@ const adjectives: { [{ p: "شپږو", f: "shpúGo" }], ], }, + vocative: { + masc: [[{ p: "شپږه", f: "shpúGa" }], [{ p: "شپږو", f: "shpúGo" }]], + fem: [[{ p: "شپږې", f: "shpúGe" }], [{ p: "شپږو", f: "shpúGo" }]], + }, }, }, // without accents @@ -312,6 +379,46 @@ const adjectives: { [{ p: "ښو", f: "xo" }], ], }, + vocative: { + masc: [[{ p: "ښه", f: "xa" }], [{ p: "ښو", f: "xo" }]], + fem: [[{ p: "ښې", f: "xe" }], [{ p: "ښو", f: "xo" }]], + }, + }, + }, + // pattern 5 adjectives + { + in: { + ts: 1527815265, + i: 10891, + p: "شین", + f: "sheen", + g: "sheen", + e: "green, blue; unripe, immature; bright, sunny", + r: 4, + c: "adj.", + infap: "شنه", + infaf: "shnu", + infbp: "شن", + infbf: "shn", + a: 1, + }, + out: { + inflections: { + masc: [ + [{ p: "شین", f: "sheen" }], + [{ p: "شنه", f: "shnu" }], + [{ p: "شنو", f: "shno" }], + ], + fem: [ + [{ p: "شنه", f: "shna" }], + [{ p: "شنې", f: "shne" }], + [{ p: "شنو", f: "shno" }], + ], + }, + vocative: { + masc: [[{ p: "شنه", f: "shna" }], [{ p: "شنو", f: "shno" }]], + fem: [[{ p: "شنې", f: "shne" }], [{ p: "شنو", f: "shno" }]], + }, }, }, // adjective non-inflecting @@ -363,6 +470,16 @@ const adjectives: { [{ p: "ګډو وډو", f: "gúDo wúDo" }], ], }, + vocative: { + masc: [ + [{ p: "ګډه وډه", f: "gúDa wúDa" }], + [{ p: "ګډو وډو", f: "gúDo wúDo" }], + ], + fem: [ + [{ p: "ګډې وډې", f: "gúDe wúDe" }], + [{ p: "ګډو وډو", f: "gúDo wúDo" }], + ], + }, }, }, ]; @@ -400,6 +517,10 @@ const nouns: { [{ p: "مېلمنو", f: "melmanó" }], ], }, + vocative: { + masc: [[{ p: "مېلمه", f: "melmá" }], [{ p: "مېلمنو", f: "melmanó" }]], + fem: [[{ p: "مېلمنې", f: "melmané" }], [{ p: "مېلمنو", f: "melmanó" }]], + }, }, }, // Unisex noun ending with ی @@ -432,6 +553,22 @@ const nouns: { ], ], }, + vocative: { + masc: [ + [{ p: "ملګریه", f: "malgúriya" }], + [ + { p: "ملګریو", f: "malgúriyo" }, + { p: "ملګرو", f: "malgúro" }, + ], + ], + fem: [ + [{ p: "ملګرې", f: "malgúre" }], + [ + { p: "ملګریو", f: "malgúriyo" }, + { p: "ملګرو", f: "malgúro" }, + ], + ], + }, }, }, // Unisex noun ending on ی with emphasis on the end @@ -466,6 +603,22 @@ const nouns: { ], ], }, + vocative: { + masc: [ + [{ p: "ترورزیه", f: "trorzúya" }], + [ + { p: "ترورزیو", f: "trorzúyo" }, + { p: "ترورزو", f: "trorzó" }, + ], + ], + fem: [ + [{ p: "ترورزۍ", f: "trorzúy" }], + [ + { p: "ترورزیو", f: "trorzúyo" }, + { p: "ترورزو", f: "trorzó" }, + ], + ], + }, plural: { masc: [ [{ p: "ترورزامن", f: "trorzaamun" }], @@ -498,6 +651,10 @@ const nouns: { [{ p: "چرګو", f: "chúrgo" }], ], }, + vocative: { + masc: [[{ p: "چرګه", f: "chúrga" }], [{ p: "چرګو", f: "chúrgo" }]], + fem: [[{ p: "چرګې", f: "chúrge" }], [{ p: "چرګو", f: "chúrgo" }]], + }, plural: { masc: [ [{ p: "چرګان", f: "churgáan" }], @@ -543,6 +700,22 @@ const nouns: { ], ], }, + vocative: { + masc: [ + [{ p: "پلویه", f: "palawúya" }], + [ + { p: "پلویو", f: "palawúyo" }, + { p: "پلوو", f: "palawó" }, + ], + ], + fem: [ + [{ p: "پلوۍ", f: "palawúy" }], + [ + { p: "پلویو", f: "palawúyo" }, + { p: "پلوو", f: "palawó" }, + ], + ], + }, plural: { masc: [ [{ p: "پلویان", f: "palawiyáan" }], @@ -578,6 +751,16 @@ const nouns: { ], ], }, + vocative: { + masc: [ + [{ p: "سړیه", f: "saRúya" }], + + [ + { p: "سړیو", f: "saRúyo" }, + { p: "سړو", f: "saRó" }, + ], + ], + }, }, }, // Masculine #3 anim @@ -590,6 +773,7 @@ const nouns: { f: "saylaanáy", g: "saylaanay", e: "tourist, sightseer, visitor", + // only masculine here for testing purposes c: "n. m. anim.", }, out: { @@ -603,6 +787,15 @@ const nouns: { ], ], }, + vocative: { + masc: [ + [{ p: "سیلانیه", f: "saylaanúya" }], + [ + { p: "سیلانیو", f: "saylaanúyo" }, + { p: "سیلانو", f: "saylaanó" }, + ], + ], + }, plural: { masc: [ [{ p: "سیلانیان", f: "saylaaniyáan" }], @@ -633,6 +826,15 @@ const nouns: { ], ], }, + vocative: { + masc: [ + [{ p: "ترېلیه", f: "trelúya" }], + [ + { p: "ترېلیو", f: "trelúyo" }, + { p: "ترېلو", f: "treló" }, + ], + ], + }, }, }, // Masculine ending in tob @@ -641,7 +843,7 @@ const nouns: { i: 11998, ts: 1586760783536, p: "مشرتوب", - f: "mushurtob", + f: "mushurtób", g: "", e: "leadership, authority, presidency", c: "n. m.", @@ -649,9 +851,15 @@ const nouns: { out: { inflections: { masc: [ - [{ p: "مشرتوب", f: "mushurtob" }], - [{ p: "مشرتابه", f: "mushurtaabu" }], - [{ p: "مشرتبو", f: "mushurtabo" }], + [{ p: "مشرتوب", f: "mushurtób" }], + [{ p: "مشرتابه", f: "mushurtaabú" }], + [{ p: "مشرتبو", f: "mushurtábo" }], + ], + }, + vocative: { + masc: [ + [{ p: "مشرتوبه", f: "mushurtóba" }], + [{ p: "مشرتبو", f: "mushurtábo" }], ], }, }, @@ -662,12 +870,12 @@ const nouns: { ts: 1527813809, i: 11318, p: "لمونځ", - f: "lamoondz", - g: "lamoondz", + f: "lamóondz", + g: "lamóondz", e: "Muslim ritual prayers (namaz, salah, salat)", c: "n. m. irreg.", infap: "لمانځه", - infaf: "lamaandzu", + infaf: "lamaandzú", infbp: "لمنځ", infbf: "lamandz", ppp: "لمونځونه", @@ -676,11 +884,17 @@ const nouns: { out: { inflections: { masc: [ - [{ p: "لمونځ", f: "lamoondz" }], + [{ p: "لمونځ", f: "lamóondz" }], [{ p: "لمانځه", f: "lamaandzú" }], [{ p: "لمنځو", f: "lamandzó" }], ], }, + vocative: { + masc: [ + [{ p: "لمونځه", f: "lamóondza" }], + [{ p: "لمنځو", f: "lamandzó" }], + ], + }, plural: { masc: [ [{ p: "لمونځونه", f: "lamoondzóona" }], @@ -712,6 +926,9 @@ const nouns: { [{ p: "غرو", f: "ghro" }], ], }, + vocative: { + masc: [[{ p: "غره", f: "ghra" }], [{ p: "غرو", f: "ghro" }]], + }, plural: { masc: [[{ p: "غرونه", f: "ghróona" }], [{ p: "غرونو", f: "ghróono" }]], }, @@ -749,6 +966,10 @@ const nouns: { [{ p: "خرو", f: "khro" }], ], }, + vocative: { + masc: [[{ p: "خره", f: "khra" }], [{ p: "خرو", f: "khro" }]], + fem: [[{ p: "خرې", f: "khre" }], [{ p: "خرو", f: "khro" }]], + }, }, }, // masc plural @@ -793,9 +1014,18 @@ const nouns: { g: "zRu", e: "heart", c: "n. m.", - noInf: true, }, out: { + inflections: { + masc: [ + [{ p: "زړه", f: "zRu" }], + [{ p: "زړه", f: "zRu" }], + [{ p: "زړو", f: "zRo" }], + ], + }, + vocative: { + masc: [[{ p: "زړه", f: "zRa" }], [{ p: "زړو", f: "zRo" }]], + }, plural: { masc: [[{ p: "زړونه", f: "zRóona" }], [{ p: "زړونو", f: "zRóono" }]], }, @@ -928,6 +1158,16 @@ const nouns: { i: 8640, }, out: { + inflections: { + masc: [ + [{ p: "کتاب", f: "kitaab" }], + [{ p: "کتاب", f: "kitaab" }], + [{ p: "کتابو", f: "kitaabo" }], + ], + }, + vocative: { + masc: [[{ p: "کتابه", f: "kitaaba" }], [{ p: "کتابو", f: "kitaabo" }]], + }, plural: { masc: [ [{ p: "کتابونه", f: "kitaabóona" }], @@ -952,6 +1192,16 @@ const nouns: { ep: "teeth", }, out: { + inflections: { + masc: [ + [{ p: "غاښ", f: "ghaax" }], + [{ p: "غاښ", f: "ghaax" }], + [{ p: "غاښو", f: "gháaxo" }], + ], + }, + vocative: { + masc: [[{ p: "غاښه", f: "gháaxa" }], [{ p: "غاښو", f: "gháaxo" }]], + }, plural: { masc: [ [{ p: "غاښونه", f: "ghaaxóona" }], @@ -976,6 +1226,16 @@ const nouns: { ppf: "wadóona", }, out: { + inflections: { + masc: [ + [{ p: "واده", f: "waadú" }], + [{ p: "واده", f: "waadú" }], + [{ p: "وادو", f: "waadó" }], + ], + }, + vocative: { + masc: [[{ p: "واده", f: "waadá" }], [{ p: "وادو", f: "waadó" }]], + }, plural: { masc: [[{ p: "ودونه", f: "wadóona" }], [{ p: "ودونو", f: "wadóono" }]], }, @@ -986,12 +1246,22 @@ const nouns: { ts: 1527817768, i: 9791, p: "کارګه", - f: "kaargu", - g: "kaargu", + f: "kaargú", + g: "kaargú", e: "raven, crow", c: "n. m. anim.", }, out: { + inflections: { + masc: [ + [{ p: "کارګه", f: "kaargú" }], + [{ p: "کارګه", f: "kaargú" }], + [{ p: "کارګو", f: "kaargó" }], + ], + }, + vocative: { + masc: [[{ p: "کارګه", f: "kaargá" }], [{ p: "کارګو", f: "kaargó" }]], + }, plural: { masc: [ [{ p: "کارګان", f: "kaargáan" }], @@ -1011,6 +1281,16 @@ const nouns: { c: "n. m.", }, out: { + inflections: { + masc: [ + [{ p: "لو", f: "law" }], + [{ p: "لو", f: "law" }], + [{ p: "لوو", f: "láwo" }], + ], + }, + vocative: { + masc: [[{ p: "لوه", f: "láwa" }], [{ p: "لوو", f: "láwo" }]], + }, plural: { masc: [[{ p: "لوونه", f: "lawóona" }], [{ p: "لوونو", f: "lawóono" }]], }, @@ -1025,7 +1305,7 @@ const nouns: { in: { ts: 1527812797, p: "ښځه", - f: "xudza", + f: "xúdza", g: "", e: "woman, wife", c: "n. f.", @@ -1034,11 +1314,14 @@ const nouns: { out: { inflections: { fem: [ - [{ p: "ښځه", f: "xudza" }], - [{ p: "ښځې", f: "xudze" }], - [{ p: "ښځو", f: "xudzo" }], + [{ p: "ښځه", f: "xúdza" }], + [{ p: "ښځې", f: "xúdze" }], + [{ p: "ښځو", f: "xúdzo" }], ], }, + vocative: { + fem: [[{ p: "ښځې", f: "xúdze" }], [{ p: "ښځو", f: "xúdzo" }]], + }, }, }, { @@ -1059,6 +1342,9 @@ const nouns: { [{ p: "ارو", f: "aró" }], ], }, + vocative: { + fem: [[{ p: "ارې", f: "aré" }], [{ p: "ارو", f: "aró" }]], + }, }, }, // Feminine regular ending in ع - a' @@ -1082,6 +1368,9 @@ const nouns: { [{ p: "مرجعو", f: "marjo" }], ], }, + vocative: { + fem: [[{ p: "مرجعې", f: "marje" }], [{ p: "مرجعو", f: "marjo" }]], + }, arabicPlural: { fem: [[{ p: "مراجع", f: "maraají'" }], [{ p: "مراجو", f: "maraajó" }]], }, @@ -1107,6 +1396,9 @@ const nouns: { [{ p: "منبعو", f: "manbó" }], ], }, + vocative: { + fem: [[{ p: "منبعې", f: "manbé" }], [{ p: "منبعو", f: "manbó" }]], + }, arabicPlural: { fem: [[{ p: "منابع", f: "manaabí" }], [{ p: "منابو", f: "manaabó" }]], }, @@ -1162,14 +1454,27 @@ const nouns: { ts: 1527816113, i: 3072, p: "تبلیغ", - f: "tableegh", - g: "tableegh", + f: "tabléegh", + g: "tabléegh", e: "propaganda; preaching, evangelism", c: "n. m.", app: "تبلیغات", apf: "tableegháat", }, out: { + inflections: { + masc: [ + [{ p: "تبلیغ", f: "tabléegh" }], + [{ p: "تبلیغ", f: "tabléegh" }], + [{ p: "تبلیغو", f: "tabléegho" }], + ], + }, + vocative: { + masc: [ + [{ p: "تبلیغه", f: "tabléegha" }], + [{ p: "تبلیغو", f: "tabléegho" }], + ], + }, plural: { masc: [ [{ p: "تبلیغونه", f: "tableeghóona" }], @@ -1178,8 +1483,8 @@ const nouns: { }, bundledPlural: { masc: [ - [{ p: "تبلیغه", f: "tableegha" }], - [{ p: "تبلیغو", f: "tableegho" }], + [{ p: "تبلیغه", f: "tabléegha" }], + [{ p: "تبلیغو", f: "tabléegho" }], ], }, arabicPlural: { @@ -1231,6 +1536,9 @@ const nouns: { [{ p: "حادثو", f: "haadisó" }], ], }, + vocative: { + fem: [[{ p: "حادثې", f: "haadisé" }], [{ p: "حادثو", f: "haadisó" }]], + }, arabicPlural: { masc: [ [ @@ -1265,6 +1573,9 @@ const nouns: { [{ p: "تجربو", f: "tajrabó" }], ], }, + vocative: { + fem: [[{ p: "تجربې", f: "tajrabé" }], [{ p: "تجربو", f: "tajrabó" }]], + }, arabicPlural: { masc: [ [{ p: "تجارب", f: "tajaarib" }], @@ -1286,6 +1597,16 @@ const nouns: { apf: "ahwáal", }, out: { + inflections: { + masc: [ + [{ p: "حال", f: "haal" }], + [{ p: "حال", f: "haal" }], + [{ p: "حالو", f: "háalo" }], + ], + }, + vocative: { + masc: [[{ p: "حاله", f: "háala" }], [{ p: "حالو", f: "háalo" }]], + }, plural: { masc: [ [{ p: "حالونه", f: "haalóona" }], @@ -1337,6 +1658,9 @@ const nouns: { [{ p: "ذبحو", f: "zabho" }], ], }, + vocative: { + fem: [[{ p: "ذبحې", f: "zabhe" }], [{ p: "ذبحو", f: "zabho" }]], + }, }, }, // Feminine inanimate regular with missing ه @@ -1358,6 +1682,9 @@ const nouns: { [{ p: "لارو", f: "láaro" }], ], }, + vocative: { + fem: [[{ p: "لارې", f: "láare" }], [{ p: "لارو", f: "láaro" }]], + }, }, }, // Feminine animate ending in a consonant @@ -1375,6 +1702,9 @@ const nouns: { i: 11113, }, out: { + vocative: { + fem: [[{ p: "مورې", f: "móre" }], [{ p: "میندو", f: "mayndo" }]], + }, plural: { fem: [[{ p: "میندې", f: "maynde" }], [{ p: "میندو", f: "mayndo" }]], }, @@ -1432,6 +1762,15 @@ const nouns: { ], ], }, + vocative: { + fem: [ + [{ p: "کرسۍ", f: "kUrsúy" }], + [ + { p: "کرسیو", f: "kUrsúyo" }, + { p: "کرسو", f: "kUrsó" }, + ], + ], + }, }, }, { @@ -1456,6 +1795,15 @@ const nouns: { ], ], }, + vocative: { + fem: [ + [{ p: "قاضۍ", f: "qaazúy" }], + [ + { p: "قاضیو", f: "qaazúyo" }, + { p: "قاضو", f: "qaazó" }, + ], + ], + }, plural: { fem: [ [{ p: "قاضیانې", f: "qaaziyáane" }], @@ -1561,7 +1909,16 @@ const nouns: { [{ p: "شی", f: "shay" }], [{ p: "شي", f: "shee" }], [ - { p: "شیو", f: "shiyo" }, + { p: "شیو", f: "shúyo" }, + { p: "شو", f: "sho" }, + ], + ], + }, + vocative: { + masc: [ + [{ p: "شیه", f: "shúya" }], + [ + { p: "شیو", f: "shúyo" }, { p: "شو", f: "sho" }, ], ], @@ -1623,6 +1980,16 @@ const nouns: { [{ p: "رشوت خورو", f: "rishwat khwaró" }], ], }, + vocative: { + masc: [ + [{ p: "رشوت خوره", f: "rishwat khóra" }], + [{ p: "رشوت خورو", f: "rishwat khwaró" }], + ], + fem: [ + [{ p: "رشوت خورې", f: "rishwat khwaré" }], + [{ p: "رشوت خورو", f: "rishwat khwaró" }], + ], + }, }, }, ]; @@ -1655,7 +2022,6 @@ adjectives.forEach((word) => { }); nouns.forEach((word) => { - // if (word.in.p !== "نبي") return; test(`${word.in.p} should inflect properly`, () => { expect(inflectWord(word.in)).toEqual(word.out); }); diff --git a/src/lib/src/pashto-inflector.ts b/src/lib/src/pashto-inflector.ts index 943d7ea..62d59c5 100644 --- a/src/lib/src/pashto-inflector.ts +++ b/src/lib/src/pashto-inflector.ts @@ -15,10 +15,13 @@ import { endsInConsonant, endsInAaOrOo, addOEnding, - endsInShwa, splitPsByVarients, removeEndTick, endsWith, + concatPlurals, + hasShwaEnding, + mapPsString, + endsInTob, } from "./p-text-helpers"; import { makePsString, removeFVarients } from "./accent-and-ps-utils"; import { @@ -30,7 +33,13 @@ import { splitUpSyllables, } from "./accent-helpers"; import * as T from "../../types"; -import { fmapSingleOrLengthOpts } from "./fp-ps"; +import { applyPsString, fmapSingleOrLengthOpts } from "./fp-ps"; +import { getVocatives } from "./vocatives"; +import { + isAdjectiveEntry, + isNumberEntry, + isPattern1Entry, +} from "./type-predicates"; const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/; const endingInHayOrAynRegex = /[^ا][هع]$/; @@ -42,14 +51,24 @@ export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput { const w = removeFVarients(word); if (w.c?.includes("doub.")) { const words = splitDoubleWord(w); - const inflected = words.map((x) => - ensureUnisexInflections(inflectWord(x), x) - ); + // TODO: Make this work for non-unisex double words + // Right now this an extremely bad and complex way to do this + // with ensureUnisexInflections + const inflected = words.map((x) => { + const res = inflectWord(x); + return ensureUnisexInflections(res, x); + }); + const vocatives = inflected + .map((x) => "vocative" in x && x.vocative) + .filter((x) => x) as T.PluralInflections[]; return { inflections: concatInflections( inflected[0].inflections, inflected[1].inflections ) as T.UnisexInflections, + ...(vocatives.length + ? { vocative: concatPlurals(vocatives[0], vocatives[1]) } + : {}), }; } if (w.c && w.c.includes("pl.")) { @@ -57,7 +76,7 @@ export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput { } if ( w.c && - (w.c.includes("adj.") || w.c.includes("unisex") || w.c.includes("num")) + (isAdjectiveEntry(word) || w.c.includes("unisex") || isNumberEntry(word)) ) { return handleUnisexWord(w); } @@ -77,6 +96,7 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput { // TODO: !!! Handle weird endings / symbols ' etc. const pEnd = word.p.slice(-1); const plurals = makePlural(word); + const vocative = getVocatives(word); if (word.noInf) { return !plurals ? false : { ...plurals }; } @@ -86,21 +106,28 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput { { p: word.infap, f: word.infaf }, { p: word.infbp, f: word.infbf }, ]), + vocative, ...plurals, }; } if (pEnd === "ی" && word.f.slice(-2) === "ay") { - return { inflections: inflectRegularYayUnisex(word.p, word.f), ...plurals }; + return { + inflections: inflectRegularYayUnisex(word.p, word.f), + vocative, + ...plurals, + }; } if (pEnd === "ه" && word.g.slice(-1) === "u") { return { inflections: inflectRegularShwaEndingUnisex(word.p, word.f), + vocative, ...plurals, }; } if (pEnd === "ی" && word.f.slice(-2) === "áy") { return { inflections: inflectEmphasizedYayUnisex(word.p, word.f), + vocative, ...plurals, }; } @@ -113,6 +140,7 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput { ) { return { inflections: inflectConsonantEndingUnisex(word.p, word.f), + vocative, ...plurals, }; } @@ -134,6 +162,7 @@ function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput { // Get last letter of Pashto and last two letters of phonetics // TODO: !!! Handle weird endings / symbols ' etc. const plurals = makePlural(w); + const vocative = getVocatives(w); if (w.noInf) { return !plurals ? false : { ...plurals }; } @@ -145,22 +174,42 @@ function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput { { p: w.infap, f: w.infaf }, { p: w.infbp, f: w.infbf }, ]), + vocative, ...plurals, }; } - const isTobEnding = - w.p.slice(-3) === "توب" && - ["tób", "tob"].includes(w.f.slice(-3)) && - w.p.length > 3; - if (isTobEnding) { - return { inflections: inflectTobMasc(w.p, w.f), ...plurals }; + if (endsInTob(w)) { + return { inflections: inflectTobMasc(w.p, w.f), vocative, ...plurals }; + } + // TODO: stopgap before refactoring + // @ts-ignore + if (isPattern1Entry(w)) { + return { + inflections: { + masc: inflectPattern1Masc( + // @ts-ignore + makePsString(w.p, w.f) + ), + }, + vocative, + ...plurals, + }; + } + if ( + pEnd === "ی" && + (fEnd === "áy" || (fEnd === "ay" && countSyllables(w) === 1)) + ) { + const inflections = inflectRegularEmphasizedYayMasc(w.p, w.f); + return { + inflections, + vocative, + ...plurals, + }; } if (pEnd === "ی" && fEnd === "ay") { - return { inflections: inflectRegularYayMasc(w.p, w.f), ...plurals }; - } - if (pEnd === "ی" && fEnd === "áy") { return { - inflections: inflectRegularEmphasizedYayMasc(w.p, w.f), + inflections: inflectRegularYayMasc(w.p, w.f), + vocative, ...plurals, }; } @@ -173,17 +222,22 @@ function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput { const c = word.c || ""; const animate = c.includes("anim."); const pEnd = word.p.slice(-1); - + const vocative = getVocatives(word); const plurals = makePlural(word); if (word.noInf) { return !plurals ? false : { ...plurals }; } if (endingInHayOrAynRegex.test(word.p) && endingInSingleARegex.test(word.f)) { - return { inflections: inflectRegularAFem(word.p, word.f), ...plurals }; + return { + inflections: inflectRegularAFem(word.p, word.f), + vocative, + ...plurals, + }; } if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(word.f)) { return { + vocative, inflections: inflectRegularAWithHimPEnding(word.p, word.f), ...plurals, }; @@ -194,20 +248,38 @@ function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput { !animate ) { return { + vocative, inflections: inflectRegularInanMissingAFem(word.p, word.f), ...plurals, }; } if (pEnd === "ي" && !animate) { - return { inflections: inflectRegularInanEeFem(word.p, word.f), ...plurals }; + return { + inflections: inflectRegularInanEeFem(word.p, word.f), + vocative, + ...plurals, + }; } if (pEnd === "ۍ") { - return { inflections: inflectRegularUyFem(word.p, word.f), ...plurals }; + return { + inflections: inflectRegularUyFem(word.p, word.f), + vocative, + ...plurals, + }; } // if (endingInAlefRegex.test(word.p)) { // return { inflections: inflectRegularAaFem(word.p, f) }; // } - return plurals ? { ...plurals } : false; + return plurals || vocative + ? { + ...(plurals ? plurals : {}), + ...(vocative + ? { + vocative, + } + : {}), + } + : false; } // LEVEL 3 FUNCTIONS @@ -309,6 +381,20 @@ function inflectEmphasizedYayUnisex(p: string, f: string): T.UnisexInflections { }; } +function inflectPattern1Masc(e: T.PsString): T.InflectionSet { + const shwaEnding = hasShwaEnding(e); + const base = applyPsString( + { + f: (x) => (countSyllables(e) === 1 ? accentFSylsOnNFromEnd(x, 0) : x), + }, + mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e) + ); + if (e.f.endsWith("ú")) { + return [[e], [e], [{ p: `${base.p}و`, f: `${base.f}ó` }]]; + } + return [[e], [e], [{ p: `${base.p}و`, f: `${base.f}o` }]]; +} + function inflectConsonantEndingUnisex( p: string, f: string @@ -344,13 +430,14 @@ function inflectRegularYayMasc(p: string, f: string): T.Inflections { } function inflectTobMasc(p: string, f: string): T.Inflections { - const baseP = p.slice(0, -3); - const baseF = f.slice(0, -3); + const base = removeAccents( + mapPsString((x) => x.slice(0, -3), makePsString(p, f)) + ); return { masc: [ [{ p, f }], - [{ p: `${baseP}تابه`, f: `${baseF}taabu` }], - [{ p: `${baseP}تبو`, f: `${baseF}tabo` }], + [{ p: `${base.p}تابه`, f: `${base.f}taabú` }], + [{ p: `${base.p}تبو`, f: `${base.f}tábo` }], ], }; } @@ -358,6 +445,19 @@ function inflectTobMasc(p: string, f: string): T.Inflections { function inflectRegularEmphasizedYayMasc(p: string, f: string): T.Inflections { const baseP = p.slice(0, -1); const baseF = f.slice(0, -2); + + if (countSyllables(makePsString(p, f)) === 1) { + return { + masc: [ + [{ p, f }], + [{ p: `${baseP}ي`, f: `${baseF}ee` }], + [ + { p: `${baseP}یو`, f: `${baseF}úyo` }, + { p: `${baseP}و`, f: `${baseF}o` }, + ], + ], + }; + } return { masc: [ [{ p, f }], @@ -576,7 +676,7 @@ function makePlural( ) : w ); - const base = endsInShwa(b) + const base = hasShwaEnding(b) ? makePsString(b.p.slice(0, -1), b.f.slice(0, -1)) : b; return addSecondInf( @@ -707,7 +807,7 @@ function makePlural( } if ( type === "masc noun" && - (shortSquish || ((endsInConsonant(w) || endsInShwa(w)) && !w.infap)) && + (shortSquish || ((endsInConsonant(w) || hasShwaEnding(w)) && !w.infap)) && w.p.slice(-3) !== "توب" ) { return { diff --git a/src/lib/src/type-predicates.ts b/src/lib/src/type-predicates.ts index 1c1435b..5df4d27 100644 --- a/src/lib/src/type-predicates.ts +++ b/src/lib/src/type-predicates.ts @@ -1,6 +1,6 @@ import * as T from "../../types"; import { pashtoConsonants } from "./pashto-consonants"; -import { endsWith } from "./p-text-helpers"; +import { endsInConsonant, endsWith, hasShwaEnding } from "./p-text-helpers"; import { countSyllables } from "./accent-helpers"; const verbTenses: T.VerbTense[] = [ @@ -61,6 +61,24 @@ export function isNounOrAdjEntry( return isNounEntry(e) || isAdjectiveEntry(e); } +export function isInflectableEntry( + e: T.Entry | T.DictionaryEntry | T.DictionaryEntryNoFVars +): e is T.InflectableEntry { + if ("entry" in e) { + return false; + } + return isNounEntry(e) || isAdjectiveEntry(e) || isNumberEntry(e); +} + +export function isNumberEntry( + e: T.Entry | T.DictionaryEntry +): e is T.NumberEntry { + if ("entry" in e) { + return false; + } + return e.c ? e.c.includes("num.") : false; +} + export function isVerbDictionaryEntry( e: T.DictionaryEntry | T.DictionaryEntryNoFVars ): e is T.VerbDictionaryEntry { @@ -76,47 +94,41 @@ export function isVerbEntry( return "entry" in e && isVerbDictionaryEntry(e.entry); } -export function isMascNounEntry( - e: T.NounEntry | T.AdjectiveEntry -): e is T.MascNounEntry { +export function isMascNounEntry(e: T.InflectableEntry): e is T.MascNounEntry { return !!e.c && e.c.includes("n. m."); } -export function isFemNounEntry( - e: T.NounEntry | T.AdjectiveEntry -): e is T.FemNounEntry { +export function isFemNounEntry(e: T.InflectableEntry): e is T.FemNounEntry { return !!e.c && e.c.includes("n. f."); } export function isUnisexNounEntry( - e: T.NounEntry | T.AdjectiveEntry + e: T.InflectableEntry ): e is T.UnisexNounEntry { return isNounEntry(e) && e.c.includes("unisex"); } -export function isAnimNounEntry( - e: T.NounEntry | T.AdverbEntry -): e is T.AnimNounEntry { +export function isAnimNounEntry(e: T.InflectableEntry): e is T.AnimNounEntry { return e.c.includes("anim."); } export function isUnisexAnimNounEntry( - e: T.NounEntry | T.AdjectiveEntry + e: T.InflectableEntry ): e is T.UnisexAnimNounEntry { return isUnisexNounEntry(e) && isAnimNounEntry(e); } export function isAdjOrUnisexNounEntry( - e: T.Entry + e: T.Entry | T.InflectableEntry ): e is T.AdjectiveEntry | T.UnisexNounEntry { return isAdjectiveEntry(e) || (isNounEntry(e) && isUnisexNounEntry(e)); } export function isPattern( p: T.InflectionPattern | "all" -): (entry: T.NounEntry | T.AdjectiveEntry) => boolean { +): (entry: T.InflectableEntry) => boolean { if (p === 0) { - return (e: T.NounEntry | T.AdjectiveEntry) => + return (e: T.InflectableEntry) => !isPattern1Entry(e) && !isPattern2Entry(e) && !isPattern3Entry(e) && @@ -151,40 +163,27 @@ export function isPattern( * @param e * @returns */ -export function isPattern1Entry( +export function isPattern1Entry( e: T ): e is T.Pattern1Entry { if (e.noInf) return false; - if (e.infap) return false; + if (e.infap || e.infbp) return false; if (isFemNounEntry(e)) { return ( - endsWith( + (endsWith( [ { p: "ه", f: "a" }, { p: "ح", f: "a" }, + { p: "ع", f: "a" }, + { p: "ع", f: "a'" }, ], e - ) || + ) && + !e.p.endsWith("اع")) || (endsWith({ p: pashtoConsonants }, e) && !e.c.includes("anim.")) ); } - return ( - endsWith([{ p: pashtoConsonants }], e) || - endsWith( - [ - { p: "ه", f: "u" }, - { p: "ه", f: "h" }, - ], - e - ) || - endsWith( - [ - { p: "ای", f: "aay" }, - { p: "وی", f: "ooy" }, - ], - e - ) - ); + return endsInConsonant(e) || hasShwaEnding(e); } /** @@ -193,7 +192,7 @@ export function isPattern1Entry( * @param e * @returns T.T.T.T. */ -export function isPattern2Entry( +export function isPattern2Entry( e: T ): e is T.Pattern2Entry { if (e.noInf) return false; @@ -211,7 +210,7 @@ export function isPattern2Entry( * @param e * @returns */ -export function isPattern3Entry( +export function isPattern3Entry( e: T ): e is T.Pattern3Entry { if (e.noInf) return false; @@ -230,7 +229,7 @@ export function isPattern3Entry( * @param e * @returns */ -export function isPattern4Entry( +export function isPattern4Entry( e: T ): e is T.Pattern4Entry { if (e.noInf) return false; @@ -247,7 +246,7 @@ export function isPattern4Entry( * @param e * @returns */ -export function isPattern5Entry( +export function isPattern5Entry( e: T ): e is T.Pattern5Entry { if (e.noInf) return false; @@ -259,7 +258,7 @@ export function isPattern5Entry( } export function isPattern6FemEntry( - e: T.NounEntry | T.AdjectiveEntry + e: T.InflectableEntry ): e is T.Pattern6FemEntry { if (!isFemNounEntry(e)) return false; if (e.c.includes("anim.")) return false; diff --git a/src/lib/src/vocatives.ts b/src/lib/src/vocatives.ts new file mode 100644 index 0000000..cc45968 --- /dev/null +++ b/src/lib/src/vocatives.ts @@ -0,0 +1,310 @@ +import * as T from "../../types"; +import { makePsString } from "./accent-and-ps-utils"; +import { + accentIsOnEnd, + accentOnNFromEnd, + countSyllables, + removeAccents, +} from "./accent-helpers"; +import { applyPsString, mapGen } from "./fp-ps"; +import { getInflectionPattern } from "./inflection-pattern"; +import { + endsInConsonant, + endsInTob, + hasShwaEnding, + mapPsString, + endsWith, +} from "./p-text-helpers"; +import { + isAdjOrUnisexNounEntry, + isAnimNounEntry, + isFemNounEntry, + isInflectableEntry, + isMascNounEntry, + isNounEntry, + isNumberEntry, +} from "./type-predicates"; + +export function getVocatives( + e: T.DictionaryEntryNoFVars +): T.PluralInflections | undefined { + if (!isInflectableEntry(e)) { + return undefined; + } + const entry: T.InflectableEntry = e; + const pattern = getInflectionPattern(entry); + if ( + pattern === 0 && + isFemNounEntry(e) && + isAnimNounEntry(e) && + endsInConsonant(e) + ) { + return vocFemAnimException(e); + } + const gender: T.Gender | "unisex" = + isAdjOrUnisexNounEntry(entry) || isNumberEntry(entry) + ? "unisex" + : isMascNounEntry(entry) + ? "masc" + : "fem"; + if (pattern === 0 || pattern === 6) { + return undefined; + } + const funcs = patternFuncs[pattern]; + if (gender === "masc") { + return { + masc: funcs.masc(e), + }; + } + if (gender === "fem") { + return { + fem: funcs.fem(e), + }; + } + if (gender === "unisex") { + return { + masc: funcs.masc(e), + fem: funcs.fem(e), + }; + } +} + +const patternFuncs: Record< + 1 | 2 | 3 | 4 | 5, + Record T.PluralInflectionSet> +> = { + 1: { + masc: vocPattern1Masc, + fem: vocPattern1Fem, + }, + 2: { + masc: vocPattern2Masc, + fem: vocPattern2Fem, + }, + 3: { + masc: vocPattern3Masc, + fem: vocPattern3Fem, + }, + 4: { + masc: vocPattern4Masc, + fem: vocPattern4Fem, + }, + 5: { + masc: vocPattern5Masc, + fem: vocPattern5Fem, + }, +}; + +function vocFemAnimException(e: T.NounEntry): T.PluralInflections { + if (!e.ppp || !e.ppf) { + throw new Error( + "plural missing for feminine animate exception noun " + e.p + ); + } + // TODO: HANDLE BETTER WITH PLURALS! + const plurBase = mapPsString( + (x) => x.slice(0, -1), + makePsString(e.ppp, e.ppf) + ); + const base = + countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : makePsString(e.p, e.f); + return { + fem: [ + [{ p: `${base.p}ې`, f: `${base.f}e` }], + [{ p: `${plurBase.p}و`, f: `${plurBase.f}o` }], + ], + }; +} + +function vocPattern1Masc( + e: T.DictionaryEntryNoFVars | T.NounEntry +): T.PluralInflectionSet { + if (isNounEntry(e) && endsInTob(e)) { + const base = mapPsString((x) => x.slice(0, -3), e); + return [ + [{ p: `${e.p}ه`, f: `${e.f}a` }], + [{ p: `${base.p}تبو`, f: `${base.f}tábo` }], + ]; + } + const shwaEnding = hasShwaEnding(e); + const base = mapGen( + (ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps), + mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e) + ); + if (shwaEnding && e.f.endsWith("ú")) { + return [ + [{ p: `${base.p}ه`, f: `${base.f}á` }], + [{ p: `${base.p}و`, f: `${base.f}ó` }], + ]; + } + return [ + [{ p: `${base.p}ه`, f: `${base.f}a` }], + [{ p: `${base.p}و`, f: `${base.f}o` }], + ]; +} + +function vocPattern1Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet { + const shwaEnding = hasShwaEnding(e); + const hasFemEnding = endsWith([{ p: "ه", f: "a" }], e) || shwaEnding; + const base = mapGen( + (ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps), + hasFemEnding + ? mapPsString((x) => x.slice(0, -1), e) + : makePsString(e.p, e.f) + ); + if ( + endsWith( + [ + { p: "ع", f: "a" }, + { p: "ع", f: "a'" }, + ], + e + ) + ) { + const base = applyPsString( + { + f: (f) => f.slice(0, f.endsWith("'") ? -2 : -1), + }, + e + ); + if (accentIsOnEnd(e)) { + return [ + [{ p: `${base.p}ې`, f: `${base.f}é` }], + [{ p: `${base.p}و`, f: `${base.f}ó` }], + ]; + } + return [ + [{ p: `${base.p}ې`, f: `${base.f}e` }], + [{ p: `${base.p}و`, f: `${base.f}o` }], + ]; + } + if (endsWith([{ p: "ح", f: "a" }], e)) { + const base = applyPsString( + { + f: (f) => f.slice(0, -1), + }, + e + ); + if (accentIsOnEnd(e)) { + return [ + [{ p: `${base.p}ې`, f: `${base.f}é` }], + [{ p: `${base.p}و`, f: `${base.f}ó` }], + ]; + } + return [ + [{ p: `${base.p}ې`, f: `${base.f}e` }], + [{ p: `${base.p}و`, f: `${base.f}o` }], + ]; + } + if (hasFemEnding && accentIsOnEnd(e)) { + return [ + [{ p: `${base.p}ې`, f: `${base.f}é` }], + [{ p: `${base.p}و`, f: `${base.f}ó` }], + ]; + } + return [ + [{ p: `${base.p}ې`, f: `${base.f}e` }], + [{ p: `${base.p}و`, f: `${base.f}o` }], + ]; +} + +function vocPattern2Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet { + const base = makePsString(e.p.slice(0, -1), e.f.slice(0, -2)); + return [ + [{ p: `${base.p}یه`, f: `${base.f}iya` }], + [ + { p: `${base.p}یو`, f: `${base.f}iyo` }, + { p: `${base.p}و`, f: `${base.f}o` }, + ], + ]; +} + +function vocPattern2Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet { + const base = makePsString( + e.p.slice(0, -1), + e.f.slice(0, e.f.endsWith("ay") ? -2 : -1) + ); + return [ + [{ p: `${base.p}ې`, f: `${base.f}e` }], + [ + { p: `${base.p}یو`, f: `${base.f}iyo` }, + { p: `${base.p}و`, f: `${base.f}o` }, + ], + ]; +} + +function vocPattern3Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet { + const base = makePsString( + e.p.slice(0, -1), + // shouldn't be accents here but remove just to be sure + removeAccents(e.f.slice(0, -2)) + ); + const baseSyls = countSyllables(base); + return [ + [{ p: `${base.p}یه`, f: `${base.f}úya` }], + [ + { p: `${base.p}یو`, f: `${base.f}úyo` }, + { p: `${base.p}و`, f: `${base.f}${baseSyls ? "ó" : "o"}` }, + ], + ]; +} + +function vocPattern3Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet { + const base = makePsString( + e.p.slice(0, -1), + // shouldn't be accents here but remove just to be sure + removeAccents(e.f.slice(0, -2)) + ); + // TODO: This works well for unisex nouns/adjs but would be redundant for fem. nouns? + return [ + [{ p: `${base.p}ۍ`, f: `${base.f}úy` }], + [ + { p: `${base.p}یو`, f: `${base.f}úyo` }, + { p: `${base.p}و`, f: `${base.f}ó` }, + ], + ]; +} + +function vocPattern4Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet { + const base = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e; + const plurBase = makePsString(e.infbp || "", e.infbf || ""); + if (endsInConsonant(e)) { + return [ + [{ p: `${base.p}ه`, f: `${base.f}a` }], + [{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }], + ]; + } + // TODO: is this even possible? + if (hasShwaEnding(e)) { + return [ + [{ p: `${base.p.slice(0, -1)}ه`, f: `${base.f.slice(0, -1)}á` }], + [{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }], + ]; + } + // exception for مېلمه, کوربه + return [[{ p: e.p, f: e.f }], [{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }]]; +} + +function vocPattern4Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet { + const base = makePsString(e.infbp || "", e.infbf || ""); + return [ + [{ p: `${base.p}ې`, f: `${base.f}é` }], + [{ p: `${base.p}و`, f: `${base.f}ó` }], + ]; +} + +function vocPattern5Masc(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet { + const base = makePsString(e.infbp || "", e.infbf || ""); + return [ + [{ p: `${base.p}ه`, f: `${base.f}a` }], + [{ p: `${base.p}و`, f: `${base.f}o` }], + ]; +} + +function vocPattern5Fem(e: T.DictionaryEntryNoFVars): T.PluralInflectionSet { + const base = makePsString(e.infbp || "", e.infbf || ""); + return [ + [{ p: `${base.p}ې`, f: `${base.f}e` }], + [{ p: `${base.p}و`, f: `${base.f}o` }], + ]; +} diff --git a/src/types.ts b/src/types.ts index a04ea8e..16192e6 100644 --- a/src/types.ts +++ b/src/types.ts @@ -496,15 +496,18 @@ export type InflectorOutput = plural?: PluralInflections; bundledPlural?: PluralInflections; inflections?: Inflections; + vocative?: PluralInflections; } | { plural: PluralInflections; arabicPlural?: PluralInflections; bundledPlural?: PluralInflections; inflections?: Inflections; + vocative?: PluralInflections; } | { - inflections: Inflections; + inflections?: Inflections; + vocative?: PluralInflections; } | false; @@ -627,9 +630,17 @@ export type DisplayFormSubgroup = { export type AayTail = "ey" | "aay"; +export type InflectableEntry = + | NounEntry + | AdjectiveEntry + | NumberEntry + | AdverbEntry; export type NounEntry = DictionaryEntry & { c: string } & { __brand: "a noun entry"; }; +export type NumberEntry = DictionaryEntry & { c: string } & { + __brand: "a number entry"; +}; export type MascNounEntry = NounEntry & { __brand2: "a masc noun entry" }; export type FemNounEntry = NounEntry & { __brand2: "a fem noun entry" }; export type AnimNounEntry = NounEntry & { __brand3: "a anim noun entry" };