From 46fd6e66e84fd1677717065862b3c33bf1b4ce8a Mon Sep 17 00:00:00 2001 From: adueck Date: Sat, 27 Jul 2024 12:10:32 -0400 Subject: [PATCH] refactor with more abstraction --- src/lib/src/inflections-and-vocative.ts | 479 ++++++++++++------------ src/lib/src/nouns-plural.ts | 320 ++++++++++++++++ src/lib/src/pashto-inflector.test.ts | 3 +- src/lib/src/pashto-inflector.ts | 318 +--------------- 4 files changed, 560 insertions(+), 560 deletions(-) create mode 100644 src/lib/src/nouns-plural.ts diff --git a/src/lib/src/inflections-and-vocative.ts b/src/lib/src/inflections-and-vocative.ts index 3b26e2e..1e64832 100644 --- a/src/lib/src/inflections-and-vocative.ts +++ b/src/lib/src/inflections-and-vocative.ts @@ -14,6 +14,7 @@ import { hasShwaEnding, mapPsString, endsWith, + psStringFromEntry, } from "./p-text-helpers"; import { removeDuplicates } from "./phrase-building/vp-tools"; import { @@ -25,6 +26,16 @@ import { isNounEntry, isNumberEntry, } from "./type-predicates"; +import { semigroupPsString } from "../dist/lib/src/fp-ps"; + +const concatPs = semigroupPsString.concat; + +const o = { p: "و", f: "o" }; +const ó = { p: "و", f: "ó" }; +const a = { p: "ه", f: "a" }; +const á = { p: "ه", f: "á" }; +const e = { p: "ې", f: "e" }; +const é = { p: "ې", f: "é" }; type Plurals = | { @@ -33,11 +44,11 @@ type Plurals = } | undefined; -const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/; -const endingInHayOrAynRegex = /[^ا][هع]$/; +// const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/; +// const endingInHayOrAynRegex = /[^ا][هع]$/; export function getInfsAndVocative( - entry: T.DictionaryEntryNoFVars, + entryR: T.DictionaryEntryNoFVars, plurals: Plurals ): | { @@ -45,51 +56,51 @@ export function getInfsAndVocative( vocative?: T.PluralInflections; } | false { - if (!isInflectableEntry(entry)) { + if (!isInflectableEntry(entryR)) { return false; } // @ts-ignore - const e: T.InflectableEntry = entry as T.InflectableEntry; - const pattern = getInflectionPattern(e); + const entry: T.InflectableEntry = entryR as T.InflectableEntry; + const pattern = getInflectionPattern(entry); if ( pattern === 0 && - isFemNounEntry(e) && - isAnimNounEntry(e) && - endsInConsonant(e) + isFemNounEntry(entry) && + isAnimNounEntry(entry) && + endsInConsonant(entry) ) { return { vocative: vocFemAnimException({ - e, + entry, plurals: genderPlural("fem", plurals), }), }; } const gender: T.Gender | "unisex" = - isAdjOrUnisexNounEntry(e) || isNumberEntry(e) + isAdjOrUnisexNounEntry(entry) || isNumberEntry(entry) ? "unisex" - : isMascNounEntry(e) + : isMascNounEntry(entry) ? "masc" : "fem"; if (pattern === 0) { return false; } if (pattern === 6) { - return pattern6({ e, plurals: genderPlural("fem", plurals) }); + return pattern6({ entry, plurals: genderPlural("fem", plurals) }); } const funcs = patternFuncs[pattern]; const masc = gender === "unisex" || gender === "masc" - ? funcs.masc({ e, plurals: genderPlural("masc", plurals) }) + ? funcs.masc({ entry, plurals: genderPlural("masc", plurals) }) : undefined; const fem = gender === "unisex" || gender === "fem" - ? funcs.fem({ e, plurals: genderPlural("fem", plurals) }) + ? funcs.fem({ entry, plurals: genderPlural("fem", plurals) }) : undefined; return aggregateInfsAndVoc(masc, fem); } type PatternInput = { - e: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry; + entry: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry; plurals: T.PsString[]; }; @@ -107,45 +118,45 @@ const patternFuncs: Record< fem: vocPattern1Fem, }, 2: { - masc: vocPattern2Masc, - fem: vocPattern2Fem, + masc: pattern2Masc, + fem: pattern2Fem, }, 3: { - masc: vocPattern3Masc, - fem: vocPattern3Fem, + masc: pattern3Masc, + fem: pattern3Fem, }, 4: { - masc: vocPattern4Masc, - fem: vocPattern4Fem, + masc: pattern4Masc, + fem: pattern4Fem, }, 5: { masc: vocPattern5Masc, - fem: vocPattern5Fem, + fem: pattern5Fem, }, }; function addPlurals( - e: T.ArrayOneOrMore, + x: T.ArrayOneOrMore, plurals: T.PsString[] ): T.ArrayOneOrMore { if (!plurals) { - return e; + return x; } - return removeDuplicates([...e, ...plurals]) as T.ArrayOneOrMore; + return removeDuplicates([...x, ...plurals]) as T.ArrayOneOrMore; } -function pattern6({ e, plurals }: PatternInput): { +function pattern6({ entry, plurals }: PatternInput): { inflections: T.Inflections; vocative: T.PluralInflections; } { - const base = removeAccents({ p: e.p.slice(0, -1), f: e.f.slice(0, -2) }); + const base = removeAccents({ + p: entry.p.slice(0, -1), + f: entry.f.slice(0, -2), + }); const inflections: T.InflectionSet = [ - [{ p: e.p, f: e.f }], - [{ p: `${base.p}ۍ`, f: `${base.f}úy` }], - [ - { p: `${base.p}یو`, f: `${base.f}úyo` }, - { p: `${base.p}و`, f: `${base.f}ó` }, - ], + [psStringFromEntry(entry)], + [concatPs(base, { p: "ۍ", f: "úy" })], + [concatPs(base, { p: "یو", f: "úyo" }), concatPs(base, ó)], ]; return { inflections: { @@ -158,83 +169,88 @@ function pattern6({ e, plurals }: PatternInput): { } function vocFemAnimException({ - e, + entry, plurals, }: PatternInput): T.PluralInflections { - if (!e.ppp || !e.ppf) { + if (!entry.ppp || !entry.ppf) { throw new Error( - "plural missing for feminine animate exception noun " + e.p + "plural missing for feminine animate exception noun " + entry.p ); } // TODO: HANDLE BETTER WITH PLURALS! const plurBase = mapPsString( (x) => x.slice(0, -1), - makePsString(e.ppp, e.ppf) + makePsString(entry.ppp, entry.ppf) ); const base = - countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : makePsString(e.p, e.f); + countSyllables(entry) === 1 + ? accentOnNFromEnd(entry, 0) + : psStringFromEntry(entry); return { - fem: [ - [{ p: `${base.p}ې`, f: `${base.f}e` }], - addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}o` }], plurals), - ], + fem: [[concatPs(base, e)], addPlurals([concatPs(plurBase, o)], plurals)], }; } -function vocPattern1Masc({ e, plurals }: PatternInput): InflectionsAndVocative { - if (isNounEntry(e) && endsInTob(e)) { - const base = mapPsString((x) => x.slice(0, -3), e); - const second: T.ArrayOneOrMore = [ - { p: `${base.p}تبو`, f: `${base.f}tábo` }, +function vocPattern1Masc({ + entry, + plurals, +}: PatternInput): InflectionsAndVocative { + const plain = psStringFromEntry(entry); + if (isNounEntry(entry) && endsInTob(entry)) { + const base = mapPsString((x) => x.slice(0, -3), entry); + const inflections: T.InflectionSet = [ + [plain], + [concatPs(base, { p: "تابه", f: "taabú" })], + [concatPs(base, { p: "تبو", f: "tábo" })], ]; return { - inflections: [ - [{ p: e.p, f: e.f }], - [{ p: `${base.p}تابه`, f: `${base.f}taabú` }], - second, - ], - vocative: [[{ p: `${e.p}ه`, f: `${e.f}a` }], addPlurals(second, plurals)], + inflections, + vocative: [[concatPs(plain, a)], addPlurals(inflections[2], plurals)], }; } - const shwaEnding = hasShwaEnding(e); + const shwaEnding = hasShwaEnding(entry); const base = mapGen( - (ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps), - mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e) + (ps) => (countSyllables(entry) === 1 ? accentOnNFromEnd(ps, 0) : ps), + mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), entry) ); - if (shwaEnding && e.f.endsWith("ú")) { - const second: T.ArrayOneOrMore = [ - { p: `${base.p}و`, f: `${base.f}ó` }, + if (shwaEnding && entry.f.endsWith("ú")) { + const inflections: T.InflectionSet = [ + [plain], + [plain], + [concatPs(base, ó)], ]; return { - inflections: [[{ p: e.p, f: e.f }], [{ p: e.p, f: e.f }], second], - vocative: [ - [{ p: `${base.p}ه`, f: `${base.f}á` }], - addPlurals(second, plurals), - ], + inflections, + vocative: [[concatPs(base, á)], addPlurals(inflections[2], plurals)], }; } // TODO: shouldn't this be accent-sensitive? - const second: T.ArrayOneOrMore = [ - { p: `${base.p}و`, f: `${base.f}o` }, - ]; + const inflections: T.InflectionSet = [[plain], [plain], [concatPs(base, o)]]; return { - inflections: [[{ p: e.p, f: e.f }], [{ p: e.p, f: e.f }], second], + inflections, vocative: [ - [{ p: `${base.p}ه`, f: `${base.f}a` }], - addPlurals(second, plurals), + [concatPs(base, { p: "ه", f: "a" })], + addPlurals(inflections[2], plurals), ], }; } // TODO this is HUGELY repetitive refactor this! -function vocPattern1Fem({ e, plurals }: PatternInput): InflectionsAndVocative { - const shwaEnding = hasShwaEnding(e); - const hasFemEnding = endsWith([{ p: "ه", f: "a" }], e) || shwaEnding; +function vocPattern1Fem({ + entry, + plurals, +}: PatternInput): InflectionsAndVocative { + const shwaEnding = hasShwaEnding(entry); + const hasFemEnding = endsWith([{ p: "ه", f: "a" }], entry) || shwaEnding; + const endAccented = accentIsOnEnd(entry); const base = mapGen( - (ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps), + (ps) => + countSyllables(entry) === 1 && !endAccented + ? accentOnNFromEnd(ps, 0) + : ps, hasFemEnding - ? mapPsString((x) => x.slice(0, -1), e) - : makePsString(e.p, e.f) + ? mapPsString((x) => x.slice(0, -1), entry) + : psStringFromEntry(entry) ); if ( endsWith( @@ -242,291 +258,268 @@ function vocPattern1Fem({ e, plurals }: PatternInput): InflectionsAndVocative { { p: "ع", f: "a" }, { p: "ع", f: "a'" }, ], - e + entry ) && !["ا", "ی", "ې"].includes(e.p.at(-2) || "") ) { - const base = applyPsString( + const base2 = applyPsString( { f: (f) => f.slice(0, f.endsWith("'") ? -2 : -1), }, - e + entry ); - if (accentIsOnEnd(e)) { - const second: T.ArrayOneOrMore = [ - { p: `${base.p}و`, f: `${base.f}ó` }, - ]; + if (endAccented) { const inflections: T.InflectionSet = [ - [{ p: e.p, f: e.f }], - [{ p: `${base.p}ې`, f: `${base.f}é` }], - second, + [psStringFromEntry(entry)], + [concatPs(base2, é)], + [concatPs(base2, ó)], ]; return { inflections, - vocative: [inflections[1], addPlurals(second, plurals)], + vocative: [inflections[1], addPlurals(inflections[2], plurals)], }; } - const second: T.ArrayOneOrMore = [ - { p: `${base.p}و`, f: `${base.f}o` }, - ]; const inflections: T.InflectionSet = [ - [{ p: e.p, f: e.f }], - [{ p: `${base.p}ې`, f: `${base.f}e` }], - second, + [psStringFromEntry(entry)], + [concatPs(base2, e)], + [concatPs(base2, o)], ]; return { inflections, - vocative: [inflections[1], addPlurals(second, plurals)], + vocative: [inflections[1], addPlurals(inflections[2], plurals)], }; } if ( - endsWith([{ p: "ح", f: "a" }], e) && - !["ا", "ی", "ې"].includes(e.p.at(-2) || "") + endsWith([{ p: "ح", f: "a" }], entry) && + !["ا", "ی", "ې"].includes(entry.p.at(-2) || "") ) { const base = applyPsString( { f: (f) => f.slice(0, -1), }, - e + entry ); - if (accentIsOnEnd(e)) { - const second: T.ArrayOneOrMore = [ - { p: `${base.p}و`, f: `${base.f}ó` }, - ]; + if (accentIsOnEnd(entry)) { const inflections: T.InflectionSet = [ - [{ p: e.p, f: e.f }], - [{ p: `${base.p}ې`, f: `${base.f}é` }], - second, + [psStringFromEntry(entry)], + [concatPs(base, é)], + [concatPs(base, ó)], ]; return { inflections, - vocative: [inflections[1], addPlurals(second, plurals)], + vocative: [inflections[1], addPlurals(inflections[2], plurals)], }; } - const second: T.ArrayOneOrMore = [ - { p: `${base.p}و`, f: `${base.f}o` }, - ]; const inflections: T.InflectionSet = [ - [{ p: e.p, f: e.f }], - [{ p: `${base.p}ې`, f: `${base.f}e` }], - second, + [psStringFromEntry(entry)], + [concatPs(base, e)], + [concatPs(base, o)], ]; return { inflections, - vocative: [inflections[1], addPlurals(second, plurals)], + vocative: [inflections[1], addPlurals(inflections[2], plurals)], }; } - if (hasFemEnding && accentIsOnEnd(e)) { - const second: T.ArrayOneOrMore = [ - { p: `${base.p}و`, f: `${base.f}ó` }, - ]; + if (hasFemEnding && accentIsOnEnd(entry)) { const inflections: T.InflectionSet = [ - [{ p: `${base.p}ه`, f: `${base.f}á` }], - [{ p: `${base.p}ې`, f: `${base.f}é` }], - second, + [concatPs(base, á)], + [concatPs(base, é)], + [concatPs(base, ó)], ]; return { inflections, - vocative: [inflections[1], addPlurals(second, plurals)], + vocative: [inflections[1], addPlurals(inflections[2], plurals)], }; } - if (isFemNounEntry(e) && endsInConsonant(e)) { - const baseForInf = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e; - const second: T.ArrayOneOrMore = [ - { p: `${baseForInf.p}و`, f: `${baseForInf.f}o` }, - ]; + if (isFemNounEntry(entry) && endsInConsonant(entry)) { + const baseForInf = + countSyllables(entry) === 1 ? accentOnNFromEnd(entry, 0) : e; const inflections: T.InflectionSet = [ - [{ p: e.p, f: e.f }], - [{ p: `${baseForInf.p}ې`, f: `${baseForInf.f}e` }], - second, + [psStringFromEntry(entry)], + [concatPs(baseForInf, e)], + [concatPs(baseForInf, o)], ]; return { inflections, - vocative: [inflections[1], addPlurals(second, plurals)], + vocative: [inflections[1], addPlurals(inflections[2], plurals)], }; } - const second: T.ArrayOneOrMore = [ - { p: `${base.p}و`, f: `${base.f}o` }, - ]; const inflections: T.InflectionSet = [ - [{ p: `${base.p}ه`, f: `${base.f}a` }], - [{ p: `${base.p}ې`, f: `${base.f}e` }], - second, + [concatPs(base, a)], + [concatPs(base, e)], + [concatPs(base, o)], ]; return { inflections, - vocative: [inflections[1], addPlurals(second, plurals)], + vocative: [inflections[1], addPlurals(inflections[2], plurals)], }; } -function vocPattern2Masc({ e, plurals }: PatternInput): InflectionsAndVocative { - const base = makePsString(e.p.slice(0, -1), e.f.slice(0, -2)); - const second: T.ArrayOneOrMore = [ - { p: `${base.p}یو`, f: `${base.f}iyo` }, - { p: `${base.p}و`, f: `${base.f}o` }, +function pattern2Masc({ + entry, + plurals, +}: PatternInput): InflectionsAndVocative { + const base = makePsString(entry.p.slice(0, -1), entry.f.slice(0, -2)); + const inflections: T.InflectionSet = [ + [psStringFromEntry(entry)], + [concatPs(base, { p: "ي", f: "ee" })], + [concatPs(base, { p: "یو", f: "iyo" }), concatPs(base, { p: "و", f: "o" })], ]; return { - inflections: [ - [{ p: e.p, f: e.f }], - [{ p: `${base.p}ي`, f: `${base.f}ee` }], - second, - ], + inflections, vocative: [ - [{ p: `${base.p}یه`, f: `${base.f}iya` }], - addPlurals(second, plurals), + [concatPs(base, { p: "یه", f: "iya" })], + addPlurals(inflections[2], plurals), ], }; } -function vocPattern2Fem({ e, plurals }: PatternInput): InflectionsAndVocative { +function pattern2Fem({ entry, plurals }: PatternInput): InflectionsAndVocative { const base = makePsString( - e.p.slice(0, -1), - e.f.slice(0, e.f.endsWith("ay") ? -2 : -1) + entry.p.slice(0, -1), + entry.f.slice(0, entry.f.endsWith("ay") ? -2 : -1) ); - const second: T.ArrayOneOrMore = [ - { p: `${base.p}یو`, f: `${base.f}iyo` }, - { p: `${base.p}و`, f: `${base.f}o` }, - ]; const inflections: T.InflectionSet = [ - [{ p: `${base.p}ې`, f: `${base.f}e` }], - [{ p: `${base.p}ې`, f: `${base.f}e` }], - second, + [concatPs(base, e)], + [concatPs(base, e)], + [concatPs(base, { p: "یو", f: "iyo" }), concatPs(base, o)], ]; return { inflections, - vocative: [inflections[0], addPlurals(second, plurals)], + vocative: [inflections[1], addPlurals(inflections[2], plurals)], }; } -function vocPattern3Masc({ e, plurals }: PatternInput): InflectionsAndVocative { +function pattern3Masc({ + entry, + plurals, +}: PatternInput): InflectionsAndVocative { const base = makePsString( - e.p.slice(0, -1), + entry.p.slice(0, -1), // shouldn't be accents here but remove just to be sure - removeAccents(e.f.slice(0, -2)) + removeAccents(entry.f.slice(0, -2)) ); const baseSyls = countSyllables(base); - const second: T.ArrayOneOrMore = [ - { p: `${base.p}یو`, f: `${base.f}úyo` }, - { p: `${base.p}و`, f: `${base.f}${baseSyls ? "ó" : "o"}` }, + const inflections: T.InflectionSet = [ + [psStringFromEntry(entry)], + [concatPs(base, { p: `ي`, f: baseSyls ? "ée" : "ee" })], + [ + concatPs(base, { p: "یو", f: "úyo" }), + concatPs(base, { p: "و", f: baseSyls ? "ó" : "o" }), + ], ]; return { - inflections: [ - [{ p: e.p, f: e.f }], - [{ p: `${base.p}ي`, f: `${base.f}${baseSyls ? "ée" : "ee"}` }], - second, - ], + inflections, vocative: [ - [{ p: `${base.p}یه`, f: `${base.f}úya` }], - addPlurals(second, plurals), + [concatPs(base, { p: "یه", f: "úya" })], + addPlurals(inflections[2], plurals), ], }; } -function vocPattern3Fem({ e, plurals }: PatternInput): InflectionsAndVocative { +function pattern3Fem({ entry, plurals }: PatternInput): InflectionsAndVocative { const base = makePsString( - e.p.slice(0, -1), + entry.p.slice(0, -1), // shouldn't be accents here but remove just to be sure - removeAccents(e.f.slice(0, -2)) + removeAccents(entry.f.slice(0, -2)) ); - const second: T.ArrayOneOrMore = [ - { p: `${base.p}یو`, f: `${base.f}úyo` }, - { p: `${base.p}و`, f: `${base.f}ó` }, - ]; + const baseSyls = countSyllables(base); const plain: T.ArrayOneOrMore = [ - { p: `${base.p}ۍ`, f: `${base.f}úy` }, - ]; - return { - inflections: [plain, plain, second], - vocative: [plain, addPlurals(second, plurals)], - }; -} - -function vocPattern4Masc({ e, plurals }: PatternInput): InflectionsAndVocative { - const base = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e; - const firstInf = accentOnNFromEnd( - makePsString(e.infap || "", e.infaf || ""), - 0 - ); - const secondBase = makePsString(e.infbp || "", e.infbf || ""); - const second: T.ArrayOneOrMore = [ - { p: `${secondBase.p}و`, f: `${secondBase.f}ó` }, + concatPs(base, { p: "ۍ", f: "úy" }), ]; const inflections: T.InflectionSet = [ - [{ p: e.p, f: e.f }], - [firstInf], - second, + plain, + plain, + [concatPs(base, { p: "یو", f: "úyo" }), concatPs(base, baseSyls ? ó : o)], ]; - if (endsInConsonant(e)) { + return { + inflections, + vocative: [plain, addPlurals(inflections[2], plurals)], + }; +} + +function pattern4Masc({ + entry, + plurals, +}: PatternInput): InflectionsAndVocative { + const base = countSyllables(entry) === 1 ? accentOnNFromEnd(entry, 0) : entry; + const firstInf = accentOnNFromEnd( + makePsString(entry.infap || "", entry.infaf || ""), + 0 + ); + const secondBase = makePsString(entry.infbp || "", entry.infbf || ""); + const inflections: T.InflectionSet = [ + [psStringFromEntry(entry)], + [firstInf], + [concatPs(secondBase, ó)], + ]; + if (endsInConsonant(entry)) { return { inflections, - vocative: [ - [{ p: `${base.p}ه`, f: `${base.f}a` }], - addPlurals(second, plurals), - ], + vocative: [[concatPs(base, a)], addPlurals(inflections[2], plurals)], }; } // TODO: is this even possible? - if (hasShwaEnding(e)) { + if (hasShwaEnding(entry)) { return { inflections, vocative: [ - [{ p: `${base.p.slice(0, -1)}ه`, f: `${base.f.slice(0, -1)}á` }], - addPlurals(second, plurals), + [ + concatPs( + mapPsString((x) => x.slice(0, -1), base), + á + ), + ], + addPlurals(inflections[2], plurals), ], }; } // exception for مېلمه, کوربه return { inflections, - vocative: [[{ p: e.p, f: e.f }], second], + vocative: [[psStringFromEntry(entry)], inflections[2]], }; } -function vocPattern4Fem({ e, plurals }: PatternInput): InflectionsAndVocative { - const base = makePsString(e.infbp || "", e.infbf || ""); - const second = addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals); +function pattern4Fem({ entry }: PatternInput): InflectionsAndVocative { + const base = makePsString(entry.infbp || "", entry.infbf || ""); const inflections: T.InflectionSet = [ - [{ p: `${base.p}ه`, f: `${base.f}á` }], - [{ p: `${base.p}ې`, f: `${base.f}é` }], - second, + [concatPs(base, á)], + [concatPs(base, é)], + [concatPs(base, ó)], ]; return { inflections, - vocative: [inflections[1], second], + vocative: [inflections[1], inflections[2]], }; } -function vocPattern5Masc({ e, plurals }: PatternInput): InflectionsAndVocative { - const base = makePsString(e.infbp || "", e.infbf || ""); - const second: T.ArrayOneOrMore = [ - { p: `${base.p}و`, f: `${base.f}o` }, - ]; - return { - inflections: [ - [{ p: e.p, f: e.f }], - [{ p: `${base.p}ه`, f: `${base.f}u` }], - second, - ], - vocative: [ - [{ p: `${base.p}ه`, f: `${base.f}a` }], - addPlurals(second, plurals), - ], - }; -} - -function vocPattern5Fem({ e, plurals }: PatternInput): InflectionsAndVocative { - const base = makePsString(e.infbp || "", e.infbf || ""); - const second: T.ArrayOneOrMore = [ - { p: `${base.p}و`, f: `${base.f}o` }, - ]; +function vocPattern5Masc({ + entry, + plurals, +}: PatternInput): InflectionsAndVocative { + const base = makePsString(entry.infbp || "", entry.infbf || ""); const inflections: T.InflectionSet = [ - [{ p: `${base.p}ه`, f: `${base.f}a` }], - [{ p: `${base.p}ې`, f: `${base.f}e` }], - second, + [psStringFromEntry(entry)], + [concatPs(base, { p: "ه", f: "u" })], + [concatPs(base, o)], ]; return { inflections, - vocative: [inflections[1], addPlurals(second, plurals)], + vocative: [[concatPs(base, a)], addPlurals(inflections[2], plurals)], + }; +} + +function pattern5Fem({ entry, plurals }: PatternInput): InflectionsAndVocative { + const base = makePsString(entry.infbp || "", entry.infbf || ""); + const inflections: T.InflectionSet = [ + [concatPs(base, a)], + [concatPs(base, e)], + [concatPs(base, o)], + ]; + return { + inflections, + vocative: [inflections[1], addPlurals(inflections[2], plurals)], }; } diff --git a/src/lib/src/nouns-plural.ts b/src/lib/src/nouns-plural.ts new file mode 100644 index 0000000..d800a37 --- /dev/null +++ b/src/lib/src/nouns-plural.ts @@ -0,0 +1,320 @@ +import { + concatPsString, + endsInConsonant, + endsInAaOrOo, + addOEnding, + splitPsByVarients, + removeEndTick, + endsWith, + hasShwaEnding, +} from "./p-text-helpers"; +import { makePsString } from "./accent-and-ps-utils"; +import { + accentOnNFromEnd, + countSyllables, + removeAccents, +} from "./accent-helpers"; +import * as T from "../../types"; + +function makePashtoPlural( + word: T.DictionaryEntryNoFVars +): T.PluralInflections | undefined { + if (!(word.ppp && word.ppf)) return undefined; + const base = splitPsByVarients(makePsString(word.ppp, word.ppf)); + function getBaseAndO(): T.PluralInflectionSet { + return [base, base.flatMap(addOEnding) as T.ArrayOneOrMore]; + } + if (word.c?.includes("n. m.")) { + return { masc: getBaseAndO() }; + } + if (word.c?.includes("n. f.")) { + return { fem: getBaseAndO() }; + } + // TODO: handle masculine and unisex + return undefined; +} + +function makeBundledPlural( + word: T.DictionaryEntryNoFVars +): T.PluralInflections | undefined { + if (!endsInConsonant(word) || !word.c?.includes("n.")) { + return undefined; + } + const w = makePsString(word.p, word.f); + const base = countSyllables(w) === 1 ? accentOnNFromEnd(w, 0) : w; + return { + masc: [ + [concatPsString(base, { p: "ه", f: "a" })], + [concatPsString(base, { p: "و", f: "o" })], + ], + }; +} + +function makeArabicPlural( + word: T.DictionaryEntryNoFVars +): T.PluralInflections | undefined { + if (!(word.apf && word.app)) return undefined; + const w = makePsString(word.app, word.apf); + const plural = splitPsByVarients(w); + const end = removeAccents(removeEndTick(word.apf).slice(-1)); + // again typescript being dumb and not letting me use a typed key here + const value = [ + plural, + plural.flatMap(addOEnding) as T.ArrayOneOrMore, + ] as T.PluralInflectionSet; + // feminine words that have arabic plurals stay feminine with the plural - ie مرجع - مراجع + // but masculine words that appear feminine in the plural aren't femening with the Arabic plural - ie. نبي - انبیا + if (["i", "e", "a"].includes(end) && word.c?.includes("n. f.")) { + return { fem: value }; + } + return { masc: value }; +} + +export function makePlural( + w: T.DictionaryEntryNoFVars +): + | { plural: T.PluralInflections; bundledPlural?: T.PluralInflections } + | { arabicPlural: T.PluralInflections; bundledPlural?: T.PluralInflections } + | undefined { + function addSecondInf( + plur: T.ArrayOneOrMore | T.PsString + ): T.PluralInflectionSet { + if (!Array.isArray(plur)) { + return addSecondInf([plur]); + } + return [plur, plur.flatMap(addOEnding) as T.ArrayOneOrMore]; + } + if (w.c && w.c.includes("pl.")) { + const plural = addSecondInf(makePsString(w.p, w.f)); + // Typescript being dumb and not letting me do a typed variable for the key + // could try refactoring with an updated TypeScript dependency + if (w.c.includes("n. m.")) return { plural: { masc: plural } }; + if (w.c.includes("n. f.")) return { plural: { fem: plural } }; + } + // exception for mUláa + if (w.f === "mUláa" || w.f === "mUlaa") { + return { + plural: { + masc: [ + [ + { p: "ملایان", f: "mUlaayáan" }, + { p: "ملاګان", f: "mUlaagáan" }, + ], + [ + { p: "ملایانو", f: "mUlaayáano" }, + { p: "ملاګانو", f: "mUlaagáano" }, + ], + ], + }, + }; + } + const arabicPlural = makeArabicPlural(w); + const pashtoPlural = makePashtoPlural(w); + const bundledPlural = makeBundledPlural(w); + function addMascPluralSuffix( + animate?: boolean, + shortSquish?: boolean + ): T.PluralInflectionSet { + if (shortSquish && (w.infap === undefined || w.infaf === undefined)) { + throw new Error(`no irregular inflection info for ${w.p} - ${w.ts}`); + } + const b = removeAccents( + shortSquish + ? makePsString( + (w.infap as string).slice(0, -1), + (w.infaf as string).slice(0, -1) + ) + : w + ); + const base = hasShwaEnding(b) + ? makePsString(b.p.slice(0, -1), b.f.slice(0, -1)) + : b; + return addSecondInf( + concatPsString( + base, + animate && !shortSquish + ? { p: "ان", f: "áan" } + : { p: "ونه", f: "óona" } + ) + ); + } + function addAnimUnisexPluralSuffix(): T.UnisexSet { + const base = removeAccents(w); + return { + masc: addMascPluralSuffix(true), + fem: addSecondInf(concatPsString(base, { p: "انې", f: "áane" })), + }; + } + function addEePluralSuffix(gender: T.Gender): T.PluralInflectionSet { + const b = removeAccents(w); + const base = { + p: b.p.slice(0, -1), + f: b.f.slice(0, -2), + }; + const firstInf: T.ArrayOneOrMore = [ + concatPsString( + base, + { p: "یان", f: "iyáan" }, + gender === "fem" ? { p: "ې", f: "e" } : "" + ), + ]; + return [ + firstInf, + firstInf.flatMap(addOEnding), + // firstInf.map(addOEnding), + ] as T.PluralInflectionSet; + } + function addAnimN3UnisexPluralSuffix(): T.UnisexSet { + const b = removeAccents(w); + const base = { + p: b.p.slice(0, -1), + f: b.f.slice(0, -2), + }; + return { + masc: [ + [concatPsString(base, { p: "یان", f: "iyáan" })], + [concatPsString(base, { p: "یانو", f: "iyáano" })], + // TODO: or use addSecondInf method above? + ], + fem: [ + [concatPsString(base, { p: "یانې", f: "iyáane" })], + [concatPsString(base, { p: "یانو", f: "iyáano" })], + ], + }; + } + function addLongVowelSuffix(gender: "masc" | "fem"): T.PluralInflectionSet { + if (pashtoPlural) { + } + const base = removeEndTick(makePsString(w.p, w.f)); + const baseWOutAccents = removeAccents(base); + const space = + w.p.slice(-1) === "ع" || w.p.slice(-1) === "ه" ? { p: " ", f: " " } : ""; + if (gender === "fem") { + return addSecondInf([ + concatPsString(base, space, { p: "وې", f: "we" }), + concatPsString(baseWOutAccents, space, { p: "ګانې", f: "gáane" }), + ]); + } else { + return addSecondInf([ + concatPsString(baseWOutAccents, space, { p: "ګان", f: "gáan" }), + ]); + } + } + // TODO: This should be possible for words like پلویان but not for words like ترورزامن 🤔 + // function addFemToPashtoPlural(i: T.PluralInflections): T.UnisexSet { + // if ("fem" in i && "masc" in i) return i; + // if (!("masc" in i)) throw new Error("bad pashto plural doesn't even have masculine"); + // if (endsInConsonant(i.masc[0][0])) { + // return { + // ...i, + // fem: [ + // i.masc[0].map((x) => concatPsString(x, { p: "ې", f: "e" })) as T.ArrayOneOrMore, + // i.masc[0].map((x) => concatPsString(x, { p: "و", f: "o" })) as T.ArrayOneOrMore, + // ], + // }; + // } + // return { + // ...i, + // fem: i.masc, + // }; + // } + + const shortSquish = !!w.infap && !w.infap.includes("ا"); + const anim = w.c?.includes("anim."); + const type = w.c?.includes("unisex") + ? "unisex noun" + : w.c?.includes("n. m.") + ? "masc noun" + : w.c?.includes("n. f.") + ? "fem noun" + : "other"; + if (pashtoPlural) { + return { + plural: pashtoPlural, + arabicPlural, + }; + } + if (type === "unisex noun") { + // doesn't need to be labelled anim - because it's only with animate nouns that you get the unisex - I THINK + if (endsInConsonant(w) && !w.infap) { + return { + arabicPlural, + bundledPlural, + plural: addAnimUnisexPluralSuffix(), + }; + } + if (shortSquish && !anim) { + return { + arabicPlural, + plural: { masc: addMascPluralSuffix(anim, shortSquish) }, + }; + } + if (endsWith([{ p: "ی", f: "áy" }, { p: "ي" }], w, true)) { + return { arabicPlural, plural: addAnimN3UnisexPluralSuffix() }; + } + // usually shortSquish nouns would never have arabicPlurals -- so we don't have to worry about catching + // arabic plurals for the animat ones, right? + } + if ( + type === "masc noun" && + (shortSquish || ((endsInConsonant(w) || hasShwaEnding(w)) && !w.infap)) && + w.p.slice(-3) !== "توب" + ) { + return { + arabicPlural, + bundledPlural, + plural: { + masc: addMascPluralSuffix(anim, shortSquish), + }, + }; + } + if (type === "masc noun" && endsWith({ p: "ی", f: "áy" }, w, true) && anim) { + const { masc } = addAnimN3UnisexPluralSuffix(); + return { + arabicPlural, + plural: { + masc, + }, + }; + } + if (type === "masc noun" && endsWith({ p: "ي" }, w)) { + const masc = addEePluralSuffix("masc"); + return { + arabicPlural, + plural: { masc }, + }; + } + // TODO: What about endings in long ee / animate at inanimate + if (type === "masc noun" && endsInAaOrOo(w) && !w.infap) { + return { + arabicPlural, + plural: { + masc: addLongVowelSuffix("masc"), + }, + }; + } + // TODO: What about endings in long ee / animate at inanimate + if (type === "fem noun" && endsInAaOrOo(w) && !w.infap) { + return { + arabicPlural, + plural: { + fem: addLongVowelSuffix("fem"), + }, + }; + } + if ( + type === "fem noun" && + (endsWith({ p: "ي" }, w) || (endsWith({ p: "ۍ" }, w) && anim)) + ) { + return { + arabicPlural, + plural: { + fem: addEePluralSuffix("fem"), + }, + }; + } + if (arabicPlural) { + return { arabicPlural, plural: pashtoPlural, bundledPlural }; + } + return undefined; +} diff --git a/src/lib/src/pashto-inflector.test.ts b/src/lib/src/pashto-inflector.test.ts index 72771c3..490dc98 100644 --- a/src/lib/src/pashto-inflector.test.ts +++ b/src/lib/src/pashto-inflector.test.ts @@ -2114,7 +2114,8 @@ const others: T.DictionaryEntry[] = [ adjectives.forEach((word) => { test(`${word.in.p} should inflect properly`, () => { - expect(inflectWord(word.in)).toEqual(word.out); + const out = inflectWord(word.in); + expect(out).toEqual(word.out); }); }); diff --git a/src/lib/src/pashto-inflector.ts b/src/lib/src/pashto-inflector.ts index 913c425..78742ee 100644 --- a/src/lib/src/pashto-inflector.ts +++ b/src/lib/src/pashto-inflector.ts @@ -10,25 +10,14 @@ import { concatInflections, splitDoubleWord, ensureUnisexInflections, - concatPsString, - endsInConsonant, - endsInAaOrOo, - addOEnding, - splitPsByVarients, - removeEndTick, - endsWith, concatPlurals, - hasShwaEnding, } from "./p-text-helpers"; import { makePsString, removeFVarients } from "./accent-and-ps-utils"; -import { - accentOnNFromEnd, - countSyllables, - removeAccents, -} from "./accent-helpers"; +import { removeAccents } from "./accent-helpers"; import * as T from "../../types"; import { getInfsAndVocative } from "./inflections-and-vocative"; import { fmapSingleOrLengthOpts } from "./fp-ps"; +import { makePlural } from "./nouns-plural"; export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput { // If it's a noun/adj, inflect accordingly @@ -131,309 +120,6 @@ export function inflectRegularShwaEndingUnisex( }; } -function makePashtoPlural( - word: T.DictionaryEntryNoFVars -): T.PluralInflections | undefined { - if (!(word.ppp && word.ppf)) return undefined; - const base = splitPsByVarients(makePsString(word.ppp, word.ppf)); - function getBaseAndO(): T.PluralInflectionSet { - return [base, base.flatMap(addOEnding) as T.ArrayOneOrMore]; - } - if (word.c?.includes("n. m.")) { - return { masc: getBaseAndO() }; - } - if (word.c?.includes("n. f.")) { - return { fem: getBaseAndO() }; - } - // TODO: handle masculine and unisex - return undefined; -} - -function makeBundledPlural( - word: T.DictionaryEntryNoFVars -): T.PluralInflections | undefined { - if (!endsInConsonant(word) || !word.c?.includes("n.")) { - return undefined; - } - const w = makePsString(word.p, word.f); - const base = countSyllables(w) === 1 ? accentOnNFromEnd(w, 0) : w; - return { - masc: [ - [concatPsString(base, { p: "ه", f: "a" })], - [concatPsString(base, { p: "و", f: "o" })], - ], - }; -} - -function makeArabicPlural( - word: T.DictionaryEntryNoFVars -): T.PluralInflections | undefined { - if (!(word.apf && word.app)) return undefined; - const w = makePsString(word.app, word.apf); - const plural = splitPsByVarients(w); - const end = removeAccents(removeEndTick(word.apf).slice(-1)); - // again typescript being dumb and not letting me use a typed key here - const value = [ - plural, - plural.flatMap(addOEnding) as T.ArrayOneOrMore, - ] as T.PluralInflectionSet; - // feminine words that have arabic plurals stay feminine with the plural - ie مرجع - مراجع - // but masculine words that appear feminine in the plural aren't femening with the Arabic plural - ie. نبي - انبیا - if (["i", "e", "a"].includes(end) && word.c?.includes("n. f.")) { - return { fem: value }; - } - return { masc: value }; -} - -function makePlural( - w: T.DictionaryEntryNoFVars -): - | { plural: T.PluralInflections; bundledPlural?: T.PluralInflections } - | { arabicPlural: T.PluralInflections; bundledPlural?: T.PluralInflections } - | undefined { - function addSecondInf( - plur: T.ArrayOneOrMore | T.PsString - ): T.PluralInflectionSet { - if (!Array.isArray(plur)) { - return addSecondInf([plur]); - } - return [plur, plur.flatMap(addOEnding) as T.ArrayOneOrMore]; - } - if (w.c && w.c.includes("pl.")) { - const plural = addSecondInf(makePsString(w.p, w.f)); - // Typescript being dumb and not letting me do a typed variable for the key - // could try refactoring with an updated TypeScript dependency - if (w.c.includes("n. m.")) return { plural: { masc: plural } }; - if (w.c.includes("n. f.")) return { plural: { fem: plural } }; - } - // exception for mUláa - if (w.f === "mUláa" || w.f === "mUlaa") { - return { - plural: { - masc: [ - [ - { p: "ملایان", f: "mUlaayáan" }, - { p: "ملاګان", f: "mUlaagáan" }, - ], - [ - { p: "ملایانو", f: "mUlaayáano" }, - { p: "ملاګانو", f: "mUlaagáano" }, - ], - ], - }, - }; - } - const arabicPlural = makeArabicPlural(w); - const pashtoPlural = makePashtoPlural(w); - const bundledPlural = makeBundledPlural(w); - function addMascPluralSuffix( - animate?: boolean, - shortSquish?: boolean - ): T.PluralInflectionSet { - if (shortSquish && (w.infap === undefined || w.infaf === undefined)) { - throw new Error(`no irregular inflection info for ${w.p} - ${w.ts}`); - } - const b = removeAccents( - shortSquish - ? makePsString( - (w.infap as string).slice(0, -1), - (w.infaf as string).slice(0, -1) - ) - : w - ); - const base = hasShwaEnding(b) - ? makePsString(b.p.slice(0, -1), b.f.slice(0, -1)) - : b; - return addSecondInf( - concatPsString( - base, - animate && !shortSquish - ? { p: "ان", f: "áan" } - : { p: "ونه", f: "óona" } - ) - ); - } - function addAnimUnisexPluralSuffix(): T.UnisexSet { - const base = removeAccents(w); - return { - masc: addMascPluralSuffix(true), - fem: addSecondInf(concatPsString(base, { p: "انې", f: "áane" })), - }; - } - function addEePluralSuffix(gender: T.Gender): T.PluralInflectionSet { - const b = removeAccents(w); - const base = { - p: b.p.slice(0, -1), - f: b.f.slice(0, -2), - }; - const firstInf: T.ArrayOneOrMore = [ - concatPsString( - base, - { p: "یان", f: "iyáan" }, - gender === "fem" ? { p: "ې", f: "e" } : "" - ), - ]; - return [ - firstInf, - firstInf.flatMap(addOEnding), - // firstInf.map(addOEnding), - ] as T.PluralInflectionSet; - } - function addAnimN3UnisexPluralSuffix(): T.UnisexSet { - const b = removeAccents(w); - const base = { - p: b.p.slice(0, -1), - f: b.f.slice(0, -2), - }; - return { - masc: [ - [concatPsString(base, { p: "یان", f: "iyáan" })], - [concatPsString(base, { p: "یانو", f: "iyáano" })], - // TODO: or use addSecondInf method above? - ], - fem: [ - [concatPsString(base, { p: "یانې", f: "iyáane" })], - [concatPsString(base, { p: "یانو", f: "iyáano" })], - ], - }; - } - function addLongVowelSuffix(gender: "masc" | "fem"): T.PluralInflectionSet { - if (pashtoPlural) { - } - const base = removeEndTick(makePsString(w.p, w.f)); - const baseWOutAccents = removeAccents(base); - const space = - w.p.slice(-1) === "ع" || w.p.slice(-1) === "ه" ? { p: " ", f: " " } : ""; - if (gender === "fem") { - return addSecondInf([ - concatPsString(base, space, { p: "وې", f: "we" }), - concatPsString(baseWOutAccents, space, { p: "ګانې", f: "gáane" }), - ]); - } else { - return addSecondInf([ - concatPsString(baseWOutAccents, space, { p: "ګان", f: "gáan" }), - ]); - } - } - // TODO: This should be possible for words like پلویان but not for words like ترورزامن 🤔 - // function addFemToPashtoPlural(i: T.PluralInflections): T.UnisexSet { - // if ("fem" in i && "masc" in i) return i; - // if (!("masc" in i)) throw new Error("bad pashto plural doesn't even have masculine"); - // if (endsInConsonant(i.masc[0][0])) { - // return { - // ...i, - // fem: [ - // i.masc[0].map((x) => concatPsString(x, { p: "ې", f: "e" })) as T.ArrayOneOrMore, - // i.masc[0].map((x) => concatPsString(x, { p: "و", f: "o" })) as T.ArrayOneOrMore, - // ], - // }; - // } - // return { - // ...i, - // fem: i.masc, - // }; - // } - - const shortSquish = !!w.infap && !w.infap.includes("ا"); - const anim = w.c?.includes("anim."); - const type = w.c?.includes("unisex") - ? "unisex noun" - : w.c?.includes("n. m.") - ? "masc noun" - : w.c?.includes("n. f.") - ? "fem noun" - : "other"; - if (pashtoPlural) { - return { - plural: pashtoPlural, - arabicPlural, - }; - } - if (type === "unisex noun") { - // doesn't need to be labelled anim - because it's only with animate nouns that you get the unisex - I THINK - if (endsInConsonant(w) && !w.infap) { - return { - arabicPlural, - bundledPlural, - plural: addAnimUnisexPluralSuffix(), - }; - } - if (shortSquish && !anim) { - return { - arabicPlural, - plural: { masc: addMascPluralSuffix(anim, shortSquish) }, - }; - } - if (endsWith([{ p: "ی", f: "áy" }, { p: "ي" }], w, true)) { - return { arabicPlural, plural: addAnimN3UnisexPluralSuffix() }; - } - // usually shortSquish nouns would never have arabicPlurals -- so we don't have to worry about catching - // arabic plurals for the animat ones, right? - } - if ( - type === "masc noun" && - (shortSquish || ((endsInConsonant(w) || hasShwaEnding(w)) && !w.infap)) && - w.p.slice(-3) !== "توب" - ) { - return { - arabicPlural, - bundledPlural, - plural: { - masc: addMascPluralSuffix(anim, shortSquish), - }, - }; - } - if (type === "masc noun" && endsWith({ p: "ی", f: "áy" }, w, true) && anim) { - const { masc } = addAnimN3UnisexPluralSuffix(); - return { - arabicPlural, - plural: { - masc, - }, - }; - } - if (type === "masc noun" && endsWith({ p: "ي" }, w)) { - const masc = addEePluralSuffix("masc"); - return { - arabicPlural, - plural: { masc }, - }; - } - // TODO: What about endings in long ee / animate at inanimate - if (type === "masc noun" && endsInAaOrOo(w) && !w.infap) { - return { - arabicPlural, - plural: { - masc: addLongVowelSuffix("masc"), - }, - }; - } - // TODO: What about endings in long ee / animate at inanimate - if (type === "fem noun" && endsInAaOrOo(w) && !w.infap) { - return { - arabicPlural, - plural: { - fem: addLongVowelSuffix("fem"), - }, - }; - } - if ( - type === "fem noun" && - (endsWith({ p: "ي" }, w) || (endsWith({ p: "ۍ" }, w) && anim)) - ) { - return { - arabicPlural, - plural: { - fem: addEePluralSuffix("fem"), - }, - }; - } - if (arabicPlural) { - return { arabicPlural, plural: pashtoPlural, bundledPlural }; - } - return undefined; -} - export function inflectYay( ps: T.SingleOrLengthOpts ): T.SingleOrLengthOpts {