refactor with more abstraction

This commit is contained in:
adueck 2024-07-27 12:10:32 -04:00
parent 502031b054
commit 46fd6e66e8
4 changed files with 560 additions and 560 deletions

View File

@ -14,6 +14,7 @@ import {
hasShwaEnding,
mapPsString,
endsWith,
psStringFromEntry,
} from "./p-text-helpers";
import { removeDuplicates } from "./phrase-building/vp-tools";
import {
@ -25,6 +26,16 @@ import {
isNounEntry,
isNumberEntry,
} from "./type-predicates";
import { semigroupPsString } from "../dist/lib/src/fp-ps";
const concatPs = semigroupPsString.concat;
const o = { p: "و", f: "o" };
const ó = { p: "و", f: "ó" };
const a = { p: "ه", f: "a" };
const á = { p: "ه", f: "á" };
const e = { p: "ې", f: "e" };
const é = { p: "ې", f: "é" };
type Plurals =
| {
@ -33,11 +44,11 @@ type Plurals =
}
| undefined;
const endingInSingleARegex = /[^a]'??[aá]'??$/;
const endingInHayOrAynRegex = /[^ا][هع]$/;
// const endingInSingleARegex = /[^a]'??[aá]'??$/;
// const endingInHayOrAynRegex = /[^ا][هع]$/;
export function getInfsAndVocative(
entry: T.DictionaryEntryNoFVars,
entryR: T.DictionaryEntryNoFVars,
plurals: Plurals
):
| {
@ -45,51 +56,51 @@ export function getInfsAndVocative(
vocative?: T.PluralInflections;
}
| false {
if (!isInflectableEntry(entry)) {
if (!isInflectableEntry(entryR)) {
return false;
}
// @ts-ignore
const e: T.InflectableEntry = entry as T.InflectableEntry;
const pattern = getInflectionPattern(e);
const entry: T.InflectableEntry = entryR as T.InflectableEntry;
const pattern = getInflectionPattern(entry);
if (
pattern === 0 &&
isFemNounEntry(e) &&
isAnimNounEntry(e) &&
endsInConsonant(e)
isFemNounEntry(entry) &&
isAnimNounEntry(entry) &&
endsInConsonant(entry)
) {
return {
vocative: vocFemAnimException({
e,
entry,
plurals: genderPlural("fem", plurals),
}),
};
}
const gender: T.Gender | "unisex" =
isAdjOrUnisexNounEntry(e) || isNumberEntry(e)
isAdjOrUnisexNounEntry(entry) || isNumberEntry(entry)
? "unisex"
: isMascNounEntry(e)
: isMascNounEntry(entry)
? "masc"
: "fem";
if (pattern === 0) {
return false;
}
if (pattern === 6) {
return pattern6({ e, plurals: genderPlural("fem", plurals) });
return pattern6({ entry, plurals: genderPlural("fem", plurals) });
}
const funcs = patternFuncs[pattern];
const masc =
gender === "unisex" || gender === "masc"
? funcs.masc({ e, plurals: genderPlural("masc", plurals) })
? funcs.masc({ entry, plurals: genderPlural("masc", plurals) })
: undefined;
const fem =
gender === "unisex" || gender === "fem"
? funcs.fem({ e, plurals: genderPlural("fem", plurals) })
? funcs.fem({ entry, plurals: genderPlural("fem", plurals) })
: undefined;
return aggregateInfsAndVoc(masc, fem);
}
type PatternInput = {
e: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry;
entry: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry;
plurals: T.PsString[];
};
@ -107,45 +118,45 @@ const patternFuncs: Record<
fem: vocPattern1Fem,
},
2: {
masc: vocPattern2Masc,
fem: vocPattern2Fem,
masc: pattern2Masc,
fem: pattern2Fem,
},
3: {
masc: vocPattern3Masc,
fem: vocPattern3Fem,
masc: pattern3Masc,
fem: pattern3Fem,
},
4: {
masc: vocPattern4Masc,
fem: vocPattern4Fem,
masc: pattern4Masc,
fem: pattern4Fem,
},
5: {
masc: vocPattern5Masc,
fem: vocPattern5Fem,
fem: pattern5Fem,
},
};
function addPlurals(
e: T.ArrayOneOrMore<T.PsString>,
x: T.ArrayOneOrMore<T.PsString>,
plurals: T.PsString[]
): T.ArrayOneOrMore<T.PsString> {
if (!plurals) {
return e;
return x;
}
return removeDuplicates([...e, ...plurals]) as T.ArrayOneOrMore<T.PsString>;
return removeDuplicates([...x, ...plurals]) as T.ArrayOneOrMore<T.PsString>;
}
function pattern6({ e, plurals }: PatternInput): {
function pattern6({ entry, plurals }: PatternInput): {
inflections: T.Inflections;
vocative: T.PluralInflections;
} {
const base = removeAccents({ p: e.p.slice(0, -1), f: e.f.slice(0, -2) });
const base = removeAccents({
p: entry.p.slice(0, -1),
f: entry.f.slice(0, -2),
});
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ۍ`, f: `${base.f}úy` }],
[
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}ó` },
],
[psStringFromEntry(entry)],
[concatPs(base, { p: "ۍ", f: "úy" })],
[concatPs(base, { p: "یو", f: "úyo" }), concatPs(base, ó)],
];
return {
inflections: {
@ -158,83 +169,88 @@ function pattern6({ e, plurals }: PatternInput): {
}
function vocFemAnimException({
e,
entry,
plurals,
}: PatternInput): T.PluralInflections {
if (!e.ppp || !e.ppf) {
if (!entry.ppp || !entry.ppf) {
throw new Error(
"plural missing for feminine animate exception noun " + e.p
"plural missing for feminine animate exception noun " + entry.p
);
}
// TODO: HANDLE BETTER WITH PLURALS!
const plurBase = mapPsString(
(x) => x.slice(0, -1),
makePsString(e.ppp, e.ppf)
makePsString(entry.ppp, entry.ppf)
);
const base =
countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : makePsString(e.p, e.f);
countSyllables(entry) === 1
? accentOnNFromEnd(entry, 0)
: psStringFromEntry(entry);
return {
fem: [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}o` }], plurals),
],
fem: [[concatPs(base, e)], addPlurals([concatPs(plurBase, o)], plurals)],
};
}
function vocPattern1Masc({ e, plurals }: PatternInput): InflectionsAndVocative {
if (isNounEntry(e) && endsInTob(e)) {
const base = mapPsString((x) => x.slice(0, -3), e);
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}تبو`, f: `${base.f}tábo` },
function vocPattern1Masc({
entry,
plurals,
}: PatternInput): InflectionsAndVocative {
const plain = psStringFromEntry(entry);
if (isNounEntry(entry) && endsInTob(entry)) {
const base = mapPsString((x) => x.slice(0, -3), entry);
const inflections: T.InflectionSet = [
[plain],
[concatPs(base, { p: "تابه", f: "taabú" })],
[concatPs(base, { p: "تبو", f: "tábo" })],
];
return {
inflections: [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}تابه`, f: `${base.f}taabú` }],
second,
],
vocative: [[{ p: `${e.p}ه`, f: `${e.f}a` }], addPlurals(second, plurals)],
inflections,
vocative: [[concatPs(plain, a)], addPlurals(inflections[2], plurals)],
};
}
const shwaEnding = hasShwaEnding(e);
const shwaEnding = hasShwaEnding(entry);
const base = mapGen(
(ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps),
mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e)
(ps) => (countSyllables(entry) === 1 ? accentOnNFromEnd(ps, 0) : ps),
mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), entry)
);
if (shwaEnding && e.f.endsWith("ú")) {
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}ó` },
if (shwaEnding && entry.f.endsWith("ú")) {
const inflections: T.InflectionSet = [
[plain],
[plain],
[concatPs(base, ó)],
];
return {
inflections: [[{ p: e.p, f: e.f }], [{ p: e.p, f: e.f }], second],
vocative: [
[{ p: `${base.p}ه`, f: `${base.f}á` }],
addPlurals(second, plurals),
],
inflections,
vocative: [[concatPs(base, á)], addPlurals(inflections[2], plurals)],
};
}
// TODO: shouldn't this be accent-sensitive?
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
const inflections: T.InflectionSet = [[plain], [plain], [concatPs(base, o)]];
return {
inflections: [[{ p: e.p, f: e.f }], [{ p: e.p, f: e.f }], second],
inflections,
vocative: [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
addPlurals(second, plurals),
[concatPs(base, { p: "ه", f: "a" })],
addPlurals(inflections[2], plurals),
],
};
}
// TODO this is HUGELY repetitive refactor this!
function vocPattern1Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
const shwaEnding = hasShwaEnding(e);
const hasFemEnding = endsWith([{ p: "ه", f: "a" }], e) || shwaEnding;
function vocPattern1Fem({
entry,
plurals,
}: PatternInput): InflectionsAndVocative {
const shwaEnding = hasShwaEnding(entry);
const hasFemEnding = endsWith([{ p: "ه", f: "a" }], entry) || shwaEnding;
const endAccented = accentIsOnEnd(entry);
const base = mapGen(
(ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps),
(ps) =>
countSyllables(entry) === 1 && !endAccented
? accentOnNFromEnd(ps, 0)
: ps,
hasFemEnding
? mapPsString((x) => x.slice(0, -1), e)
: makePsString(e.p, e.f)
? mapPsString((x) => x.slice(0, -1), entry)
: psStringFromEntry(entry)
);
if (
endsWith(
@ -242,291 +258,268 @@ function vocPattern1Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
{ p: "ع", f: "a" },
{ p: "ع", f: "a'" },
],
e
entry
) &&
!["ا", "ی", "ې"].includes(e.p.at(-2) || "")
) {
const base = applyPsString(
const base2 = applyPsString(
{
f: (f) => f.slice(0, f.endsWith("'") ? -2 : -1),
},
e
entry
);
if (accentIsOnEnd(e)) {
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}ó` },
];
if (endAccented) {
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ې`, f: `${base.f}é` }],
second,
[psStringFromEntry(entry)],
[concatPs(base2, é)],
[concatPs(base2, ó)],
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
vocative: [inflections[1], addPlurals(inflections[2], plurals)],
};
}
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ې`, f: `${base.f}e` }],
second,
[psStringFromEntry(entry)],
[concatPs(base2, e)],
[concatPs(base2, o)],
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
vocative: [inflections[1], addPlurals(inflections[2], plurals)],
};
}
if (
endsWith([{ p: "ح", f: "a" }], e) &&
!["ا", "ی", "ې"].includes(e.p.at(-2) || "")
endsWith([{ p: "ح", f: "a" }], entry) &&
!["ا", "ی", "ې"].includes(entry.p.at(-2) || "")
) {
const base = applyPsString(
{
f: (f) => f.slice(0, -1),
},
e
entry
);
if (accentIsOnEnd(e)) {
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}ó` },
];
if (accentIsOnEnd(entry)) {
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ې`, f: `${base.f}é` }],
second,
[psStringFromEntry(entry)],
[concatPs(base, é)],
[concatPs(base, ó)],
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
vocative: [inflections[1], addPlurals(inflections[2], plurals)],
};
}
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ې`, f: `${base.f}e` }],
second,
[psStringFromEntry(entry)],
[concatPs(base, e)],
[concatPs(base, o)],
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
vocative: [inflections[1], addPlurals(inflections[2], plurals)],
};
}
if (hasFemEnding && accentIsOnEnd(e)) {
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}ó` },
];
if (hasFemEnding && accentIsOnEnd(entry)) {
const inflections: T.InflectionSet = [
[{ p: `${base.p}ه`, f: `${base.f}á` }],
[{ p: `${base.p}ې`, f: `${base.f}é` }],
second,
[concatPs(base, á)],
[concatPs(base, é)],
[concatPs(base, ó)],
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
vocative: [inflections[1], addPlurals(inflections[2], plurals)],
};
}
if (isFemNounEntry(e) && endsInConsonant(e)) {
const baseForInf = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e;
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${baseForInf.p}و`, f: `${baseForInf.f}o` },
];
if (isFemNounEntry(entry) && endsInConsonant(entry)) {
const baseForInf =
countSyllables(entry) === 1 ? accentOnNFromEnd(entry, 0) : e;
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${baseForInf.p}ې`, f: `${baseForInf.f}e` }],
second,
[psStringFromEntry(entry)],
[concatPs(baseForInf, e)],
[concatPs(baseForInf, o)],
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
vocative: [inflections[1], addPlurals(inflections[2], plurals)],
};
}
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
const inflections: T.InflectionSet = [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
[{ p: `${base.p}ې`, f: `${base.f}e` }],
second,
[concatPs(base, a)],
[concatPs(base, e)],
[concatPs(base, o)],
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
vocative: [inflections[1], addPlurals(inflections[2], plurals)],
};
}
function vocPattern2Masc({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(e.p.slice(0, -1), e.f.slice(0, -2));
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
function pattern2Masc({
entry,
plurals,
}: PatternInput): InflectionsAndVocative {
const base = makePsString(entry.p.slice(0, -1), entry.f.slice(0, -2));
const inflections: T.InflectionSet = [
[psStringFromEntry(entry)],
[concatPs(base, { p: "ي", f: "ee" })],
[concatPs(base, { p: "یو", f: "iyo" }), concatPs(base, { p: "و", f: "o" })],
];
return {
inflections: [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ي`, f: `${base.f}ee` }],
second,
],
inflections,
vocative: [
[{ p: `${base.p}یه`, f: `${base.f}iya` }],
addPlurals(second, plurals),
[concatPs(base, { p: "یه", f: "iya" })],
addPlurals(inflections[2], plurals),
],
};
}
function vocPattern2Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
function pattern2Fem({ entry, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(
e.p.slice(0, -1),
e.f.slice(0, e.f.endsWith("ay") ? -2 : -1)
entry.p.slice(0, -1),
entry.f.slice(0, entry.f.endsWith("ay") ? -2 : -1)
);
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
];
const inflections: T.InflectionSet = [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${base.p}ې`, f: `${base.f}e` }],
second,
[concatPs(base, e)],
[concatPs(base, e)],
[concatPs(base, { p: "یو", f: "iyo" }), concatPs(base, o)],
];
return {
inflections,
vocative: [inflections[0], addPlurals(second, plurals)],
vocative: [inflections[1], addPlurals(inflections[2], plurals)],
};
}
function vocPattern3Masc({ e, plurals }: PatternInput): InflectionsAndVocative {
function pattern3Masc({
entry,
plurals,
}: PatternInput): InflectionsAndVocative {
const base = makePsString(
e.p.slice(0, -1),
entry.p.slice(0, -1),
// shouldn't be accents here but remove just to be sure
removeAccents(e.f.slice(0, -2))
removeAccents(entry.f.slice(0, -2))
);
const baseSyls = countSyllables(base);
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}${baseSyls ? "ó" : "o"}` },
const inflections: T.InflectionSet = [
[psStringFromEntry(entry)],
[concatPs(base, { p: `ي`, f: baseSyls ? "ée" : "ee" })],
[
concatPs(base, { p: "یو", f: "úyo" }),
concatPs(base, { p: "و", f: baseSyls ? "ó" : "o" }),
],
];
return {
inflections: [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ي`, f: `${base.f}${baseSyls ? "ée" : "ee"}` }],
second,
],
inflections,
vocative: [
[{ p: `${base.p}یه`, f: `${base.f}úya` }],
addPlurals(second, plurals),
[concatPs(base, { p: "یه", f: "úya" })],
addPlurals(inflections[2], plurals),
],
};
}
function vocPattern3Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
function pattern3Fem({ entry, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(
e.p.slice(0, -1),
entry.p.slice(0, -1),
// shouldn't be accents here but remove just to be sure
removeAccents(e.f.slice(0, -2))
removeAccents(entry.f.slice(0, -2))
);
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}ó` },
];
const baseSyls = countSyllables(base);
const plain: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}ۍ`, f: `${base.f}úy` },
];
return {
inflections: [plain, plain, second],
vocative: [plain, addPlurals(second, plurals)],
};
}
function vocPattern4Masc({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e;
const firstInf = accentOnNFromEnd(
makePsString(e.infap || "", e.infaf || ""),
0
);
const secondBase = makePsString(e.infbp || "", e.infbf || "");
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${secondBase.p}و`, f: `${secondBase.f}ó` },
concatPs(base, { p: "ۍ", f: "úy" }),
];
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[firstInf],
second,
plain,
plain,
[concatPs(base, { p: "یو", f: "úyo" }), concatPs(base, baseSyls ? ó : o)],
];
if (endsInConsonant(e)) {
return {
inflections,
vocative: [plain, addPlurals(inflections[2], plurals)],
};
}
function pattern4Masc({
entry,
plurals,
}: PatternInput): InflectionsAndVocative {
const base = countSyllables(entry) === 1 ? accentOnNFromEnd(entry, 0) : entry;
const firstInf = accentOnNFromEnd(
makePsString(entry.infap || "", entry.infaf || ""),
0
);
const secondBase = makePsString(entry.infbp || "", entry.infbf || "");
const inflections: T.InflectionSet = [
[psStringFromEntry(entry)],
[firstInf],
[concatPs(secondBase, ó)],
];
if (endsInConsonant(entry)) {
return {
inflections,
vocative: [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
addPlurals(second, plurals),
],
vocative: [[concatPs(base, a)], addPlurals(inflections[2], plurals)],
};
}
// TODO: is this even possible?
if (hasShwaEnding(e)) {
if (hasShwaEnding(entry)) {
return {
inflections,
vocative: [
[{ p: `${base.p.slice(0, -1)}ه`, f: `${base.f.slice(0, -1)}á` }],
addPlurals(second, plurals),
[
concatPs(
mapPsString((x) => x.slice(0, -1), base),
á
),
],
addPlurals(inflections[2], plurals),
],
};
}
// exception for مېلمه, کوربه
return {
inflections,
vocative: [[{ p: e.p, f: e.f }], second],
vocative: [[psStringFromEntry(entry)], inflections[2]],
};
}
function vocPattern4Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(e.infbp || "", e.infbf || "");
const second = addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals);
function pattern4Fem({ entry }: PatternInput): InflectionsAndVocative {
const base = makePsString(entry.infbp || "", entry.infbf || "");
const inflections: T.InflectionSet = [
[{ p: `${base.p}ه`, f: `${base.f}á` }],
[{ p: `${base.p}ې`, f: `${base.f}é` }],
second,
[concatPs(base, á)],
[concatPs(base, é)],
[concatPs(base, ó)],
];
return {
inflections,
vocative: [inflections[1], second],
vocative: [inflections[1], inflections[2]],
};
}
function vocPattern5Masc({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(e.infbp || "", e.infbf || "");
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
return {
inflections: [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ه`, f: `${base.f}u` }],
second,
],
vocative: [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
addPlurals(second, plurals),
],
};
}
function vocPattern5Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(e.infbp || "", e.infbf || "");
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
function vocPattern5Masc({
entry,
plurals,
}: PatternInput): InflectionsAndVocative {
const base = makePsString(entry.infbp || "", entry.infbf || "");
const inflections: T.InflectionSet = [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
[{ p: `${base.p}ې`, f: `${base.f}e` }],
second,
[psStringFromEntry(entry)],
[concatPs(base, { p: "ه", f: "u" })],
[concatPs(base, o)],
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
vocative: [[concatPs(base, a)], addPlurals(inflections[2], plurals)],
};
}
function pattern5Fem({ entry, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(entry.infbp || "", entry.infbf || "");
const inflections: T.InflectionSet = [
[concatPs(base, a)],
[concatPs(base, e)],
[concatPs(base, o)],
];
return {
inflections,
vocative: [inflections[1], addPlurals(inflections[2], plurals)],
};
}

320
src/lib/src/nouns-plural.ts Normal file
View File

@ -0,0 +1,320 @@
import {
concatPsString,
endsInConsonant,
endsInAaOrOo,
addOEnding,
splitPsByVarients,
removeEndTick,
endsWith,
hasShwaEnding,
} from "./p-text-helpers";
import { makePsString } from "./accent-and-ps-utils";
import {
accentOnNFromEnd,
countSyllables,
removeAccents,
} from "./accent-helpers";
import * as T from "../../types";
function makePashtoPlural(
word: T.DictionaryEntryNoFVars
): T.PluralInflections | undefined {
if (!(word.ppp && word.ppf)) return undefined;
const base = splitPsByVarients(makePsString(word.ppp, word.ppf));
function getBaseAndO(): T.PluralInflectionSet {
return [base, base.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>];
}
if (word.c?.includes("n. m.")) {
return { masc: getBaseAndO() };
}
if (word.c?.includes("n. f.")) {
return { fem: getBaseAndO() };
}
// TODO: handle masculine and unisex
return undefined;
}
function makeBundledPlural(
word: T.DictionaryEntryNoFVars
): T.PluralInflections | undefined {
if (!endsInConsonant(word) || !word.c?.includes("n.")) {
return undefined;
}
const w = makePsString(word.p, word.f);
const base = countSyllables(w) === 1 ? accentOnNFromEnd(w, 0) : w;
return {
masc: [
[concatPsString(base, { p: "ه", f: "a" })],
[concatPsString(base, { p: "و", f: "o" })],
],
};
}
function makeArabicPlural(
word: T.DictionaryEntryNoFVars
): T.PluralInflections | undefined {
if (!(word.apf && word.app)) return undefined;
const w = makePsString(word.app, word.apf);
const plural = splitPsByVarients(w);
const end = removeAccents(removeEndTick(word.apf).slice(-1));
// again typescript being dumb and not letting me use a typed key here
const value = [
plural,
plural.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
] as T.PluralInflectionSet;
// feminine words that have arabic plurals stay feminine with the plural - ie مرجع - مراجع
// but masculine words that appear feminine in the plural aren't femening with the Arabic plural - ie. نبي - انبیا
if (["i", "e", "a"].includes(end) && word.c?.includes("n. f.")) {
return { fem: value };
}
return { masc: value };
}
export function makePlural(
w: T.DictionaryEntryNoFVars
):
| { plural: T.PluralInflections; bundledPlural?: T.PluralInflections }
| { arabicPlural: T.PluralInflections; bundledPlural?: T.PluralInflections }
| undefined {
function addSecondInf(
plur: T.ArrayOneOrMore<T.PsString> | T.PsString
): T.PluralInflectionSet {
if (!Array.isArray(plur)) {
return addSecondInf([plur]);
}
return [plur, plur.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>];
}
if (w.c && w.c.includes("pl.")) {
const plural = addSecondInf(makePsString(w.p, w.f));
// Typescript being dumb and not letting me do a typed variable for the key
// could try refactoring with an updated TypeScript dependency
if (w.c.includes("n. m.")) return { plural: { masc: plural } };
if (w.c.includes("n. f.")) return { plural: { fem: plural } };
}
// exception for mUláa
if (w.f === "mUláa" || w.f === "mUlaa") {
return {
plural: {
masc: [
[
{ p: "ملایان", f: "mUlaayáan" },
{ p: "ملاګان", f: "mUlaagáan" },
],
[
{ p: "ملایانو", f: "mUlaayáano" },
{ p: "ملاګانو", f: "mUlaagáano" },
],
],
},
};
}
const arabicPlural = makeArabicPlural(w);
const pashtoPlural = makePashtoPlural(w);
const bundledPlural = makeBundledPlural(w);
function addMascPluralSuffix(
animate?: boolean,
shortSquish?: boolean
): T.PluralInflectionSet {
if (shortSquish && (w.infap === undefined || w.infaf === undefined)) {
throw new Error(`no irregular inflection info for ${w.p} - ${w.ts}`);
}
const b = removeAccents(
shortSquish
? makePsString(
(w.infap as string).slice(0, -1),
(w.infaf as string).slice(0, -1)
)
: w
);
const base = hasShwaEnding(b)
? makePsString(b.p.slice(0, -1), b.f.slice(0, -1))
: b;
return addSecondInf(
concatPsString(
base,
animate && !shortSquish
? { p: "ان", f: "áan" }
: { p: "ونه", f: "óona" }
)
);
}
function addAnimUnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
const base = removeAccents(w);
return {
masc: addMascPluralSuffix(true),
fem: addSecondInf(concatPsString(base, { p: "انې", f: "áane" })),
};
}
function addEePluralSuffix(gender: T.Gender): T.PluralInflectionSet {
const b = removeAccents(w);
const base = {
p: b.p.slice(0, -1),
f: b.f.slice(0, -2),
};
const firstInf: T.ArrayOneOrMore<T.PsString> = [
concatPsString(
base,
{ p: "یان", f: "iyáan" },
gender === "fem" ? { p: "ې", f: "e" } : ""
),
];
return [
firstInf,
firstInf.flatMap(addOEnding),
// firstInf.map(addOEnding),
] as T.PluralInflectionSet;
}
function addAnimN3UnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
const b = removeAccents(w);
const base = {
p: b.p.slice(0, -1),
f: b.f.slice(0, -2),
};
return {
masc: [
[concatPsString(base, { p: "یان", f: "iyáan" })],
[concatPsString(base, { p: "یانو", f: "iyáano" })],
// TODO: or use addSecondInf method above?
],
fem: [
[concatPsString(base, { p: "یانې", f: "iyáane" })],
[concatPsString(base, { p: "یانو", f: "iyáano" })],
],
};
}
function addLongVowelSuffix(gender: "masc" | "fem"): T.PluralInflectionSet {
if (pashtoPlural) {
}
const base = removeEndTick(makePsString(w.p, w.f));
const baseWOutAccents = removeAccents(base);
const space =
w.p.slice(-1) === "ع" || w.p.slice(-1) === "ه" ? { p: " ", f: " " } : "";
if (gender === "fem") {
return addSecondInf([
concatPsString(base, space, { p: "وې", f: "we" }),
concatPsString(baseWOutAccents, space, { p: "ګانې", f: "gáane" }),
]);
} else {
return addSecondInf([
concatPsString(baseWOutAccents, space, { p: "ګان", f: "gáan" }),
]);
}
}
// TODO: This should be possible for words like پلویان but not for words like ترورزامن 🤔
// function addFemToPashtoPlural(i: T.PluralInflections): T.UnisexSet<T.PluralInflectionSet> {
// if ("fem" in i && "masc" in i) return i;
// if (!("masc" in i)) throw new Error("bad pashto plural doesn't even have masculine");
// if (endsInConsonant(i.masc[0][0])) {
// return {
// ...i,
// fem: [
// i.masc[0].map((x) => concatPsString(x, { p: "ې", f: "e" })) as T.ArrayOneOrMore<T.PsString>,
// i.masc[0].map((x) => concatPsString(x, { p: "و", f: "o" })) as T.ArrayOneOrMore<T.PsString>,
// ],
// };
// }
// return {
// ...i,
// fem: i.masc,
// };
// }
const shortSquish = !!w.infap && !w.infap.includes("ا");
const anim = w.c?.includes("anim.");
const type = w.c?.includes("unisex")
? "unisex noun"
: w.c?.includes("n. m.")
? "masc noun"
: w.c?.includes("n. f.")
? "fem noun"
: "other";
if (pashtoPlural) {
return {
plural: pashtoPlural,
arabicPlural,
};
}
if (type === "unisex noun") {
// doesn't need to be labelled anim - because it's only with animate nouns that you get the unisex - I THINK
if (endsInConsonant(w) && !w.infap) {
return {
arabicPlural,
bundledPlural,
plural: addAnimUnisexPluralSuffix(),
};
}
if (shortSquish && !anim) {
return {
arabicPlural,
plural: { masc: addMascPluralSuffix(anim, shortSquish) },
};
}
if (endsWith([{ p: "ی", f: "áy" }, { p: "ي" }], w, true)) {
return { arabicPlural, plural: addAnimN3UnisexPluralSuffix() };
}
// usually shortSquish nouns would never have arabicPlurals -- so we don't have to worry about catching
// arabic plurals for the animat ones, right?
}
if (
type === "masc noun" &&
(shortSquish || ((endsInConsonant(w) || hasShwaEnding(w)) && !w.infap)) &&
w.p.slice(-3) !== "توب"
) {
return {
arabicPlural,
bundledPlural,
plural: {
masc: addMascPluralSuffix(anim, shortSquish),
},
};
}
if (type === "masc noun" && endsWith({ p: "ی", f: "áy" }, w, true) && anim) {
const { masc } = addAnimN3UnisexPluralSuffix();
return {
arabicPlural,
plural: {
masc,
},
};
}
if (type === "masc noun" && endsWith({ p: "ي" }, w)) {
const masc = addEePluralSuffix("masc");
return {
arabicPlural,
plural: { masc },
};
}
// TODO: What about endings in long ee / animate at inanimate
if (type === "masc noun" && endsInAaOrOo(w) && !w.infap) {
return {
arabicPlural,
plural: {
masc: addLongVowelSuffix("masc"),
},
};
}
// TODO: What about endings in long ee / animate at inanimate
if (type === "fem noun" && endsInAaOrOo(w) && !w.infap) {
return {
arabicPlural,
plural: {
fem: addLongVowelSuffix("fem"),
},
};
}
if (
type === "fem noun" &&
(endsWith({ p: "ي" }, w) || (endsWith({ p: "ۍ" }, w) && anim))
) {
return {
arabicPlural,
plural: {
fem: addEePluralSuffix("fem"),
},
};
}
if (arabicPlural) {
return { arabicPlural, plural: pashtoPlural, bundledPlural };
}
return undefined;
}

View File

@ -2114,7 +2114,8 @@ const others: T.DictionaryEntry[] = [
adjectives.forEach((word) => {
test(`${word.in.p} should inflect properly`, () => {
expect(inflectWord(word.in)).toEqual(word.out);
const out = inflectWord(word.in);
expect(out).toEqual(word.out);
});
});

View File

@ -10,25 +10,14 @@ import {
concatInflections,
splitDoubleWord,
ensureUnisexInflections,
concatPsString,
endsInConsonant,
endsInAaOrOo,
addOEnding,
splitPsByVarients,
removeEndTick,
endsWith,
concatPlurals,
hasShwaEnding,
} from "./p-text-helpers";
import { makePsString, removeFVarients } from "./accent-and-ps-utils";
import {
accentOnNFromEnd,
countSyllables,
removeAccents,
} from "./accent-helpers";
import { removeAccents } from "./accent-helpers";
import * as T from "../../types";
import { getInfsAndVocative } from "./inflections-and-vocative";
import { fmapSingleOrLengthOpts } from "./fp-ps";
import { makePlural } from "./nouns-plural";
export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
// If it's a noun/adj, inflect accordingly
@ -131,309 +120,6 @@ export function inflectRegularShwaEndingUnisex(
};
}
function makePashtoPlural(
word: T.DictionaryEntryNoFVars
): T.PluralInflections | undefined {
if (!(word.ppp && word.ppf)) return undefined;
const base = splitPsByVarients(makePsString(word.ppp, word.ppf));
function getBaseAndO(): T.PluralInflectionSet {
return [base, base.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>];
}
if (word.c?.includes("n. m.")) {
return { masc: getBaseAndO() };
}
if (word.c?.includes("n. f.")) {
return { fem: getBaseAndO() };
}
// TODO: handle masculine and unisex
return undefined;
}
function makeBundledPlural(
word: T.DictionaryEntryNoFVars
): T.PluralInflections | undefined {
if (!endsInConsonant(word) || !word.c?.includes("n.")) {
return undefined;
}
const w = makePsString(word.p, word.f);
const base = countSyllables(w) === 1 ? accentOnNFromEnd(w, 0) : w;
return {
masc: [
[concatPsString(base, { p: "ه", f: "a" })],
[concatPsString(base, { p: "و", f: "o" })],
],
};
}
function makeArabicPlural(
word: T.DictionaryEntryNoFVars
): T.PluralInflections | undefined {
if (!(word.apf && word.app)) return undefined;
const w = makePsString(word.app, word.apf);
const plural = splitPsByVarients(w);
const end = removeAccents(removeEndTick(word.apf).slice(-1));
// again typescript being dumb and not letting me use a typed key here
const value = [
plural,
plural.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
] as T.PluralInflectionSet;
// feminine words that have arabic plurals stay feminine with the plural - ie مرجع - مراجع
// but masculine words that appear feminine in the plural aren't femening with the Arabic plural - ie. نبي - انبیا
if (["i", "e", "a"].includes(end) && word.c?.includes("n. f.")) {
return { fem: value };
}
return { masc: value };
}
function makePlural(
w: T.DictionaryEntryNoFVars
):
| { plural: T.PluralInflections; bundledPlural?: T.PluralInflections }
| { arabicPlural: T.PluralInflections; bundledPlural?: T.PluralInflections }
| undefined {
function addSecondInf(
plur: T.ArrayOneOrMore<T.PsString> | T.PsString
): T.PluralInflectionSet {
if (!Array.isArray(plur)) {
return addSecondInf([plur]);
}
return [plur, plur.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>];
}
if (w.c && w.c.includes("pl.")) {
const plural = addSecondInf(makePsString(w.p, w.f));
// Typescript being dumb and not letting me do a typed variable for the key
// could try refactoring with an updated TypeScript dependency
if (w.c.includes("n. m.")) return { plural: { masc: plural } };
if (w.c.includes("n. f.")) return { plural: { fem: plural } };
}
// exception for mUláa
if (w.f === "mUláa" || w.f === "mUlaa") {
return {
plural: {
masc: [
[
{ p: "ملایان", f: "mUlaayáan" },
{ p: "ملاګان", f: "mUlaagáan" },
],
[
{ p: "ملایانو", f: "mUlaayáano" },
{ p: "ملاګانو", f: "mUlaagáano" },
],
],
},
};
}
const arabicPlural = makeArabicPlural(w);
const pashtoPlural = makePashtoPlural(w);
const bundledPlural = makeBundledPlural(w);
function addMascPluralSuffix(
animate?: boolean,
shortSquish?: boolean
): T.PluralInflectionSet {
if (shortSquish && (w.infap === undefined || w.infaf === undefined)) {
throw new Error(`no irregular inflection info for ${w.p} - ${w.ts}`);
}
const b = removeAccents(
shortSquish
? makePsString(
(w.infap as string).slice(0, -1),
(w.infaf as string).slice(0, -1)
)
: w
);
const base = hasShwaEnding(b)
? makePsString(b.p.slice(0, -1), b.f.slice(0, -1))
: b;
return addSecondInf(
concatPsString(
base,
animate && !shortSquish
? { p: "ان", f: "áan" }
: { p: "ونه", f: "óona" }
)
);
}
function addAnimUnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
const base = removeAccents(w);
return {
masc: addMascPluralSuffix(true),
fem: addSecondInf(concatPsString(base, { p: "انې", f: "áane" })),
};
}
function addEePluralSuffix(gender: T.Gender): T.PluralInflectionSet {
const b = removeAccents(w);
const base = {
p: b.p.slice(0, -1),
f: b.f.slice(0, -2),
};
const firstInf: T.ArrayOneOrMore<T.PsString> = [
concatPsString(
base,
{ p: "یان", f: "iyáan" },
gender === "fem" ? { p: "ې", f: "e" } : ""
),
];
return [
firstInf,
firstInf.flatMap(addOEnding),
// firstInf.map(addOEnding),
] as T.PluralInflectionSet;
}
function addAnimN3UnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
const b = removeAccents(w);
const base = {
p: b.p.slice(0, -1),
f: b.f.slice(0, -2),
};
return {
masc: [
[concatPsString(base, { p: "یان", f: "iyáan" })],
[concatPsString(base, { p: "یانو", f: "iyáano" })],
// TODO: or use addSecondInf method above?
],
fem: [
[concatPsString(base, { p: "یانې", f: "iyáane" })],
[concatPsString(base, { p: "یانو", f: "iyáano" })],
],
};
}
function addLongVowelSuffix(gender: "masc" | "fem"): T.PluralInflectionSet {
if (pashtoPlural) {
}
const base = removeEndTick(makePsString(w.p, w.f));
const baseWOutAccents = removeAccents(base);
const space =
w.p.slice(-1) === "ع" || w.p.slice(-1) === "ه" ? { p: " ", f: " " } : "";
if (gender === "fem") {
return addSecondInf([
concatPsString(base, space, { p: "وې", f: "we" }),
concatPsString(baseWOutAccents, space, { p: "ګانې", f: "gáane" }),
]);
} else {
return addSecondInf([
concatPsString(baseWOutAccents, space, { p: "ګان", f: "gáan" }),
]);
}
}
// TODO: This should be possible for words like پلویان but not for words like ترورزامن 🤔
// function addFemToPashtoPlural(i: T.PluralInflections): T.UnisexSet<T.PluralInflectionSet> {
// if ("fem" in i && "masc" in i) return i;
// if (!("masc" in i)) throw new Error("bad pashto plural doesn't even have masculine");
// if (endsInConsonant(i.masc[0][0])) {
// return {
// ...i,
// fem: [
// i.masc[0].map((x) => concatPsString(x, { p: "ې", f: "e" })) as T.ArrayOneOrMore<T.PsString>,
// i.masc[0].map((x) => concatPsString(x, { p: "و", f: "o" })) as T.ArrayOneOrMore<T.PsString>,
// ],
// };
// }
// return {
// ...i,
// fem: i.masc,
// };
// }
const shortSquish = !!w.infap && !w.infap.includes("ا");
const anim = w.c?.includes("anim.");
const type = w.c?.includes("unisex")
? "unisex noun"
: w.c?.includes("n. m.")
? "masc noun"
: w.c?.includes("n. f.")
? "fem noun"
: "other";
if (pashtoPlural) {
return {
plural: pashtoPlural,
arabicPlural,
};
}
if (type === "unisex noun") {
// doesn't need to be labelled anim - because it's only with animate nouns that you get the unisex - I THINK
if (endsInConsonant(w) && !w.infap) {
return {
arabicPlural,
bundledPlural,
plural: addAnimUnisexPluralSuffix(),
};
}
if (shortSquish && !anim) {
return {
arabicPlural,
plural: { masc: addMascPluralSuffix(anim, shortSquish) },
};
}
if (endsWith([{ p: "ی", f: "áy" }, { p: "ي" }], w, true)) {
return { arabicPlural, plural: addAnimN3UnisexPluralSuffix() };
}
// usually shortSquish nouns would never have arabicPlurals -- so we don't have to worry about catching
// arabic plurals for the animat ones, right?
}
if (
type === "masc noun" &&
(shortSquish || ((endsInConsonant(w) || hasShwaEnding(w)) && !w.infap)) &&
w.p.slice(-3) !== "توب"
) {
return {
arabicPlural,
bundledPlural,
plural: {
masc: addMascPluralSuffix(anim, shortSquish),
},
};
}
if (type === "masc noun" && endsWith({ p: "ی", f: "áy" }, w, true) && anim) {
const { masc } = addAnimN3UnisexPluralSuffix();
return {
arabicPlural,
plural: {
masc,
},
};
}
if (type === "masc noun" && endsWith({ p: "ي" }, w)) {
const masc = addEePluralSuffix("masc");
return {
arabicPlural,
plural: { masc },
};
}
// TODO: What about endings in long ee / animate at inanimate
if (type === "masc noun" && endsInAaOrOo(w) && !w.infap) {
return {
arabicPlural,
plural: {
masc: addLongVowelSuffix("masc"),
},
};
}
// TODO: What about endings in long ee / animate at inanimate
if (type === "fem noun" && endsInAaOrOo(w) && !w.infap) {
return {
arabicPlural,
plural: {
fem: addLongVowelSuffix("fem"),
},
};
}
if (
type === "fem noun" &&
(endsWith({ p: "ي" }, w) || (endsWith({ p: "ۍ" }, w) && anim))
) {
return {
arabicPlural,
plural: {
fem: addEePluralSuffix("fem"),
},
};
}
if (arabicPlural) {
return { arabicPlural, plural: pashtoPlural, bundledPlural };
}
return undefined;
}
export function inflectYay(
ps: T.SingleOrLengthOpts<T.PsString>
): T.SingleOrLengthOpts<T.UnisexInflections> {