pashto-inflector/src/lib/pashto-inflector.ts

628 lines
19 KiB
TypeScript
Raw Normal View History

2021-03-09 12:39:13 +00:00
/**
* Copyright (c) 2021 lingdocs.com
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
import { pashtoConsonants } from "./pashto-consonants";
2021-05-25 09:47:02 +00:00
import {
concatInflections,
splitDoubleWord,
ensureUnisexInflections,
2021-08-31 09:34:18 +00:00
makePsString,
2021-09-07 11:49:57 +00:00
removeFVarients,
concatPsString,
endsInConsonant,
endsInAaOrOo,
2021-09-14 14:25:04 +00:00
addOEnding,
endsInShwa,
splitPsByVarients,
removeEndTick,
endsWith,
2021-05-25 09:47:02 +00:00
} from "./p-text-helpers";
2021-08-31 09:34:18 +00:00
import {
accentFSylsOnNFromEnd,
2021-09-14 14:25:04 +00:00
hasAccents,
2021-08-31 09:34:18 +00:00
removeAccents,
splitUpSyllables,
2021-08-31 09:34:18 +00:00
} from "./accent-helpers";
2021-03-09 12:39:13 +00:00
import * as T from "../types";
import { splitFIntoPhonemes } from "./phonetics-to-diacritics";
2021-03-09 12:39:13 +00:00
const endingInSingleARegex = /[^a]'??[aá]'??$/;
const endingInHeyOrAynRegex = /[^ا][هع]$/;
2021-09-07 11:49:57 +00:00
// const endingInAlefRegex = /اع?$/;
2021-03-09 12:39:13 +00:00
2021-09-07 11:49:57 +00:00
export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
2021-03-09 12:39:13 +00:00
// If it's a noun/adj, inflect accordingly
// TODO: What about n. f. / adj. that end in ي ??
2021-09-07 11:49:57 +00:00
const w = removeFVarients(word);
if (w.c?.includes("doub.")) {
const words = splitDoubleWord(w);
const inflected = words.map((x) => ensureUnisexInflections(inflectWord(x), x));
return {
inflections: concatInflections(
inflected[0].inflections,
inflected[1].inflections,
) as T.UnisexInflections,
};
2021-05-25 09:47:02 +00:00
}
2021-09-14 14:25:04 +00:00
if (w.c && w.c.includes("pl.")) {
return handlePluralNoun(w);
}
2021-10-09 13:47:46 +00:00
if (w.c && (w.c.includes("adj.") || w.c.includes("unisex") || w.c.includes("num"))) {
2021-09-07 11:49:57 +00:00
return handleUnisexWord(w);
2021-03-09 12:39:13 +00:00
}
2021-09-07 11:49:57 +00:00
if (w.c && (w.c.includes("n. m."))) {
return handleMascNoun(w);
2021-03-09 12:39:13 +00:00
}
2021-09-07 11:49:57 +00:00
if (w.c && (w.c.includes("n. f."))) {
return handleFemNoun(w);
2021-03-09 12:39:13 +00:00
}
// It's not a noun/adj
return false;
}
// LEVEL 2 FUNCTIONS
2021-09-07 11:49:57 +00:00
function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
2021-03-09 12:39:13 +00:00
// Get last letter of Pashto and last two letters of phonetics
// TODO: !!! Handle weird endings / symbols ' etc.
const pEnd = word.p.slice(-1);
2021-09-14 14:25:04 +00:00
const plurals = makePlural(word);
if (word.noInf) {
return !plurals ? false : { ...plurals };
}
2021-03-09 12:39:13 +00:00
if (word.infap && word.infaf && word.infbp && word.infbf) {
2021-09-07 11:49:57 +00:00
return {
inflections: inflectIrregularUnisex(word.p, word.f, [
{p: word.infap, f: word.infaf},
{p: word.infbp, f: word.infbf},
]),
2021-09-14 14:25:04 +00:00
...plurals,
2021-09-07 11:49:57 +00:00
};
2021-03-09 12:39:13 +00:00
}
2021-09-07 11:49:57 +00:00
if (pEnd === "ی" && word.f.slice(-2) === "ey") {
2021-09-14 14:25:04 +00:00
return { inflections: inflectRegularYeyUnisex(word.p, word.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
2021-08-31 09:34:18 +00:00
if (pEnd === "ه" && word.g.slice(-1) === "u") {
2021-09-14 14:25:04 +00:00
return { inflections: inflectRegularShwaEndingUnisex(word.p, word.f), ...plurals };
2021-08-31 09:34:18 +00:00
}
2021-09-07 11:49:57 +00:00
if (pEnd === "ی" && word.f.slice(-2) === "éy") {
2021-09-14 14:25:04 +00:00
return { inflections: inflectEmphasizedYeyUnisex(word.p, word.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
if (
pashtoConsonants.includes(pEnd) ||
word.p.slice(-2) === "وی" ||
word.p.slice(-2) === "ای" ||
word.f.slice(-1) === "w" ||
2021-09-07 11:49:57 +00:00
(word.p.slice(-1) === "ه" && word.f.slice(-1) === "h")
2021-03-09 12:39:13 +00:00
) {
2021-09-14 14:25:04 +00:00
return { inflections: inflectConsonantEndingUnisex(word.p, word.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
2021-09-14 14:25:04 +00:00
if (plurals) return plurals;
2021-03-09 12:39:13 +00:00
return false;
}
2021-09-14 14:25:04 +00:00
function handlePluralNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
if (!w.c || !w.c.includes("n.")) return false;
const plurals = makePlural(w);
if (w.noInf) {
return !plurals ? false : { ...plurals };
}
2021-09-14 14:25:04 +00:00
if (!plurals) return false;
return { ...plurals };
}
2021-09-07 11:49:57 +00:00
function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
2021-03-09 12:39:13 +00:00
// Get last letter of Pashto and last two letters of phonetics
// TODO: !!! Handle weird endings / symbols ' etc.
2021-09-14 14:25:04 +00:00
const plurals = makePlural(w);
if (w.noInf) {
return !plurals ? false : { ...plurals };
}
2021-09-07 11:49:57 +00:00
const pEnd = w.p.slice(-1);
const fEnd = w.f.slice(-2);
if (w.infap && w.infaf && w.infbp && w.infbf) {
return {
inflections: inflectIrregularMasc(w.p, w.f, [
{p: w.infap, f: w.infaf},
{p: w.infbp, f: w.infbf},
]),
2021-09-14 14:25:04 +00:00
...plurals,
2021-09-07 11:49:57 +00:00
};
2021-03-09 12:39:13 +00:00
}
2021-09-07 11:49:57 +00:00
const isTobEnding = (w.p.slice(-3) === "توب" && ["tób", "tob"].includes(w.f.slice(-3)) && w.p.length > 3);
2021-03-09 12:39:13 +00:00
if (isTobEnding) {
2021-09-14 14:25:04 +00:00
return { inflections: inflectTobMasc(w.p, w.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
if (pEnd === "ی" && fEnd === "ey") {
2021-09-14 14:25:04 +00:00
return { inflections: inflectRegularYeyMasc(w.p, w.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
if (pEnd === "ی" && fEnd === "éy") {
2021-09-14 14:25:04 +00:00
return { inflections: inflectRegularEmphasizedYeyMasc(w.p, w.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
2021-09-14 14:25:04 +00:00
return plurals ? { ...plurals } : false
2021-03-09 12:39:13 +00:00
}
2021-09-07 11:49:57 +00:00
function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
2021-03-09 12:39:13 +00:00
// Get first of comma seperated phonetics entries
/* istanbul ignore next */ // will always have word.c at this point
const c = word.c || "";
const animate = c.includes("anim.");
const pEnd = word.p.slice(-1);
2021-09-14 14:25:04 +00:00
const plurals = makePlural(word);
if (word.noInf) {
return !plurals ? false : { ...plurals };
}
2021-09-07 11:49:57 +00:00
2021-09-07 12:02:01 +00:00
if (endingInHeyOrAynRegex.test(word.p) && endingInSingleARegex.test(word.f)) {
2021-09-14 14:25:04 +00:00
return { inflections: inflectRegularAFem(word.p, word.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
2021-09-07 12:02:01 +00:00
if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(word.f)) {
2021-09-14 14:25:04 +00:00
return { inflections: inflectRegularAWithHimPEnding(word.p, word.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
// TODO: better reusable function to check if something ends with a consonant
if ((pashtoConsonants.includes(pEnd) || word.f.slice(-1) === "w") && !animate) {
2021-09-14 14:25:04 +00:00
return { inflections: inflectRegularInanMissingAFem(word.p, word.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
if (pEnd === "ي" && (!animate)) {
2021-09-14 14:25:04 +00:00
return { inflections: inflectRegularInanEeFem(word.p, word.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
if (pEnd === "ۍ") {
2021-09-14 14:25:04 +00:00
return { inflections: inflectRegularUyFem(word.p, word.f), ...plurals };
2021-03-09 12:39:13 +00:00
}
2021-09-07 11:49:57 +00:00
// if (endingInAlefRegex.test(word.p)) {
// return { inflections: inflectRegularAaFem(word.p, f) };
// }
2021-09-14 14:25:04 +00:00
return plurals ? { ...plurals } : false;
2021-03-09 12:39:13 +00:00
}
// LEVEL 3 FUNCTIONS
function inflectIrregularUnisex(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections {
const inf1 = removeAccents(inflections[1]);
const inf0syls = splitFIntoPhonemes(inflections[0].f);
const inf0f = accentFSylsOnNFromEnd(inf0syls, 0);
2021-03-09 12:39:13 +00:00
return {
masc: [
[{p, f}],
[{p: inflections[0].p, f: inf0f, }],
[{p: `${inf1.p}و`, f: `${inf1.f}ó`}],
2021-03-09 12:39:13 +00:00
],
fem: [
[{p: `${inf1.p}ه`, f: `${inf1.f}á`}],
[{p: `${inf1.p}ې`, f: `${inf1.f}é`}],
[{p: `${inf1.p}و`, f: `${inf1.f}ó`}],
2021-03-09 12:39:13 +00:00
],
};
}
export function inflectRegularYeyUnisex(p: string, f: string): T.UnisexInflections {
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -2);
return {
masc: [
[{p, f}],
[{p: `${baseP}ي`, f: `${baseF}ee`}],
[
{p: `${baseP}یو`, f: `${baseF}iyo`},
{p: `${baseP}و`, f: `${baseF}o`},
],
],
fem: [
[{p: `${baseP}ې`, f: `${baseF}e`}],
[{p: `${baseP}ې`, f: `${baseF}e`}],
[{p: `${baseP}و`, f: `${baseF}o`}],
],
};
}
2021-08-31 09:34:18 +00:00
export function inflectRegularShwaEndingUnisex(pr: string, fr: string): T.UnisexInflections {
const { p, f } = removeAccents(makePsString(pr, fr));
2021-09-22 17:21:59 +00:00
const accented = fr.slice(-1) === "ú";
2021-08-31 09:34:18 +00:00
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -1);
return {
masc: [
2021-09-22 17:21:59 +00:00
[{p: `${baseP}ه`, f: `${baseF}${accented ? "ú" : "u"}`}],
[{p: `${baseP}ه`, f: `${baseF}${accented ? "ú" : "u"}`}],
[{p: `${baseP}و`, f: `${baseF}${accented ? "ó" : "o"}`}],
2021-08-31 09:34:18 +00:00
],
fem: [
2021-09-22 17:21:59 +00:00
[{p: `${baseP}ه`, f: `${baseF}${accented ? "á" : "a"}`}],
[{p: `${baseP}ې`, f: `${baseF}${accented ? "é" : "e"}`}],
[{p: `${baseP}و`, f: `${baseF}${accented ? "ó" : "o"}`}],
2021-08-31 09:34:18 +00:00
],
};
}
2021-03-09 12:39:13 +00:00
function inflectEmphasizedYeyUnisex(p: string, f: string): T.UnisexInflections {
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -2);
return {
masc: [
[{p, f}],
[{p: `${baseP}ي`, f: `${baseF}ée`}],
[
{p: `${baseP}یو`, f: `${baseF}iyo`},
{p: `${baseP}و`, f: `${baseF}ó`},
],
],
fem: [
[{p: `${baseP}ۍ`, f: `${baseF}úy`}],
[{p: `${baseP}ۍ`, f: `${baseF}úy`}],
[
{ p: `${baseP}یو`, f: `${baseF}úyo` },
{ p: `${baseP}و`, f: `${baseF}ó`, },
],
],
};
}
function inflectConsonantEndingUnisex(p: string, f: string): T.UnisexInflections {
const fSyls = splitUpSyllables(f);
const iBase = fSyls.length === 1
? makePsString(p, accentFSylsOnNFromEnd(fSyls, 0))
: makePsString(p, f);
2021-03-09 12:39:13 +00:00
return {
masc: [
[{p, f}],
[{p, f}],
[{p: `${iBase.p}و`, f: `${iBase.f}o`}],
2021-03-09 12:39:13 +00:00
],
fem: [
[{p: `${iBase.p}ه`, f: `${iBase.f}a`}],
[{p: `${iBase.p}ې`, f: `${iBase.f}e`}],
[{p: `${iBase.p}و`, f: `${iBase.f}o`}],
2021-03-09 12:39:13 +00:00
],
};
}
function inflectRegularYeyMasc(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -2);
return {
masc: [
[{p, f}],
[{p: `${baseP}ي`, f: `${baseF}ee`}],
[
{p: `${baseP}یو`, f: `${baseF}iyo`},
{p: `${baseP}و`, f: `${baseF}o`},
],
],
};
}
function inflectTobMasc(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -3);
const baseF = f.slice(0, -3);
return {
masc: [
[{p, f}],
[{p: `${baseP}تابه`, f: `${baseF}taabu`}],
[{p: `${baseP}تبو`, f: `${baseF}tabo`}],
],
};
}
function inflectRegularEmphasizedYeyMasc(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -2);
return {
masc: [
[{p, f}],
[{p: `${baseP}ي`, f: `${baseF}ée`}],
[
{p: `${baseP}یو`, f: `${baseF}iyo`},
{p: `${baseP}و`, f: `${baseF}o`},
],
],
};
}
function inflectIrregularMasc(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections {
const inf0f = splitUpSyllables(inflections[0].f).length > 1
? accentFSylsOnNFromEnd(inflections[0].f, 0)
: inflections[0].f
2021-03-09 12:39:13 +00:00
return {
masc: [
[{p, f}],
[{p: inflections[0].p, f: inf0f}],
[{p: `${inflections[1].p}و`, f: `${removeAccents(inflections[1].f)}ó`}],
2021-03-09 12:39:13 +00:00
],
};
}
function inflectRegularAFem(p: string, f: string): T.Inflections {
2021-09-14 14:25:04 +00:00
const withoutTrailingComma = ["'", ""].includes(f.slice(-1)) ? f.slice(0, -1) : f;
const accentLast = hasAccents(withoutTrailingComma.slice(-1));
const baseF = withoutTrailingComma.slice(0, -1);
2021-03-09 12:39:13 +00:00
const baseP = p.slice(-1) === "ع" ? p : p.slice(0, -1);
return {
fem: [
[{p, f}],
2021-09-14 14:25:04 +00:00
[{p: `${baseP}ې`, f: `${baseF}${accentLast ? "é" : "e"}`}],
[{p: `${baseP}و`, f: `${baseF}${accentLast ? "ó" : "o"}`}],
2021-03-09 12:39:13 +00:00
],
};
}
function inflectRegularAWithHimPEnding(p: string, f: string): T.Inflections {
const baseF = f.slice(0, -1);
return {
fem: [
[{p, f}],
[{p: `${p}ې`, f: `${baseF}e`}],
[{p: `${p}و`, f: `${baseF}o`}],
],
};
}
function inflectRegularInanMissingAFem(p: string, f: string): T.Inflections {
const fBase = splitUpSyllables(f).length === 1
? accentFSylsOnNFromEnd(f, 0)
: f;
return {
2021-03-09 12:39:13 +00:00
fem: [
[{p, f}],
[{p: `${p}ې`, f: `${fBase}e`}],
[{p: `${p}و`, f: `${fBase}o`}],
2021-03-09 12:39:13 +00:00
],
};
}
function inflectRegularInanEeFem(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -2);
return {
fem: [
[{p, f}],
2022-02-24 14:57:55 +00:00
[{p: `${baseP}ۍ`, f: `${baseF}úy`}],
[{p: `${baseP}یو`, f: `${baseF}úyo`}],
2021-03-09 12:39:13 +00:00
],
};
}
function inflectRegularUyFem(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -1);
2022-02-24 14:57:55 +00:00
const baseF = removeAccents(f.slice(0, -2));
2021-03-09 12:39:13 +00:00
return {
fem: [
2022-02-24 14:57:55 +00:00
[{p, f: `${baseF}úy`}],
[{p, f: `${baseF}úy`}],
2021-03-09 12:39:13 +00:00
[
2022-02-24 14:57:55 +00:00
{p: `${baseP}یو`, f: `${baseF}úyo`},
2021-03-09 12:39:13 +00:00
{p: `${baseP}و`, f: `${baseF}o`},
],
],
};
}
2021-09-07 11:49:57 +00:00
function makePashtoPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
if (!(word.ppp && word.ppf)) return undefined;
2021-09-16 20:41:50 +00:00
const base = splitPsByVarients(
makePsString(word.ppp, word.ppf)
);
2021-09-14 14:25:04 +00:00
function getBaseAndO(): T.PluralInflectionSet {
2021-09-16 20:41:50 +00:00
return [
base,
base.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
];
2021-09-14 14:25:04 +00:00
}
if (word.c?.includes("n. m.")) {
return { masc: getBaseAndO() };
}
2021-09-07 11:49:57 +00:00
if (word.c?.includes("n. f.")) {
2021-09-14 14:25:04 +00:00
return { fem: getBaseAndO() };
2021-09-07 11:49:57 +00:00
}
// TODO: handle masculine and unisex
return undefined;
}
2021-09-14 14:25:04 +00:00
function makeArabicPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
if (!(word.apf && word.app)) return undefined;
const w = makePsString(word.app, word.apf);
const plural = splitPsByVarients(w);
const end = removeAccents(removeEndTick(word.apf).slice(-1));
// again typescript being dumb and not letting me use a typed key here
const value = [
plural,
plural.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
] as T.PluralInflectionSet;
// feminine words that have arabic plurals stay feminine with the plural - ie مرجع - مراجع
// but masculine words that appear feminine in the plural aren't femening with the Arabic plural - ie. نبي - انبیا
if (["i", "e", "a"].includes(end) && word.c?.includes("n. f.")) {
return { fem: value };
}
return { masc: value };
}
function makePlural(w: T.DictionaryEntryNoFVars): { plural: T.PluralInflections } | { arabicPlural: T.PluralInflections } | undefined {
function addSecondInf(plur: T.ArrayOneOrMore<T.PsString> | T.PsString): T.PluralInflectionSet {
if (!Array.isArray(plur)) {
return addSecondInf([plur]);
}
2021-09-07 11:49:57 +00:00
return [
2021-09-14 14:25:04 +00:00
plur,
plur.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
2021-09-07 11:49:57 +00:00
];
2021-09-14 14:25:04 +00:00
}
if (w.c && w.c.includes("pl.")) {
const plural = addSecondInf(makePsString(w.p, w.f));
// Typescript being dumb and not letting me do a typed variable for the key
// could try refactoring with an updated TypeScript dependency
if (w.c.includes("n. m.")) return { plural: { masc: plural }};
if (w.c.includes("n. f.")) return { plural: { fem: plural }};
}
const arabicPlural = makeArabicPlural(w);
const pashtoPlural = makePashtoPlural(w);
function addMascPluralSuffix(animate?: boolean, shortSquish?: boolean): T.PluralInflectionSet {
if (shortSquish && (w.infap === undefined || w.infaf === undefined)) {
2021-09-14 14:25:04 +00:00
throw new Error(`no irregular inflection info for ${w.p} - ${w.ts}`);
}
const b = removeAccents(shortSquish
? makePsString((w.infap as string).slice(0, -1), (w.infaf as string).slice(0, -1))
: w
);
const base = endsInShwa(b)
? makePsString(b.p.slice(0, -1), b.f.slice(0, -1))
: b;
return addSecondInf(
concatPsString(base, (animate && !shortSquish) ? { p: "ان", f: "áan" } : { p: "ونه", f: "óona" }),
);
2021-09-07 11:49:57 +00:00
}
function addAnimUnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
const base = removeAccents(w);
return {
masc: addMascPluralSuffix(true),
2021-09-14 14:25:04 +00:00
fem: addSecondInf(concatPsString(base, { p: "انې", f: "áane" })),
2021-09-07 11:49:57 +00:00
};
}
2021-11-01 21:55:07 +00:00
function addEePluralSuffix(gender: T.Gender): T.PluralInflectionSet {
const b = removeAccents(w);
const base = {
p: b.p.slice(0, -1),
f: b.f.slice(0, -2),
};
const firstInf: T.ArrayOneOrMore<T.PsString> = [
concatPsString(base, { p: "یان", f: "iyáan" }, gender === "fem" ? { p: "ې", f: "e" } : ""),
...gender === "fem"
? [concatPsString(base, { p: "یګانې", f: "eegáane" })]
: [],
];
return [
firstInf,
firstInf.flatMap(addOEnding),
// firstInf.map(addOEnding),
] as T.PluralInflectionSet;
}
function addAnimN3UnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
const b = removeAccents(w);
const base = {
p: b.p.slice(0, -1),
f: b.f.slice(0, -2),
};
return {
masc: [
[concatPsString(base, { p: "یان", f: "iyáan" })],
[concatPsString(base, { p: "یانو", f: "iyáano" })],
// TODO: or use addSecondInf method above?
],
fem: [
[concatPsString(base, { p: "یانې", f: "iyáane" })],
[concatPsString(base, { p: "یانو", f: "iyáano" })],
],
};
}
2021-09-07 11:49:57 +00:00
function addFemLongVowelSuffix(): T.PluralInflectionSet {
2021-09-14 14:25:04 +00:00
const base = removeEndTick(makePsString(w.p, w.f));
2021-09-07 11:49:57 +00:00
const baseWOutAccents = removeAccents(base);
2021-09-14 14:25:04 +00:00
const space = (w.p.slice(-1) === "ع" || w.p.slice(-1) === "ه") ? { p: " ", f: "" } : "";
return addSecondInf([
concatPsString(base, space, { p: "وې", f: "we" }),
concatPsString(baseWOutAccents, space, { p: "ګانې", f: "gáane" })
]);
2021-09-07 11:49:57 +00:00
}
2021-10-10 05:20:39 +00:00
// TODO: This should be possible for words like پلویان but not for words like ترورزامن 🤔
// function addFemToPashtoPlural(i: T.PluralInflections): T.UnisexSet<T.PluralInflectionSet> {
// if ("fem" in i && "masc" in i) return i;
// if (!("masc" in i)) throw new Error("bad pashto plural doesn't even have masculine");
// if (endsInConsonant(i.masc[0][0])) {
// return {
// ...i,
// fem: [
// i.masc[0].map((x) => concatPsString(x, { p: "ې", f: "e" })) as T.ArrayOneOrMore<T.PsString>,
// i.masc[0].map((x) => concatPsString(x, { p: "و", f: "o" })) as T.ArrayOneOrMore<T.PsString>,
// ],
// };
// }
// return {
// ...i,
// fem: i.masc,
// };
// }
2021-09-07 11:49:57 +00:00
2021-09-14 14:25:04 +00:00
const shortSquish = !!w.infap && !w.infap.includes("ا");
2021-09-07 11:49:57 +00:00
const anim = w.c?.includes("anim.");
const type = (w.c?.includes("unisex"))
? "unisex noun"
: (w.c?.includes("n. m."))
? "masc noun"
: (w.c?.includes("n. f."))
? "fem noun"
: "other";
2021-10-10 05:20:39 +00:00
if (pashtoPlural) return {
plural: pashtoPlural,
arabicPlural,
};
2021-09-14 14:25:04 +00:00
if (type === "unisex noun") {
2021-10-10 05:20:39 +00:00
// doesn't need to be labelled anim - because it's only with animate nouns that you get the unisex - I THINK
if (endsInConsonant(w) && (!w.infap)) {
2021-09-14 14:25:04 +00:00
return { arabicPlural, plural: addAnimUnisexPluralSuffix() };
}
2021-10-10 06:11:41 +00:00
if (shortSquish && !anim) {
2021-09-14 14:25:04 +00:00
return { arabicPlural, plural: { masc: addMascPluralSuffix(anim, shortSquish) }};
}
2021-11-01 21:55:07 +00:00
if (endsWith([{ p: "ی", f: "éy" }, { p: "ي" }], w, true)) {
return { arabicPlural, plural: addAnimN3UnisexPluralSuffix() };
}
2021-10-10 06:11:41 +00:00
// usually shortSquish nouns would never have arabicPlurals -- so we don't have to worry about catching
// arabic plurals for the animat ones, right?
2021-09-07 11:49:57 +00:00
}
if (
2021-11-01 21:55:07 +00:00
type === "masc noun" &&
(shortSquish || ((endsInConsonant(w) || endsInShwa(w)) && (!w.infap))) &&
(w.p.slice(-3) !== "توب")
2021-10-20 01:24:05 +00:00
) {
2021-09-07 11:49:57 +00:00
return {
2021-09-14 14:25:04 +00:00
arabicPlural,
plural: {
masc: addMascPluralSuffix(anim, shortSquish),
},
2021-09-07 11:49:57 +00:00
};
}
if (
type === "masc noun" &&
endsWith({ p: "ی", f: "éy" }, w, true) &&
anim
) {
2021-10-18 14:52:31 +00:00
const { masc } = addAnimN3UnisexPluralSuffix();
return {
arabicPlural,
plural: {
masc,
},
};
}
2021-11-01 21:55:07 +00:00
if (type === "masc noun" && endsWith({ p: "ي" }, w)) {
const masc = addEePluralSuffix("masc");
return {
arabicPlural,
plural: { masc },
};
}
2021-09-07 11:49:57 +00:00
// TODO: What about endings in long ee / animate at inanimate
if (type === "fem noun" && endsInAaOrOo(w) && (!w.infap)) {
return {
2021-09-14 14:25:04 +00:00
arabicPlural,
plural: {
fem: addFemLongVowelSuffix(),
},
2021-09-07 11:49:57 +00:00
};
}
2021-11-01 21:55:07 +00:00
if (type === "fem noun" && endsWith({ p: "ي" }, w)) {
return {
arabicPlural,
plural: {
fem: addEePluralSuffix("fem"),
},
};
}
2021-09-14 14:25:04 +00:00
if (arabicPlural) {
return { arabicPlural, plural: pashtoPlural };
}
2021-09-07 11:49:57 +00:00
return undefined;
2021-03-09 12:39:13 +00:00
}