2021-03-09 12:39:13 +00:00
|
|
|
|
/**
|
|
|
|
|
* Copyright (c) 2021 lingdocs.com
|
|
|
|
|
*
|
|
|
|
|
* This source code is licensed under the MIT license found in the
|
|
|
|
|
* LICENSE file in the root directory of this source tree.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
import { pashtoConsonants } from "./pashto-consonants";
|
2021-05-25 09:47:02 +00:00
|
|
|
|
import {
|
|
|
|
|
concatInflections,
|
|
|
|
|
splitDoubleWord,
|
|
|
|
|
ensureUnisexInflections,
|
2021-08-31 09:34:18 +00:00
|
|
|
|
makePsString,
|
2021-09-07 11:49:57 +00:00
|
|
|
|
removeFVarients,
|
|
|
|
|
concatPsString,
|
|
|
|
|
endsInConsonant,
|
|
|
|
|
endsInAaOrOo,
|
2021-09-14 14:25:04 +00:00
|
|
|
|
addOEnding,
|
|
|
|
|
endsInShwa,
|
|
|
|
|
splitPsByVarients,
|
|
|
|
|
removeEndTick,
|
2021-10-18 14:41:21 +00:00
|
|
|
|
endsWith,
|
2021-05-25 09:47:02 +00:00
|
|
|
|
} from "./p-text-helpers";
|
2021-08-31 09:34:18 +00:00
|
|
|
|
import {
|
2021-11-01 21:10:23 +00:00
|
|
|
|
accentFSylsOnNFromEnd,
|
2021-09-14 14:25:04 +00:00
|
|
|
|
hasAccents,
|
2021-08-31 09:34:18 +00:00
|
|
|
|
removeAccents,
|
2021-11-01 21:10:23 +00:00
|
|
|
|
splitUpSyllables,
|
2021-08-31 09:34:18 +00:00
|
|
|
|
} from "./accent-helpers";
|
2021-03-09 12:39:13 +00:00
|
|
|
|
import * as T from "../types";
|
2021-11-01 21:10:23 +00:00
|
|
|
|
import { splitFIntoPhonemes } from "./phonetics-to-diacritics";
|
2021-03-09 12:39:13 +00:00
|
|
|
|
|
|
|
|
|
const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/;
|
|
|
|
|
const endingInHeyOrAynRegex = /[^ا][هع]$/;
|
2021-09-07 11:49:57 +00:00
|
|
|
|
// const endingInAlefRegex = /اع?$/;
|
2021-03-09 12:39:13 +00:00
|
|
|
|
|
2021-09-07 11:49:57 +00:00
|
|
|
|
export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
|
2021-03-09 12:39:13 +00:00
|
|
|
|
// If it's a noun/adj, inflect accordingly
|
|
|
|
|
// TODO: What about n. f. / adj. that end in ي ??
|
2021-09-07 11:49:57 +00:00
|
|
|
|
const w = removeFVarients(word);
|
|
|
|
|
if (w.c?.includes("doub.")) {
|
|
|
|
|
const words = splitDoubleWord(w);
|
|
|
|
|
const inflected = words.map((x) => ensureUnisexInflections(inflectWord(x), x));
|
|
|
|
|
return {
|
|
|
|
|
inflections: concatInflections(
|
|
|
|
|
inflected[0].inflections,
|
|
|
|
|
inflected[1].inflections,
|
|
|
|
|
) as T.UnisexInflections,
|
|
|
|
|
};
|
2021-05-25 09:47:02 +00:00
|
|
|
|
}
|
2021-09-14 14:25:04 +00:00
|
|
|
|
if (w.c && w.c.includes("pl.")) {
|
|
|
|
|
return handlePluralNoun(w);
|
|
|
|
|
}
|
2021-10-09 13:47:46 +00:00
|
|
|
|
if (w.c && (w.c.includes("adj.") || w.c.includes("unisex") || w.c.includes("num"))) {
|
2021-09-07 11:49:57 +00:00
|
|
|
|
return handleUnisexWord(w);
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
if (w.c && (w.c.includes("n. m."))) {
|
|
|
|
|
return handleMascNoun(w);
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
if (w.c && (w.c.includes("n. f."))) {
|
|
|
|
|
return handleFemNoun(w);
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
|
|
|
|
// It's not a noun/adj
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// LEVEL 2 FUNCTIONS
|
2021-09-07 11:49:57 +00:00
|
|
|
|
function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
2021-03-09 12:39:13 +00:00
|
|
|
|
// Get last letter of Pashto and last two letters of phonetics
|
|
|
|
|
// TODO: !!! Handle weird endings / symbols ' etc.
|
|
|
|
|
const pEnd = word.p.slice(-1);
|
2021-09-14 14:25:04 +00:00
|
|
|
|
const plurals = makePlural(word);
|
2021-10-18 00:58:50 +00:00
|
|
|
|
if (word.noInf) {
|
|
|
|
|
return !plurals ? false : { ...plurals };
|
|
|
|
|
}
|
2021-03-09 12:39:13 +00:00
|
|
|
|
if (word.infap && word.infaf && word.infbp && word.infbf) {
|
2021-09-07 11:49:57 +00:00
|
|
|
|
return {
|
|
|
|
|
inflections: inflectIrregularUnisex(word.p, word.f, [
|
|
|
|
|
{p: word.infap, f: word.infaf},
|
|
|
|
|
{p: word.infbp, f: word.infbf},
|
|
|
|
|
]),
|
2021-09-14 14:25:04 +00:00
|
|
|
|
...plurals,
|
2021-09-07 11:49:57 +00:00
|
|
|
|
};
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
if (pEnd === "ی" && word.f.slice(-2) === "ey") {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectRegularYeyUnisex(word.p, word.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
2021-08-31 09:34:18 +00:00
|
|
|
|
if (pEnd === "ه" && word.g.slice(-1) === "u") {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectRegularShwaEndingUnisex(word.p, word.f), ...plurals };
|
2021-08-31 09:34:18 +00:00
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
if (pEnd === "ی" && word.f.slice(-2) === "éy") {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectEmphasizedYeyUnisex(word.p, word.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
|
|
|
|
if (
|
|
|
|
|
pashtoConsonants.includes(pEnd) ||
|
|
|
|
|
word.p.slice(-2) === "وی" ||
|
|
|
|
|
word.p.slice(-2) === "ای" ||
|
2021-10-16 16:41:41 +00:00
|
|
|
|
word.f.slice(-1) === "w" ||
|
2021-09-07 11:49:57 +00:00
|
|
|
|
(word.p.slice(-1) === "ه" && word.f.slice(-1) === "h")
|
2021-03-09 12:39:13 +00:00
|
|
|
|
) {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectConsonantEndingUnisex(word.p, word.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
2021-09-14 14:25:04 +00:00
|
|
|
|
if (plurals) return plurals;
|
2021-03-09 12:39:13 +00:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-14 14:25:04 +00:00
|
|
|
|
function handlePluralNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
|
|
|
|
if (!w.c || !w.c.includes("n.")) return false;
|
|
|
|
|
const plurals = makePlural(w);
|
2021-10-18 00:58:50 +00:00
|
|
|
|
if (w.noInf) {
|
|
|
|
|
return !plurals ? false : { ...plurals };
|
|
|
|
|
}
|
2021-09-14 14:25:04 +00:00
|
|
|
|
if (!plurals) return false;
|
|
|
|
|
return { ...plurals };
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-07 11:49:57 +00:00
|
|
|
|
function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
2021-03-09 12:39:13 +00:00
|
|
|
|
// Get last letter of Pashto and last two letters of phonetics
|
|
|
|
|
// TODO: !!! Handle weird endings / symbols ' etc.
|
2021-09-14 14:25:04 +00:00
|
|
|
|
const plurals = makePlural(w);
|
2021-10-18 00:58:50 +00:00
|
|
|
|
if (w.noInf) {
|
|
|
|
|
return !plurals ? false : { ...plurals };
|
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
const pEnd = w.p.slice(-1);
|
|
|
|
|
const fEnd = w.f.slice(-2);
|
|
|
|
|
if (w.infap && w.infaf && w.infbp && w.infbf) {
|
|
|
|
|
return {
|
|
|
|
|
inflections: inflectIrregularMasc(w.p, w.f, [
|
|
|
|
|
{p: w.infap, f: w.infaf},
|
|
|
|
|
{p: w.infbp, f: w.infbf},
|
|
|
|
|
]),
|
2021-09-14 14:25:04 +00:00
|
|
|
|
...plurals,
|
2021-09-07 11:49:57 +00:00
|
|
|
|
};
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
const isTobEnding = (w.p.slice(-3) === "توب" && ["tób", "tob"].includes(w.f.slice(-3)) && w.p.length > 3);
|
2021-03-09 12:39:13 +00:00
|
|
|
|
if (isTobEnding) {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectTobMasc(w.p, w.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
|
|
|
|
if (pEnd === "ی" && fEnd === "ey") {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectRegularYeyMasc(w.p, w.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
|
|
|
|
if (pEnd === "ی" && fEnd === "éy") {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectRegularEmphasizedYeyMasc(w.p, w.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return plurals ? { ...plurals } : false
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-07 11:49:57 +00:00
|
|
|
|
function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
2021-03-09 12:39:13 +00:00
|
|
|
|
// Get first of comma seperated phonetics entries
|
|
|
|
|
/* istanbul ignore next */ // will always have word.c at this point
|
|
|
|
|
const c = word.c || "";
|
|
|
|
|
const animate = c.includes("anim.");
|
|
|
|
|
const pEnd = word.p.slice(-1);
|
|
|
|
|
|
2021-09-14 14:25:04 +00:00
|
|
|
|
const plurals = makePlural(word);
|
2021-10-18 00:58:50 +00:00
|
|
|
|
if (word.noInf) {
|
|
|
|
|
return !plurals ? false : { ...plurals };
|
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
|
2021-09-07 12:02:01 +00:00
|
|
|
|
if (endingInHeyOrAynRegex.test(word.p) && endingInSingleARegex.test(word.f)) {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectRegularAFem(word.p, word.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
2021-09-07 12:02:01 +00:00
|
|
|
|
if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(word.f)) {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectRegularAWithHimPEnding(word.p, word.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
2021-10-16 16:41:41 +00:00
|
|
|
|
// TODO: better reusable function to check if something ends with a consonant
|
|
|
|
|
if ((pashtoConsonants.includes(pEnd) || word.f.slice(-1) === "w") && !animate) {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectRegularInanMissingAFem(word.p, word.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
|
|
|
|
if (pEnd === "ي" && (!animate)) {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectRegularInanEeFem(word.p, word.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
|
|
|
|
if (pEnd === "ۍ") {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { inflections: inflectRegularUyFem(word.p, word.f), ...plurals };
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
// if (endingInAlefRegex.test(word.p)) {
|
|
|
|
|
// return { inflections: inflectRegularAaFem(word.p, f) };
|
|
|
|
|
// }
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return plurals ? { ...plurals } : false;
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// LEVEL 3 FUNCTIONS
|
|
|
|
|
function inflectIrregularUnisex(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections {
|
2021-11-01 21:10:23 +00:00
|
|
|
|
const inf1 = removeAccents(inflections[1]);
|
|
|
|
|
const inf0syls = splitFIntoPhonemes(inflections[0].f);
|
|
|
|
|
const inf0f = accentFSylsOnNFromEnd(inf0syls, 0);
|
2021-03-09 12:39:13 +00:00
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
2021-11-01 21:10:23 +00:00
|
|
|
|
[{p: inflections[0].p, f: inf0f, }],
|
|
|
|
|
[{p: `${inf1.p}و`, f: `${inf1.f}ó`}],
|
2021-03-09 12:39:13 +00:00
|
|
|
|
],
|
|
|
|
|
fem: [
|
2021-11-01 21:10:23 +00:00
|
|
|
|
[{p: `${inf1.p}ه`, f: `${inf1.f}á`}],
|
|
|
|
|
[{p: `${inf1.p}ې`, f: `${inf1.f}é`}],
|
|
|
|
|
[{p: `${inf1.p}و`, f: `${inf1.f}ó`}],
|
2021-03-09 12:39:13 +00:00
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function inflectRegularYeyUnisex(p: string, f: string): T.UnisexInflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}ي`, f: `${baseF}ee`}],
|
|
|
|
|
[
|
|
|
|
|
{p: `${baseP}یو`, f: `${baseF}iyo`},
|
|
|
|
|
{p: `${baseP}و`, f: `${baseF}o`},
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
fem: [
|
|
|
|
|
[{p: `${baseP}ې`, f: `${baseF}e`}],
|
|
|
|
|
[{p: `${baseP}ې`, f: `${baseF}e`}],
|
|
|
|
|
[{p: `${baseP}و`, f: `${baseF}o`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-31 09:34:18 +00:00
|
|
|
|
export function inflectRegularShwaEndingUnisex(pr: string, fr: string): T.UnisexInflections {
|
|
|
|
|
const { p, f } = removeAccents(makePsString(pr, fr));
|
2021-09-22 17:21:59 +00:00
|
|
|
|
const accented = fr.slice(-1) === "ú";
|
2021-08-31 09:34:18 +00:00
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -1);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
2021-09-22 17:21:59 +00:00
|
|
|
|
[{p: `${baseP}ه`, f: `${baseF}${accented ? "ú" : "u"}`}],
|
|
|
|
|
[{p: `${baseP}ه`, f: `${baseF}${accented ? "ú" : "u"}`}],
|
|
|
|
|
[{p: `${baseP}و`, f: `${baseF}${accented ? "ó" : "o"}`}],
|
2021-08-31 09:34:18 +00:00
|
|
|
|
],
|
|
|
|
|
fem: [
|
2021-09-22 17:21:59 +00:00
|
|
|
|
[{p: `${baseP}ه`, f: `${baseF}${accented ? "á" : "a"}`}],
|
|
|
|
|
[{p: `${baseP}ې`, f: `${baseF}${accented ? "é" : "e"}`}],
|
|
|
|
|
[{p: `${baseP}و`, f: `${baseF}${accented ? "ó" : "o"}`}],
|
2021-08-31 09:34:18 +00:00
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-09 12:39:13 +00:00
|
|
|
|
function inflectEmphasizedYeyUnisex(p: string, f: string): T.UnisexInflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}ي`, f: `${baseF}ée`}],
|
|
|
|
|
[
|
|
|
|
|
{p: `${baseP}یو`, f: `${baseF}iyo`},
|
|
|
|
|
{p: `${baseP}و`, f: `${baseF}ó`},
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
fem: [
|
|
|
|
|
[{p: `${baseP}ۍ`, f: `${baseF}úy`}],
|
|
|
|
|
[{p: `${baseP}ۍ`, f: `${baseF}úy`}],
|
|
|
|
|
[
|
|
|
|
|
{ p: `${baseP}یو`, f: `${baseF}úyo` },
|
|
|
|
|
{ p: `${baseP}و`, f: `${baseF}ó`, },
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectConsonantEndingUnisex(p: string, f: string): T.UnisexInflections {
|
2021-11-01 21:10:23 +00:00
|
|
|
|
const fSyls = splitUpSyllables(f);
|
|
|
|
|
const iBase = fSyls.length === 1
|
|
|
|
|
? makePsString(p, accentFSylsOnNFromEnd(fSyls, 0))
|
|
|
|
|
: makePsString(p, f);
|
2021-03-09 12:39:13 +00:00
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p, f}],
|
2021-11-01 21:10:23 +00:00
|
|
|
|
[{p: `${iBase.p}و`, f: `${iBase.f}o`}],
|
2021-03-09 12:39:13 +00:00
|
|
|
|
],
|
|
|
|
|
fem: [
|
2021-11-01 21:10:23 +00:00
|
|
|
|
[{p: `${iBase.p}ه`, f: `${iBase.f}a`}],
|
|
|
|
|
[{p: `${iBase.p}ې`, f: `${iBase.f}e`}],
|
|
|
|
|
[{p: `${iBase.p}و`, f: `${iBase.f}o`}],
|
2021-03-09 12:39:13 +00:00
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularYeyMasc(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}ي`, f: `${baseF}ee`}],
|
|
|
|
|
[
|
|
|
|
|
{p: `${baseP}یو`, f: `${baseF}iyo`},
|
|
|
|
|
{p: `${baseP}و`, f: `${baseF}o`},
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectTobMasc(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseP = p.slice(0, -3);
|
|
|
|
|
const baseF = f.slice(0, -3);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}تابه`, f: `${baseF}taabu`}],
|
|
|
|
|
[{p: `${baseP}تبو`, f: `${baseF}tabo`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularEmphasizedYeyMasc(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}ي`, f: `${baseF}ée`}],
|
|
|
|
|
[
|
|
|
|
|
{p: `${baseP}یو`, f: `${baseF}iyo`},
|
|
|
|
|
{p: `${baseP}و`, f: `${baseF}o`},
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectIrregularMasc(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections {
|
2021-11-01 21:10:23 +00:00
|
|
|
|
const inf0f = splitUpSyllables(inflections[0].f).length > 1
|
|
|
|
|
? accentFSylsOnNFromEnd(inflections[0].f, 0)
|
|
|
|
|
: inflections[0].f
|
2021-03-09 12:39:13 +00:00
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
2021-11-01 21:10:23 +00:00
|
|
|
|
[{p: inflections[0].p, f: inf0f}],
|
|
|
|
|
[{p: `${inflections[1].p}و`, f: `${removeAccents(inflections[1].f)}ó`}],
|
2021-03-09 12:39:13 +00:00
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularAFem(p: string, f: string): T.Inflections {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
const withoutTrailingComma = ["'", "’"].includes(f.slice(-1)) ? f.slice(0, -1) : f;
|
|
|
|
|
const accentLast = hasAccents(withoutTrailingComma.slice(-1));
|
|
|
|
|
const baseF = withoutTrailingComma.slice(0, -1);
|
2021-03-09 12:39:13 +00:00
|
|
|
|
const baseP = p.slice(-1) === "ع" ? p : p.slice(0, -1);
|
|
|
|
|
return {
|
|
|
|
|
fem: [
|
|
|
|
|
[{p, f}],
|
2021-09-14 14:25:04 +00:00
|
|
|
|
[{p: `${baseP}ې`, f: `${baseF}${accentLast ? "é" : "e"}`}],
|
|
|
|
|
[{p: `${baseP}و`, f: `${baseF}${accentLast ? "ó" : "o"}`}],
|
2021-03-09 12:39:13 +00:00
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularAWithHimPEnding(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseF = f.slice(0, -1);
|
|
|
|
|
return {
|
|
|
|
|
fem: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${p}ې`, f: `${baseF}e`}],
|
|
|
|
|
[{p: `${p}و`, f: `${baseF}o`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularInanMissingAFem(p: string, f: string): T.Inflections {
|
2021-11-01 21:10:23 +00:00
|
|
|
|
const fBase = splitUpSyllables(f).length === 1
|
|
|
|
|
? accentFSylsOnNFromEnd(f, 0)
|
|
|
|
|
: f;
|
|
|
|
|
return {
|
2021-03-09 12:39:13 +00:00
|
|
|
|
fem: [
|
|
|
|
|
[{p, f}],
|
2021-11-01 21:10:23 +00:00
|
|
|
|
[{p: `${p}ې`, f: `${fBase}e`}],
|
|
|
|
|
[{p: `${p}و`, f: `${fBase}o`}],
|
2021-03-09 12:39:13 +00:00
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularInanEeFem(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
fem: [
|
|
|
|
|
[{p, f}],
|
2022-02-24 14:57:55 +00:00
|
|
|
|
[{p: `${baseP}ۍ`, f: `${baseF}úy`}],
|
|
|
|
|
[{p: `${baseP}یو`, f: `${baseF}úyo`}],
|
2021-03-09 12:39:13 +00:00
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularUyFem(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
2022-02-24 14:57:55 +00:00
|
|
|
|
const baseF = removeAccents(f.slice(0, -2));
|
2021-03-09 12:39:13 +00:00
|
|
|
|
return {
|
|
|
|
|
fem: [
|
2022-02-24 14:57:55 +00:00
|
|
|
|
[{p, f: `${baseF}úy`}],
|
|
|
|
|
[{p, f: `${baseF}úy`}],
|
2021-03-09 12:39:13 +00:00
|
|
|
|
[
|
2022-02-24 14:57:55 +00:00
|
|
|
|
{p: `${baseP}یو`, f: `${baseF}úyo`},
|
2021-03-09 12:39:13 +00:00
|
|
|
|
{p: `${baseP}و`, f: `${baseF}o`},
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-07 11:49:57 +00:00
|
|
|
|
function makePashtoPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
|
|
|
|
|
if (!(word.ppp && word.ppf)) return undefined;
|
2021-09-16 20:41:50 +00:00
|
|
|
|
const base = splitPsByVarients(
|
|
|
|
|
makePsString(word.ppp, word.ppf)
|
|
|
|
|
);
|
2021-09-14 14:25:04 +00:00
|
|
|
|
function getBaseAndO(): T.PluralInflectionSet {
|
2021-09-16 20:41:50 +00:00
|
|
|
|
return [
|
|
|
|
|
base,
|
|
|
|
|
base.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
|
|
|
|
|
];
|
2021-09-14 14:25:04 +00:00
|
|
|
|
}
|
|
|
|
|
if (word.c?.includes("n. m.")) {
|
|
|
|
|
return { masc: getBaseAndO() };
|
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
if (word.c?.includes("n. f.")) {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { fem: getBaseAndO() };
|
2021-09-07 11:49:57 +00:00
|
|
|
|
}
|
|
|
|
|
// TODO: handle masculine and unisex
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-14 14:25:04 +00:00
|
|
|
|
function makeArabicPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
|
|
|
|
|
if (!(word.apf && word.app)) return undefined;
|
|
|
|
|
const w = makePsString(word.app, word.apf);
|
|
|
|
|
const plural = splitPsByVarients(w);
|
|
|
|
|
const end = removeAccents(removeEndTick(word.apf).slice(-1));
|
|
|
|
|
// again typescript being dumb and not letting me use a typed key here
|
|
|
|
|
const value = [
|
|
|
|
|
plural,
|
|
|
|
|
plural.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
|
|
|
|
|
] as T.PluralInflectionSet;
|
|
|
|
|
// feminine words that have arabic plurals stay feminine with the plural - ie مرجع - مراجع
|
|
|
|
|
// but masculine words that appear feminine in the plural aren't femening with the Arabic plural - ie. نبي - انبیا
|
|
|
|
|
if (["i", "e", "a"].includes(end) && word.c?.includes("n. f.")) {
|
|
|
|
|
return { fem: value };
|
|
|
|
|
}
|
|
|
|
|
return { masc: value };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function makePlural(w: T.DictionaryEntryNoFVars): { plural: T.PluralInflections } | { arabicPlural: T.PluralInflections } | undefined {
|
|
|
|
|
function addSecondInf(plur: T.ArrayOneOrMore<T.PsString> | T.PsString): T.PluralInflectionSet {
|
|
|
|
|
if (!Array.isArray(plur)) {
|
|
|
|
|
return addSecondInf([plur]);
|
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
return [
|
2021-09-14 14:25:04 +00:00
|
|
|
|
plur,
|
|
|
|
|
plur.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
|
2021-09-07 11:49:57 +00:00
|
|
|
|
];
|
2021-09-14 14:25:04 +00:00
|
|
|
|
}
|
|
|
|
|
if (w.c && w.c.includes("pl.")) {
|
|
|
|
|
const plural = addSecondInf(makePsString(w.p, w.f));
|
|
|
|
|
// Typescript being dumb and not letting me do a typed variable for the key
|
|
|
|
|
// could try refactoring with an updated TypeScript dependency
|
|
|
|
|
if (w.c.includes("n. m.")) return { plural: { masc: plural }};
|
|
|
|
|
if (w.c.includes("n. f.")) return { plural: { fem: plural }};
|
|
|
|
|
}
|
|
|
|
|
const arabicPlural = makeArabicPlural(w);
|
|
|
|
|
const pashtoPlural = makePashtoPlural(w);
|
|
|
|
|
function addMascPluralSuffix(animate?: boolean, shortSquish?: boolean): T.PluralInflectionSet {
|
2021-09-14 15:04:45 +00:00
|
|
|
|
if (shortSquish && (w.infap === undefined || w.infaf === undefined)) {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
throw new Error(`no irregular inflection info for ${w.p} - ${w.ts}`);
|
|
|
|
|
}
|
|
|
|
|
const b = removeAccents(shortSquish
|
|
|
|
|
? makePsString((w.infap as string).slice(0, -1), (w.infaf as string).slice(0, -1))
|
|
|
|
|
: w
|
|
|
|
|
);
|
|
|
|
|
const base = endsInShwa(b)
|
|
|
|
|
? makePsString(b.p.slice(0, -1), b.f.slice(0, -1))
|
|
|
|
|
: b;
|
|
|
|
|
return addSecondInf(
|
|
|
|
|
concatPsString(base, (animate && !shortSquish) ? { p: "ان", f: "áan" } : { p: "ونه", f: "óona" }),
|
|
|
|
|
);
|
2021-09-07 11:49:57 +00:00
|
|
|
|
}
|
|
|
|
|
function addAnimUnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
|
|
|
|
|
const base = removeAccents(w);
|
|
|
|
|
return {
|
|
|
|
|
masc: addMascPluralSuffix(true),
|
2021-09-14 14:25:04 +00:00
|
|
|
|
fem: addSecondInf(concatPsString(base, { p: "انې", f: "áane" })),
|
2021-09-07 11:49:57 +00:00
|
|
|
|
};
|
|
|
|
|
}
|
2021-11-01 21:55:07 +00:00
|
|
|
|
function addEePluralSuffix(gender: T.Gender): T.PluralInflectionSet {
|
|
|
|
|
const b = removeAccents(w);
|
|
|
|
|
const base = {
|
|
|
|
|
p: b.p.slice(0, -1),
|
|
|
|
|
f: b.f.slice(0, -2),
|
|
|
|
|
};
|
|
|
|
|
const firstInf: T.ArrayOneOrMore<T.PsString> = [
|
|
|
|
|
concatPsString(base, { p: "یان", f: "iyáan" }, gender === "fem" ? { p: "ې", f: "e" } : ""),
|
|
|
|
|
...gender === "fem"
|
|
|
|
|
? [concatPsString(base, { p: "یګانې", f: "eegáane" })]
|
|
|
|
|
: [],
|
|
|
|
|
];
|
|
|
|
|
return [
|
|
|
|
|
firstInf,
|
|
|
|
|
firstInf.flatMap(addOEnding),
|
|
|
|
|
// firstInf.map(addOEnding),
|
|
|
|
|
] as T.PluralInflectionSet;
|
|
|
|
|
}
|
2021-10-18 14:41:21 +00:00
|
|
|
|
function addAnimN3UnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
|
|
|
|
|
const b = removeAccents(w);
|
|
|
|
|
const base = {
|
|
|
|
|
p: b.p.slice(0, -1),
|
|
|
|
|
f: b.f.slice(0, -2),
|
|
|
|
|
};
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[concatPsString(base, { p: "یان", f: "iyáan" })],
|
|
|
|
|
[concatPsString(base, { p: "یانو", f: "iyáano" })],
|
|
|
|
|
// TODO: or use addSecondInf method above?
|
|
|
|
|
],
|
|
|
|
|
fem: [
|
|
|
|
|
[concatPsString(base, { p: "یانې", f: "iyáane" })],
|
|
|
|
|
[concatPsString(base, { p: "یانو", f: "iyáano" })],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
function addFemLongVowelSuffix(): T.PluralInflectionSet {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
const base = removeEndTick(makePsString(w.p, w.f));
|
2021-09-07 11:49:57 +00:00
|
|
|
|
const baseWOutAccents = removeAccents(base);
|
2021-09-14 14:25:04 +00:00
|
|
|
|
const space = (w.p.slice(-1) === "ع" || w.p.slice(-1) === "ه") ? { p: " ", f: "" } : "";
|
|
|
|
|
return addSecondInf([
|
|
|
|
|
concatPsString(base, space, { p: "وې", f: "we" }),
|
|
|
|
|
concatPsString(baseWOutAccents, space, { p: "ګانې", f: "gáane" })
|
|
|
|
|
]);
|
2021-09-07 11:49:57 +00:00
|
|
|
|
}
|
2021-10-10 05:20:39 +00:00
|
|
|
|
// TODO: This should be possible for words like پلویان but not for words like ترورزامن 🤔
|
|
|
|
|
// function addFemToPashtoPlural(i: T.PluralInflections): T.UnisexSet<T.PluralInflectionSet> {
|
|
|
|
|
// if ("fem" in i && "masc" in i) return i;
|
|
|
|
|
// if (!("masc" in i)) throw new Error("bad pashto plural doesn't even have masculine");
|
|
|
|
|
// if (endsInConsonant(i.masc[0][0])) {
|
|
|
|
|
// return {
|
|
|
|
|
// ...i,
|
|
|
|
|
// fem: [
|
|
|
|
|
// i.masc[0].map((x) => concatPsString(x, { p: "ې", f: "e" })) as T.ArrayOneOrMore<T.PsString>,
|
|
|
|
|
// i.masc[0].map((x) => concatPsString(x, { p: "و", f: "o" })) as T.ArrayOneOrMore<T.PsString>,
|
|
|
|
|
// ],
|
|
|
|
|
// };
|
|
|
|
|
// }
|
|
|
|
|
// return {
|
|
|
|
|
// ...i,
|
|
|
|
|
// fem: i.masc,
|
|
|
|
|
// };
|
|
|
|
|
// }
|
2021-09-07 11:49:57 +00:00
|
|
|
|
|
2021-09-14 14:25:04 +00:00
|
|
|
|
const shortSquish = !!w.infap && !w.infap.includes("ا");
|
2021-09-07 11:49:57 +00:00
|
|
|
|
const anim = w.c?.includes("anim.");
|
|
|
|
|
const type = (w.c?.includes("unisex"))
|
|
|
|
|
? "unisex noun"
|
|
|
|
|
: (w.c?.includes("n. m."))
|
|
|
|
|
? "masc noun"
|
|
|
|
|
: (w.c?.includes("n. f."))
|
|
|
|
|
? "fem noun"
|
|
|
|
|
: "other";
|
2021-10-10 05:20:39 +00:00
|
|
|
|
if (pashtoPlural) return {
|
|
|
|
|
plural: pashtoPlural,
|
|
|
|
|
arabicPlural,
|
|
|
|
|
};
|
2021-09-14 14:25:04 +00:00
|
|
|
|
if (type === "unisex noun") {
|
2021-10-10 05:20:39 +00:00
|
|
|
|
// doesn't need to be labelled anim - because it's only with animate nouns that you get the unisex - I THINK
|
|
|
|
|
if (endsInConsonant(w) && (!w.infap)) {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { arabicPlural, plural: addAnimUnisexPluralSuffix() };
|
|
|
|
|
}
|
2021-10-10 06:11:41 +00:00
|
|
|
|
if (shortSquish && !anim) {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
return { arabicPlural, plural: { masc: addMascPluralSuffix(anim, shortSquish) }};
|
|
|
|
|
}
|
2021-11-01 21:55:07 +00:00
|
|
|
|
if (endsWith([{ p: "ی", f: "éy" }, { p: "ي" }], w, true)) {
|
2021-10-18 14:41:21 +00:00
|
|
|
|
return { arabicPlural, plural: addAnimN3UnisexPluralSuffix() };
|
|
|
|
|
}
|
2021-10-10 06:11:41 +00:00
|
|
|
|
// usually shortSquish nouns would never have arabicPlurals -- so we don't have to worry about catching
|
|
|
|
|
// arabic plurals for the animat ones, right?
|
2021-09-07 11:49:57 +00:00
|
|
|
|
}
|
2021-09-14 15:04:45 +00:00
|
|
|
|
if (
|
2021-11-01 21:55:07 +00:00
|
|
|
|
type === "masc noun" &&
|
|
|
|
|
(shortSquish || ((endsInConsonant(w) || endsInShwa(w)) && (!w.infap))) &&
|
|
|
|
|
(w.p.slice(-3) !== "توب")
|
2021-10-20 01:24:05 +00:00
|
|
|
|
) {
|
2021-09-07 11:49:57 +00:00
|
|
|
|
return {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
arabicPlural,
|
|
|
|
|
plural: {
|
|
|
|
|
masc: addMascPluralSuffix(anim, shortSquish),
|
|
|
|
|
},
|
2021-09-07 11:49:57 +00:00
|
|
|
|
};
|
|
|
|
|
}
|
2021-10-18 14:41:21 +00:00
|
|
|
|
if (
|
|
|
|
|
type === "masc noun" &&
|
|
|
|
|
endsWith({ p: "ی", f: "éy" }, w, true) &&
|
|
|
|
|
anim
|
|
|
|
|
) {
|
2021-10-18 14:52:31 +00:00
|
|
|
|
const { masc } = addAnimN3UnisexPluralSuffix();
|
2021-10-18 14:41:21 +00:00
|
|
|
|
return {
|
|
|
|
|
arabicPlural,
|
|
|
|
|
plural: {
|
|
|
|
|
masc,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
}
|
2021-11-01 21:55:07 +00:00
|
|
|
|
if (type === "masc noun" && endsWith({ p: "ي" }, w)) {
|
|
|
|
|
const masc = addEePluralSuffix("masc");
|
|
|
|
|
return {
|
|
|
|
|
arabicPlural,
|
|
|
|
|
plural: { masc },
|
|
|
|
|
};
|
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
// TODO: What about endings in long ee / animate at inanimate
|
|
|
|
|
if (type === "fem noun" && endsInAaOrOo(w) && (!w.infap)) {
|
|
|
|
|
return {
|
2021-09-14 14:25:04 +00:00
|
|
|
|
arabicPlural,
|
|
|
|
|
plural: {
|
|
|
|
|
fem: addFemLongVowelSuffix(),
|
|
|
|
|
},
|
2021-09-07 11:49:57 +00:00
|
|
|
|
};
|
|
|
|
|
}
|
2021-11-01 21:55:07 +00:00
|
|
|
|
if (type === "fem noun" && endsWith({ p: "ي" }, w)) {
|
|
|
|
|
return {
|
|
|
|
|
arabicPlural,
|
|
|
|
|
plural: {
|
|
|
|
|
fem: addEePluralSuffix("fem"),
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
}
|
2021-09-14 14:25:04 +00:00
|
|
|
|
if (arabicPlural) {
|
|
|
|
|
return { arabicPlural, plural: pashtoPlural };
|
|
|
|
|
}
|
2021-09-07 11:49:57 +00:00
|
|
|
|
return undefined;
|
2021-03-09 12:39:13 +00:00
|
|
|
|
}
|