2021-03-09 12:39:13 +00:00
|
|
|
|
/**
|
|
|
|
|
* Copyright (c) 2021 lingdocs.com
|
|
|
|
|
*
|
|
|
|
|
* This source code is licensed under the MIT license found in the
|
|
|
|
|
* LICENSE file in the root directory of this source tree.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
import { pashtoConsonants } from "./pashto-consonants";
|
2021-05-25 09:47:02 +00:00
|
|
|
|
import {
|
|
|
|
|
concatInflections,
|
|
|
|
|
splitDoubleWord,
|
|
|
|
|
ensureUnisexInflections,
|
2021-08-31 09:34:18 +00:00
|
|
|
|
makePsString,
|
2021-05-25 09:47:02 +00:00
|
|
|
|
} from "./p-text-helpers";
|
2021-08-31 09:34:18 +00:00
|
|
|
|
import {
|
|
|
|
|
removeAccents,
|
|
|
|
|
} from "./accent-helpers";
|
2021-03-09 12:39:13 +00:00
|
|
|
|
import * as T from "../types";
|
|
|
|
|
|
|
|
|
|
const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/;
|
|
|
|
|
const endingInHeyOrAynRegex = /[^ا][هع]$/;
|
|
|
|
|
const endingInAlefRegex = /اع?$/;
|
|
|
|
|
|
|
|
|
|
export function inflectWord(word: T.DictionaryEntry): T.Inflections | false {
|
|
|
|
|
// If it's a noun/adj, inflect accordingly
|
|
|
|
|
// TODO: What about n. f. / adj. that end in ي ??
|
|
|
|
|
if (word.noInf) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2021-05-25 09:47:02 +00:00
|
|
|
|
if (word.c?.includes("doub.")) {
|
|
|
|
|
const words = splitDoubleWord(word);
|
|
|
|
|
const inflected = words.map((word) => ensureUnisexInflections(inflectWord(word), word));
|
|
|
|
|
return concatInflections(
|
|
|
|
|
inflected[0],
|
|
|
|
|
inflected[1],
|
|
|
|
|
) as T.UnisexInflections;
|
|
|
|
|
}
|
2021-03-09 12:39:13 +00:00
|
|
|
|
if (word.c && (word.c.includes("adj.") || word.c.includes("unisex"))) {
|
|
|
|
|
return handleUnisexWord(word);
|
|
|
|
|
}
|
|
|
|
|
if (word.c && (word.c.includes("n. m."))) {
|
|
|
|
|
return handleMascNoun(word);
|
|
|
|
|
}
|
|
|
|
|
if (word.c && (word.c.includes("n. f."))) {
|
|
|
|
|
return handleFemNoun(word);
|
|
|
|
|
}
|
|
|
|
|
// It's not a noun/adj
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// LEVEL 2 FUNCTIONS
|
|
|
|
|
function handleUnisexWord(word: T.DictionaryEntry): T.Inflections | false {
|
|
|
|
|
// Get first of comma seperated phonetics entries
|
|
|
|
|
const f = word.f.split(",")[0].trim();
|
|
|
|
|
// Get last letter of Pashto and last two letters of phonetics
|
|
|
|
|
// TODO: !!! Handle weird endings / symbols ' etc.
|
|
|
|
|
const pEnd = word.p.slice(-1);
|
|
|
|
|
if (word.infap && word.infaf && word.infbp && word.infbf) {
|
|
|
|
|
return inflectIrregularUnisex(word.p, f, [
|
|
|
|
|
{p: word.infap, f: word.infaf},
|
|
|
|
|
{p: word.infbp, f: word.infbf},
|
|
|
|
|
]);
|
|
|
|
|
}
|
|
|
|
|
if (pEnd === "ی" && f.slice(-2) === "ey") {
|
|
|
|
|
return inflectRegularYeyUnisex(word.p, f);
|
|
|
|
|
}
|
2021-08-31 09:34:18 +00:00
|
|
|
|
if (pEnd === "ه" && word.g.slice(-1) === "u") {
|
|
|
|
|
return inflectRegularShwaEndingUnisex(word.p, f);
|
|
|
|
|
}
|
2021-03-09 12:39:13 +00:00
|
|
|
|
if (pEnd === "ی" && f.slice(-2) === "éy") {
|
|
|
|
|
return inflectEmphasizedYeyUnisex(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
if (
|
|
|
|
|
pashtoConsonants.includes(pEnd) ||
|
|
|
|
|
word.p.slice(-2) === "وی" ||
|
|
|
|
|
word.p.slice(-2) === "ای" ||
|
|
|
|
|
(word.p.slice(-1) === "ه" && f.slice(-1) === "h")
|
|
|
|
|
) {
|
|
|
|
|
return inflectConsonantEndingUnisex(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function handleMascNoun(word: T.DictionaryEntry): T.Inflections | false {
|
|
|
|
|
// Get first of comma seperated phonetics entries
|
|
|
|
|
const f = word.f.split(",")[0].trim();
|
|
|
|
|
// Get last letter of Pashto and last two letters of phonetics
|
|
|
|
|
// TODO: !!! Handle weird endings / symbols ' etc.
|
|
|
|
|
const pEnd = word.p.slice(-1);
|
|
|
|
|
const fEnd = f.slice(-2);
|
|
|
|
|
if (word.infap && word.infaf && word.infbp && word.infbf) {
|
|
|
|
|
return inflectIrregularMasc(word.p, f, [
|
|
|
|
|
{p: word.infap, f: word.infaf},
|
|
|
|
|
{p: word.infbp, f: word.infbf},
|
|
|
|
|
]);
|
|
|
|
|
}
|
|
|
|
|
const isTobEnding = (word.p.slice(-3) === "توب" && ["tób", "tob"].includes(f.slice(-3)) && word.p.length > 3);
|
|
|
|
|
if (isTobEnding) {
|
|
|
|
|
return inflectTobMasc(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
if (pEnd === "ی" && fEnd === "ey") {
|
|
|
|
|
return inflectRegularYeyMasc(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
if (pEnd === "ی" && fEnd === "éy") {
|
|
|
|
|
return inflectRegularEmphasizedYeyMasc(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function handleFemNoun(word: T.DictionaryEntry): T.Inflections | false {
|
|
|
|
|
// Get first of comma seperated phonetics entries
|
|
|
|
|
const f = word.f.split(",")[0].trim();
|
|
|
|
|
/* istanbul ignore next */ // will always have word.c at this point
|
|
|
|
|
const c = word.c || "";
|
|
|
|
|
const animate = c.includes("anim.");
|
|
|
|
|
const pEnd = word.p.slice(-1);
|
|
|
|
|
|
|
|
|
|
if (endingInHeyOrAynRegex.test(word.p) && endingInSingleARegex.test(f)) {
|
|
|
|
|
return inflectRegularAFem(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(f)) {
|
|
|
|
|
return inflectRegularAWithHimPEnding(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
if (pashtoConsonants.includes(pEnd) && !animate) {
|
|
|
|
|
return inflectRegularInanMissingAFem(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
if (pEnd === "ي" && (!animate)) {
|
|
|
|
|
return inflectRegularInanEeFem(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
if (pEnd === "ۍ") {
|
|
|
|
|
return inflectRegularUyFem(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
if (endingInAlefRegex.test(word.p)) {
|
|
|
|
|
return inflectRegularAaFem(word.p, f);
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// LEVEL 3 FUNCTIONS
|
|
|
|
|
function inflectIrregularUnisex(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections {
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: inflections[0].p, f: inflections[0].f}],
|
|
|
|
|
[{p: `${inflections[1].p}و`, f: `${inflections[1].f}o`}],
|
|
|
|
|
],
|
|
|
|
|
fem: [
|
|
|
|
|
[{p: `${inflections[1].p}ه`, f: `${inflections[1].f}a`}],
|
|
|
|
|
[{p: `${inflections[1].p}ې`, f: `${inflections[1].f}e`}],
|
|
|
|
|
[{p: `${inflections[1].p}و`, f: `${inflections[1].f}o`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function inflectRegularYeyUnisex(p: string, f: string): T.UnisexInflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}ي`, f: `${baseF}ee`}],
|
|
|
|
|
[
|
|
|
|
|
{p: `${baseP}یو`, f: `${baseF}iyo`},
|
|
|
|
|
{p: `${baseP}و`, f: `${baseF}o`},
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
fem: [
|
|
|
|
|
[{p: `${baseP}ې`, f: `${baseF}e`}],
|
|
|
|
|
[{p: `${baseP}ې`, f: `${baseF}e`}],
|
|
|
|
|
[{p: `${baseP}و`, f: `${baseF}o`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-31 09:34:18 +00:00
|
|
|
|
export function inflectRegularShwaEndingUnisex(pr: string, fr: string): T.UnisexInflections {
|
|
|
|
|
const { p, f } = removeAccents(makePsString(pr, fr));
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -1);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p: `${baseP}ه`, f: `${baseF}ú`}],
|
|
|
|
|
[{p: `${baseP}ه`, f: `${baseF}ú`}],
|
|
|
|
|
[{p: `${baseP}و`, f: `${baseF}ó`}],
|
|
|
|
|
],
|
|
|
|
|
fem: [
|
|
|
|
|
[{p: `${baseP}ه`, f: `${baseF}á`}],
|
|
|
|
|
[{p: `${baseP}ې`, f: `${baseF}é`}],
|
|
|
|
|
[{p: `${baseP}و`, f: `${baseF}ó`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-09 12:39:13 +00:00
|
|
|
|
function inflectEmphasizedYeyUnisex(p: string, f: string): T.UnisexInflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}ي`, f: `${baseF}ée`}],
|
|
|
|
|
[
|
|
|
|
|
{p: `${baseP}یو`, f: `${baseF}iyo`},
|
|
|
|
|
{p: `${baseP}و`, f: `${baseF}ó`},
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
fem: [
|
|
|
|
|
[{p: `${baseP}ۍ`, f: `${baseF}úy`}],
|
|
|
|
|
[{p: `${baseP}ۍ`, f: `${baseF}úy`}],
|
|
|
|
|
[
|
|
|
|
|
{ p: `${baseP}یو`, f: `${baseF}úyo` },
|
|
|
|
|
{ p: `${baseP}و`, f: `${baseF}ó`, },
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectConsonantEndingUnisex(p: string, f: string): T.UnisexInflections {
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${p}و`, f: `${f}o`}],
|
|
|
|
|
],
|
|
|
|
|
fem: [
|
|
|
|
|
[{p: `${p}ه`, f: `${f}a`}],
|
|
|
|
|
[{p: `${p}ې`, f: `${f}e`}],
|
|
|
|
|
[{p: `${p}و`, f: `${f}o`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularYeyMasc(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}ي`, f: `${baseF}ee`}],
|
|
|
|
|
[
|
|
|
|
|
{p: `${baseP}یو`, f: `${baseF}iyo`},
|
|
|
|
|
{p: `${baseP}و`, f: `${baseF}o`},
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectTobMasc(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseP = p.slice(0, -3);
|
|
|
|
|
const baseF = f.slice(0, -3);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}تابه`, f: `${baseF}taabu`}],
|
|
|
|
|
[{p: `${baseP}تبو`, f: `${baseF}tabo`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularEmphasizedYeyMasc(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}ي`, f: `${baseF}ée`}],
|
|
|
|
|
[
|
|
|
|
|
{p: `${baseP}یو`, f: `${baseF}iyo`},
|
|
|
|
|
{p: `${baseP}و`, f: `${baseF}o`},
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectIrregularMasc(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections {
|
|
|
|
|
return {
|
|
|
|
|
masc: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: inflections[0].p, f: inflections[0].f}],
|
|
|
|
|
[{p: `${inflections[1].p}و`, f: `${inflections[1].f}o`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularAFem(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseF = ["'", "’"].includes(f.slice(-1)) ? f.slice(0, -2) : f.slice(0, -1);
|
|
|
|
|
const baseP = p.slice(-1) === "ع" ? p : p.slice(0, -1);
|
|
|
|
|
return {
|
|
|
|
|
fem: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}ې`, f: `${baseF}e`}],
|
|
|
|
|
[{p: `${baseP}و`, f: `${baseF}o`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularAWithHimPEnding(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseF = f.slice(0, -1);
|
|
|
|
|
return {
|
|
|
|
|
fem: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${p}ې`, f: `${baseF}e`}],
|
|
|
|
|
[{p: `${p}و`, f: `${baseF}o`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularInanMissingAFem(p: string, f: string): T.Inflections {
|
|
|
|
|
return {
|
|
|
|
|
fem: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${p}ې`, f: `${f}e`}],
|
|
|
|
|
[{p: `${p}و`, f: `${f}o`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularInanEeFem(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
fem: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${baseP}ۍ`, f: `${baseF}uy`}],
|
|
|
|
|
[{p: `${baseP}یو`, f: `${baseF}uyo`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularUyFem(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseP = p.slice(0, -1);
|
|
|
|
|
const baseF = f.slice(0, -2);
|
|
|
|
|
return {
|
|
|
|
|
fem: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[
|
|
|
|
|
{p: `${baseP}یو`, f: `${baseF}uyo`},
|
|
|
|
|
{p: `${baseP}و`, f: `${baseF}o`},
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function inflectRegularAaFem(p: string, f: string): T.Inflections {
|
|
|
|
|
const baseF = ["'", "’"].includes(f.slice(-1)) ? f.slice(0, -1) : f;
|
|
|
|
|
return {
|
|
|
|
|
fem: [
|
|
|
|
|
[{p, f}],
|
|
|
|
|
[{p: `${p}وې`, f: `${baseF}we`}],
|
|
|
|
|
[{p: `${p}وو`, f: `${baseF}wo`}],
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|