refactoring the word inflector

This commit is contained in:
adueck 2024-07-26 15:15:49 -04:00
parent 0861c03c82
commit 502031b054
10 changed files with 626 additions and 826 deletions

4
package-lock.json generated
View File

@ -1,12 +1,12 @@
{ {
"name": "pashto-inflector", "name": "pashto-inflector",
"version": "7.2.1", "version": "7.2.2",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "pashto-inflector", "name": "pashto-inflector",
"version": "7.2.1", "version": "7.2.2",
"hasInstallScript": true, "hasInstallScript": true,
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {

View File

@ -1,6 +1,6 @@
{ {
"name": "pashto-inflector", "name": "pashto-inflector",
"version": "7.2.1", "version": "7.2.2",
"author": "lingdocs.com", "author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com", "homepage": "https://verbs.lingdocs.com",

View File

@ -1,12 +1,12 @@
{ {
"name": "@lingdocs/ps-react", "name": "@lingdocs/ps-react",
"version": "7.2.1", "version": "7.2.2",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lingdocs/ps-react", "name": "@lingdocs/ps-react",
"version": "7.2.1", "version": "7.2.2",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@formkit/auto-animate": "^1.0.0-beta.3", "@formkit/auto-animate": "^1.0.0-beta.3",

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/ps-react", "name": "@lingdocs/ps-react",
"version": "7.2.1", "version": "7.2.2",
"description": "Pashto inflector library module with React components", "description": "Pashto inflector library module with React components",
"main": "dist/components/library.js", "main": "dist/components/library.js",
"module": "dist/components/library.js", "module": "dist/components/library.js",

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/inflect", "name": "@lingdocs/inflect",
"version": "7.2.1", "version": "7.2.2",
"description": "Pashto inflector library", "description": "Pashto inflector library",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/lib/library.d.ts", "types": "dist/lib/library.d.ts",

View File

@ -0,0 +1,596 @@
import * as T from "../../types";
import { makePsString } from "./accent-and-ps-utils";
import {
accentIsOnEnd,
accentOnNFromEnd,
countSyllables,
removeAccents,
} from "./accent-helpers";
import { applyPsString, mapGen } from "./fp-ps";
import { getInflectionPattern } from "./inflection-pattern";
import {
endsInConsonant,
endsInTob,
hasShwaEnding,
mapPsString,
endsWith,
} from "./p-text-helpers";
import { removeDuplicates } from "./phrase-building/vp-tools";
import {
isAdjOrUnisexNounEntry,
isAnimNounEntry,
isFemNounEntry,
isInflectableEntry,
isMascNounEntry,
isNounEntry,
isNumberEntry,
} from "./type-predicates";
type Plurals =
| {
plural?: T.PluralInflections;
arabicPlural?: T.PluralInflections;
}
| undefined;
const endingInSingleARegex = /[^a]'??[aá]'??$/;
const endingInHayOrAynRegex = /[^ا][هع]$/;
export function getInfsAndVocative(
entry: T.DictionaryEntryNoFVars,
plurals: Plurals
):
| {
inflections?: T.Inflections;
vocative?: T.PluralInflections;
}
| false {
if (!isInflectableEntry(entry)) {
return false;
}
// @ts-ignore
const e: T.InflectableEntry = entry as T.InflectableEntry;
const pattern = getInflectionPattern(e);
if (
pattern === 0 &&
isFemNounEntry(e) &&
isAnimNounEntry(e) &&
endsInConsonant(e)
) {
return {
vocative: vocFemAnimException({
e,
plurals: genderPlural("fem", plurals),
}),
};
}
const gender: T.Gender | "unisex" =
isAdjOrUnisexNounEntry(e) || isNumberEntry(e)
? "unisex"
: isMascNounEntry(e)
? "masc"
: "fem";
if (pattern === 0) {
return false;
}
if (pattern === 6) {
return pattern6({ e, plurals: genderPlural("fem", plurals) });
}
const funcs = patternFuncs[pattern];
const masc =
gender === "unisex" || gender === "masc"
? funcs.masc({ e, plurals: genderPlural("masc", plurals) })
: undefined;
const fem =
gender === "unisex" || gender === "fem"
? funcs.fem({ e, plurals: genderPlural("fem", plurals) })
: undefined;
return aggregateInfsAndVoc(masc, fem);
}
type PatternInput = {
e: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry;
plurals: T.PsString[];
};
type InflectionsAndVocative = {
inflections: T.InflectionSet;
vocative: T.PluralInflectionSet;
};
const patternFuncs: Record<
1 | 2 | 3 | 4 | 5,
Record<T.Gender, (inp: PatternInput) => InflectionsAndVocative>
> = {
1: {
masc: vocPattern1Masc,
fem: vocPattern1Fem,
},
2: {
masc: vocPattern2Masc,
fem: vocPattern2Fem,
},
3: {
masc: vocPattern3Masc,
fem: vocPattern3Fem,
},
4: {
masc: vocPattern4Masc,
fem: vocPattern4Fem,
},
5: {
masc: vocPattern5Masc,
fem: vocPattern5Fem,
},
};
function addPlurals(
e: T.ArrayOneOrMore<T.PsString>,
plurals: T.PsString[]
): T.ArrayOneOrMore<T.PsString> {
if (!plurals) {
return e;
}
return removeDuplicates([...e, ...plurals]) as T.ArrayOneOrMore<T.PsString>;
}
function pattern6({ e, plurals }: PatternInput): {
inflections: T.Inflections;
vocative: T.PluralInflections;
} {
const base = removeAccents({ p: e.p.slice(0, -1), f: e.f.slice(0, -2) });
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ۍ`, f: `${base.f}úy` }],
[
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}ó` },
],
];
return {
inflections: {
fem: inflections,
},
vocative: {
fem: [inflections[0], addPlurals(inflections[2], plurals)],
},
};
}
function vocFemAnimException({
e,
plurals,
}: PatternInput): T.PluralInflections {
if (!e.ppp || !e.ppf) {
throw new Error(
"plural missing for feminine animate exception noun " + e.p
);
}
// TODO: HANDLE BETTER WITH PLURALS!
const plurBase = mapPsString(
(x) => x.slice(0, -1),
makePsString(e.ppp, e.ppf)
);
const base =
countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : makePsString(e.p, e.f);
return {
fem: [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}o` }], plurals),
],
};
}
function vocPattern1Masc({ e, plurals }: PatternInput): InflectionsAndVocative {
if (isNounEntry(e) && endsInTob(e)) {
const base = mapPsString((x) => x.slice(0, -3), e);
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}تبو`, f: `${base.f}tábo` },
];
return {
inflections: [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}تابه`, f: `${base.f}taabú` }],
second,
],
vocative: [[{ p: `${e.p}ه`, f: `${e.f}a` }], addPlurals(second, plurals)],
};
}
const shwaEnding = hasShwaEnding(e);
const base = mapGen(
(ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps),
mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e)
);
if (shwaEnding && e.f.endsWith("ú")) {
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}ó` },
];
return {
inflections: [[{ p: e.p, f: e.f }], [{ p: e.p, f: e.f }], second],
vocative: [
[{ p: `${base.p}ه`, f: `${base.f}á` }],
addPlurals(second, plurals),
],
};
}
// TODO: shouldn't this be accent-sensitive?
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
return {
inflections: [[{ p: e.p, f: e.f }], [{ p: e.p, f: e.f }], second],
vocative: [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
addPlurals(second, plurals),
],
};
}
// TODO this is HUGELY repetitive refactor this!
function vocPattern1Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
const shwaEnding = hasShwaEnding(e);
const hasFemEnding = endsWith([{ p: "ه", f: "a" }], e) || shwaEnding;
const base = mapGen(
(ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps),
hasFemEnding
? mapPsString((x) => x.slice(0, -1), e)
: makePsString(e.p, e.f)
);
if (
endsWith(
[
{ p: "ع", f: "a" },
{ p: "ع", f: "a'" },
],
e
) &&
!["ا", "ی", "ې"].includes(e.p.at(-2) || "")
) {
const base = applyPsString(
{
f: (f) => f.slice(0, f.endsWith("'") ? -2 : -1),
},
e
);
if (accentIsOnEnd(e)) {
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}ó` },
];
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ې`, f: `${base.f}é` }],
second,
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
};
}
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ې`, f: `${base.f}e` }],
second,
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
};
}
if (
endsWith([{ p: "ح", f: "a" }], e) &&
!["ا", "ی", "ې"].includes(e.p.at(-2) || "")
) {
const base = applyPsString(
{
f: (f) => f.slice(0, -1),
},
e
);
if (accentIsOnEnd(e)) {
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}ó` },
];
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ې`, f: `${base.f}é` }],
second,
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
};
}
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ې`, f: `${base.f}e` }],
second,
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
};
}
if (hasFemEnding && accentIsOnEnd(e)) {
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}ó` },
];
const inflections: T.InflectionSet = [
[{ p: `${base.p}ه`, f: `${base.f}á` }],
[{ p: `${base.p}ې`, f: `${base.f}é` }],
second,
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
};
}
if (isFemNounEntry(e) && endsInConsonant(e)) {
const baseForInf = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e;
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${baseForInf.p}و`, f: `${baseForInf.f}o` },
];
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[{ p: `${baseForInf.p}ې`, f: `${baseForInf.f}e` }],
second,
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
};
}
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
const inflections: T.InflectionSet = [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
[{ p: `${base.p}ې`, f: `${base.f}e` }],
second,
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
};
}
function vocPattern2Masc({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(e.p.slice(0, -1), e.f.slice(0, -2));
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
];
return {
inflections: [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ي`, f: `${base.f}ee` }],
second,
],
vocative: [
[{ p: `${base.p}یه`, f: `${base.f}iya` }],
addPlurals(second, plurals),
],
};
}
function vocPattern2Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(
e.p.slice(0, -1),
e.f.slice(0, e.f.endsWith("ay") ? -2 : -1)
);
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
];
const inflections: T.InflectionSet = [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
[{ p: `${base.p}ې`, f: `${base.f}e` }],
second,
];
return {
inflections,
vocative: [inflections[0], addPlurals(second, plurals)],
};
}
function vocPattern3Masc({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(
e.p.slice(0, -1),
// shouldn't be accents here but remove just to be sure
removeAccents(e.f.slice(0, -2))
);
const baseSyls = countSyllables(base);
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}${baseSyls ? "ó" : "o"}` },
];
return {
inflections: [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ي`, f: `${base.f}${baseSyls ? "ée" : "ee"}` }],
second,
],
vocative: [
[{ p: `${base.p}یه`, f: `${base.f}úya` }],
addPlurals(second, plurals),
],
};
}
function vocPattern3Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(
e.p.slice(0, -1),
// shouldn't be accents here but remove just to be sure
removeAccents(e.f.slice(0, -2))
);
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}ó` },
];
const plain: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}ۍ`, f: `${base.f}úy` },
];
return {
inflections: [plain, plain, second],
vocative: [plain, addPlurals(second, plurals)],
};
}
function vocPattern4Masc({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e;
const firstInf = accentOnNFromEnd(
makePsString(e.infap || "", e.infaf || ""),
0
);
const secondBase = makePsString(e.infbp || "", e.infbf || "");
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${secondBase.p}و`, f: `${secondBase.f}ó` },
];
const inflections: T.InflectionSet = [
[{ p: e.p, f: e.f }],
[firstInf],
second,
];
if (endsInConsonant(e)) {
return {
inflections,
vocative: [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
addPlurals(second, plurals),
],
};
}
// TODO: is this even possible?
if (hasShwaEnding(e)) {
return {
inflections,
vocative: [
[{ p: `${base.p.slice(0, -1)}ه`, f: `${base.f.slice(0, -1)}á` }],
addPlurals(second, plurals),
],
};
}
// exception for مېلمه, کوربه
return {
inflections,
vocative: [[{ p: e.p, f: e.f }], second],
};
}
function vocPattern4Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(e.infbp || "", e.infbf || "");
const second = addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals);
const inflections: T.InflectionSet = [
[{ p: `${base.p}ه`, f: `${base.f}á` }],
[{ p: `${base.p}ې`, f: `${base.f}é` }],
second,
];
return {
inflections,
vocative: [inflections[1], second],
};
}
function vocPattern5Masc({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(e.infbp || "", e.infbf || "");
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
return {
inflections: [
[{ p: e.p, f: e.f }],
[{ p: `${base.p}ه`, f: `${base.f}u` }],
second,
],
vocative: [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
addPlurals(second, plurals),
],
};
}
function vocPattern5Fem({ e, plurals }: PatternInput): InflectionsAndVocative {
const base = makePsString(e.infbp || "", e.infbf || "");
const second: T.ArrayOneOrMore<T.PsString> = [
{ p: `${base.p}و`, f: `${base.f}o` },
];
const inflections: T.InflectionSet = [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
[{ p: `${base.p}ې`, f: `${base.f}e` }],
second,
];
return {
inflections,
vocative: [inflections[1], addPlurals(second, plurals)],
};
}
function aggregateInfsAndVoc(
masc: InflectionsAndVocative | undefined,
fem: InflectionsAndVocative | undefined
): {
inflections?: T.Inflections;
vocative?: T.PluralInflections;
} {
if (masc && fem) {
return {
inflections: {
masc: masc.inflections,
fem: fem.inflections,
},
vocative: {
masc: masc.vocative,
fem: fem.vocative,
},
};
}
if (masc) {
return {
inflections: {
masc: masc.inflections,
},
vocative: {
masc: masc.vocative,
},
};
}
if (fem) {
return {
inflections: {
fem: fem.inflections,
},
vocative: {
fem: fem.vocative,
},
};
}
return { inflections: undefined, vocative: undefined };
}
function genderPlural(gender: T.Gender, plurals: Plurals): T.PsString[] {
if (!plurals) return [];
if (gender === "masc") {
return [
...(plurals.plural && "masc" in plurals.plural
? plurals.plural.masc[1]
: []),
...(plurals.arabicPlural && "masc" in plurals.arabicPlural
? plurals.arabicPlural.masc[1]
: []),
];
} else {
return [
...(plurals.plural && "fem" in plurals.plural
? plurals.plural.fem[1]
: []),
...(plurals.arabicPlural && "fem" in plurals.arabicPlural
? plurals.arabicPlural.fem[1]
: []),
];
}
}

View File

@ -1822,6 +1822,16 @@ const nouns: {
], ],
], ],
}, },
vocative: {
fem: [
[{ p: "دوستي", f: "dostee" }],
[
{ p: "دوستیو", f: "dostúyo" },
{ p: "دوستو", f: "dostó" },
{ p: "دوستیانو", f: "dostiyáano" },
],
],
},
}, },
}, },
// Feminine regular ending in ۍ // Feminine regular ending in ۍ

View File

@ -6,7 +6,6 @@
* *
*/ */
import { pashtoConsonants } from "./pashto-consonants";
import { import {
concatInflections, concatInflections,
splitDoubleWord, splitDoubleWord,
@ -20,30 +19,16 @@ import {
endsWith, endsWith,
concatPlurals, concatPlurals,
hasShwaEnding, hasShwaEnding,
mapPsString,
endsInTob,
} from "./p-text-helpers"; } from "./p-text-helpers";
import { makePsString, removeFVarients } from "./accent-and-ps-utils"; import { makePsString, removeFVarients } from "./accent-and-ps-utils";
import { import {
accentFSylsOnNFromEnd,
accentOnNFromEnd, accentOnNFromEnd,
countSyllables, countSyllables,
hasAccents,
removeAccents, removeAccents,
splitUpSyllables,
} from "./accent-helpers"; } from "./accent-helpers";
import * as T from "../../types"; import * as T from "../../types";
import { applyPsString, fmapSingleOrLengthOpts } from "./fp-ps"; import { getInfsAndVocative } from "./inflections-and-vocative";
import { getVocatives } from "./vocatives"; import { fmapSingleOrLengthOpts } from "./fp-ps";
import {
isAdjectiveEntry,
isNumberEntry,
isPattern1Entry,
} from "./type-predicates";
const endingInSingleARegex = /[^a]'??[aá]'??$/;
const endingInHayOrAynRegex = /[^ا][هع]$/;
// const endingInAlefRegex = /اع?$/;
export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput { export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
// If it's a noun/adj, inflect accordingly // If it's a noun/adj, inflect accordingly
@ -74,79 +59,17 @@ export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
if (w.c && w.c.includes("pl.")) { if (w.c && w.c.includes("pl.")) {
return handlePluralNounOrAdj(w); return handlePluralNounOrAdj(w);
} }
if (
w.c &&
(isAdjectiveEntry(word) || w.c.includes("unisex") || isNumberEntry(word))
) {
return handleUnisexWord(w);
}
if (w.c && w.c.includes("n. m.")) {
return handleMascNoun(w);
}
if (w.c && w.c.includes("n. f.")) {
return handleFemNoun(w);
}
// It's not a noun/adj
return false;
}
// LEVEL 2 FUNCTIONS const plurals = makePlural(w);
function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput { const infAndVoc = getInfsAndVocative(w, plurals);
// Get last letter of Pashto and last two letters of phonetics if (!infAndVoc && !plurals) {
// TODO: !!! Handle weird endings / symbols ' etc.
const pEnd = word.p.slice(-1);
const plurals = makePlural(word);
const vocative = getVocatives(word, plurals);
if (word.noInf) {
return !plurals ? false : { ...plurals };
}
if (word.infap && word.infaf && word.infbp && word.infbf) {
return {
inflections: inflectIrregularUnisex(word.p, word.f, [
{ p: word.infap, f: word.infaf },
{ p: word.infbp, f: word.infbf },
]),
vocative,
...plurals,
};
}
if (pEnd === "ی" && word.f.slice(-2) === "ay") {
return {
inflections: inflectRegularYayUnisex(word.p, word.f),
vocative,
...plurals,
};
}
if (pEnd === "ه" && word.g.slice(-1) === "u") {
return {
inflections: inflectRegularShwaEndingUnisex(word.p, word.f),
vocative,
...plurals,
};
}
if (pEnd === "ی" && word.f.slice(-2) === "áy") {
return {
inflections: inflectEmphasizedYayUnisex(word.p, word.f),
vocative,
...plurals,
};
}
if (
pashtoConsonants.includes(pEnd) ||
word.p.slice(-2) === "وی" ||
word.p.slice(-2) === "ای" ||
word.f.slice(-1) === "w" ||
(word.p.slice(-1) === "ه" && word.f.slice(-1) === "h")
) {
return {
inflections: inflectConsonantEndingUnisex(word.p, word.f),
vocative,
...plurals,
};
}
if (plurals) return plurals;
return false; return false;
} }
return {
...plurals,
...infAndVoc,
};
}
function handlePluralNounOrAdj(w: T.DictionaryEntryNoFVars): T.InflectorOutput { function handlePluralNounOrAdj(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
if (!w.c || !w.c.includes("n.")) return false; if (!w.c || !w.c.includes("n.")) return false;
@ -158,158 +81,7 @@ function handlePluralNounOrAdj(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
return { ...plurals }; return { ...plurals };
} }
function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput { // TODO: REMOVE THIS
// Get last letter of Pashto and last two letters of phonetics
// TODO: !!! Handle weird endings / symbols ' etc.
const plurals = makePlural(w);
const vocative = getVocatives(w, plurals);
if (w.noInf) {
return !plurals ? false : { ...plurals };
}
const pEnd = w.p.slice(-1);
const fEnd = w.f.slice(-2);
if (w.infap && w.infaf && w.infbp && w.infbf) {
return {
inflections: inflectIrregularMasc(w.p, w.f, [
{ p: w.infap, f: w.infaf },
{ p: w.infbp, f: w.infbf },
]),
vocative,
...plurals,
};
}
if (endsInTob(w)) {
return { inflections: inflectTobMasc(w.p, w.f), vocative, ...plurals };
}
// TODO: stopgap before refactoring
// @ts-ignore
if (isPattern1Entry(w)) {
return {
inflections: {
masc: inflectPattern1Masc(
// @ts-ignore
makePsString(w.p, w.f)
),
},
vocative,
...plurals,
};
}
if (
pEnd === "ی" &&
(fEnd === "áy" || (fEnd === "ay" && countSyllables(w) === 1))
) {
const inflections = inflectRegularEmphasizedYayMasc(w.p, w.f);
return {
inflections,
vocative,
...plurals,
};
}
if (pEnd === "ی" && fEnd === "ay") {
return {
inflections: inflectRegularYayMasc(w.p, w.f),
vocative,
...plurals,
};
}
return plurals ? { ...plurals } : false;
}
function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
// Get first of comma seperated phonetics entries
/* istanbul ignore next */ // will always have word.c at this point
const c = word.c || "";
const animate = c.includes("anim.");
const pEnd = word.p.slice(-1);
const plurals = makePlural(word);
const vocative = getVocatives(word, plurals);
if (word.noInf) {
return !plurals ? false : { ...plurals };
}
if (endingInHayOrAynRegex.test(word.p) && endingInSingleARegex.test(word.f)) {
return {
inflections: inflectRegularAFem(word.p, word.f),
vocative,
...plurals,
};
}
if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(word.f)) {
return {
vocative,
inflections: inflectRegularAWithHimPEnding(word.p, word.f),
...plurals,
};
}
// TODO: better reusable function to check if something ends with a consonant
if (
(pashtoConsonants.includes(pEnd) || word.f.slice(-1) === "w") &&
!animate
) {
return {
vocative,
inflections: inflectRegularInanMissingAFem(word.p, word.f),
...plurals,
};
}
if (pEnd === "ي" && !animate) {
return {
inflections: inflectRegularInanEeFem(word.p, word.f),
vocative,
...plurals,
};
}
if (pEnd === "ۍ") {
return {
inflections: inflectRegularUyFem(word.p, word.f),
vocative,
...plurals,
};
}
// if (endingInAlefRegex.test(word.p)) {
// return { inflections: inflectRegularAaFem(word.p, f) };
// }
return plurals || vocative
? {
...(plurals ? plurals : {}),
...(vocative
? {
vocative,
}
: {}),
}
: false;
}
// LEVEL 3 FUNCTIONS
function inflectIrregularUnisex(
p: string,
f: string,
inflections: Array<{ p: string; f: string }>
): T.Inflections {
const inf1 = removeAccents(inflections[1]);
const inf0 = removeAccents(inflections[0]);
const inf0fSyls = splitUpSyllables(inf0.f).length;
return {
masc: [
[{ p, f }],
[
{
p: inflections[0].p,
f: `${inf0.f.slice(0, -1)}${inf0fSyls === 1 ? "u" : "ú"}`,
},
],
[{ p: `${inf1.p}و`, f: `${inf1.f}${inf0fSyls === 1 ? "o" : "ó"}` }],
],
fem: [
[{ p: `${inf1.p}ه`, f: `${inf1.f}${inf0fSyls === 1 ? "a" : "á"}` }],
[{ p: `${inf1.p}ې`, f: `${inf1.f}${inf0fSyls === 1 ? "e" : "é"}` }],
[{ p: `${inf1.p}و`, f: `${inf1.f}${inf0fSyls === 1 ? "o" : "ó"}` }],
],
};
}
export function inflectRegularYayUnisex( export function inflectRegularYayUnisex(
p: string, p: string,
f: string f: string
@ -336,6 +108,7 @@ export function inflectRegularYayUnisex(
}; };
} }
// TODO: REMOVE THIS
export function inflectRegularShwaEndingUnisex( export function inflectRegularShwaEndingUnisex(
pr: string, pr: string,
fr: string fr: string
@ -358,214 +131,6 @@ export function inflectRegularShwaEndingUnisex(
}; };
} }
function inflectEmphasizedYayUnisex(p: string, f: string): T.UnisexInflections {
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -2);
return {
masc: [
[{ p, f }],
[{ p: `${baseP}ي`, f: `${baseF}ée` }],
[
{ p: `${baseP}یو`, f: `${baseF}úyo` },
{ p: `${baseP}و`, f: `${baseF}ó` },
],
],
fem: [
[{ p: `${baseP}ۍ`, f: `${baseF}úy` }],
[{ p: `${baseP}ۍ`, f: `${baseF}úy` }],
[
{ p: `${baseP}یو`, f: `${baseF}úyo` },
{ p: `${baseP}و`, f: `${baseF}ó` },
],
],
};
}
function inflectPattern1Masc(e: T.PsString): T.InflectionSet {
const shwaEnding = hasShwaEnding(e);
const base = applyPsString(
{
f: (x) => (countSyllables(e) === 1 ? accentFSylsOnNFromEnd(x, 0) : x),
},
mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e)
);
if (e.f.endsWith("ú")) {
return [[e], [e], [{ p: `${base.p}و`, f: `${base.f}ó` }]];
}
return [[e], [e], [{ p: `${base.p}و`, f: `${base.f}o` }]];
}
function inflectConsonantEndingUnisex(
p: string,
f: string
): T.UnisexInflections {
const fSyls = splitUpSyllables(removeAccents(f));
const iBase =
fSyls.length === 1
? makePsString(p, accentFSylsOnNFromEnd(fSyls, 0))
: makePsString(p, f);
return {
masc: [[{ p, f }], [{ p, f }], [{ p: `${iBase.p}و`, f: `${iBase.f}o` }]],
fem: [
[{ p: `${iBase.p}ه`, f: `${iBase.f}a` }],
[{ p: `${iBase.p}ې`, f: `${iBase.f}e` }],
[{ p: `${iBase.p}و`, f: `${iBase.f}o` }],
],
};
}
function inflectRegularYayMasc(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -2);
return {
masc: [
[{ p, f }],
[{ p: `${baseP}ي`, f: `${baseF}ee` }],
[
{ p: `${baseP}یو`, f: `${baseF}iyo` },
{ p: `${baseP}و`, f: `${baseF}o` },
],
],
};
}
function inflectTobMasc(p: string, f: string): T.Inflections {
const base = removeAccents(
mapPsString((x) => x.slice(0, -3), makePsString(p, f))
);
return {
masc: [
[{ p, f }],
[{ p: `${base.p}تابه`, f: `${base.f}taabú` }],
[{ p: `${base.p}تبو`, f: `${base.f}tábo` }],
],
};
}
function inflectRegularEmphasizedYayMasc(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -2);
if (countSyllables(makePsString(p, f)) === 1) {
return {
masc: [
[{ p, f }],
[{ p: `${baseP}ي`, f: `${baseF}ee` }],
[
{ p: `${baseP}یو`, f: `${baseF}úyo` },
{ p: `${baseP}و`, f: `${baseF}o` },
],
],
};
}
return {
masc: [
[{ p, f }],
[{ p: `${baseP}ي`, f: `${baseF}ée` }],
[
{ p: `${baseP}یو`, f: `${baseF}úyo` },
{ p: `${baseP}و`, f: `${baseF}ó` },
],
],
};
}
function inflectIrregularMasc(
p: string,
f: string,
inflections: Array<{ p: string; f: string }>
): T.Inflections {
let inf0f = removeAccents(inflections[0].f);
const inf0syls = splitUpSyllables(f).length;
const inf1f = removeAccents(inflections[1].f);
return {
masc: [
[{ p, f }],
[
{
p: inflections[0].p,
f: `${inf0f.slice(0, -1)}${inf0syls === 1 ? "u" : "ú"}`,
},
],
[
{
p: `${inflections[1].p}و`,
f: `${inf1f}${inf0syls === 1 ? "o" : "ó"}`,
},
],
],
};
}
function inflectRegularAFem(p: string, f: string): T.Inflections {
const withoutTrailingComma = ["'", ""].includes(f.slice(-1))
? f.slice(0, -1)
: f;
const accentLast = hasAccents(withoutTrailingComma.slice(-1));
const baseF = withoutTrailingComma.slice(0, -1);
const baseP = p.slice(-1) === "ع" ? p : p.slice(0, -1);
return {
fem: [
[{ p, f }],
[{ p: `${baseP}ې`, f: `${baseF}${accentLast ? "é" : "e"}` }],
[{ p: `${baseP}و`, f: `${baseF}${accentLast ? "ó" : "o"}` }],
],
};
}
function inflectRegularAWithHimPEnding(p: string, f: string): T.Inflections {
const baseF = f.slice(0, -1);
return {
fem: [
[{ p, f }],
[{ p: `${p}ې`, f: `${baseF}e` }],
[{ p: `${p}و`, f: `${baseF}o` }],
],
};
}
function inflectRegularInanMissingAFem(p: string, f: string): T.Inflections {
const fBase =
splitUpSyllables(f).length === 1 ? accentFSylsOnNFromEnd(f, 0) : f;
return {
fem: [
[{ p, f }],
[{ p: `${p}ې`, f: `${fBase}e` }],
[{ p: `${p}و`, f: `${fBase}o` }],
],
};
}
function inflectRegularInanEeFem(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -1);
const baseF = f.slice(0, -2);
return {
fem: [
[{ p, f }],
[{ p: `${baseP}ۍ`, f: `${baseF}úy` }],
[
{ p: `${baseP}یو`, f: `${baseF}úyo` },
{ p: `${baseP}و`, f: `${baseF}ó` },
],
],
};
}
function inflectRegularUyFem(p: string, f: string): T.Inflections {
const baseP = p.slice(0, -1);
const baseF = removeAccents(f.slice(0, -2));
return {
fem: [
[{ p, f: `${baseF}úy` }],
[{ p, f: `${baseF}úy` }],
[
{ p: `${baseP}یو`, f: `${baseF}úyo` },
{ p: `${baseP}و`, f: `${baseF}ó` },
],
],
};
}
function makePashtoPlural( function makePashtoPlural(
word: T.DictionaryEntryNoFVars word: T.DictionaryEntryNoFVars
): T.PluralInflections | undefined { ): T.PluralInflections | undefined {

View File

@ -98,7 +98,7 @@ export function isMascNounEntry(e: T.InflectableEntry): e is T.MascNounEntry {
return !!e.c && e.c.includes("n. m."); return !!e.c && e.c.includes("n. m.");
} }
export function isFemNounEntry(e: T.InflectableEntry): e is T.FemNounEntry { export function isFemNounEntry(e: T.DictionaryEntry): e is T.FemNounEntry {
return !!e.c && e.c.includes("n. f."); return !!e.c && e.c.includes("n. f.");
} }

View File

@ -1,371 +0,0 @@
import * as T from "../../types";
import { makePsString } from "./accent-and-ps-utils";
import {
accentIsOnEnd,
accentOnNFromEnd,
countSyllables,
removeAccents,
} from "./accent-helpers";
import { applyPsString, mapGen } from "./fp-ps";
import { getInflectionPattern } from "./inflection-pattern";
import {
endsInConsonant,
endsInTob,
hasShwaEnding,
mapPsString,
endsWith,
} from "./p-text-helpers";
import { removeDuplicates } from "./phrase-building/vp-tools";
import {
isAdjOrUnisexNounEntry,
isAnimNounEntry,
isFemNounEntry,
isInflectableEntry,
isMascNounEntry,
isNounEntry,
isNumberEntry,
} from "./type-predicates";
type Plurals =
| {
plural?: T.PluralInflections;
arabicPlural?: T.PluralInflections;
}
| undefined;
export function getVocatives(
entry: T.DictionaryEntryNoFVars,
plurals: Plurals
): T.PluralInflections | undefined {
if (!isInflectableEntry(entry)) {
return undefined;
}
// @ts-ignore
const e: T.InflectableEntry = entry as T.InflectableEntry;
const pattern = getInflectionPattern(e);
if (
pattern === 0 &&
isFemNounEntry(e) &&
isAnimNounEntry(e) &&
endsInConsonant(e)
) {
return vocFemAnimException({ e, plurals: genderPlural("fem", plurals) });
}
const gender: T.Gender | "unisex" =
isAdjOrUnisexNounEntry(e) || isNumberEntry(e)
? "unisex"
: isMascNounEntry(e)
? "masc"
: "fem";
if (pattern === 0 || pattern === 6) {
return undefined;
}
const funcs = patternFuncs[pattern];
if (gender === "masc") {
return {
masc: funcs.masc({ e, plurals: genderPlural("masc", plurals) }),
};
}
if (gender === "fem") {
return {
fem: funcs.fem({ e, plurals: genderPlural("fem", plurals) }),
};
}
if (gender === "unisex") {
return {
masc: funcs.masc({ e, plurals: genderPlural("masc", plurals) }),
fem: funcs.fem({ e, plurals: genderPlural("fem", plurals) }),
};
}
}
type PatternInput = {
e: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry;
plurals: T.PsString[];
};
const patternFuncs: Record<
1 | 2 | 3 | 4 | 5,
Record<T.Gender, (inp: PatternInput) => T.PluralInflectionSet>
> = {
1: {
masc: vocPattern1Masc,
fem: vocPattern1Fem,
},
2: {
masc: vocPattern2Masc,
fem: vocPattern2Fem,
},
3: {
masc: vocPattern3Masc,
fem: vocPattern3Fem,
},
4: {
masc: vocPattern4Masc,
fem: vocPattern4Fem,
},
5: {
masc: vocPattern5Masc,
fem: vocPattern5Fem,
},
};
function addPlurals(
e: T.ArrayOneOrMore<T.PsString>,
plurals: T.PsString[]
): T.ArrayOneOrMore<T.PsString> {
if (!plurals) {
return e;
}
return removeDuplicates([...e, ...plurals]) as T.ArrayOneOrMore<T.PsString>;
}
function vocFemAnimException({
e,
plurals,
}: PatternInput): T.PluralInflections {
if (!e.ppp || !e.ppf) {
throw new Error(
"plural missing for feminine animate exception noun " + e.p
);
}
// TODO: HANDLE BETTER WITH PLURALS!
const plurBase = mapPsString(
(x) => x.slice(0, -1),
makePsString(e.ppp, e.ppf)
);
const base =
countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : makePsString(e.p, e.f);
return {
fem: [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}o` }], plurals),
],
};
}
function vocPattern1Masc({ e, plurals }: PatternInput): T.PluralInflectionSet {
if (isNounEntry(e) && endsInTob(e)) {
const base = mapPsString((x) => x.slice(0, -3), e);
return [
[{ p: `${e.p}ه`, f: `${e.f}a` }],
addPlurals([{ p: `${base.p}تبو`, f: `${base.f}tábo` }], plurals),
];
}
const shwaEnding = hasShwaEnding(e);
const base = mapGen(
(ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps),
mapPsString((x: string): string => (shwaEnding ? x.slice(0, -1) : x), e)
);
if (shwaEnding && e.f.endsWith("ú")) {
return [
[{ p: `${base.p}ه`, f: `${base.f}á` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals),
];
}
return [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
function vocPattern1Fem({ e, plurals }: PatternInput): T.PluralInflectionSet {
const shwaEnding = hasShwaEnding(e);
const hasFemEnding = endsWith([{ p: "ه", f: "a" }], e) || shwaEnding;
const base = mapGen(
(ps) => (countSyllables(e) === 1 ? accentOnNFromEnd(ps, 0) : ps),
hasFemEnding
? mapPsString((x) => x.slice(0, -1), e)
: makePsString(e.p, e.f)
);
if (
endsWith(
[
{ p: "ع", f: "a" },
{ p: "ع", f: "a'" },
],
e
)
) {
const base = applyPsString(
{
f: (f) => f.slice(0, f.endsWith("'") ? -2 : -1),
},
e
);
if (accentIsOnEnd(e)) {
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals),
];
}
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
if (endsWith([{ p: "ح", f: "a" }], e)) {
const base = applyPsString(
{
f: (f) => f.slice(0, -1),
},
e
);
if (accentIsOnEnd(e)) {
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals),
];
}
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
if (hasFemEnding && accentIsOnEnd(e)) {
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals),
];
}
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
function vocPattern2Masc({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(e.p.slice(0, -1), e.f.slice(0, -2));
return [
[{ p: `${base.p}یه`, f: `${base.f}iya` }],
addPlurals(
[
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
],
plurals
),
];
}
function vocPattern2Fem({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(
e.p.slice(0, -1),
e.f.slice(0, e.f.endsWith("ay") ? -2 : -1)
);
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
addPlurals(
[
{ p: `${base.p}یو`, f: `${base.f}iyo` },
{ p: `${base.p}و`, f: `${base.f}o` },
],
plurals
),
];
}
function vocPattern3Masc({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(
e.p.slice(0, -1),
// shouldn't be accents here but remove just to be sure
removeAccents(e.f.slice(0, -2))
);
const baseSyls = countSyllables(base);
return [
[{ p: `${base.p}یه`, f: `${base.f}úya` }],
addPlurals(
[
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}${baseSyls ? "ó" : "o"}` },
],
plurals
),
];
}
function vocPattern3Fem({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(
e.p.slice(0, -1),
// shouldn't be accents here but remove just to be sure
removeAccents(e.f.slice(0, -2))
);
// TODO: This works well for unisex nouns/adjs but would be redundant for fem. nouns?
return [
[{ p: `${base.p}ۍ`, f: `${base.f}úy` }],
addPlurals(
[
{ p: `${base.p}یو`, f: `${base.f}úyo` },
{ p: `${base.p}و`, f: `${base.f}ó` },
],
plurals
),
];
}
function vocPattern4Masc({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = countSyllables(e) === 1 ? accentOnNFromEnd(e, 0) : e;
const plurBase = makePsString(e.infbp || "", e.infbf || "");
if (endsInConsonant(e)) {
return [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }], plurals),
];
}
// TODO: is this even possible?
if (hasShwaEnding(e)) {
return [
[{ p: `${base.p.slice(0, -1)}ه`, f: `${base.f.slice(0, -1)}á` }],
addPlurals([{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }], plurals),
];
}
// exception for مېلمه, کوربه
return [[{ p: e.p, f: e.f }], [{ p: `${plurBase.p}و`, f: `${plurBase.f}ó` }]];
}
function vocPattern4Fem({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(e.infbp || "", e.infbf || "");
return [
[{ p: `${base.p}ې`, f: `${base.f}é` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}ó` }], plurals),
];
}
function vocPattern5Masc({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(e.infbp || "", e.infbf || "");
return [
[{ p: `${base.p}ه`, f: `${base.f}a` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
function vocPattern5Fem({ e, plurals }: PatternInput): T.PluralInflectionSet {
const base = makePsString(e.infbp || "", e.infbf || "");
return [
[{ p: `${base.p}ې`, f: `${base.f}e` }],
addPlurals([{ p: `${base.p}و`, f: `${base.f}o` }], plurals),
];
}
function genderPlural(gender: T.Gender, plurals: Plurals): T.PsString[] {
if (!plurals) return [];
if (gender === "masc") {
return [
...(plurals.plural && "masc" in plurals.plural
? plurals.plural.masc[1]
: []),
...(plurals.arabicPlural && "masc" in plurals.arabicPlural
? plurals.arabicPlural.masc[1]
: []),
];
} else {
return [
...(plurals.plural && "fem" in plurals.plural
? plurals.plural.fem[1]
: []),
...(plurals.arabicPlural && "fem" in plurals.arabicPlural
? plurals.arabicPlural.fem[1]
: []),
];
}
}