getting going on plural-suffixes
This commit is contained in:
parent
a4eddf82d2
commit
47551aa7dd
|
@ -21,6 +21,7 @@ import {
|
|||
psStringEquals,
|
||||
removeRetroflexR,
|
||||
splitDoubleWord,
|
||||
endsInConsonant,
|
||||
} from "./p-text-helpers";
|
||||
import * as T from "../types";
|
||||
import {
|
||||
|
@ -1003,6 +1004,12 @@ test("psStringEquals", () => {
|
|||
expect(
|
||||
psStringEquals({ p: "بور", f: "bor" }, { p: "تور", f: "tor" })
|
||||
).toBe(false);
|
||||
expect(
|
||||
psStringEquals({ p: "ملګری", f: "malgúrey" }, { p: "ملګری", f: "malgurey" })
|
||||
).toBe(false);
|
||||
expect(
|
||||
psStringEquals({ p: "ملګری", f: "malgúrey" }, { p: "ملګری", f: "malgurey" }, true)
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("removeRetroflexR", () => {
|
||||
|
@ -1010,3 +1017,21 @@ test("removeRetroflexR", () => {
|
|||
removeRetroflexR({ p: "وکړ", f: "óokR" }),
|
||||
).toEqual({ p: "وک", f: "óok" });
|
||||
});
|
||||
|
||||
test("endsInAConsonant", () => {
|
||||
const does: T.PsString[] = [
|
||||
{ p: "پښتون", f: "puxtoon" },
|
||||
{ p: "کور", f: "kor" },
|
||||
{ p: "ګناه", f: "gUnaah" },
|
||||
{ p: "زوی", f: "zooy" },
|
||||
{ p: "ځای", f: "dzaay" },
|
||||
];
|
||||
const doesnt: T.PsString[] = [
|
||||
{ p: "بابا", f: "baabaa" },
|
||||
{ p: "قاضي", f: "qaazee" },
|
||||
{ p: "ګناه", f: "gunaa" },
|
||||
{ p: "اطلاع", f: "itlaa" },
|
||||
];
|
||||
does.forEach((x) => expect(endsInConsonant(x)).toBe(true));
|
||||
doesnt.forEach((x) => expect(endsInConsonant(x)).toBe(false));
|
||||
})
|
|
@ -15,6 +15,9 @@ import {
|
|||
getPersonInflectionsKey,
|
||||
} from "./misc-helpers";
|
||||
import * as T from "../types";
|
||||
import { removeAccents } from "./accent-helpers";
|
||||
import { pashtoConsonants, phoneticsConsonants } from "./pashto-consonants";
|
||||
import { simplifyPhonetics } from "./simplify-phonetics";
|
||||
|
||||
// export function concatPsStringWithVars(...items: Array<T.PsString | " " | "">): T.PsString[] {
|
||||
|
||||
|
@ -90,7 +93,7 @@ export function concatPsString(...items: Array<T.PsString | T.LengthOptions<T.Ps
|
|||
* @param w
|
||||
* @returns
|
||||
*/
|
||||
export function splitDoubleWord(w: T.DictionaryEntry): [T.DictionaryEntry, T.DictionaryEntry] {
|
||||
export function splitDoubleWord(w: T.DictionaryEntryNoFVars): [T.DictionaryEntryNoFVars, T.DictionaryEntryNoFVars] {
|
||||
const pSplit = w.p.split(" ");
|
||||
const fSplit = w.f.split(" ");
|
||||
const c = w.c?.replace(" doub.", "");
|
||||
|
@ -176,25 +179,25 @@ export function ensureBaAt(ps: T.FullForm<T.PsString>, pos: number): T.FullForm<
|
|||
return baInserted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first phonetics value in a comma-seperated list
|
||||
*
|
||||
* @param f - a phonetics string
|
||||
*/
|
||||
export function firstPhonetics(f: string): string {
|
||||
return f.split(",")[0];
|
||||
export function removeFVarients(x: T.DictionaryEntry): T.DictionaryEntryNoFVars;
|
||||
export function removeFVarients(x: T.PsString): T.PsStringNoFVars;
|
||||
export function removeFVarients(x: string): T.FStringNoFVars;
|
||||
export function removeFVarients(x: string | T.PsString | T.DictionaryEntry): T.FStringNoFVars | T.PsStringNoFVars | T.DictionaryEntryNoFVars {
|
||||
if (typeof x === "string") {
|
||||
return x.split(",")[0] as T.FStringNoFVars;
|
||||
}
|
||||
|
||||
/**
|
||||
* returs a PsString or DictionaryEntry ensuring only one phonetics variation
|
||||
*
|
||||
* @param ps
|
||||
*/
|
||||
export function removeFVariants(ps: T.PsString): T.PsString {
|
||||
if ("ts" in x) {
|
||||
return {
|
||||
...ps,
|
||||
f: firstPhonetics(ps.f),
|
||||
};
|
||||
...x,
|
||||
f: removeFVarients(x.f),
|
||||
__brand: "name for a dictionary entry with all the phonetics variations removed",
|
||||
} as T.DictionaryEntryNoFVars;
|
||||
}
|
||||
return {
|
||||
...x,
|
||||
f: removeFVarients(x.f),
|
||||
__brand: "name for a ps string with all the phonetics variations removed",
|
||||
} as T.PsStringNoFVars;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -514,13 +517,6 @@ export function yulEndingInfinitive(s: T.PsString): boolean {
|
|||
return ((pEnding === "یل") && (["yul", "yúl"].includes(fEnding)));
|
||||
}
|
||||
|
||||
export function psStringFromEntry(entry: T.DictionaryEntry): T.PsString {
|
||||
return makePsString(
|
||||
entry.p,
|
||||
firstPhonetics(entry.f),
|
||||
);
|
||||
}
|
||||
|
||||
export function allOnePersonInflection(block: T.ImperativeForm, person: T.Person): T.SingleOrLengthOpts<T.ImperativeBlock>;
|
||||
export function allOnePersonInflection(block: T.VerbForm, person: T.Person): T.SingleOrLengthOpts<T.VerbBlock>;
|
||||
export function allOnePersonInflection(block: T.SingleOrLengthOpts<T.UnisexInflections>, person: T.Person): T.SingleOrLengthOpts<T.UnisexInflections>;
|
||||
|
@ -617,8 +613,9 @@ export function complementInflects(inf: T.UnisexInflections): boolean {
|
|||
// );
|
||||
}
|
||||
|
||||
export function psStringEquals(ps1: T.PsString, ps2: T.PsString): boolean {
|
||||
return (ps1.p === ps2.p) && (ps1.f === ps2.f);
|
||||
export function psStringEquals(ps1: T.PsString, ps2: T.PsString, ignoreAccents?: boolean): boolean {
|
||||
const [p1, p2] = ignoreAccents ? [removeAccents(ps1), removeAccents(ps2)] : [ps1, ps2];
|
||||
return (p1.p === p2.p) && (p1.f === p2.f);
|
||||
}
|
||||
|
||||
export function removeRetroflexR(ps: T.PsString): T.PsString {
|
||||
|
@ -754,10 +751,14 @@ export function ensureShortWurShwaShift(ps: T.PsString): T.PsString {
|
|||
return ps;
|
||||
}
|
||||
|
||||
export function ensureUnisexInflections(infs: T.Inflections | false, w: T.DictionaryEntry): T.UnisexInflections {
|
||||
const ps = { p: w.p, f: firstPhonetics(w.f) };
|
||||
if (infs === false) {
|
||||
export function ensureUnisexInflections(infs: T.InflectorOutput, w: T.DictionaryEntryNoFVars): {
|
||||
inflections: T.UnisexInflections,
|
||||
plural?: T.PluralInflections,
|
||||
} {
|
||||
const ps = { p: w.p, f: w.f };
|
||||
if (infs === false || infs.inflections === undefined) {
|
||||
return {
|
||||
inflections: {
|
||||
masc: [
|
||||
[ps],
|
||||
[ps],
|
||||
|
@ -768,19 +769,58 @@ export function ensureUnisexInflections(infs: T.Inflections | false, w: T.Dictio
|
|||
[ps],
|
||||
[ps],
|
||||
],
|
||||
},
|
||||
};
|
||||
}
|
||||
if (!("fem" in infs)) {
|
||||
if (!("fem" in infs.inflections)) {
|
||||
return {
|
||||
...infs,
|
||||
inflections: {
|
||||
...infs.inflections,
|
||||
fem: [[ps], [ps], [ps]],
|
||||
}
|
||||
};
|
||||
}
|
||||
if (!("masc" in infs)) {
|
||||
if (!("masc" in infs.inflections)) {
|
||||
return {
|
||||
...infs,
|
||||
inflections: {
|
||||
...infs.inflections,
|
||||
masc: [[ps], [ps], [ps]],
|
||||
},
|
||||
};
|
||||
}
|
||||
return infs;
|
||||
// for some dumb reason have to do this for type safety
|
||||
return {
|
||||
inflections: infs.inflections,
|
||||
};
|
||||
}
|
||||
|
||||
export function endsInAaOrOo(w: T.PsString): boolean {
|
||||
const fEnd = simplifyPhonetics(w.f).slice(-2);
|
||||
const pEnd = w.p.slice(-1);
|
||||
return (
|
||||
pEnd === "و" && fEnd.endsWith("o")
|
||||
||
|
||||
pEnd === "ا" && fEnd === "aa"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
export function endsInConsonant(w: T.PsString): boolean {
|
||||
// TODO: Add reporting back that the plural ending will need a space?
|
||||
|
||||
function endsInLongDipthong(w: T.PsString): boolean {
|
||||
function isLongDipthong(end: T.PsString): boolean {
|
||||
return (psStringEquals(end, { p: "ای", f: "aay" }, true) || psStringEquals(end, { p: "وی", f: "ooy" }, true));
|
||||
}
|
||||
const end = makePsString(
|
||||
w.p.slice(-2),
|
||||
w.f.slice(-3),
|
||||
);
|
||||
return isLongDipthong(end);
|
||||
}
|
||||
|
||||
if (endsInLongDipthong(w)) return true;
|
||||
// const pCons = pashtoConsonants.includes(w.p.slice(-1));
|
||||
const fCons = phoneticsConsonants.includes(simplifyPhonetics(w.f).slice(-1));
|
||||
return fCons;
|
||||
}
|
|
@ -7,3 +7,6 @@
|
|||
*/
|
||||
|
||||
export const pashtoConsonants = ["ب", "پ", "ت", "ټ", "ث", "ج", "چ", "ح", "خ", "څ", "ځ", "د", "ډ", "ذ", "ر", "ړ", "ز", "ژ", "ږ", "س", "ش", "ښ", "ص", "ض", "ط", "ظ", "غ", "ف", "ق", "ک", "ګ", "گ", "ل", "ل", "م", "ن", "ڼ"];
|
||||
export const phoneticsConsonants = [
|
||||
"b", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "w", "z",
|
||||
];
|
|
@ -16,7 +16,7 @@ import * as T from "../types";
|
|||
|
||||
const adjectives: Array<{
|
||||
in: T.DictionaryEntry,
|
||||
out: T.Inflections | false,
|
||||
out: T.InflectorOutput,
|
||||
}> = [
|
||||
// irregular adj.
|
||||
{
|
||||
|
@ -34,6 +34,7 @@ const adjectives: Array<{
|
|||
infbf: "zaR",
|
||||
},
|
||||
out: {
|
||||
inflections:{
|
||||
masc: [
|
||||
[{p: "زوړ", f: "zoR"}],
|
||||
[{p: "زاړه", f: "zaaRu"}],
|
||||
|
@ -46,6 +47,7 @@ const adjectives: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// regular adjective ending in ی
|
||||
{
|
||||
in: {
|
||||
|
@ -58,6 +60,7 @@ const adjectives: Array<{
|
|||
i: 6564,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "ستړی", f: "stúRey"}],
|
||||
[{p: "ستړي", f: "stúRee"}],
|
||||
|
@ -68,6 +71,7 @@ const adjectives: Array<{
|
|||
[{p: "ستړې", f: "stúRe"}],
|
||||
[{p: "ستړو", f: "stúRo"}],
|
||||
],
|
||||
}
|
||||
},
|
||||
},
|
||||
// regular adjective ending in ی with stress on the end
|
||||
|
@ -82,6 +86,7 @@ const adjectives: Array<{
|
|||
i: 12026,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "وروستی", f: "wroostéy"}],
|
||||
[{p: "وروستي", f: "wroostée"}],
|
||||
|
@ -92,6 +97,7 @@ const adjectives: Array<{
|
|||
[{p: "وروستۍ", f: "wroostúy"}],
|
||||
[{p: "وروستیو", f: "wroostúyo"}, {p: "وروستو", f: "wroostó"}],
|
||||
],
|
||||
}
|
||||
},
|
||||
},
|
||||
// regular adjective ending in a consonant
|
||||
|
@ -106,6 +112,7 @@ const adjectives: Array<{
|
|||
i: 6502,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "سپک", f: "spuk"}],
|
||||
[{p: "سپک", f: "spuk"}],
|
||||
|
@ -118,6 +125,7 @@ const adjectives: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {
|
||||
ts: 1527812862,
|
||||
|
@ -129,6 +137,7 @@ const adjectives: Array<{
|
|||
i: 9945,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "لوی", f: "looy"}],
|
||||
[{p: "لوی", f: "looy"}],
|
||||
|
@ -141,6 +150,7 @@ const adjectives: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {
|
||||
ts: 1527811469,
|
||||
|
@ -152,6 +162,7 @@ const adjectives: Array<{
|
|||
i: 2430,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "پوه", f: "poh"}],
|
||||
[{p: "پوه", f: "poh"}],
|
||||
|
@ -164,6 +175,7 @@ const adjectives: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// adjective ending in u
|
||||
{
|
||||
in: {
|
||||
|
@ -176,6 +188,7 @@ const adjectives: Array<{
|
|||
i: 1,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "ویده", f: "weedú"}],
|
||||
[{p: "ویده", f: "weedú"}],
|
||||
|
@ -188,6 +201,7 @@ const adjectives: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// adjective non-inflecting
|
||||
{
|
||||
in: {
|
||||
|
@ -225,6 +239,7 @@ const adjectives: Array<{
|
|||
i: 1,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{ p: "ګډ وډ", f: "guD wuD" }],
|
||||
[{ p: "ګډ وډ", f: "guD wuD" }],
|
||||
|
@ -235,13 +250,14 @@ const adjectives: Array<{
|
|||
[{ p: "ګډې وډې", f: "guDe wuDe" }],
|
||||
[{ p: "ګډو وډو", f: "guDo wuDo" }],
|
||||
],
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const nouns: Array<{
|
||||
in: T.DictionaryEntry,
|
||||
out: T.Inflections | false,
|
||||
out: T.InflectorOutput,
|
||||
}> = [
|
||||
// ## UNISEX
|
||||
// Unisex noun irregular
|
||||
|
@ -260,6 +276,7 @@ const nouns: Array<{
|
|||
infbf: "melman",
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "مېلمه", f: "melmá"}],
|
||||
[{p: "مېلمانه", f: "melmaanu"}],
|
||||
|
@ -272,6 +289,7 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Unisex noun ending with ی
|
||||
{
|
||||
in: {
|
||||
|
@ -284,6 +302,7 @@ const nouns: Array<{
|
|||
i: 10943,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "ملګری", f: "malgúrey"}],
|
||||
[{p: "ملګري", f: "malgúree"}],
|
||||
|
@ -296,6 +315,7 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Unisex noun ending on ی with emphasis on the end
|
||||
{
|
||||
in: {
|
||||
|
@ -308,6 +328,7 @@ const nouns: Array<{
|
|||
i: 2900,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "ترورزی", f: "trorzéy"}],
|
||||
[{p: "ترورزي", f: "trorzée"}],
|
||||
|
@ -319,6 +340,13 @@ const nouns: Array<{
|
|||
[{p: "ترورزیو", f: "trorzúyo"}, {p: "ترورزو", f: "trorzó"}],
|
||||
],
|
||||
},
|
||||
// plural: {
|
||||
// masc: [
|
||||
// [{ p: "ترورزامن", f: "trorzaamun" }],
|
||||
// [{ p: "ترورزامنو", f: "trorzaamuno" }],
|
||||
// ],
|
||||
// },
|
||||
},
|
||||
},
|
||||
// Unisex noun ending with a consanant
|
||||
{
|
||||
|
@ -328,10 +356,11 @@ const nouns: Array<{
|
|||
f: "churg",
|
||||
g: "",
|
||||
e: "rooster, cock; chicken, poultry",
|
||||
c: "n. m. unisex",
|
||||
c: "n. m. unisex anim.",
|
||||
i: 4101,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "چرګ", f: "churg"}],
|
||||
[{p: "چرګ", f: "churg"}],
|
||||
|
@ -343,6 +372,17 @@ const nouns: Array<{
|
|||
[{p: "چرګو", f: "churgo"}],
|
||||
],
|
||||
},
|
||||
plural: {
|
||||
masc: [
|
||||
[{p: "چرګان", f: "churgáan"}],
|
||||
[{p: "چرګانو", f: "churgáano"}],
|
||||
],
|
||||
fem: [
|
||||
[{p: "چرګانې", f: "churgáane"}],
|
||||
[{p: "چرګانو", f: "churgáano"}],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// ## MASCULINE
|
||||
// Masculine regular ending in ی
|
||||
|
@ -357,11 +397,13 @@ const nouns: Array<{
|
|||
i: 6750,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "سړی", f: "saRey"}],
|
||||
[{p: "سړي", f: "saRee"}],
|
||||
[{p: "سړیو", f: "saRiyo"}, {p: "سړو", f: "saRo"}],
|
||||
],
|
||||
}
|
||||
},
|
||||
},
|
||||
// Masculine regular ending in ی with emphasis on end
|
||||
|
@ -376,6 +418,7 @@ const nouns: Array<{
|
|||
i: 2931,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "ترېلی", f: "treléy"}],
|
||||
[{p: "ترېلي", f: "trelée"}],
|
||||
|
@ -383,6 +426,7 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Masculine ending in tob
|
||||
{
|
||||
in: {
|
||||
|
@ -395,6 +439,7 @@ const nouns: Array<{
|
|||
c: "n. m.",
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "مشرتوب", f: "mushurtob"}],
|
||||
[{p: "مشرتابه", f: "mushurtaabu"}],
|
||||
|
@ -402,6 +447,7 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Masculine irregular
|
||||
{
|
||||
in: {
|
||||
|
@ -418,12 +464,20 @@ const nouns: Array<{
|
|||
infbf: "lamandz",
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
masc: [
|
||||
[{p: "لمونځ", f: "lamoondz"}],
|
||||
[{p: "لمانځه", f: "lamaandzu"}],
|
||||
[{p: "لمنځو", f: "lamandzo"}],
|
||||
],
|
||||
},
|
||||
// plural: {
|
||||
// masc: [
|
||||
// [{ p: "لمونځونه", f: "lamoondzóona" }],
|
||||
// [{ p: "لمونځونو", f: "lamoondzóono" }],
|
||||
// ],
|
||||
// },
|
||||
},
|
||||
},
|
||||
// Masculine non-inflecting
|
||||
{
|
||||
|
@ -436,7 +490,14 @@ const nouns: Array<{
|
|||
c: "n. m.",
|
||||
i: 8640,
|
||||
},
|
||||
out: false,
|
||||
out: {
|
||||
plural: {
|
||||
masc: [
|
||||
[{ p: "کتابونه", f: "kitaabóona" }],
|
||||
[{ p: "کتابونو", f: "kitaabóono" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// ## FEMININE
|
||||
// Feminine regular ending in ه
|
||||
|
@ -451,6 +512,7 @@ const nouns: Array<{
|
|||
i: 7444,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
fem: [
|
||||
[{p: "ښځه", f: "xudza"}],
|
||||
[{p: "ښځې", f: "xudze"}],
|
||||
|
@ -458,6 +520,7 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {
|
||||
ts: 1527821380,
|
||||
|
@ -469,6 +532,7 @@ const nouns: Array<{
|
|||
i: 365,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
fem: [
|
||||
[{p: "اره", f: "ará"}],
|
||||
[{p: "ارې", f: "are"}],
|
||||
|
@ -476,6 +540,7 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Feminine regular ending in ع - a'
|
||||
{
|
||||
in: {
|
||||
|
@ -490,6 +555,7 @@ const nouns: Array<{
|
|||
apf: "maraají’",
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
fem: [
|
||||
[{p: "مرجع", f: "marja'"}],
|
||||
[{p: "مرجعې", f: "marje"}],
|
||||
|
@ -497,6 +563,7 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: {
|
||||
ts: 1527820212,
|
||||
|
@ -510,6 +577,7 @@ const nouns: Array<{
|
|||
apf: "manaabí",
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
fem: [
|
||||
[{p: "منبع", f: "manbá"}],
|
||||
[{p: "منبعې", f: "manbe"}],
|
||||
|
@ -517,6 +585,7 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Feminine regular ending in ح - a
|
||||
{
|
||||
in: {
|
||||
|
@ -529,6 +598,7 @@ const nouns: Array<{
|
|||
i: 5813,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
fem: [
|
||||
[{p: "ذبح", f: "zabha"}],
|
||||
[{p: "ذبحې", f: "zabhe"}],
|
||||
|
@ -536,6 +606,7 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Feminine inanimate regular with missing ه
|
||||
{
|
||||
in: {
|
||||
|
@ -548,6 +619,7 @@ const nouns: Array<{
|
|||
i: 9593,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
fem: [
|
||||
[{p: "لار", f: "laar"}],
|
||||
[{p: "لارې", f: "laare"}],
|
||||
|
@ -555,7 +627,9 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Feminine animate ending in a consonant
|
||||
// TODO: ALLOW FOR MULTIPLE PLURAL POSSIBILITIES میندې, میېنې etc.
|
||||
{
|
||||
in: {
|
||||
ts: 1527812928,
|
||||
|
@ -564,9 +638,18 @@ const nouns: Array<{
|
|||
g: "",
|
||||
e: "mother, mom",
|
||||
c: "n. f. anim.",
|
||||
ppp: "میندې",
|
||||
ppf: "meynde",
|
||||
i: 11113,
|
||||
},
|
||||
out: false,
|
||||
out: {
|
||||
plural: {
|
||||
fem: [
|
||||
[{ p: "میندې", f: "meynde" }],
|
||||
[{ p: "میندو", f: "meyndo" }],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Feminine regular inanimate ending in ي
|
||||
{
|
||||
|
@ -580,6 +663,7 @@ const nouns: Array<{
|
|||
i: 5503,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
fem: [
|
||||
[{p: "دوستي", f: "dostee"}],
|
||||
[{p: "دوستۍ", f: "dostuy"}],
|
||||
|
@ -587,6 +671,7 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Feminine regular ending in ۍ
|
||||
{
|
||||
in: {
|
||||
|
@ -599,6 +684,7 @@ const nouns: Array<{
|
|||
i: 8718,
|
||||
},
|
||||
out: {
|
||||
inflections: {
|
||||
fem: [
|
||||
[{p: "کرسۍ", f: "kUrsuy"}],
|
||||
[{p: "کرسۍ", f: "kUrsuy"}],
|
||||
|
@ -606,25 +692,27 @@ const nouns: Array<{
|
|||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Feminine regular ending in ا
|
||||
{
|
||||
in: {
|
||||
ts: 1527812456,
|
||||
p: "اړتیا",
|
||||
f: "aRtiyaa, aRtyaa",
|
||||
f: "aRtiyáa, aRtyáa",
|
||||
g: "",
|
||||
e: "need, necessity",
|
||||
c: "n. f.",
|
||||
i: 376,
|
||||
},
|
||||
out: {
|
||||
plural: {
|
||||
fem: [
|
||||
[{p: "اړتیا", f: "aRtiyaa"}],
|
||||
[{p: "اړتیاوې", f: "aRtiyaawe"}],
|
||||
[{p: "اړتیاوو", f: "aRtiyaawo"}],
|
||||
[{p: "اړتیاوې", f: "aRtiyáawe"}, { p: "اړتیاګانې", f:"aRtiyaagáane"}],
|
||||
[{p: "اړتیاوو", f: "aRtiyáawo"}, { p: "اړتیاګانو", f:"aRtiyaagáano"}],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
// Feminine regular ending in اع
|
||||
{
|
||||
in: {
|
||||
|
@ -636,13 +724,15 @@ const nouns: Array<{
|
|||
c: "n. f.",
|
||||
i: 12205,
|
||||
},
|
||||
out: {
|
||||
fem: [
|
||||
[{p: "وداع", f: "widáa'"}],
|
||||
[{p: "وداعوې", f: "widáawe"}],
|
||||
[{p: "وداعوو", f: "widáawo"}],
|
||||
],
|
||||
},
|
||||
out: false,
|
||||
// out: {
|
||||
// plural: {
|
||||
// fem: [
|
||||
// [{p: "وداع وې", f: "widáawe"}, {p: "وداع ګانې", f: "widaagáane"}],
|
||||
// [{p: "وداع وو", f: "widáawo"}, {p: "وداع ګانو", f: "widaagáano"}],
|
||||
// ],
|
||||
// },
|
||||
// },
|
||||
},
|
||||
// Word with no inflections
|
||||
{
|
||||
|
@ -658,6 +748,7 @@ const nouns: Array<{
|
|||
},
|
||||
out: false,
|
||||
},
|
||||
// TODO: WORDS THAT ARE ALREADY PLURAL!
|
||||
];
|
||||
|
||||
const others: T.DictionaryEntry[] = [
|
||||
|
|
|
@ -12,6 +12,10 @@ import {
|
|||
splitDoubleWord,
|
||||
ensureUnisexInflections,
|
||||
makePsString,
|
||||
removeFVarients,
|
||||
concatPsString,
|
||||
endsInConsonant,
|
||||
endsInAaOrOo,
|
||||
} from "./p-text-helpers";
|
||||
import {
|
||||
removeAccents,
|
||||
|
@ -20,95 +24,102 @@ import * as T from "../types";
|
|||
|
||||
const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/;
|
||||
const endingInHeyOrAynRegex = /[^ا][هع]$/;
|
||||
const endingInAlefRegex = /اع?$/;
|
||||
// const endingInAlefRegex = /اع?$/;
|
||||
|
||||
export function inflectWord(word: T.DictionaryEntry): T.Inflections | false {
|
||||
export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
|
||||
// If it's a noun/adj, inflect accordingly
|
||||
// TODO: What about n. f. / adj. that end in ي ??
|
||||
if (word.noInf) {
|
||||
const w = removeFVarients(word);
|
||||
if (w.noInf) {
|
||||
return false;
|
||||
}
|
||||
if (word.c?.includes("doub.")) {
|
||||
const words = splitDoubleWord(word);
|
||||
const inflected = words.map((word) => ensureUnisexInflections(inflectWord(word), word));
|
||||
return concatInflections(
|
||||
inflected[0],
|
||||
inflected[1],
|
||||
) as T.UnisexInflections;
|
||||
if (w.c?.includes("doub.")) {
|
||||
const words = splitDoubleWord(w);
|
||||
const inflected = words.map((x) => ensureUnisexInflections(inflectWord(x), x));
|
||||
return {
|
||||
inflections: concatInflections(
|
||||
inflected[0].inflections,
|
||||
inflected[1].inflections,
|
||||
) as T.UnisexInflections,
|
||||
};
|
||||
}
|
||||
if (word.c && (word.c.includes("adj.") || word.c.includes("unisex"))) {
|
||||
return handleUnisexWord(word);
|
||||
if (w.c && (w.c.includes("adj.") || w.c.includes("unisex"))) {
|
||||
return handleUnisexWord(w);
|
||||
}
|
||||
if (word.c && (word.c.includes("n. m."))) {
|
||||
return handleMascNoun(word);
|
||||
if (w.c && (w.c.includes("n. m."))) {
|
||||
return handleMascNoun(w);
|
||||
}
|
||||
if (word.c && (word.c.includes("n. f."))) {
|
||||
return handleFemNoun(word);
|
||||
if (w.c && (w.c.includes("n. f."))) {
|
||||
return handleFemNoun(w);
|
||||
}
|
||||
// It's not a noun/adj
|
||||
return false;
|
||||
}
|
||||
|
||||
// LEVEL 2 FUNCTIONS
|
||||
function handleUnisexWord(word: T.DictionaryEntry): T.Inflections | false {
|
||||
// Get first of comma seperated phonetics entries
|
||||
const f = word.f.split(",")[0].trim();
|
||||
function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
||||
// Get last letter of Pashto and last two letters of phonetics
|
||||
// TODO: !!! Handle weird endings / symbols ' etc.
|
||||
const pEnd = word.p.slice(-1);
|
||||
const plural = makePlural(word);
|
||||
if (word.infap && word.infaf && word.infbp && word.infbf) {
|
||||
return inflectIrregularUnisex(word.p, f, [
|
||||
return {
|
||||
inflections: inflectIrregularUnisex(word.p, word.f, [
|
||||
{p: word.infap, f: word.infaf},
|
||||
{p: word.infbp, f: word.infbf},
|
||||
]);
|
||||
]),
|
||||
plural,
|
||||
};
|
||||
}
|
||||
if (pEnd === "ی" && f.slice(-2) === "ey") {
|
||||
return inflectRegularYeyUnisex(word.p, f);
|
||||
if (pEnd === "ی" && word.f.slice(-2) === "ey") {
|
||||
return { inflections: inflectRegularYeyUnisex(word.p, word.f), plural };
|
||||
}
|
||||
if (pEnd === "ه" && word.g.slice(-1) === "u") {
|
||||
return inflectRegularShwaEndingUnisex(word.p, f);
|
||||
return { inflections: inflectRegularShwaEndingUnisex(word.p, word.f), plural };
|
||||
}
|
||||
if (pEnd === "ی" && f.slice(-2) === "éy") {
|
||||
return inflectEmphasizedYeyUnisex(word.p, f);
|
||||
if (pEnd === "ی" && word.f.slice(-2) === "éy") {
|
||||
return { inflections: inflectEmphasizedYeyUnisex(word.p, word.f), plural };
|
||||
}
|
||||
if (
|
||||
pashtoConsonants.includes(pEnd) ||
|
||||
word.p.slice(-2) === "وی" ||
|
||||
word.p.slice(-2) === "ای" ||
|
||||
(word.p.slice(-1) === "ه" && f.slice(-1) === "h")
|
||||
(word.p.slice(-1) === "ه" && word.f.slice(-1) === "h")
|
||||
) {
|
||||
return inflectConsonantEndingUnisex(word.p, f);
|
||||
return { inflections: inflectConsonantEndingUnisex(word.p, word.f), plural };
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function handleMascNoun(word: T.DictionaryEntry): T.Inflections | false {
|
||||
// Get first of comma seperated phonetics entries
|
||||
const f = word.f.split(",")[0].trim();
|
||||
function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
||||
// Get last letter of Pashto and last two letters of phonetics
|
||||
// TODO: !!! Handle weird endings / symbols ' etc.
|
||||
const pEnd = word.p.slice(-1);
|
||||
const fEnd = f.slice(-2);
|
||||
if (word.infap && word.infaf && word.infbp && word.infbf) {
|
||||
return inflectIrregularMasc(word.p, f, [
|
||||
{p: word.infap, f: word.infaf},
|
||||
{p: word.infbp, f: word.infbf},
|
||||
]);
|
||||
const plural = makePlural(w);
|
||||
const pEnd = w.p.slice(-1);
|
||||
const fEnd = w.f.slice(-2);
|
||||
if (w.infap && w.infaf && w.infbp && w.infbf) {
|
||||
return {
|
||||
inflections: inflectIrregularMasc(w.p, w.f, [
|
||||
{p: w.infap, f: w.infaf},
|
||||
{p: w.infbp, f: w.infbf},
|
||||
]),
|
||||
plural,
|
||||
};
|
||||
}
|
||||
const isTobEnding = (word.p.slice(-3) === "توب" && ["tób", "tob"].includes(f.slice(-3)) && word.p.length > 3);
|
||||
const isTobEnding = (w.p.slice(-3) === "توب" && ["tób", "tob"].includes(w.f.slice(-3)) && w.p.length > 3);
|
||||
if (isTobEnding) {
|
||||
return inflectTobMasc(word.p, f);
|
||||
return { inflections: inflectTobMasc(w.p, w.f), plural };
|
||||
}
|
||||
if (pEnd === "ی" && fEnd === "ey") {
|
||||
return inflectRegularYeyMasc(word.p, f);
|
||||
return { inflections: inflectRegularYeyMasc(w.p, w.f), plural };
|
||||
}
|
||||
if (pEnd === "ی" && fEnd === "éy") {
|
||||
return inflectRegularEmphasizedYeyMasc(word.p, f);
|
||||
return { inflections: inflectRegularEmphasizedYeyMasc(w.p, w.f), plural };
|
||||
}
|
||||
return false;
|
||||
return plural ? { plural } : false
|
||||
}
|
||||
|
||||
function handleFemNoun(word: T.DictionaryEntry): T.Inflections | false {
|
||||
function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
|
||||
// Get first of comma seperated phonetics entries
|
||||
const f = word.f.split(",")[0].trim();
|
||||
/* istanbul ignore next */ // will always have word.c at this point
|
||||
|
@ -116,25 +127,27 @@ function handleFemNoun(word: T.DictionaryEntry): T.Inflections | false {
|
|||
const animate = c.includes("anim.");
|
||||
const pEnd = word.p.slice(-1);
|
||||
|
||||
const plural = makePlural(word);
|
||||
|
||||
if (endingInHeyOrAynRegex.test(word.p) && endingInSingleARegex.test(f)) {
|
||||
return inflectRegularAFem(word.p, f);
|
||||
return { inflections: inflectRegularAFem(word.p, f), plural };
|
||||
}
|
||||
if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(f)) {
|
||||
return inflectRegularAWithHimPEnding(word.p, f);
|
||||
return { inflections: inflectRegularAWithHimPEnding(word.p, f), plural };
|
||||
}
|
||||
if (pashtoConsonants.includes(pEnd) && !animate) {
|
||||
return inflectRegularInanMissingAFem(word.p, f);
|
||||
return { inflections: inflectRegularInanMissingAFem(word.p, f), plural };
|
||||
}
|
||||
if (pEnd === "ي" && (!animate)) {
|
||||
return inflectRegularInanEeFem(word.p, f);
|
||||
return { inflections: inflectRegularInanEeFem(word.p, f), plural };
|
||||
}
|
||||
if (pEnd === "ۍ") {
|
||||
return inflectRegularUyFem(word.p, f);
|
||||
return { inflections: inflectRegularUyFem(word.p, f), plural };
|
||||
}
|
||||
if (endingInAlefRegex.test(word.p)) {
|
||||
return inflectRegularAaFem(word.p, f);
|
||||
}
|
||||
return false;
|
||||
// if (endingInAlefRegex.test(word.p)) {
|
||||
// return { inflections: inflectRegularAaFem(word.p, f) };
|
||||
// }
|
||||
return plural ? { plural } : false;
|
||||
}
|
||||
|
||||
// LEVEL 3 FUNCTIONS
|
||||
|
@ -341,13 +354,77 @@ function inflectRegularUyFem(p: string, f: string): T.Inflections {
|
|||
};
|
||||
}
|
||||
|
||||
function inflectRegularAaFem(p: string, f: string): T.Inflections {
|
||||
const baseF = ["'", "’"].includes(f.slice(-1)) ? f.slice(0, -1) : f;
|
||||
function makePashtoPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
|
||||
if (!(word.ppp && word.ppf)) return undefined;
|
||||
const base = makePsString(word.ppp, word.ppf);
|
||||
// TODO: Add male Pashto plural
|
||||
if (word.c?.includes("n. f.")) {
|
||||
return {
|
||||
fem: [
|
||||
[{p, f}],
|
||||
[{p: `${p}وې`, f: `${baseF}we`}],
|
||||
[{p: `${p}وو`, f: `${baseF}wo`}],
|
||||
[base],
|
||||
// todo: function to add و ending automatically
|
||||
[concatPsString(
|
||||
makePsString(base.p.slice(0, -1), base.f.slice(0, -1)),
|
||||
{ p: "و", f: "o" },
|
||||
)],
|
||||
],
|
||||
}
|
||||
}
|
||||
// TODO: handle masculine and unisex
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function makePlural(w: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
|
||||
// TODO: Include the Pashto plural thing here
|
||||
const pashtoPlural = makePashtoPlural(w);
|
||||
if (pashtoPlural) return pashtoPlural;
|
||||
function addMascPluralSuffix(animate?: boolean): T.PluralInflectionSet {
|
||||
const base = removeAccents(w);
|
||||
return [
|
||||
[concatPsString(base, animate ? { p: "ان", f: "áan" } : { p: "ونه", f: "óona" })],
|
||||
[concatPsString(base, animate ? { p: "انو", f: "áano" } : { p: "ونو", f: "óono" })],
|
||||
];
|
||||
}
|
||||
function addAnimUnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
|
||||
const base = removeAccents(w);
|
||||
return {
|
||||
masc: addMascPluralSuffix(true),
|
||||
fem: [
|
||||
[concatPsString(base, { p: "انې", f: "áane" })],
|
||||
[concatPsString(base, { p: "انو", f: "áano" })],
|
||||
],
|
||||
};
|
||||
}
|
||||
function addFemLongVowelSuffix(): T.PluralInflectionSet {
|
||||
const base = makePsString(w.p, w.f);
|
||||
const baseWOutAccents = removeAccents(base);
|
||||
return [
|
||||
[concatPsString(base, { p: "وې", f: "we" }), concatPsString(baseWOutAccents, { p: "ګانې", f: "gáane" })],
|
||||
[concatPsString(base, { p: "وو", f: "wo" }), concatPsString(baseWOutAccents, { p: "ګانو", f: "gáano" })],
|
||||
];
|
||||
}
|
||||
|
||||
const anim = w.c?.includes("anim.");
|
||||
const type = (w.c?.includes("unisex"))
|
||||
? "unisex noun"
|
||||
: (w.c?.includes("n. m."))
|
||||
? "masc noun"
|
||||
: (w.c?.includes("n. f."))
|
||||
? "fem noun"
|
||||
: "other";
|
||||
if (type === "unisex noun" && endsInConsonant(w) && (!w.infap) && anim) {
|
||||
return addAnimUnisexPluralSuffix();
|
||||
}
|
||||
if (type === "masc noun" && endsInConsonant(w) && (!w.infap) && (w.p.slice(-3) !== "توب")) {
|
||||
return {
|
||||
masc: addMascPluralSuffix(anim),
|
||||
};
|
||||
}
|
||||
// TODO: What about endings in long ee / animate at inanimate
|
||||
if (type === "fem noun" && endsInAaOrOo(w) && (!w.infap)) {
|
||||
return {
|
||||
fem: addFemLongVowelSuffix(),
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
|
|
@ -121,7 +121,7 @@ export function getVerbInfo(
|
|||
return getGenerativeStativeCompoundVerbInfo(entry, complement as T.DictionaryEntry);
|
||||
}
|
||||
}
|
||||
const comp = complement ? ensureUnisexInflections(complement) : undefined;
|
||||
const comp = complement ? ensureUnisexInf(complement) : undefined;
|
||||
const root = getVerbRoots(entry, transitivity, comp);
|
||||
const stem = getVerbStems(entry, root, transitivity, comp);
|
||||
const infinitive = "mascSing" in root.imperfective ? root.imperfective.mascSing.long : root.imperfective.long;
|
||||
|
@ -877,7 +877,7 @@ function addOoPrefix(
|
|||
};
|
||||
}
|
||||
|
||||
function ensureUnisexInflections(complement: T.DictionaryEntry): T.UnisexInflections {
|
||||
function ensureUnisexInf(complement: T.DictionaryEntry): T.UnisexInflections {
|
||||
const inflected = inflectWord(complement);
|
||||
const isUnisex = inflected && (("masc" in inflected) && ("fem" in inflected));
|
||||
if (isUnisex) {
|
||||
|
|
23
src/types.ts
23
src/types.ts
|
@ -114,6 +114,10 @@ export type DictionaryEntry = {
|
|||
ep?: string;
|
||||
}
|
||||
|
||||
export type DictionaryEntryNoFVars = DictionaryEntry & { __brand: "name for a dictionary entry with all the phonetics variations removed" };
|
||||
export type PsStringNoFVars = PsString & { __brand: "name for a ps string with all the phonetics variations removed" };
|
||||
export type FStringNoFVars = string & { __brand: "name for a phonetics string with all the phonetics variations removed" };
|
||||
|
||||
export type DictionaryEntryTextField = "p" | "f" | "e" | "c" | "infap" | "infaf" | "infbp" | "infbf" | "app" | "apf" | "ppp" | "ppf" | "psp" | "psf" | "ssp" | "ssf" | "prp" | "prf" | "pprtp" | "pprtf" | "tppp" | "tppf" | "ec" | "ep";
|
||||
export type DictionaryEntryBooleanField = "noInf" | "shortIntrans" | "noOo" | "sepOo" | "diacExcept";
|
||||
export type DictionaryEntryNumberField = "ts" | "i" | "l" | "separationAtP" | "separationAtF";
|
||||
|
@ -334,12 +338,25 @@ export type PerfectContent = {
|
|||
// Plain, 1st, and 2nd Inflection
|
||||
export type InflectionSet = ArrayFixed<ArrayOneOrMore<PsString>, 3>;
|
||||
|
||||
// Plural and Second Inflection
|
||||
export type PluralInflectionSet = ArrayFixed<ArrayOneOrMore<PsString>, 2>
|
||||
|
||||
export type Gender = "masc" | "fem";
|
||||
|
||||
export type UnisexInflections = Record<Gender, InflectionSet>;
|
||||
export type UnisexSet<T> = Record<Gender, T>;
|
||||
export type GenderedSet<T> = UnisexSet<T> | Omit<UnisexSet<T>, "fem"> | Omit<UnisexSet<T>, "masc">;
|
||||
export type UnisexInflections = UnisexSet<InflectionSet>;
|
||||
|
||||
export type Inflections = UnisexInflections
|
||||
| Omit<UnisexInflections, "fem"> | Omit<UnisexInflections, "masc">;
|
||||
export type Inflections = GenderedSet<InflectionSet>;
|
||||
|
||||
export type PluralInflections = GenderedSet<PluralInflectionSet>;
|
||||
|
||||
export type InflectorOutput = {
|
||||
plural: PluralInflections,
|
||||
inflections?: Inflections,
|
||||
} | {
|
||||
inflections: Inflections,
|
||||
} | false;
|
||||
|
||||
export type PersonLine = [
|
||||
/** singular form of person */
|
||||
|
|
Loading…
Reference in New Issue