From 47551aa7ddf4922eb1ae728a26a521f588ddaed6 Mon Sep 17 00:00:00 2001 From: lingdocs <71590811+lingdocs@users.noreply.github.com> Date: Tue, 7 Sep 2021 15:49:57 +0400 Subject: [PATCH] getting going on plural-suffixes --- src/lib/p-text-helpers.test.ts | 27 +- src/lib/p-text-helpers.ts | 136 ++++++--- src/lib/pashto-consonants.ts | 3 + src/lib/pashto-inflector.test.ts | 491 ++++++++++++++++++------------- src/lib/pashto-inflector.ts | 205 +++++++++---- src/lib/verb-info.ts | 4 +- src/types.ts | 23 +- 7 files changed, 571 insertions(+), 318 deletions(-) diff --git a/src/lib/p-text-helpers.test.ts b/src/lib/p-text-helpers.test.ts index 95e75be..7bb69f8 100644 --- a/src/lib/p-text-helpers.test.ts +++ b/src/lib/p-text-helpers.test.ts @@ -21,6 +21,7 @@ import { psStringEquals, removeRetroflexR, splitDoubleWord, + endsInConsonant, } from "./p-text-helpers"; import * as T from "../types"; import { @@ -1003,10 +1004,34 @@ test("psStringEquals", () => { expect( psStringEquals({ p: "بور", f: "bor" }, { p: "تور", f: "tor" }) ).toBe(false); + expect( + psStringEquals({ p: "ملګری", f: "malgúrey" }, { p: "ملګری", f: "malgurey" }) + ).toBe(false); + expect( + psStringEquals({ p: "ملګری", f: "malgúrey" }, { p: "ملګری", f: "malgurey" }, true) + ).toBe(true); }); test("removeRetroflexR", () => { expect( removeRetroflexR({ p: "وکړ", f: "óokR" }), ).toEqual({ p: "وک", f: "óok" }); -}); \ No newline at end of file +}); + +test("endsInAConsonant", () => { + const does: T.PsString[] = [ + { p: "پښتون", f: "puxtoon" }, + { p: "کور", f: "kor" }, + { p: "ګناه", f: "gUnaah" }, + { p: "زوی", f: "zooy" }, + { p: "ځای", f: "dzaay" }, + ]; + const doesnt: T.PsString[] = [ + { p: "بابا", f: "baabaa" }, + { p: "قاضي", f: "qaazee" }, + { p: "ګناه", f: "gunaa" }, + { p: "اطلاع", f: "itlaa" }, + ]; + does.forEach((x) => expect(endsInConsonant(x)).toBe(true)); + doesnt.forEach((x) => expect(endsInConsonant(x)).toBe(false)); +}) \ No newline at end of file diff --git a/src/lib/p-text-helpers.ts b/src/lib/p-text-helpers.ts index 1472635..b205f0a 100644 --- a/src/lib/p-text-helpers.ts +++ b/src/lib/p-text-helpers.ts @@ -15,6 +15,9 @@ import { getPersonInflectionsKey, } from "./misc-helpers"; import * as T from "../types"; +import { removeAccents } from "./accent-helpers"; +import { pashtoConsonants, phoneticsConsonants } from "./pashto-consonants"; +import { simplifyPhonetics } from "./simplify-phonetics"; // export function concatPsStringWithVars(...items: Array): T.PsString[] { @@ -90,7 +93,7 @@ export function concatPsString(...items: Array, pos: number): T.FullForm< return baInserted; } -/** - * Returns the first phonetics value in a comma-seperated list - * - * @param f - a phonetics string - */ -export function firstPhonetics(f: string): string { - return f.split(",")[0]; -} - -/** - * returs a PsString or DictionaryEntry ensuring only one phonetics variation - * - * @param ps - */ -export function removeFVariants(ps: T.PsString): T.PsString { +export function removeFVarients(x: T.DictionaryEntry): T.DictionaryEntryNoFVars; +export function removeFVarients(x: T.PsString): T.PsStringNoFVars; +export function removeFVarients(x: string): T.FStringNoFVars; +export function removeFVarients(x: string | T.PsString | T.DictionaryEntry): T.FStringNoFVars | T.PsStringNoFVars | T.DictionaryEntryNoFVars { + if (typeof x === "string") { + return x.split(",")[0] as T.FStringNoFVars; + } + if ("ts" in x) { + return { + ...x, + f: removeFVarients(x.f), + __brand: "name for a dictionary entry with all the phonetics variations removed", + } as T.DictionaryEntryNoFVars; + } return { - ...ps, - f: firstPhonetics(ps.f), - }; + ...x, + f: removeFVarients(x.f), + __brand: "name for a ps string with all the phonetics variations removed", + } as T.PsStringNoFVars; } /** @@ -514,13 +517,6 @@ export function yulEndingInfinitive(s: T.PsString): boolean { return ((pEnding === "یل") && (["yul", "yúl"].includes(fEnding))); } -export function psStringFromEntry(entry: T.DictionaryEntry): T.PsString { - return makePsString( - entry.p, - firstPhonetics(entry.f), - ); -} - export function allOnePersonInflection(block: T.ImperativeForm, person: T.Person): T.SingleOrLengthOpts; export function allOnePersonInflection(block: T.VerbForm, person: T.Person): T.SingleOrLengthOpts; export function allOnePersonInflection(block: T.SingleOrLengthOpts, person: T.Person): T.SingleOrLengthOpts; @@ -617,8 +613,9 @@ export function complementInflects(inf: T.UnisexInflections): boolean { // ); } -export function psStringEquals(ps1: T.PsString, ps2: T.PsString): boolean { - return (ps1.p === ps2.p) && (ps1.f === ps2.f); +export function psStringEquals(ps1: T.PsString, ps2: T.PsString, ignoreAccents?: boolean): boolean { + const [p1, p2] = ignoreAccents ? [removeAccents(ps1), removeAccents(ps2)] : [ps1, ps2]; + return (p1.p === p2.p) && (p1.f === p2.f); } export function removeRetroflexR(ps: T.PsString): T.PsString { @@ -754,33 +751,76 @@ export function ensureShortWurShwaShift(ps: T.PsString): T.PsString { return ps; } -export function ensureUnisexInflections(infs: T.Inflections | false, w: T.DictionaryEntry): T.UnisexInflections { - const ps = { p: w.p, f: firstPhonetics(w.f) }; - if (infs === false) { +export function ensureUnisexInflections(infs: T.InflectorOutput, w: T.DictionaryEntryNoFVars): { + inflections: T.UnisexInflections, + plural?: T.PluralInflections, +} { + const ps = { p: w.p, f: w.f }; + if (infs === false || infs.inflections === undefined) { return { - masc: [ - [ps], - [ps], - [ps], - ], - fem: [ - [ps], - [ps], - [ps], - ], + inflections: { + masc: [ + [ps], + [ps], + [ps], + ], + fem: [ + [ps], + [ps], + [ps], + ], + }, }; } - if (!("fem" in infs)) { + if (!("fem" in infs.inflections)) { return { - ...infs, - fem: [[ps], [ps], [ps]], + inflections: { + ...infs.inflections, + fem: [[ps], [ps], [ps]], + } }; } - if (!("masc" in infs)) { + if (!("masc" in infs.inflections)) { return { - ...infs, - masc: [[ps], [ps], [ps]], + inflections: { + ...infs.inflections, + masc: [[ps], [ps], [ps]], + }, }; } - return infs; + // for some dumb reason have to do this for type safety + return { + inflections: infs.inflections, + }; +} + +export function endsInAaOrOo(w: T.PsString): boolean { + const fEnd = simplifyPhonetics(w.f).slice(-2); + const pEnd = w.p.slice(-1); + return ( + pEnd === "و" && fEnd.endsWith("o") + || + pEnd === "ا" && fEnd === "aa" + ); +} + + +export function endsInConsonant(w: T.PsString): boolean { + // TODO: Add reporting back that the plural ending will need a space? + + function endsInLongDipthong(w: T.PsString): boolean { + function isLongDipthong(end: T.PsString): boolean { + return (psStringEquals(end, { p: "ای", f: "aay" }, true) || psStringEquals(end, { p: "وی", f: "ooy" }, true)); + } + const end = makePsString( + w.p.slice(-2), + w.f.slice(-3), + ); + return isLongDipthong(end); + } + + if (endsInLongDipthong(w)) return true; + // const pCons = pashtoConsonants.includes(w.p.slice(-1)); + const fCons = phoneticsConsonants.includes(simplifyPhonetics(w.f).slice(-1)); + return fCons; } \ No newline at end of file diff --git a/src/lib/pashto-consonants.ts b/src/lib/pashto-consonants.ts index a290d07..156fdcb 100644 --- a/src/lib/pashto-consonants.ts +++ b/src/lib/pashto-consonants.ts @@ -7,3 +7,6 @@ */ export const pashtoConsonants = ["ب", "پ", "ت", "ټ", "ث", "ج", "چ", "ح", "خ", "څ", "ځ", "د", "ډ", "ذ", "ر", "ړ", "ز", "ژ", "ږ", "س", "ش", "ښ", "ص", "ض", "ط", "ظ", "غ", "ف", "ق", "ک", "ګ", "گ", "ل", "ل", "م", "ن", "ڼ"]; +export const phoneticsConsonants = [ + "b", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "w", "z", +]; \ No newline at end of file diff --git a/src/lib/pashto-inflector.test.ts b/src/lib/pashto-inflector.test.ts index 51b71c5..7ff68cf 100644 --- a/src/lib/pashto-inflector.test.ts +++ b/src/lib/pashto-inflector.test.ts @@ -16,7 +16,7 @@ import * as T from "../types"; const adjectives: Array<{ in: T.DictionaryEntry, - out: T.Inflections | false, + out: T.InflectorOutput, }> = [ // irregular adj. { @@ -34,16 +34,18 @@ const adjectives: Array<{ infbf: "zaR", }, out: { - masc: [ - [{p: "زوړ", f: "zoR"}], - [{p: "زاړه", f: "zaaRu"}], - [{p: "زړو", f: "zaRo"}], - ], - fem: [ - [{p: "زړه", f: "zaRa"}], - [{p: "زړې", f: "zaRe"}], - [{p: "زړو", f: "zaRo"}], - ], + inflections:{ + masc: [ + [{p: "زوړ", f: "zoR"}], + [{p: "زاړه", f: "zaaRu"}], + [{p: "زړو", f: "zaRo"}], + ], + fem: [ + [{p: "زړه", f: "zaRa"}], + [{p: "زړې", f: "zaRe"}], + [{p: "زړو", f: "zaRo"}], + ], + }, }, }, // regular adjective ending in ی @@ -58,16 +60,18 @@ const adjectives: Array<{ i: 6564, }, out: { - masc: [ - [{p: "ستړی", f: "stúRey"}], - [{p: "ستړي", f: "stúRee"}], - [{p: "ستړیو", f: "stúRiyo"}, {p: "ستړو", f: "stúRo"}], - ], - fem: [ - [{p: "ستړې", f: "stúRe"}], - [{p: "ستړې", f: "stúRe"}], - [{p: "ستړو", f: "stúRo"}], - ], + inflections: { + masc: [ + [{p: "ستړی", f: "stúRey"}], + [{p: "ستړي", f: "stúRee"}], + [{p: "ستړیو", f: "stúRiyo"}, {p: "ستړو", f: "stúRo"}], + ], + fem: [ + [{p: "ستړې", f: "stúRe"}], + [{p: "ستړې", f: "stúRe"}], + [{p: "ستړو", f: "stúRo"}], + ], + } }, }, // regular adjective ending in ی with stress on the end @@ -82,16 +86,18 @@ const adjectives: Array<{ i: 12026, }, out: { - masc: [ - [{p: "وروستی", f: "wroostéy"}], - [{p: "وروستي", f: "wroostée"}], - [{p: "وروستیو", f: "wroostiyo"}, {p: "وروستو", f: "wroostó"}], - ], - fem: [ - [{p: "وروستۍ", f: "wroostúy"}], - [{p: "وروستۍ", f: "wroostúy"}], - [{p: "وروستیو", f: "wroostúyo"}, {p: "وروستو", f: "wroostó"}], - ], + inflections: { + masc: [ + [{p: "وروستی", f: "wroostéy"}], + [{p: "وروستي", f: "wroostée"}], + [{p: "وروستیو", f: "wroostiyo"}, {p: "وروستو", f: "wroostó"}], + ], + fem: [ + [{p: "وروستۍ", f: "wroostúy"}], + [{p: "وروستۍ", f: "wroostúy"}], + [{p: "وروستیو", f: "wroostúyo"}, {p: "وروستو", f: "wroostó"}], + ], + } }, }, // regular adjective ending in a consonant @@ -106,16 +112,18 @@ const adjectives: Array<{ i: 6502, }, out: { - masc: [ - [{p: "سپک", f: "spuk"}], - [{p: "سپک", f: "spuk"}], - [{p: "سپکو", f: "spuko"}], - ], - fem: [ - [{p: "سپکه", f: "spuka"}], - [{p: "سپکې", f: "spuke"}], - [{p: "سپکو", f: "spuko"}], - ], + inflections: { + masc: [ + [{p: "سپک", f: "spuk"}], + [{p: "سپک", f: "spuk"}], + [{p: "سپکو", f: "spuko"}], + ], + fem: [ + [{p: "سپکه", f: "spuka"}], + [{p: "سپکې", f: "spuke"}], + [{p: "سپکو", f: "spuko"}], + ], + }, }, }, { @@ -129,16 +137,18 @@ const adjectives: Array<{ i: 9945, }, out: { - masc: [ - [{p: "لوی", f: "looy"}], - [{p: "لوی", f: "looy"}], - [{p: "لویو", f: "looyo"}], - ], - fem: [ - [{p: "لویه", f: "looya"}], - [{p: "لویې", f: "looye"}], - [{p: "لویو", f: "looyo"}], - ], + inflections: { + masc: [ + [{p: "لوی", f: "looy"}], + [{p: "لوی", f: "looy"}], + [{p: "لویو", f: "looyo"}], + ], + fem: [ + [{p: "لویه", f: "looya"}], + [{p: "لویې", f: "looye"}], + [{p: "لویو", f: "looyo"}], + ], + }, }, }, { @@ -152,16 +162,18 @@ const adjectives: Array<{ i: 2430, }, out: { - masc: [ - [{p: "پوه", f: "poh"}], - [{p: "پوه", f: "poh"}], - [{p: "پوهو", f: "poho"}], - ], - fem: [ - [{p: "پوهه", f: "poha"}], - [{p: "پوهې", f: "pohe"}], - [{p: "پوهو", f: "poho"}], - ], + inflections: { + masc: [ + [{p: "پوه", f: "poh"}], + [{p: "پوه", f: "poh"}], + [{p: "پوهو", f: "poho"}], + ], + fem: [ + [{p: "پوهه", f: "poha"}], + [{p: "پوهې", f: "pohe"}], + [{p: "پوهو", f: "poho"}], + ], + }, }, }, // adjective ending in u @@ -176,16 +188,18 @@ const adjectives: Array<{ i: 1, }, out: { - masc: [ - [{p: "ویده", f: "weedú"}], - [{p: "ویده", f: "weedú"}], - [{p: "ویدو", f: "weedó"}], - ], - fem: [ - [{p: "ویده", f: "weedá"}], - [{p: "ویدې", f: "weedé"}], - [{p: "ویدو", f: "weedó"}], - ], + inflections: { + masc: [ + [{p: "ویده", f: "weedú"}], + [{p: "ویده", f: "weedú"}], + [{p: "ویدو", f: "weedó"}], + ], + fem: [ + [{p: "ویده", f: "weedá"}], + [{p: "ویدې", f: "weedé"}], + [{p: "ویدو", f: "weedó"}], + ], + }, }, }, // adjective non-inflecting @@ -225,23 +239,25 @@ const adjectives: Array<{ i: 1, }, out: { - masc: [ - [{ p: "ګډ وډ", f: "guD wuD" }], - [{ p: "ګډ وډ", f: "guD wuD" }], - [{ p: "ګډو وډو", f: "guDo wuDo" }], - ], - fem: [ - [{ p: "ګډه وډه", f: "guDa wuDa" }], - [{ p: "ګډې وډې", f: "guDe wuDe" }], - [{ p: "ګډو وډو", f: "guDo wuDo" }], - ], - } - } + inflections: { + masc: [ + [{ p: "ګډ وډ", f: "guD wuD" }], + [{ p: "ګډ وډ", f: "guD wuD" }], + [{ p: "ګډو وډو", f: "guDo wuDo" }], + ], + fem: [ + [{ p: "ګډه وډه", f: "guDa wuDa" }], + [{ p: "ګډې وډې", f: "guDe wuDe" }], + [{ p: "ګډو وډو", f: "guDo wuDo" }], + ], + }, + }, + }, ]; const nouns: Array<{ in: T.DictionaryEntry, - out: T.Inflections | false, + out: T.InflectorOutput, }> = [ // ## UNISEX // Unisex noun irregular @@ -260,16 +276,18 @@ const nouns: Array<{ infbf: "melman", }, out: { - masc: [ - [{p: "مېلمه", f: "melmá"}], - [{p: "مېلمانه", f: "melmaanu"}], - [{p: "مېلمنو", f: "melmano"}], - ], - fem: [ - [{p: "مېلمنه", f: "melmana"}], - [{p: "مېلمنې", f: "melmane"}], - [{p: "مېلمنو", f: "melmano"}], - ], + inflections: { + masc: [ + [{p: "مېلمه", f: "melmá"}], + [{p: "مېلمانه", f: "melmaanu"}], + [{p: "مېلمنو", f: "melmano"}], + ], + fem: [ + [{p: "مېلمنه", f: "melmana"}], + [{p: "مېلمنې", f: "melmane"}], + [{p: "مېلمنو", f: "melmano"}], + ], + }, }, }, // Unisex noun ending with ی @@ -284,16 +302,18 @@ const nouns: Array<{ i: 10943, }, out: { - masc: [ - [{p: "ملګری", f: "malgúrey"}], - [{p: "ملګري", f: "malgúree"}], - [{p: "ملګریو", f: "malgúriyo"}, {p: "ملګرو", f: "malgúro"}], - ], - fem: [ - [{p: "ملګرې", f: "malgúre"}], - [{p: "ملګرې", f: "malgúre"}], - [{p: "ملګرو", f: "malgúro"}], - ], + inflections: { + masc: [ + [{p: "ملګری", f: "malgúrey"}], + [{p: "ملګري", f: "malgúree"}], + [{p: "ملګریو", f: "malgúriyo"}, {p: "ملګرو", f: "malgúro"}], + ], + fem: [ + [{p: "ملګرې", f: "malgúre"}], + [{p: "ملګرې", f: "malgúre"}], + [{p: "ملګرو", f: "malgúro"}], + ], + }, }, }, // Unisex noun ending on ی with emphasis on the end @@ -308,16 +328,24 @@ const nouns: Array<{ i: 2900, }, out: { - masc: [ - [{p: "ترورزی", f: "trorzéy"}], - [{p: "ترورزي", f: "trorzée"}], - [{p: "ترورزیو", f: "trorziyo"}, {p: "ترورزو", f: "trorzó"}], - ], - fem: [ - [{p: "ترورزۍ", f: "trorzúy"}], - [{p: "ترورزۍ", f: "trorzúy"}], - [{p: "ترورزیو", f: "trorzúyo"}, {p: "ترورزو", f: "trorzó"}], - ], + inflections: { + masc: [ + [{p: "ترورزی", f: "trorzéy"}], + [{p: "ترورزي", f: "trorzée"}], + [{p: "ترورزیو", f: "trorziyo"}, {p: "ترورزو", f: "trorzó"}], + ], + fem: [ + [{p: "ترورزۍ", f: "trorzúy"}], + [{p: "ترورزۍ", f: "trorzúy"}], + [{p: "ترورزیو", f: "trorzúyo"}, {p: "ترورزو", f: "trorzó"}], + ], + }, + // plural: { + // masc: [ + // [{ p: "ترورزامن", f: "trorzaamun" }], + // [{ p: "ترورزامنو", f: "trorzaamuno" }], + // ], + // }, }, }, // Unisex noun ending with a consanant @@ -328,20 +356,32 @@ const nouns: Array<{ f: "churg", g: "", e: "rooster, cock; chicken, poultry", - c: "n. m. unisex", + c: "n. m. unisex anim.", i: 4101, }, out: { - masc: [ - [{p: "چرګ", f: "churg"}], - [{p: "چرګ", f: "churg"}], - [{p: "چرګو", f: "churgo"}], - ], - fem: [ - [{p: "چرګه", f: "churga"}], - [{p: "چرګې", f: "churge"}], - [{p: "چرګو", f: "churgo"}], - ], + inflections: { + masc: [ + [{p: "چرګ", f: "churg"}], + [{p: "چرګ", f: "churg"}], + [{p: "چرګو", f: "churgo"}], + ], + fem: [ + [{p: "چرګه", f: "churga"}], + [{p: "چرګې", f: "churge"}], + [{p: "چرګو", f: "churgo"}], + ], + }, + plural: { + masc: [ + [{p: "چرګان", f: "churgáan"}], + [{p: "چرګانو", f: "churgáano"}], + ], + fem: [ + [{p: "چرګانې", f: "churgáane"}], + [{p: "چرګانو", f: "churgáano"}], + ], + }, }, }, // ## MASCULINE @@ -357,11 +397,13 @@ const nouns: Array<{ i: 6750, }, out: { - masc: [ - [{p: "سړی", f: "saRey"}], - [{p: "سړي", f: "saRee"}], - [{p: "سړیو", f: "saRiyo"}, {p: "سړو", f: "saRo"}], - ], + inflections: { + masc: [ + [{p: "سړی", f: "saRey"}], + [{p: "سړي", f: "saRee"}], + [{p: "سړیو", f: "saRiyo"}, {p: "سړو", f: "saRo"}], + ], + } }, }, // Masculine regular ending in ی with emphasis on end @@ -376,11 +418,13 @@ const nouns: Array<{ i: 2931, }, out: { - masc: [ - [{p: "ترېلی", f: "treléy"}], - [{p: "ترېلي", f: "trelée"}], - [{p: "ترېلیو", f: "treliyo"}, {p: "ترېلو", f: "trelo"}], - ], + inflections: { + masc: [ + [{p: "ترېلی", f: "treléy"}], + [{p: "ترېلي", f: "trelée"}], + [{p: "ترېلیو", f: "treliyo"}, {p: "ترېلو", f: "trelo"}], + ], + }, }, }, // Masculine ending in tob @@ -395,11 +439,13 @@ const nouns: Array<{ c: "n. m.", }, out: { - masc: [ - [{p: "مشرتوب", f: "mushurtob"}], - [{p: "مشرتابه", f: "mushurtaabu"}], - [{p: "مشرتبو", f: "mushurtabo"}], - ], + inflections: { + masc: [ + [{p: "مشرتوب", f: "mushurtob"}], + [{p: "مشرتابه", f: "mushurtaabu"}], + [{p: "مشرتبو", f: "mushurtabo"}], + ], + }, }, }, // Masculine irregular @@ -418,11 +464,19 @@ const nouns: Array<{ infbf: "lamandz", }, out: { - masc: [ - [{p: "لمونځ", f: "lamoondz"}], - [{p: "لمانځه", f: "lamaandzu"}], - [{p: "لمنځو", f: "lamandzo"}], - ], + inflections: { + masc: [ + [{p: "لمونځ", f: "lamoondz"}], + [{p: "لمانځه", f: "lamaandzu"}], + [{p: "لمنځو", f: "lamandzo"}], + ], + }, + // plural: { + // masc: [ + // [{ p: "لمونځونه", f: "lamoondzóona" }], + // [{ p: "لمونځونو", f: "lamoondzóono" }], + // ], + // }, }, }, // Masculine non-inflecting @@ -436,7 +490,14 @@ const nouns: Array<{ c: "n. m.", i: 8640, }, - out: false, + out: { + plural: { + masc: [ + [{ p: "کتابونه", f: "kitaabóona" }], + [{ p: "کتابونو", f: "kitaabóono" }], + ], + }, + }, }, // ## FEMININE // Feminine regular ending in ه @@ -451,11 +512,13 @@ const nouns: Array<{ i: 7444, }, out: { - fem: [ - [{p: "ښځه", f: "xudza"}], - [{p: "ښځې", f: "xudze"}], - [{p: "ښځو", f: "xudzo"}], - ], + inflections: { + fem: [ + [{p: "ښځه", f: "xudza"}], + [{p: "ښځې", f: "xudze"}], + [{p: "ښځو", f: "xudzo"}], + ], + }, }, }, { @@ -469,11 +532,13 @@ const nouns: Array<{ i: 365, }, out: { - fem: [ - [{p: "اره", f: "ará"}], - [{p: "ارې", f: "are"}], - [{p: "ارو", f: "aro"}], - ], + inflections: { + fem: [ + [{p: "اره", f: "ará"}], + [{p: "ارې", f: "are"}], + [{p: "ارو", f: "aro"}], + ], + }, }, }, // Feminine regular ending in ع - a' @@ -490,11 +555,13 @@ const nouns: Array<{ apf: "maraají’", }, out: { - fem: [ - [{p: "مرجع", f: "marja'"}], - [{p: "مرجعې", f: "marje"}], - [{p: "مرجعو", f: "marjo"}], - ], + inflections: { + fem: [ + [{p: "مرجع", f: "marja'"}], + [{p: "مرجعې", f: "marje"}], + [{p: "مرجعو", f: "marjo"}], + ], + }, }, }, { @@ -510,11 +577,13 @@ const nouns: Array<{ apf: "manaabí", }, out: { - fem: [ - [{p: "منبع", f: "manbá"}], - [{p: "منبعې", f: "manbe"}], - [{p: "منبعو", f: "manbo"}], - ], + inflections: { + fem: [ + [{p: "منبع", f: "manbá"}], + [{p: "منبعې", f: "manbe"}], + [{p: "منبعو", f: "manbo"}], + ], + }, }, }, // Feminine regular ending in ح - a @@ -529,11 +598,13 @@ const nouns: Array<{ i: 5813, }, out: { - fem: [ - [{p: "ذبح", f: "zabha"}], - [{p: "ذبحې", f: "zabhe"}], - [{p: "ذبحو", f: "zabho"}], - ], + inflections: { + fem: [ + [{p: "ذبح", f: "zabha"}], + [{p: "ذبحې", f: "zabhe"}], + [{p: "ذبحو", f: "zabho"}], + ], + }, }, }, // Feminine inanimate regular with missing ه @@ -548,14 +619,17 @@ const nouns: Array<{ i: 9593, }, out: { - fem: [ - [{p: "لار", f: "laar"}], - [{p: "لارې", f: "laare"}], - [{p: "لارو", f: "laaro"}], - ], + inflections: { + fem: [ + [{p: "لار", f: "laar"}], + [{p: "لارې", f: "laare"}], + [{p: "لارو", f: "laaro"}], + ], + }, }, }, // Feminine animate ending in a consonant + // TODO: ALLOW FOR MULTIPLE PLURAL POSSIBILITIES میندې, میېنې etc. { in: { ts: 1527812928, @@ -564,9 +638,18 @@ const nouns: Array<{ g: "", e: "mother, mom", c: "n. f. anim.", + ppp: "میندې", + ppf: "meynde", i: 11113, }, - out: false, + out: { + plural: { + fem: [ + [{ p: "میندې", f: "meynde" }], + [{ p: "میندو", f: "meyndo" }], + ], + }, + }, }, // Feminine regular inanimate ending in ي { @@ -580,11 +663,13 @@ const nouns: Array<{ i: 5503, }, out: { - fem: [ - [{p: "دوستي", f: "dostee"}], - [{p: "دوستۍ", f: "dostuy"}], - [{p: "دوستیو", f: "dostuyo"}], - ], + inflections: { + fem: [ + [{p: "دوستي", f: "dostee"}], + [{p: "دوستۍ", f: "dostuy"}], + [{p: "دوستیو", f: "dostuyo"}], + ], + }, }, }, // Feminine regular ending in ۍ @@ -599,11 +684,13 @@ const nouns: Array<{ i: 8718, }, out: { - fem: [ - [{p: "کرسۍ", f: "kUrsuy"}], - [{p: "کرسۍ", f: "kUrsuy"}], - [{p: "کرسیو", f: "kUrsuyo"}, { p: "کرسو", f: "kUrso"}], - ], + inflections: { + fem: [ + [{p: "کرسۍ", f: "kUrsuy"}], + [{p: "کرسۍ", f: "kUrsuy"}], + [{p: "کرسیو", f: "kUrsuyo"}, { p: "کرسو", f: "kUrso"}], + ], + }, }, }, // Feminine regular ending in ا @@ -611,18 +698,19 @@ const nouns: Array<{ in: { ts: 1527812456, p: "اړتیا", - f: "aRtiyaa, aRtyaa", + f: "aRtiyáa, aRtyáa", g: "", e: "need, necessity", c: "n. f.", i: 376, }, out: { - fem: [ - [{p: "اړتیا", f: "aRtiyaa"}], - [{p: "اړتیاوې", f: "aRtiyaawe"}], - [{p: "اړتیاوو", f: "aRtiyaawo"}], - ], + plural: { + fem: [ + [{p: "اړتیاوې", f: "aRtiyáawe"}, { p: "اړتیاګانې", f:"aRtiyaagáane"}], + [{p: "اړتیاوو", f: "aRtiyáawo"}, { p: "اړتیاګانو", f:"aRtiyaagáano"}], + ], + }, }, }, // Feminine regular ending in اع @@ -636,13 +724,15 @@ const nouns: Array<{ c: "n. f.", i: 12205, }, - out: { - fem: [ - [{p: "وداع", f: "widáa'"}], - [{p: "وداعوې", f: "widáawe"}], - [{p: "وداعوو", f: "widáawo"}], - ], - }, + out: false, + // out: { + // plural: { + // fem: [ + // [{p: "وداع وې", f: "widáawe"}, {p: "وداع ګانې", f: "widaagáane"}], + // [{p: "وداع وو", f: "widáawo"}, {p: "وداع ګانو", f: "widaagáano"}], + // ], + // }, + // }, }, // Word with no inflections { @@ -658,6 +748,7 @@ const nouns: Array<{ }, out: false, }, + // TODO: WORDS THAT ARE ALREADY PLURAL! ]; const others: T.DictionaryEntry[] = [ diff --git a/src/lib/pashto-inflector.ts b/src/lib/pashto-inflector.ts index c3a8a67..189edbb 100644 --- a/src/lib/pashto-inflector.ts +++ b/src/lib/pashto-inflector.ts @@ -12,6 +12,10 @@ import { splitDoubleWord, ensureUnisexInflections, makePsString, + removeFVarients, + concatPsString, + endsInConsonant, + endsInAaOrOo, } from "./p-text-helpers"; import { removeAccents, @@ -20,95 +24,102 @@ import * as T from "../types"; const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/; const endingInHeyOrAynRegex = /[^ا][هع]$/; -const endingInAlefRegex = /اع?$/; +// const endingInAlefRegex = /اع?$/; -export function inflectWord(word: T.DictionaryEntry): T.Inflections | false { +export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput { // If it's a noun/adj, inflect accordingly // TODO: What about n. f. / adj. that end in ي ?? - if (word.noInf) { + const w = removeFVarients(word); + if (w.noInf) { return false; } - if (word.c?.includes("doub.")) { - const words = splitDoubleWord(word); - const inflected = words.map((word) => ensureUnisexInflections(inflectWord(word), word)); - return concatInflections( - inflected[0], - inflected[1], - ) as T.UnisexInflections; + if (w.c?.includes("doub.")) { + const words = splitDoubleWord(w); + const inflected = words.map((x) => ensureUnisexInflections(inflectWord(x), x)); + return { + inflections: concatInflections( + inflected[0].inflections, + inflected[1].inflections, + ) as T.UnisexInflections, + }; } - if (word.c && (word.c.includes("adj.") || word.c.includes("unisex"))) { - return handleUnisexWord(word); + if (w.c && (w.c.includes("adj.") || w.c.includes("unisex"))) { + return handleUnisexWord(w); } - if (word.c && (word.c.includes("n. m."))) { - return handleMascNoun(word); + if (w.c && (w.c.includes("n. m."))) { + return handleMascNoun(w); } - if (word.c && (word.c.includes("n. f."))) { - return handleFemNoun(word); + if (w.c && (w.c.includes("n. f."))) { + return handleFemNoun(w); } // It's not a noun/adj return false; } // LEVEL 2 FUNCTIONS -function handleUnisexWord(word: T.DictionaryEntry): T.Inflections | false { - // Get first of comma seperated phonetics entries - const f = word.f.split(",")[0].trim(); +function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput { // Get last letter of Pashto and last two letters of phonetics // TODO: !!! Handle weird endings / symbols ' etc. const pEnd = word.p.slice(-1); + const plural = makePlural(word); if (word.infap && word.infaf && word.infbp && word.infbf) { - return inflectIrregularUnisex(word.p, f, [ - {p: word.infap, f: word.infaf}, - {p: word.infbp, f: word.infbf}, - ]); + return { + inflections: inflectIrregularUnisex(word.p, word.f, [ + {p: word.infap, f: word.infaf}, + {p: word.infbp, f: word.infbf}, + ]), + plural, + }; } - if (pEnd === "ی" && f.slice(-2) === "ey") { - return inflectRegularYeyUnisex(word.p, f); + if (pEnd === "ی" && word.f.slice(-2) === "ey") { + return { inflections: inflectRegularYeyUnisex(word.p, word.f), plural }; } if (pEnd === "ه" && word.g.slice(-1) === "u") { - return inflectRegularShwaEndingUnisex(word.p, f); + return { inflections: inflectRegularShwaEndingUnisex(word.p, word.f), plural }; } - if (pEnd === "ی" && f.slice(-2) === "éy") { - return inflectEmphasizedYeyUnisex(word.p, f); + if (pEnd === "ی" && word.f.slice(-2) === "éy") { + return { inflections: inflectEmphasizedYeyUnisex(word.p, word.f), plural }; } if ( pashtoConsonants.includes(pEnd) || word.p.slice(-2) === "وی" || word.p.slice(-2) === "ای" || - (word.p.slice(-1) === "ه" && f.slice(-1) === "h") + (word.p.slice(-1) === "ه" && word.f.slice(-1) === "h") ) { - return inflectConsonantEndingUnisex(word.p, f); + return { inflections: inflectConsonantEndingUnisex(word.p, word.f), plural }; } return false; } -function handleMascNoun(word: T.DictionaryEntry): T.Inflections | false { - // Get first of comma seperated phonetics entries - const f = word.f.split(",")[0].trim(); +function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput { // Get last letter of Pashto and last two letters of phonetics // TODO: !!! Handle weird endings / symbols ' etc. - const pEnd = word.p.slice(-1); - const fEnd = f.slice(-2); - if (word.infap && word.infaf && word.infbp && word.infbf) { - return inflectIrregularMasc(word.p, f, [ - {p: word.infap, f: word.infaf}, - {p: word.infbp, f: word.infbf}, - ]); + const plural = makePlural(w); + const pEnd = w.p.slice(-1); + const fEnd = w.f.slice(-2); + if (w.infap && w.infaf && w.infbp && w.infbf) { + return { + inflections: inflectIrregularMasc(w.p, w.f, [ + {p: w.infap, f: w.infaf}, + {p: w.infbp, f: w.infbf}, + ]), + plural, + }; } - const isTobEnding = (word.p.slice(-3) === "توب" && ["tób", "tob"].includes(f.slice(-3)) && word.p.length > 3); + const isTobEnding = (w.p.slice(-3) === "توب" && ["tób", "tob"].includes(w.f.slice(-3)) && w.p.length > 3); if (isTobEnding) { - return inflectTobMasc(word.p, f); + return { inflections: inflectTobMasc(w.p, w.f), plural }; } if (pEnd === "ی" && fEnd === "ey") { - return inflectRegularYeyMasc(word.p, f); + return { inflections: inflectRegularYeyMasc(w.p, w.f), plural }; } if (pEnd === "ی" && fEnd === "éy") { - return inflectRegularEmphasizedYeyMasc(word.p, f); + return { inflections: inflectRegularEmphasizedYeyMasc(w.p, w.f), plural }; } - return false; + return plural ? { plural } : false } -function handleFemNoun(word: T.DictionaryEntry): T.Inflections | false { +function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput { // Get first of comma seperated phonetics entries const f = word.f.split(",")[0].trim(); /* istanbul ignore next */ // will always have word.c at this point @@ -116,25 +127,27 @@ function handleFemNoun(word: T.DictionaryEntry): T.Inflections | false { const animate = c.includes("anim."); const pEnd = word.p.slice(-1); + const plural = makePlural(word); + if (endingInHeyOrAynRegex.test(word.p) && endingInSingleARegex.test(f)) { - return inflectRegularAFem(word.p, f); + return { inflections: inflectRegularAFem(word.p, f), plural }; } if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(f)) { - return inflectRegularAWithHimPEnding(word.p, f); + return { inflections: inflectRegularAWithHimPEnding(word.p, f), plural }; } if (pashtoConsonants.includes(pEnd) && !animate) { - return inflectRegularInanMissingAFem(word.p, f); + return { inflections: inflectRegularInanMissingAFem(word.p, f), plural }; } if (pEnd === "ي" && (!animate)) { - return inflectRegularInanEeFem(word.p, f); + return { inflections: inflectRegularInanEeFem(word.p, f), plural }; } if (pEnd === "ۍ") { - return inflectRegularUyFem(word.p, f); + return { inflections: inflectRegularUyFem(word.p, f), plural }; } - if (endingInAlefRegex.test(word.p)) { - return inflectRegularAaFem(word.p, f); - } - return false; + // if (endingInAlefRegex.test(word.p)) { + // return { inflections: inflectRegularAaFem(word.p, f) }; + // } + return plural ? { plural } : false; } // LEVEL 3 FUNCTIONS @@ -341,13 +354,77 @@ function inflectRegularUyFem(p: string, f: string): T.Inflections { }; } -function inflectRegularAaFem(p: string, f: string): T.Inflections { - const baseF = ["'", "’"].includes(f.slice(-1)) ? f.slice(0, -1) : f; - return { - fem: [ - [{p, f}], - [{p: `${p}وې`, f: `${baseF}we`}], - [{p: `${p}وو`, f: `${baseF}wo`}], - ], - }; +function makePashtoPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined { + if (!(word.ppp && word.ppf)) return undefined; + const base = makePsString(word.ppp, word.ppf); + // TODO: Add male Pashto plural + if (word.c?.includes("n. f.")) { + return { + fem: [ + [base], + // todo: function to add و ending automatically + [concatPsString( + makePsString(base.p.slice(0, -1), base.f.slice(0, -1)), + { p: "و", f: "o" }, + )], + ], + } + } + // TODO: handle masculine and unisex + return undefined; +} + +function makePlural(w: T.DictionaryEntryNoFVars): T.PluralInflections | undefined { + // TODO: Include the Pashto plural thing here + const pashtoPlural = makePashtoPlural(w); + if (pashtoPlural) return pashtoPlural; + function addMascPluralSuffix(animate?: boolean): T.PluralInflectionSet { + const base = removeAccents(w); + return [ + [concatPsString(base, animate ? { p: "ان", f: "áan" } : { p: "ونه", f: "óona" })], + [concatPsString(base, animate ? { p: "انو", f: "áano" } : { p: "ونو", f: "óono" })], + ]; + } + function addAnimUnisexPluralSuffix(): T.UnisexSet { + const base = removeAccents(w); + return { + masc: addMascPluralSuffix(true), + fem: [ + [concatPsString(base, { p: "انې", f: "áane" })], + [concatPsString(base, { p: "انو", f: "áano" })], + ], + }; + } + function addFemLongVowelSuffix(): T.PluralInflectionSet { + const base = makePsString(w.p, w.f); + const baseWOutAccents = removeAccents(base); + return [ + [concatPsString(base, { p: "وې", f: "we" }), concatPsString(baseWOutAccents, { p: "ګانې", f: "gáane" })], + [concatPsString(base, { p: "وو", f: "wo" }), concatPsString(baseWOutAccents, { p: "ګانو", f: "gáano" })], + ]; + } + + const anim = w.c?.includes("anim."); + const type = (w.c?.includes("unisex")) + ? "unisex noun" + : (w.c?.includes("n. m.")) + ? "masc noun" + : (w.c?.includes("n. f.")) + ? "fem noun" + : "other"; + if (type === "unisex noun" && endsInConsonant(w) && (!w.infap) && anim) { + return addAnimUnisexPluralSuffix(); + } + if (type === "masc noun" && endsInConsonant(w) && (!w.infap) && (w.p.slice(-3) !== "توب")) { + return { + masc: addMascPluralSuffix(anim), + }; + } + // TODO: What about endings in long ee / animate at inanimate + if (type === "fem noun" && endsInAaOrOo(w) && (!w.infap)) { + return { + fem: addFemLongVowelSuffix(), + }; + } + return undefined; } diff --git a/src/lib/verb-info.ts b/src/lib/verb-info.ts index 51e2e83..5a309a1 100644 --- a/src/lib/verb-info.ts +++ b/src/lib/verb-info.ts @@ -121,7 +121,7 @@ export function getVerbInfo( return getGenerativeStativeCompoundVerbInfo(entry, complement as T.DictionaryEntry); } } - const comp = complement ? ensureUnisexInflections(complement) : undefined; + const comp = complement ? ensureUnisexInf(complement) : undefined; const root = getVerbRoots(entry, transitivity, comp); const stem = getVerbStems(entry, root, transitivity, comp); const infinitive = "mascSing" in root.imperfective ? root.imperfective.mascSing.long : root.imperfective.long; @@ -877,7 +877,7 @@ function addOoPrefix( }; } -function ensureUnisexInflections(complement: T.DictionaryEntry): T.UnisexInflections { +function ensureUnisexInf(complement: T.DictionaryEntry): T.UnisexInflections { const inflected = inflectWord(complement); const isUnisex = inflected && (("masc" in inflected) && ("fem" in inflected)); if (isUnisex) { diff --git a/src/types.ts b/src/types.ts index 7f37892..8702713 100644 --- a/src/types.ts +++ b/src/types.ts @@ -114,6 +114,10 @@ export type DictionaryEntry = { ep?: string; } +export type DictionaryEntryNoFVars = DictionaryEntry & { __brand: "name for a dictionary entry with all the phonetics variations removed" }; +export type PsStringNoFVars = PsString & { __brand: "name for a ps string with all the phonetics variations removed" }; +export type FStringNoFVars = string & { __brand: "name for a phonetics string with all the phonetics variations removed" }; + export type DictionaryEntryTextField = "p" | "f" | "e" | "c" | "infap" | "infaf" | "infbp" | "infbf" | "app" | "apf" | "ppp" | "ppf" | "psp" | "psf" | "ssp" | "ssf" | "prp" | "prf" | "pprtp" | "pprtf" | "tppp" | "tppf" | "ec" | "ep"; export type DictionaryEntryBooleanField = "noInf" | "shortIntrans" | "noOo" | "sepOo" | "diacExcept"; export type DictionaryEntryNumberField = "ts" | "i" | "l" | "separationAtP" | "separationAtF"; @@ -334,12 +338,25 @@ export type PerfectContent = { // Plain, 1st, and 2nd Inflection export type InflectionSet = ArrayFixed, 3>; +// Plural and Second Inflection +export type PluralInflectionSet = ArrayFixed, 2> + export type Gender = "masc" | "fem"; -export type UnisexInflections = Record; +export type UnisexSet = Record; +export type GenderedSet = UnisexSet | Omit, "fem"> | Omit, "masc">; +export type UnisexInflections = UnisexSet; -export type Inflections = UnisexInflections - | Omit | Omit; +export type Inflections = GenderedSet; + +export type PluralInflections = GenderedSet; + +export type InflectorOutput = { + plural: PluralInflections, + inflections?: Inflections, +} | { + inflections: Inflections, +} | false; export type PersonLine = [ /** singular form of person */