inflector with plurals beta working!

2021-09-14 18:25:04 +04:00 · 2021-09-14 18:25:04 +04:00 · 916bc24487
parent 9baa2d5e58
commit 916bc24487
8 changed files with 505 additions and 108 deletions
--- a/src/lib/accent-helpers.ts
+++ b/src/lib/accent-helpers.ts
@ -118,6 +118,7 @@ export function removeAccents(s: T.PsString | string): T.PsString | string {
 * 
 * @param s a string of Pashto phonetics
 */
-export function hasAccents(s: string): boolean {
+export function hasAccents(s: string | T.PsString): boolean {
+    if (typeof s !== "string") return hasAccents(s.f);
    return accentReplacer.some((x) => s.includes(x.accented));
 }
--- a/src/lib/p-text-helpers.test.ts
+++ b/src/lib/p-text-helpers.test.ts
@ -8,7 +8,6 @@

 import {
    concatPsString,
-    firstPhonetics,
    makePsString,
    removeEndingL,
    yulEndingInfinitive,
@ -22,6 +21,11 @@ import {
    removeRetroflexR,
    splitDoubleWord,
    endsInConsonant,
+    addOEnding,
+    removeFVarients,
+    endsInShwa,
+    removeAynEnding,
+    splitPsByVarients,
 } from "./p-text-helpers";
 import * as T from "../types";
 import {
@ -617,9 +621,11 @@ test(`complementInflects`, () => {
    })).toBe(false);
 });

-test(`firstPhonetics should work`, () => {
-    expect(firstPhonetics("ist'imaal, istimaal")).toBe("ist'imaal");
-    expect(firstPhonetics("kor")).toBe("kor");
+test(`removeFVarients`, () => {
+    expect(removeFVarients("ist'imaal, istimaal")).toBe("ist'imaal");
+    expect(removeFVarients({ p: "معالوم", f: "ma'aalóom, maalóom" }))
+        .toEqual({ p: "معالوم", f: "ma'aalóom" });
+    expect(removeFVarients("kor")).toBe("kor");
 });

 test(`makePsString should work`, () => {
@ -1034,4 +1040,86 @@ test("endsInAConsonant", () => {
    ];
    does.forEach((x) => expect(endsInConsonant(x)).toBe(true));
    doesnt.forEach((x) => expect(endsInConsonant(x)).toBe(false));
+})
+
+test("addOEnding", () => {
+    const tests: { in: T.PsString, out: T.PsString[] }[] = [
+        {
+            in: { p: "کتابونه", f: "kitaabóona" },
+            out: [{ p: "کتابونو", f: "kitaabóono" }],
+        },
+        {
+            in: { p: "کارغان", f: "kaargháan" },
+            out: [{ p: "کارغانو", f: "kaargháano" }],
+        },
+        {
+            in: { p: "کارغانې", f: "kaargháane" },
+            out: [{ p: "کارغانو", f: "kaargháano" }],
+        },
+        {
+            in: { p: "ښځې", f: "xúdze" },
+            out: [{ p: "ښځو", f: "xúdzo" }],
+        },
+        // TODO: Make this last thing accented??
+        {
+            in: { p: "کور", f: "kor" },
+            out: [{ p: "کورو", f: "koro" }],
+        },
+        {
+            in: { p: "سړی", f: "saRéy" },
+            out: [{ p: "سړیو", f: "saRíyo" }, { p: "سړو", f: "saRó"}], 
+        },
+        {
+            in: { p: "افغانۍ", f: "afghaanúy" },
+            out: [{ p: "افغانیو", f: "afghaanúyo" }],
+        },
+        {
+            in: { p: "اوبه", f: "oobú" },
+            out: [{ p: "اوبو", f: "oobó" }],
+        },
+        {
+            in: { p: "شودې", f: "shoodé" },
+            out: [{ p: "شودو", f: "shoodó" }],
+        },
+        {
+            in: { p: "منابع", f: "manaabí" },
+            out: [{ p: "منابو", f: "manaabó" }],
+        },
+        {
+            in: { p: "انبیا", f: "ambiyáa" },
+            out: [{ p: "انبیاوو", f: "ambiyáawo" }],
+        },
+        {
+            in: { p: "مراجع", f: "maraají'" },
+            out: [{ p: "مراجو", f: "maraajó" }],
+        },
+        {
+            in: { p: "اتباع", f: "atbaa" },
+            out: [{ p: "اتباعوو", f: "atbaawo" }],
+        },
+        {
+            in: { p: "اتباع", "f": "atbáa'" },
+            out: [{ p: "اتباعوو", f: "atbáawo" }],
+        },
+    ];
+    tests.forEach((t) => {
+        expect(addOEnding(t.in)).toEqual(t.out);
+    });
+});
+
+test("endsInShwa", () => {
+    expect(endsInShwa({ p: "ښایسته", f: "xaaystú" })).toBe(true);
+    expect(endsInShwa({ p: "ښایسته", f: "xaaystu" })).toBe(true);
+    expect(endsInShwa({ p: "ښایسته", f: "xaaysta" })).toBe(false);
+    expect(endsInShwa({ p: "کور", f: "kor" })).toBe(false);
+});
+
+test("splitPsByVarients", () => {
+    expect(splitPsByVarients({ p: "حوادث, حادثات", f: "hawáadis, haadisáat" }))
+        .toEqual([{ p: "حوادث", f: "hawáadis" }, { p: "حادثات", f: "haadisáat" }]);
+    // should work with Pashto comma too
+    expect(splitPsByVarients({ p: "حوادث، حادثات", f: "hawáadis, haadisáat" }))
+        .toEqual([{ p: "حوادث", f: "hawáadis" }, { p: "حادثات", f: "haadisáat" }]);
+    expect(splitPsByVarients({ p: "کور", f: "kor" }))
+        .toEqual([{ p: "کور", f: "kor" }]);
 })
--- a/src/lib/p-text-helpers.ts
+++ b/src/lib/p-text-helpers.ts
@ -15,8 +15,8 @@ import {
    getPersonInflectionsKey,
 } from "./misc-helpers";
 import * as T from "../types";
-import { removeAccents } from "./accent-helpers";
-import { pashtoConsonants, phoneticsConsonants } from "./pashto-consonants";
+import { hasAccents, removeAccents } from "./accent-helpers";
+import { phoneticsConsonants } from "./pashto-consonants";
 import { simplifyPhonetics } from "./simplify-phonetics";

 // export function concatPsStringWithVars(...items: Array<T.PsString | " " | "">): T.PsString[] {
@ -190,14 +190,12 @@ export function removeFVarients(x: string | T.PsString | T.DictionaryEntry): T.F
        return {
            ...x,
            f: removeFVarients(x.f),
-            __brand: "name for a dictionary entry with all the phonetics variations removed",
-        } as T.DictionaryEntryNoFVars;
+        } as unknown as T.DictionaryEntryNoFVars;
    }
    return {
        ...x,
        f: removeFVarients(x.f),
-        __brand: "name for a ps string with all the phonetics variations removed",
-    } as T.PsStringNoFVars;
+    } as unknown as T.PsStringNoFVars;
 }

 /**
@ -796,7 +794,7 @@ export function ensureUnisexInflections(infs: T.InflectorOutput, w: T.Dictionary

 export function endsInAaOrOo(w: T.PsString): boolean {
    const fEnd = simplifyPhonetics(w.f).slice(-2);
-    const pEnd = w.p.slice(-1);
+    const pEnd = w.p.slice(-1) === "ع" ? w.p.slice(-2, -1) : w.p.slice(-1);
    return (
        pEnd === "و" && fEnd.endsWith("o")
        ||
@ -804,7 +802,6 @@ export function endsInAaOrOo(w: T.PsString): boolean {
    );
 }

-
 export function endsInConsonant(w: T.PsString): boolean {
    // TODO: Add reporting back that the plural ending will need a space?

@ -823,4 +820,106 @@ export function endsInConsonant(w: T.PsString): boolean {
    // const pCons = pashtoConsonants.includes(w.p.slice(-1));
    const fCons = phoneticsConsonants.includes(simplifyPhonetics(w.f).slice(-1));
    return fCons;
+}
+
+/**
+ * adds a و - o ending (used in plurals 2nd inflection) to a given PsString
+ * It will wipe out a ه - a / u or ې - e and will preserve the accent
+ * 
+ * @param w 
+ * @returns 
+ */
+export function addOEnding(ps: T.PsString): T.ArrayOneOrMore<T.PsString> {
+    const w = removeEndTick(ps);
+    const lastLetter = makePsString(
+        w.p.slice(-1),
+        w.f.slice(-1),
+    );
+    const hasEyEnding = (lastLetter.p === "ی") && ["ey", "éy"].includes(w.f.slice(-2));
+    if (hasEyEnding) {
+        const base = makePsString(w.p.slice(0, -1), w.f.slice(0, -2));
+        const endHadAccent = w.f.slice(-2) === "éy";
+        return [
+            concatPsString(base, { p: "یو", f: endHadAccent ? "íyo" : "iyo" }),
+            concatPsString(base, { p: "و", f: endHadAccent ? "ó" : "o" }),
+        ];
+    }
+    if (lastLetter.p === "ۍ") {
+        const base = makePsString(w.p.slice(0, -1), w.f.slice(0, -2));
+        const endHadAccent = w.f.slice(-2) === "úy";
+        return [
+            concatPsString(base, { p: "یو", f: endHadAccent ? "úyo" : "uyo" }),
+        ];
+    }
+    if (lastLetter.p === "ا" || (w.p.slice(-2) === "اع")) {
+        return [concatPsString(w, { p: "وو", f: "wo" })];
+    }
+    const base = (
+        (["ه", "ع"].includes(lastLetter.p) && lastLetter.f.match(/[a|u|i|U|á|ú|í|Ú]/)) ||
+        (lastLetter.p === "ې" && ["e", "é"].includes(lastLetter.f))
+    ) ? makePsString(
+        w.p.slice(0, -1),
+        w.f.slice(0, -1),
+    ) : w;
+    return [concatPsString(
+        base,
+        makePsString(
+            "و",
+            hasAccents(lastLetter.f) ? "ó" : "o",
+        ),
+    )];
+}
+
+/**
+ * Determines whether a string ends in a shwa or not
+ * 
+ * @param w 
+ */
+export function endsInShwa(w: T.PsString): boolean {
+    const p = w.p.slice(-1);
+    const f = w.f.slice(-1);
+    return p === "ه" && ["u", "ú"].includes(f);
+}
+
+/**
+ * applies f function to both the p and f in a PsString
+ * 
+ */
+export function mapPsString<T>(ps: T.PsString, f: (s: string) => T): { p: T, f: T } {
+    return {
+        p: f(ps.p),
+        f: f(ps.f),
+    };
+}
+
+/**
+ * splits up a given PsString by comma-seperated varients
+ * 
+ * @param w 
+ * @returns 
+ */
+export function splitPsByVarients(w: T.PsString): T.ArrayOneOrMore<T.PsString> {
+    function cut(s: string) {
+        return s.split(/[,|،]/).map((s) => s.trim());
+    }
+    const ps = mapPsString(w, cut);
+    return ps.p.map((p, i) => {
+        if (!ps.f[i]) throw new Error("uneven comma seperated ps varients: " + JSON.stringify(w))
+        return makePsString(
+            p,
+            ps.f[i],
+        );
+    }) as T.ArrayOneOrMore<T.PsString>;
+}
+
+
+export function removeEndTick(w: T.PsString): T.PsString;
+export function removeEndTick(w: string): string;
+export function removeEndTick(w: T.PsString | string): T.PsString | string {
+    if (typeof w !== "string") {
+        return makePsString(w.p, removeEndTick(w.f));
+    }
+    return (w.slice(-1) === "'") 
+        ? w.slice(0, -1)
+        : w;
 }
--- a/src/lib/pashto-inflector.test.ts
+++ b/src/lib/pashto-inflector.test.ts
@ -442,19 +442,7 @@ const nouns: Array<{
    },
    // Masculine irregular
    {
-        in: {
-            ts: 1527813809,
-            p: "لمونځ",
-            f: "lamoondz",
-            g: "",
-            e: "Muslim ritual prayers (namaz, salah, salat)",
-            c: "n. m. irreg.",
-            i: 9835,
-            infap: "لمانځه",
-            infaf: "lamaandzu",
-            infbp: "لمنځ",
-            infbf: "lamandz",
-        },
+        in: {"ts":1527813809,"i":11318,"p":"لمونځ","f":"lamoondz","g":"lamoondz","e":"Muslim ritual prayers (namaz, salah, salat)","c":"n. m. irreg.","infap":"لمانځه","infaf":"lamaandzu","infbp":"لمنځ","infbf":"lamandz","ppp":"لمونځونه","ppf":"lamoondzóona"},
        out: {
            inflections: {
                masc: [
@ -463,17 +451,17 @@ const nouns: Array<{
                    [{p: "لمنځو", f: "lamandzo"}],
                ],
            },
-            // plural: {
-            //     masc: [
-            //         [{ p: "لمونځونه", f: "lamoondzóona" }],
-            //         [{ p: "لمونځونو", f: "lamoondzóono" }],
-            //     ],
-            // },
+            plural: {
+                masc: [
+                    [{ p: "لمونځونه", f: "lamoondzóona" }],
+                    [{ p: "لمونځونو", f: "lamoondzóono" }],
+                ],
+            },
        },
    },
    // Masculine short squish
    {
-        in: {"i":9049,"ts":1527813593,"p":"غر","f":"ghar, ghur","g":"ghar,ghur","e":"mountain","c":"n. m.","infap":"غره","infaf":"ghru","infbp":"غرو","infbf":"ghro"},
+        in: {"i":9049,"ts":1527813593,"p":"غر","f":"ghar, ghur","g":"ghar,ghur","e":"mountain","c":"n. m.","infap":"غره","infaf":"ghru","infbp":"غر","infbf":"ghr"},
        out: {
            inflections: {
                masc: [
@ -500,6 +488,11 @@ const nouns: Array<{
                    [{ p: "خره", f: "khru" }],
                    [{ p: "خرو", f: "khro" }],
                ],
+                fem: [
+                    [{ p: "خره", f: "khra" }],
+                    [{ p: "خرې", f: "khre" }],
+                    [{ p: "خرو", f: "khro" }],
+                ],
            },
            plural: {
                masc: [
@ -587,12 +580,12 @@ const nouns: Array<{
        },
    },
    {
-        in: {"ts":1527815394,"i":13991,"p":"واده","f":"waadú","g":"waadu","e":"wedding, marriage","c":"n. m."},
+        in: {"ts":1527815394,"i":13991,"p":"واده","f":"waadú","g":"waadu","e":"wedding, marriage","c":"n. m.","ppp":"ودونه","ppf":"wadóona"},
        out: {
            plural: {
                masc: [
-                    [{ p: "وادونه", f: "waadóona" }],
-                    [{ p: "وادونو", f: "waadóono" }],
+                    [{ p: "ودونه", f: "wadóona" }],
+                    [{ p: "ودونو", f: "wadóono" }],
                ],
            },
        },
@ -655,8 +648,8 @@ const nouns: Array<{
            inflections: {
                fem: [
                    [{p: "اره", f: "ará"}],
-                    [{p: "ارې", f: "are"}],
-                    [{p: "ارو", f: "aro"}],
+                    [{p: "ارې", f: "aré"}],
+                    [{p: "ارو", f: "aró"}],
                ],
            },
        },
@ -672,7 +665,7 @@ const nouns: Array<{
            c: "n. f.",
            i: 10661,
            app: "مراجع",
-            apf: "maraají’",
+            apf: "maraají'",
        },
        out: {
            inflections: {
@ -682,6 +675,12 @@ const nouns: Array<{
                    [{p: "مرجعو", f: "marjo"}],
                ],
            },
+            arabicPlural: {
+                fem: [
+                    [{ p: "مراجع", f: "maraají'" }],
+                    [{ p: "مراجو", f: "maraajó" }],
+                ],
+            },
        },
    },
    {
@ -700,8 +699,128 @@ const nouns: Array<{
            inflections: {
                fem: [
                    [{p: "منبع", f: "manbá"}],
-                    [{p: "منبعې", f: "manbe"}],
-                    [{p: "منبعو", f: "manbo"}],
+                    [{p: "منبعې", f: "manbé"}],
+                    [{p: "منبعو", f: "manbó"}],
+                ],
+            },
+            arabicPlural: {
+                fem: [
+                    [{ p: "منابع", f: "manaabí" }],
+                    [{ p: "منابو", f: "manaabó" }],
+                ],
+            },
+        },
+    },
+    {
+        in: {"ts":1527823093,"i":13207,"p":"نبي","f":"nabee","g":"nabee","e":"prophet","c":"n. m. anim.","app":"انبیا","apf":"ambiyáa"},
+        out: {
+            arabicPlural: {
+                masc: [
+                    [{ p: "انبیا", f: "ambiyáa" }],
+                    [{ p: "انبیاوو", f: "ambiyáawo" }],
+                ],
+            },
+        }
+    },
+    {
+        in: {"ts":1527819536,"i":3063,"p":"تبع","f":"taba'","g":"taba","e":"follower, adherent, supporter, subject, national","c":"n. m. unisex anim.","app":"اتباع","apf":"atbaa"},
+        out: {
+            arabicPlural: {
+                masc: [
+                    [{ p: "اتباع", f: "atbaa" }],
+                    [{ p: "اتباعوو", f: "atbaawo" }],
+                ],
+            },
+        },
+    },
+    {
+        in: {"ts":1527816113,"i":3072,"p":"تبلیغ","f":"tableegh","g":"tableegh","e":"propaganda; preaching, evangelism","c":"n. m.","app":"تبلیغات","apf":"tableegháat"},
+        out: {
+            plural: {
+                masc: [
+                    [{ p: "تبلیغونه", f: "tableeghóona" }],
+                    [{ p: "تبلیغونو", f: "tableeghóono" }],
+                ],
+            },
+            arabicPlural: {
+                masc: [
+                    [{ p: "تبلیغات", f: "tableegháat" }],
+                    [{ p: "تبلیغاتو", f: "tableegháato" }],
+                ],
+            },
+        },
+    },
+    {
+        in: {"ts":1527815921,"i":3844,"p":"توقع","f":"tawaqqU","g":"tawakkU","e":"expectation, hope, anticipation","c":"n. f.","app":"توقعات","apf":"tawaqqUaat"},
+        out: {
+            arabicPlural: {
+                masc: [
+                    [{ p: "توقعات", f: "tawaqqUaat" }],
+                    [{ p: "توقعاتو", f: "tawaqqUaato" }],
+                ],
+            },
+        },
+    },
+    {
+        in: {"ts":1527815820,"i":5177,"p":"حادثه","f":"haadisá","g":"haadisa","e":"accident, event","c":"n. f.","app":"حوادث, حادثات","apf":"hawaadis, haadisaat"},
+        out: {
+            inflections: {
+                fem: [
+                    [{ p: "حادثه", f: "haadisá" }],
+                    [{ p: "حادثې", f: "haadisé" }],
+                    [{ p: "حادثو", f: "haadisó" }],
+                ],
+            },
+            arabicPlural: {
+                masc: [
+                    [{ p: "حوادث", f: "hawaadis"}, { p: "حادثات", f: "haadisaat" }],
+                    [{ p: "حوادثو", f: "hawaadiso"}, { p: "حادثاتو", f: "haadisaato" }],
+                ],
+            },
+        },
+    },
+    {
+        in: {"ts":1527815329,"i":3097,"p":"تجربه","f":"tajrabá, tajribá","g":"tajraba,tajriba","e":"experience","c":"n. f.","app":"تجارب","apf":"tajaarib"},
+        out: {
+            inflections: {
+                fem: [
+                    [{ p: "تجربه", f: "tajrabá" }],
+                    [{ p: "تجربې", f: "tajrabé" }],
+                    [{ p: "تجربو", f: "tajrabó" }],
+                ],
+            },
+            arabicPlural: {
+                masc: [
+                    [{ p: "تجارب", f: "tajaarib"}],
+                    [{ p: "تجاربو", f: "tajaaribo"}],
+                ],
+            },
+        },
+    },
+    {
+        in: {"ts":1527814069,"i":5194,"p":"حال","f":"haal","g":"haal","e":"state, condition, circumstance","c":"n. m.","app":"احوال","apf":"ahwáal"},
+        out: {
+            plural: {
+                masc: [
+                    [{ p: "حالونه", f: "haalóona" }],
+                    [{ p: "حالونو", f: "haalóono" }],
+                ],
+            },
+            arabicPlural: {
+                masc: [
+                    [{ p: "احوال", f: "ahwáal" }],
+                    [{ p: "احوالو", f: "ahwáalo" }],
+                ],
+            },
+        },
+    },
+    {
+        in: {"ts":1527819536,"i":3063,"p":"تبع","f":"taba'","g":"taba","e":"follower, adherent, supporter, subject, national","c":"n. m. unisex anim.","app":"اتباع","apf":"atbáa'"},
+        out: {
+            arabicPlural: {
+                masc: [
+                    [{ p: "اتباع", f: "atbáa'" }],
+                    [{ p: "اتباعوو", f: "atbáawo" }],
                ],
            },
        },
@ -856,15 +975,14 @@ const nouns: Array<{
            c: "n. f.",
            i: 12205,
        },
-        out: false,
-        // out: {
-        //     plural: {
-        //         fem: [
-        //             [{p: "وداع وې", f: "widáawe"}, {p: "وداع ګانې", f: "widaagáane"}],
-        //             [{p: "وداع وو", f: "widáawo"}, {p: "وداع ګانو", f: "widaagáano"}],
-        //         ],
-        //     },
-        // },
+        out: {
+            plural: {
+                fem: [
+                    [{p: "وداع وې", f: "widáawe"}, {p: "وداع ګانې", f: "widaagáane"}],
+                    [{p: "وداع وو", f: "widáawo"}, {p: "وداع ګانو", f: "widaagáano"}],
+                ],
+            },
+        },
    },
    // TODO: Plaar plaroona paaraan - wrooNa
    // Word with no inflections
@ -881,7 +999,6 @@ const nouns: Array<{
        },
        out: false,
    },
-    // TODO: WORDS THAT ARE ALREADY PLURAL!
 ];

 const others: T.DictionaryEntry[] = [
@ -912,6 +1029,7 @@ adjectives.forEach((word) => {
 });

 nouns.forEach((word) => {
+    // if (word.in.p !== "نبي") return;
    test(`${word.in.p} should inflect properly`, () => {
        expect(inflectWord(word.in)).toEqual(word.out);
    });
@ -936,4 +1054,4 @@ test(`inflectRegularYeyUnisex should work`, () => {
            [{p: "لیدونکو", f: "leedóonko"}],
        ],
    });
-})
+});
--- a/src/lib/pashto-inflector.ts
+++ b/src/lib/pashto-inflector.ts
@ -16,8 +16,13 @@ import {
  concatPsString,
  endsInConsonant,
  endsInAaOrOo,
+  addOEnding,
+  endsInShwa,
+  splitPsByVarients,
+  removeEndTick,
 } from "./p-text-helpers";
 import {
+  hasAccents,
  removeAccents,
 } from "./accent-helpers";
 import * as T from "../types";
@ -43,6 +48,9 @@ export function inflectWord(word: T.DictionaryEntry): T.InflectorOutput {
      ) as T.UnisexInflections,
    };
  }
+  if (w.c && w.c.includes("pl.")) {
+    return handlePluralNoun(w);
+  }
  if (w.c && (w.c.includes("adj.") || w.c.includes("unisex"))) {
    return handleUnisexWord(w);
  }
@ -61,24 +69,24 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
  // Get last letter of Pashto and last two letters of phonetics
  // TODO: !!! Handle weird endings / symbols ' etc.
  const pEnd = word.p.slice(-1);
-  const plural = makePlural(word);
+  const plurals = makePlural(word);
  if (word.infap && word.infaf && word.infbp && word.infbf) {
    return {
      inflections: inflectIrregularUnisex(word.p, word.f, [
        {p: word.infap, f: word.infaf},
        {p: word.infbp, f: word.infbf},
      ]),
-      plural,
+      ...plurals,
    };
  }
  if (pEnd === "ی" && word.f.slice(-2) === "ey") {
-    return { inflections: inflectRegularYeyUnisex(word.p, word.f), plural };
+    return { inflections: inflectRegularYeyUnisex(word.p, word.f), ...plurals };
  }
  if (pEnd === "ه" && word.g.slice(-1) === "u") {
-    return { inflections: inflectRegularShwaEndingUnisex(word.p, word.f), plural };
+    return { inflections: inflectRegularShwaEndingUnisex(word.p, word.f), ...plurals };
  }
  if (pEnd === "ی" && word.f.slice(-2) === "éy") {
-    return { inflections: inflectEmphasizedYeyUnisex(word.p, word.f), plural };
+    return { inflections: inflectEmphasizedYeyUnisex(word.p, word.f), ...plurals };
  }
  if (
    pashtoConsonants.includes(pEnd) ||
@ -86,15 +94,23 @@ function handleUnisexWord(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
    word.p.slice(-2) === "ای" ||
    (word.p.slice(-1) === "ه" && word.f.slice(-1) === "h")
  ) {
-    return { inflections: inflectConsonantEndingUnisex(word.p, word.f), plural };
+    return { inflections: inflectConsonantEndingUnisex(word.p, word.f), ...plurals };
  }
+  if (plurals) return plurals;
  return false;
 }

+function handlePluralNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
+  if (!w.c || !w.c.includes("n.")) return false;
+  const plurals = makePlural(w);
+  if (!plurals) return false;
+  return { ...plurals };
+}
+
 function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
  // Get last letter of Pashto and last two letters of phonetics
  // TODO: !!! Handle weird endings / symbols ' etc.
-  const plural = makePlural(w);
+  const plurals = makePlural(w);
  const pEnd = w.p.slice(-1);
  const fEnd = w.f.slice(-2);
  if (w.infap && w.infaf && w.infbp && w.infbf) {
@ -103,20 +119,20 @@ function handleMascNoun(w: T.DictionaryEntryNoFVars): T.InflectorOutput {
        {p: w.infap, f: w.infaf},
        {p: w.infbp, f: w.infbf},
      ]),
-      plural,
+      ...plurals,
    };
  }
  const isTobEnding = (w.p.slice(-3) === "توب" && ["tób", "tob"].includes(w.f.slice(-3)) && w.p.length > 3);
  if (isTobEnding) {
-    return { inflections: inflectTobMasc(w.p, w.f), plural };
+    return { inflections: inflectTobMasc(w.p, w.f), ...plurals };
  }
  if (pEnd === "ی" && fEnd === "ey") {
-    return { inflections: inflectRegularYeyMasc(w.p, w.f), plural };
+    return { inflections: inflectRegularYeyMasc(w.p, w.f), ...plurals };
  }
  if (pEnd === "ی" && fEnd === "éy") {
-    return { inflections: inflectRegularEmphasizedYeyMasc(w.p, w.f), plural };
+    return { inflections: inflectRegularEmphasizedYeyMasc(w.p, w.f), ...plurals };
  }
-  return plural ? { plural } : false
+  return plurals ? { ...plurals } : false
 }

 function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
@ -126,27 +142,27 @@ function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
  const animate = c.includes("anim.");
  const pEnd = word.p.slice(-1);

-  const plural = makePlural(word);
+  const plurals = makePlural(word);

  if (endingInHeyOrAynRegex.test(word.p) && endingInSingleARegex.test(word.f)) {
-    return { inflections: inflectRegularAFem(word.p, word.f), plural };
+    return { inflections: inflectRegularAFem(word.p, word.f), ...plurals };
  }
  if (word.p.slice(-1) === "ح" && endingInSingleARegex.test(word.f)) {
-    return { inflections: inflectRegularAWithHimPEnding(word.p, word.f), plural };
+    return { inflections: inflectRegularAWithHimPEnding(word.p, word.f), ...plurals };
  }
  if (pashtoConsonants.includes(pEnd) && !animate) {
-    return { inflections: inflectRegularInanMissingAFem(word.p, word.f), plural };
+    return { inflections: inflectRegularInanMissingAFem(word.p, word.f), ...plurals };
  }
  if (pEnd === "ي" && (!animate)) {
-    return { inflections: inflectRegularInanEeFem(word.p, word.f), plural };
+    return { inflections: inflectRegularInanEeFem(word.p, word.f), ...plurals };
  }
  if (pEnd === "ۍ") {
-    return { inflections: inflectRegularUyFem(word.p, word.f), plural };
+    return { inflections: inflectRegularUyFem(word.p, word.f), ...plurals };
  }
  // if (endingInAlefRegex.test(word.p)) {
  //   return { inflections: inflectRegularAaFem(word.p, f) };
  // }
-  return plural ? { plural } : false;
+  return plurals ? { ...plurals } : false;
 }

 // LEVEL 3 FUNCTIONS
@ -294,13 +310,15 @@ function inflectIrregularMasc(p: string, f: string, inflections: Array<{p: strin
 }

 function inflectRegularAFem(p: string, f: string): T.Inflections {
-  const baseF = ["'", "’"].includes(f.slice(-1)) ? f.slice(0, -2) : f.slice(0, -1);
+  const withoutTrailingComma = ["'", "’"].includes(f.slice(-1)) ? f.slice(0, -1) : f;
+  const accentLast = hasAccents(withoutTrailingComma.slice(-1));
+  const baseF = withoutTrailingComma.slice(0, -1);
  const baseP = p.slice(-1) === "ع" ? p : p.slice(0, -1);
  return {
    fem: [
      [{p, f}],
-      [{p: `${baseP}ې`, f: `${baseF}e`}],
-      [{p: `${baseP}و`, f: `${baseF}o`}],
+      [{p: `${baseP}ې`, f: `${baseF}${accentLast ? "é" : "e"}`}],
+      [{p: `${baseP}و`, f: `${baseF}${accentLast ? "ó" : "o"}`}],
    ],
  };
 }
@ -356,53 +374,91 @@ function inflectRegularUyFem(p: string, f: string): T.Inflections {
 function makePashtoPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
  if (!(word.ppp && word.ppf)) return undefined;
  const base = makePsString(word.ppp, word.ppf);
-  // TODO: Add male Pashto plural
+  function getBaseAndO(): T.PluralInflectionSet {
+    return [[base], addOEnding(base)];
+  }
+  if (word.c?.includes("n. m.")) {
+    return { masc: getBaseAndO() };
+  }
  if (word.c?.includes("n. f.")) {
-    return {
-      fem: [
-        [base],
-        // todo: function to add و ending automatically
-        [concatPsString(
-          makePsString(base.p.slice(0, -1), base.f.slice(0, -1)),
-          { p: "و", f: "o" },
-        )],
-      ],
-    }
+    return { fem: getBaseAndO() };
  }
  // TODO: handle masculine and unisex
  return undefined;
 }

-function makePlural(w: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
-  // TODO: Include the Pashto plural thing here
-  const pashtoPlural = makePashtoPlural(w);
-  if (pashtoPlural) return pashtoPlural;
-  function addMascPluralSuffix(animate?: boolean): T.PluralInflectionSet {
-    const base = removeAccents(w);
+function makeArabicPlural(word: T.DictionaryEntryNoFVars): T.PluralInflections | undefined {
+  if (!(word.apf && word.app)) return undefined;
+  const w = makePsString(word.app, word.apf);
+  const plural = splitPsByVarients(w);
+  const end = removeAccents(removeEndTick(word.apf).slice(-1));
+  // again typescript being dumb and not letting me use a typed key here
+  const value = [
+    plural,
+    plural.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
+  ] as T.PluralInflectionSet;
+  // feminine words that have arabic plurals stay feminine with the plural - ie مرجع - مراجع
+  // but masculine words that appear feminine in the plural aren't femening with the Arabic plural - ie. نبي - انبیا
+  if (["i", "e", "a"].includes(end) && word.c?.includes("n. f.")) {
+    return { fem: value };
+  }
+  return { masc: value };
+}
+
+function makePlural(w: T.DictionaryEntryNoFVars): { plural: T.PluralInflections } | { arabicPlural: T.PluralInflections } | undefined {
+  function addSecondInf(plur: T.ArrayOneOrMore<T.PsString> | T.PsString): T.PluralInflectionSet {
+    if (!Array.isArray(plur)) {
+      return addSecondInf([plur]);
+    }
    return [
-      [concatPsString(base, animate ? { p: "ان", f: "áan" } : { p: "ونه", f: "óona" })],
-      [concatPsString(base, animate ? { p: "انو", f: "áano" } : { p: "ونو", f: "óono" })],
+      plur,
+      plur.flatMap(addOEnding) as T.ArrayOneOrMore<T.PsString>,
    ];
+  }
+  if (w.c && w.c.includes("pl.")) {
+    const plural = addSecondInf(makePsString(w.p, w.f));
+    // Typescript being dumb and not letting me do a typed variable for the key
+    // could try refactoring with an updated TypeScript dependency
+    if (w.c.includes("n. m.")) return { plural: { masc: plural }};
+    if (w.c.includes("n. f.")) return { plural: { fem: plural }};
+  }
+  // TODO: MAKE ARABIC PLURAL HERE IF THERE IS ARABIC PLURAL
+  const arabicPlural = makeArabicPlural(w);
+  const pashtoPlural = makePashtoPlural(w);
+  if (pashtoPlural) return { plural: pashtoPlural, arabicPlural }; 
+  function addMascPluralSuffix(animate?: boolean, shortSquish?: boolean): T.PluralInflectionSet {
+    if (shortSquish && (w.infap == undefined || w.infaf === undefined)) {
+      throw new Error(`no irregular inflection info for ${w.p} - ${w.ts}`);
+    }
+    const b = removeAccents(shortSquish
+      ? makePsString((w.infap as string).slice(0, -1), (w.infaf as string).slice(0, -1))
+      : w
+    );
+    const base = endsInShwa(b)
+      ? makePsString(b.p.slice(0, -1), b.f.slice(0, -1))
+      : b;
+    return addSecondInf(
+      concatPsString(base, (animate && !shortSquish) ? { p: "ان", f: "áan" } : { p: "ونه", f: "óona" }),
+    );
  } 
  function addAnimUnisexPluralSuffix(): T.UnisexSet<T.PluralInflectionSet> {
    const base = removeAccents(w);
    return {
      masc: addMascPluralSuffix(true),
-      fem: [
-        [concatPsString(base, { p: "انې", f: "áane" })],
-        [concatPsString(base, { p: "انو", f: "áano" })],
-      ],
+      fem: addSecondInf(concatPsString(base, { p: "انې", f: "áane" })),
    };
  }
  function addFemLongVowelSuffix(): T.PluralInflectionSet {
-    const base = makePsString(w.p, w.f);
+    const base = removeEndTick(makePsString(w.p, w.f));
    const baseWOutAccents = removeAccents(base);
-    return [
-      [concatPsString(base, { p: "وې", f: "we" }), concatPsString(baseWOutAccents, { p: "ګانې", f: "gáane" })],
-      [concatPsString(base, { p: "وو", f: "wo" }), concatPsString(baseWOutAccents, { p: "ګانو", f: "gáano" })],
-    ];
+    const space = (w.p.slice(-1) === "ع" || w.p.slice(-1) === "ه") ? { p: " ", f: "" } : "";
+    return addSecondInf([
+      concatPsString(base, space, { p: "وې", f: "we" }),
+      concatPsString(baseWOutAccents, space, { p: "ګانې", f: "gáane" })
+    ]);
  }

+  const shortSquish = !!w.infap && !w.infap.includes("ا");
  const anim = w.c?.includes("anim.");
  const type = (w.c?.includes("unisex"))
    ? "unisex noun"
@ -411,19 +467,33 @@ function makePlural(w: T.DictionaryEntryNoFVars): T.PluralInflections | undefine
    : (w.c?.includes("n. f."))
    ? "fem noun"
    : "other";
-  if (type === "unisex noun" && endsInConsonant(w) && (!w.infap) && anim) {
-    return addAnimUnisexPluralSuffix();
+  if (type === "unisex noun") {
+    if (endsInConsonant(w) && (!w.infap) && anim) {
+      return { arabicPlural, plural: addAnimUnisexPluralSuffix() };
+    }
+    if (shortSquish) {
+      return { arabicPlural, plural: { masc: addMascPluralSuffix(anim, shortSquish) }};
+    }
  }
-  if (type === "masc noun" && endsInConsonant(w) && (!w.infap) && (w.p.slice(-3) !== "توب")) {
+  if (type === "masc noun" && (shortSquish || (endsInConsonant(w) || endsInShwa(w) && (!w.infap))) && (w.p.slice(-3) !== "توب")) {
    return {
-      masc: addMascPluralSuffix(anim),
+      arabicPlural,
+      plural: {
+        masc: addMascPluralSuffix(anim, shortSquish),
+      },
    };
  }
  // TODO: What about endings in long ee / animate at inanimate
  if (type === "fem noun" && endsInAaOrOo(w) && (!w.infap)) {
    return {
-      fem: addFemLongVowelSuffix(),
+      arabicPlural,
+      plural: {
+        fem: addFemLongVowelSuffix(),
+      },
    };
  }
+  if (arabicPlural) {
+    return { arabicPlural, plural: pashtoPlural };
+  }
  return undefined;
 }
--- a/src/lib/standardize-pashto.test.ts
+++ b/src/lib/standardize-pashto.test.ts
@ -6,7 +6,7 @@
 *
 */

-import { standardizePashto } from "./standardize-pashto";
+import { standardizePashto, standardizePhonetics } from "./standardize-pashto";

 const testPairs = [
  ["گوگل", "ګوګل"],
@ -31,3 +31,14 @@ testPairs.forEach((pair) => {
    expect(result).toBe(pair[1]);
  });
 });
+
+test("standardizePashto", () => {
+  const pairs = [
+    ["ma’aaloom", "ma'aaloom"],
+    ["ma‘aaloom", "ma'aaloom"],
+    ["ma'aaloom", "ma'aaloom"],
+  ];
+  pairs.forEach((x) => {
+    expect(standardizePhonetics(x[0])).toBe(x[1])
+  });
+})
--- a/src/lib/standardize-pashto.ts
+++ b/src/lib/standardize-pashto.ts
@ -20,3 +20,8 @@ export function standardizePashto(input: string): string {
    // Replace آ two character version with combined آ character
    .replace(/آ/g, "آ");
 }
+
+export function standardizePhonetics(input: string): string {
+  // TODO: check that these are the only kinds of smart comments
+  return input.replace(/[‘|’]/g, "'");
+}
--- a/src/types.ts
+++ b/src/types.ts
@ -352,7 +352,12 @@ export type Inflections = GenderedSet<InflectionSet>;
 export type PluralInflections = GenderedSet<PluralInflectionSet>;

 export type InflectorOutput = {
+    arabicPlural: PluralInflections,
+    plural?: PluralInflections,
+    inflections?: Inflections,
+} | {
    plural: PluralInflections,
+    arabicPlural?: PluralInflections,
    inflections?: Inflections,
 } | {
    inflections: Inflections,