more coming

2021-06-03 18:22:14 +04:30 · 2021-06-03 18:22:14 +04:30 · 1a0480a9d3
parent cf01df5c6d
commit 1a0480a9d3
3 changed files with 550 additions and 203 deletions
--- a/src/lib/diacritics-helpers.ts
+++ b/src/lib/diacritics-helpers.ts
@ -236,7 +236,6 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
        endingMatches: ["ه"],
        beginningMatches: ["ا", "ع"],
        // canComeAfterHeyEnding: true,
        // canBeFirstPartOfFathahanEnding: true,
    },
    "u": {
        diacritic: zwarakey,
@ -311,13 +310,162 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
    }
    return result;
 }
 export enum PhonemeStatus {
    LeadingLongVowel,
    LeadingConsonantOrShortVowel,
    DoubleConsonantTashdeed,
    EndingWithHeyHim,
    DirectMatch,
    DirectMatchAfterSukun,
    EndingWithHeyHimFromSukun,
    ShortVowel,
    PersianSilentWWithAa,
    ArabicWasla,
    Izafe,
    EndOfDuParticle,
    ShortAEndingAfterHeem,
    AlefDaggarEnding,
    AinWithLongAAtBeginning,
    LongAinVowelMissingComma,
    ShortAinVowelMissingComma,
    ShortAinVowelMissingCommaAfterAlefStart,
    AinBeginningAfterShortVowel,
    AlefWithHamza,
    AlefWithHamzaWithGlottalStop,
    WoEndingO,
    ShortAForAlefBeforeFathatan,
    NOnFathatan,
 }
 export function stateInfo({ state, i, phonemes, phoneme }: {
    state: DiacriticsAccumulator,
    i: number,
    phonemes: Phoneme[],
    phoneme: Phoneme,
 }) {
    const isOutOfWord = (char: string) => !char || char === " ";
    const prevPLetter = last(state.pOut);
    const currentPLetter = state.pIn[0];
    const nextPLetter = state.pIn[1];
    const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
    const isEndOfWord = isOutOfWord(nextPLetter);
    const phonemeInfo = phonemeTable[phoneme];
    const nextPhoneme = phonemes[i+1];
    const previousPhoneme = i > 0 && phonemes[i-1];
    const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
    // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
    // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
    const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
    const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
    const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
    const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع"));
    const diacritic = useAinBlendDiacritics
    ? phonemeInfo.ainBlendDiacritic
    : isEndOfWord 
    ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
    const lastWordEndedW = (char: string) => ((prevPLetter === char && !currentPLetter) || (prevPLetter === " " && last(state.pOut, 2) === char));
    function getPhonemeState(): PhonemeStatus {
        if (isBeginningOfWord && phoneme === "aa" && phonemeInfo.beginningMatches?.includes(currentPLetter)) {
            return PhonemeStatus.DirectMatch;
        }
        if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
            if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
                throw Error("phonetics error - needs alef prefix");
            }
            return PhonemeStatus.LeadingLongVowel;
        }
        if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
            return PhonemeStatus.LeadingConsonantOrShortVowel;
        }
        if (isBeginningOfWord && phoneme === "aa" && currentPLetter === "ع" && nextPLetter === "ا") {
            return PhonemeStatus.AinWithLongAAtBeginning;
        }
        // console.log("------");
        // console.log("phoneme", phoneme);
        // console.log("state", state);
        // console.log("prevPLetter is space", prevPLetter === " ");
        // console.log("------");
        if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
            return PhonemeStatus.EndOfDuParticle
        }
        if (phoneme === "a" && currentPLetter === "ا" && nextPLetter === fathahan) {
            return PhonemeStatus.ShortAForAlefBeforeFathatan;
        }
        if (phoneme === "'" && last(state.pOut, 2) === "ع" && isOutOfWord(last(state.pOut, 3))) {
            return PhonemeStatus.AinBeginningAfterShortVowel;
        }
        if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
            return PhonemeStatus.PersianSilentWWithAa;
        }
        if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") {
            return PhonemeStatus.ArabicWasla;
        }
        if (phoneme === "-i-" && isBeginningOfWord) {
            return PhonemeStatus.Izafe;
        }
        if (phoneme === "a" && currentPLetter === "أ") {
            return PhonemeStatus.AlefWithHamza;
        }
        if (phoneme === "'" && nextPhoneme === "a" && currentPLetter === "أ") {
            return PhonemeStatus.AlefWithHamzaWithGlottalStop;
        }
        if (currentPLetter === "ع" && phoneme !== "'" && nextPhoneme !== "'") {
            if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
                return PhonemeStatus.ShortAinVowelMissingComma;
            }
            if ((last(state.pOut, 2) === "ا") && isOutOfWord(last(state.pOut, 3))) {
                return PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart;
            }
        }
        if (useAinBlendDiacritics) {
            return PhonemeStatus.LongAinVowelMissingComma;
        }
        if (needsTashdeed) {
            return PhonemeStatus.DoubleConsonantTashdeed;
        }
        if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
            return PhonemeStatus.AlefDaggarEnding;
        }
        if (phoneme === "a" && lastWordEndedW("ح")) {
            return PhonemeStatus.ShortAEndingAfterHeem;
        }
        if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
            return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
        }
        if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
            return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
        }
        if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {  
            return PhonemeStatus.ShortVowel;
        }
        if (phoneme === "o" && previousPhoneme === "w" && lastWordEndedW("و")) {
            return PhonemeStatus.WoEndingO;
        }
        if (isEndOfWord && phoneme === "n" && currentPLetter === fathahan && prevPLetter === "ا") {
            return PhonemeStatus.NOnFathatan;
        }
        console.log(state);
        // console.log("bad phoneme is ", phoneme);
        throw new Error("phonetics error - no status found for phoneme: " + phoneme);
    }
    const phs = getPhonemeState();
    return {
        phs, phonemeInfo, diacritic, prevPLetter,
    };
 };
 /**
- * returns the last character of a string
+ * returns the nth last character of a string
 * 
 * @param s 
 */
-export function last(s: string) {
+export function last(s: string, n = 1) {
-    return s[s.length - 1];
+    return s[s.length - n];
 }
 export function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator {
--- a/src/lib/diacritics.test.ts
+++ b/src/lib/diacritics.test.ts
@ -110,6 +110,21 @@ const diacriticsSections: {
                },
                out: "پَسْتَه",
            },
            // working with ئ as vowel at end
            {
                in: {
                    p: "شئ",
                    f: "sheyy",
                },
                out: "شئ",
            },
            {
                in: {
                    p: "کار کوئ چې لاړ شئ",
                    f: "kaar kawéyy che laaR sheyy",
                },
                out: "کار کَوئ چې لاړ شئ",
            },
            // working with وs
            {
                in: {
@ -209,6 +224,41 @@ const diacriticsSections: {
                },
                out: "لِیک",
            },
            {
                in: {
                    p: "ماضی",
                    f: "maazee",
                },
                out: null,
            },
            {
                in: {
                    p: "وسېدل",
                    f: "osedul",
                },
                out: null,
            },
            {
                in: {
                    p: "يست",
                    f: "eest",
                },
                out: null,
            },
            {
                in: {
                    p: "ست",
                    f: "ist",
                },
                out: null,
            },
            {
                in: {
                    p: "haca",
                    f: "هځه",
                },
                out: null,
            },
            {
                in: {
                    p: "رغېدل",
@ -458,6 +508,13 @@ const diacriticsSections: {
                },
                out: "پَتَّه تُور",      
            },
            {
                in: {
                    p: "لکۍ وال",
                    f: "lakuy waal",
                },
                out: "لَکۍ وال",
            },
            // avoid false double consonant
            {
                in: {
@ -466,6 +523,107 @@ const diacriticsSections: {
                },
                out: "اَزَل لِیک",
            },
            {
                in: {
                    p: "سه",
                    f: "si",
                },
                out: "سِه",
            },
            {
                in: {
                    p: "سه شنبه",
                    f: "sishamba",
                },
                out: "سِه شَنْبَه",
            },
            {
                in: {
                    p: "توجه",
                    f: "tawajÚ",
                },
                out: "تَوَجُه",
            },
            {
                in: {
                    p: "توجه کول",
                    f: "tawajU kawul",
                },
                out: "تَوَجُه کَو" + zwarakey + "ل",
            },
            {
                in: {
                    p: "با استعداد",
                    f: "baa isti'dáad",
                },
                out: "با اِسْتِعْداد",
            },
            {
                in: {
                    p: "آدم",
                    f: "aadam",
                },
                out: "آدَم",
            },
            {
                in: {
                    p: "آسان",
                    f: "aasáan",
                },
                out: "آسان",
            },
            {
                in: {
                    p: "آسان",
                    f: "asáan",
                },
                out: null,
            },
            {
                in: {
                    p: "یدام",
                    f: "aadam",
                },
                out: null,
            },
        ],
    },
    {
        describe: "ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی",
        tests: [
            {
                in: {
                    p: "پتېیل",
                    f: "pateyúl",
                },
                out: null,
            },
            {
                in: {
                    p: "پتېیل",
                    f: "pate`yúl",
                },
                out: "پَتېی" + zwarakey + "ل",
            },
            {
                in: {
                    p: "درېیم",
                    f: "dre`yum",
                },
                out: "دْرېی" + zwarakey + "م",
            },
        ],
    },
    {
        describe: "handle circumpositions",
        tests: [
            {
                in: {
                    p: "تر ... پورې",
                    f: "tur ... pore",
                },
                out: "ت" + zwarakey + "ر ... پورې",
            },
        ],
    },
    {
@ -480,6 +638,25 @@ const diacriticsSections: {
            },
        ],
    },
    {
        describe: "excetption for و - wo",
        tests: [
            {
                in: {
                    p: "و",
                    f: "wo",
                },
                out: "و",
            },
            {
                in: {
                    p: "سړی و",
                    f: "saRey wo",
                },
                out: "سَړی و",
            },
        ],
    },
    {
        describe: "alef with hamza above",
        tests: [
@ -593,43 +770,105 @@ const diacriticsSections: {
                },
                out: "طَمَع اِسْتِعمال",
            },
-            // {
+            {
-            //     in: {
+                in: {
-            //         p: "اعتصاب شکن",
+                    p: "مربع",
-            //         f: "itisaab shakan",
+                    f: "mUraba'",
-            //     },
+                },
-            //     out: "اِعتِصاب شَکَن",
+                out: "مُرَبَع",
-            // },
+            },
-            // {
+            {
-            //     in: {
+                in: {
-            //         p: "عادل",
+                    p: "مربع جذر",
-            //         f: "aadíl",
+                    f: "mUraba' jazúr",
-            //     },
+                },
-            //     out: "عادل",
+                out: "مُرَبَع جَذ" + zwarakey + "ر",
-            // },
+            },
-            // // starting with ع
+            {
-            // {
+                in: {
-            //     in: {
+                    p: "عام",
-            //         p: "عزت",
+                    f: "'aam",
-            //         f: "izzat",
+                },
-            //     },
+                out: "عام",
-            //     out: "عِزَّت",
+            },
-            // },
+            {
-            // {
+                in: {
-            //     in: {
+                    p: "قتل عام",
-            //         p: "عزت",
+                    f: "qatl-i-aam",
-            //         f: "i'zzat",
+                },
-            //     },
+                out: "قَتْلِ عام",
-            //     out: "عِزَّت",
+            },
-            // },
+            {
-            // // middle ع
+                in: {
-            // {
+                    p: "توقع",
-            //     in: {
+                    f: "tawaqqÚ",
-            //         p: "معنا",
+                },
-            //         f: "ma'anaa",
+                out: "تَوَقّعُ",
-            //     },
+            },
-            //     out: "مَعَنا",
+        ],
-            // },
+    },
    {
        describe: "ayn at the beginning",
        tests: [
            // as a short vowel at the beginning
            {
                in: {
                    p: "عزت",
                    f: "izzat",
                },
                out: "عِزَّت",
            },
            {
                in: {
                    p: "عزت",
                    f: "i'zzat",
                },
                out: "عِْزَّت",
            },
            {
                in: {
                    p: "عذر",
                    f: "Uzar",
                },
                out: "عُذَر",
            },
            {
                in: {
                    p: "عذر",
                    f: "U'zar",
                },
                out: "عُْذَر",
            },
            // as a short i with an alef
            {
                in: {
                    p: "اعتصاب شکن",
                    f: "itisaab shakan",
                },
                out: "اِعتِصاب شَکَن",
            },
            {
                in: {
                    p: "اعتصاب شکن",
                    f: "i'tisaab shakan",
                },
                out: "اِعْتِصاب شَکَن",
            },
            // as a long aa at beginning
            {
                in: {
                    p: "عادل",
                    f: "aadíl",
                },
                out: "عادِل",
            },
            {
                in: {
                    p: "عید",
                    f: "eed",
                },
                out: "عِید",
            },
        ],
    },
    {
@ -687,6 +926,25 @@ const diacriticsSections: {
            },
        ],
    },
    {
        describe: "joiner و",
        tests: [
            {
                in: {
                    p: "کار و بار",
                    f: "kaar-U-baar",
                },
                out: "کار و بار",
            },
            {
                in: {
                    p: "کاروبار",
                    f: "kaar-U-baar",
                },
                out: "کاروبار",
            },
        ],
    },
    {
        describe: "special behaviour with د",
        tests: [
@ -716,13 +974,13 @@ const diacriticsSections: {
    {
        describe: "ha ending with ح",
        tests: [
-            // {
+            {
-            //     in: {
+                in: {
-            //         p: "ذبح",
+                    p: "ذبح",
-            //         f: "zabha",
+                    f: "zabha",
-            //     },
+                },
-            //     out: "ذَبْحَ",
+                out: "ذَبْحَ",
-            // },
+            },
            {
                in: {
                    p: "ذبح کول",
@ -764,10 +1022,42 @@ const diacriticsSections: {
                out: "مَعَنیٰ",
            },
        ],
-    }
+    },
    {
        describe: "require fathatan on words ending in اً ",
        tests: [
            {
                in: {
                    p: "دقیقا",
                    f: "daqeeqan",
                },
                out: null,
            },
            {
                in: {
                    p: "دقیقاً",
                    f: "daqeeqan",
                },
                out: "دَقِیقاً",
            },
        ],
    },
    {
        describe: "Ua ؤ",
        tests: [
            {
                in: {
                    p: "مودب",
                    f: "mUaddab",
                },
                out: "مؤدَّب",
            },
        ],
    },
 ];
 diacriticsSections.forEach((section) => {
    // if (!section.describe.includes("require fathatan")) return;
    describe(section.describe, () => {
        section.tests.forEach((t) => {
            if (t.out) {
@ -785,34 +1075,34 @@ diacriticsSections.forEach((section) => {
 // ERRORS
-const brokenDiacritics = [
+// const brokenDiacritics = [
-    {
+//     {
-        p: "تشناب",
+//         p: "تشناب",
-        f: "peshnaab",
+//         f: "peshnaab",
-    },
+//     },
-    {
+//     {
-        p: "وسېدل",
+//         p: "وسېدل",
-        f: "osedul",
+//         f: "osedul",
-    },
+//     },
-];
+// ];
-test("ending with left over Pashto script will throw an error", () => {
+// test("ending with left over Pashto script will throw an error", () => {
-    expect(() => {
+//     expect(() => {
-        addDiacritics({ p: "کور ته", f: "kor" });
+//         addDiacritics({ p: "کور ته", f: "kor" });
-    }).toThrow(`phonetics error - phonetics shorter than pashto script`);
+//     }).toThrow(`phonetics error - phonetics shorter than pashto script`);
-});
+// });
-test("ending with left over phonetics will throw an error", () => {
+// test("ending with left over phonetics will throw an error", () => {
-    expect(() => {
+//     expect(() => {
-        addDiacritics({ p: "کار", f: "kaar kawul" });
+//         addDiacritics({ p: "کار", f: "kaar kawul" });
-    }).toThrow();
+//     }).toThrow();
-});
+// });
-test("adding diacritics errors when phonetecs and pashto do not line up", () => {
+// test("adding diacritics errors when phonetecs and pashto do not line up", () => {
-    brokenDiacritics.forEach((t) => {
+//     brokenDiacritics.forEach((t) => {
-        expect(() => {
+//         expect(() => {
-            addDiacritics(t);
+//             addDiacritics(t);
-        }).toThrow();
+//         }).toThrow();
-    });
+//     });
-});
+// });
--- a/src/lib/diacritics.ts
+++ b/src/lib/diacritics.ts
@ -21,15 +21,15 @@ import {
    wasla,
    daggerAlif,
    fathahan,
    lastNonWhitespace,
    addP,
    last,
    advanceP,
    reverseP,
    overwriteP,
    advanceForHamza,
    advanceForHamzaMid,
    DiacriticsAccumulator,
    stateInfo,
    PhonemeStatus,
 } from "./diacritics-helpers";
 import { firstPhonetics } from "./p-text-helpers";
@ -51,27 +51,6 @@ import { pipe } from "rambda";
    };
 }
 enum PhonemeStatus {
    LeadingLongVowel,
    LeadingConsonantOrShortVowel,
    DoubleConsonantTashdeed,
    EndingWithHeyHim,
    DirectMatch,
    DirectMatchAfterSukun,
    EndingWithHeyHimFromSukun,
    ShortVowel,
    PersianSilentWWithAa,
    ArabicWasla,
    Izafe,
    EndOfDuParticle,
    HaEndingWithHeem,
    AlefDaggarEnding,
    LongAinVowelMissingComma,
    ShortAinVowelMissingComma,
    AlefWithHamza,
    AlefWithHamzaWithGlottalStop,
 }
 function processPhoneme(
    acc: DiacriticsAccumulator,
    phoneme: Phoneme,
@ -96,6 +75,7 @@ function processPhoneme(
        phonemeInfo,
        diacritic,
        phs,
        prevPLetter,
    } = stateInfo({ state, i, phoneme, phonemes });
    // console.log("phoneme", phoneme);
@ -154,10 +134,9 @@ function processPhoneme(
                reverseP,
                addP(zwarakey),
            )(state)
-        : (phs === PhonemeStatus.HaEndingWithHeem) ?
+        : (phs === PhonemeStatus.ShortAEndingAfterHeem) ?
            pipe(
-                reverseP,
+                prevPLetter === " " ? reverseP : addP(""),
                // prevPLetter === " " ? reverseP ,
                addP(zwar),
            )(state)
        : (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
@ -181,114 +160,44 @@ function processPhoneme(
                addP(diacritic),
                advanceP,
            )(state)
        : (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ?
            pipe(
                advanceP,
                advanceP,
            )(state)
        : (phs === PhonemeStatus.AinWithLongAAtBeginning) ?
            pipe(
               advanceP,
               advanceP, 
            )(state)
        : (phs === PhonemeStatus.AlefWithHamza) ?
            pipe(
                advanceP,
            )(state)
-        : (phs === PhonemeStatus.AlefWithHamzaWithGlottalStop) ?
+        : (phs === PhonemeStatus.ShortVowel) ?
            state
        :
        // phs === PhonemeState.ShortVowel
            pipe(
                advanceForHamzaMid,
                addP(phonemeInfo.diacritic),
                // TODO THIS?
                advanceForHamza,
-            )(state);
+            )(state)
        : (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ?
            pipe(
                advanceP,
            )(state)
        : (phs === PhonemeStatus.NOnFathatan) ?
            pipe(
                advanceP,
            )(state)
        : state;
        // (phs === PhonemeStatus.AlefWithHamzaWithGlottalStop) ?
        //    state
        // : (phs === PhonemeStatus.AinBeginningAfterShortVowel) ?
        //    state
        //: (phs === PhonemeStatus.WoEndingO) ?
        //    state
        // :
        // 
 }
 function stateInfo({ state, i, phonemes, phoneme }: {
    state: DiacriticsAccumulator,
    i: number,
    phonemes: Phoneme[],
    phoneme: Phoneme,
 }) {
    const prevPLetter = last(state.pOut);
    const currentPLetter = state.pIn[0];
    const nextPLetter = state.pIn[1];
    const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
    const isEndOfWord = !nextPLetter || nextPLetter === " ";
    const phonemeInfo = phonemeTable[phoneme];
    const nextPhoneme = phonemes[i+1];
    const previousPhoneme = i > 0 && phonemes[i-1];
    const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
    // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
    // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
    const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
    const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
    const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
    const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع"));
    const diacritic = useAinBlendDiacritics
        ? phonemeInfo.ainBlendDiacritic
        : isEndOfWord 
        ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
    function getPhonemeState(): PhonemeStatus {
        if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
            if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
                throw Error("phonetics error - needs alef prefix");
            }
            return PhonemeStatus.LeadingLongVowel;
        }
        if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
            return PhonemeStatus.LeadingConsonantOrShortVowel;
        }
        // console.log("------");
        // console.log("phoneme", phoneme);
        // console.log("state", state);
        // console.log("prevPLetter is space", prevPLetter === " ");
        // console.log("------");
        if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
            return PhonemeStatus.EndOfDuParticle
        }
        if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
            return PhonemeStatus.PersianSilentWWithAa;
        }
        if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") {
            return PhonemeStatus.ArabicWasla;
        }
        if (phoneme === "-i-" && isBeginningOfWord) {
            return PhonemeStatus.Izafe;
        }
        if (phoneme === "a" && currentPLetter === "أ") {
            return PhonemeStatus.AlefWithHamza;
        }
        if (phoneme === "'" && nextPhoneme === "a" && currentPLetter === "أ") {
            return PhonemeStatus.AlefWithHamzaWithGlottalStop;
        }
        if (currentPLetter === "ع" && phoneme !== "'" && nextPhoneme !== "'" && phonemeInfo.diacritic && !phonemeInfo.longVowel) {
            return PhonemeStatus.ShortAinVowelMissingComma;
        }
        if (useAinBlendDiacritics) {
            return PhonemeStatus.LongAinVowelMissingComma;
        }
        if (needsTashdeed) {
            return PhonemeStatus.DoubleConsonantTashdeed;
        }
        if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
            return PhonemeStatus.AlefDaggarEnding;
        }
        if (((isEndOfWord && prevPLetter === "ح") || (prevPLetter === " " && state.pOut[state.pOut.length - 2])) && phoneme === "a") {
            return PhonemeStatus.HaEndingWithHeem;
        }
        if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
            return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
        }
        if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
            return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
        }
        if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {  
            return PhonemeStatus.ShortVowel;
        }
        // console.log("bad phoneme is ", phoneme);
        throw new Error("phonetics error - no status found for phoneme: " + phoneme);
    }
    const phs = getPhonemeState();
    return {
        phs, phonemeInfo, diacritic,
    };
 };