more coming

2021-06-03 18:22:14 +04:30 · 2021-06-03 18:22:14 +04:30 · 1a0480a9d3
parent cf01df5c6d
commit 1a0480a9d3
3 changed files with 550 additions and 203 deletions
--- a/src/lib/diacritics-helpers.ts
+++ b/src/lib/diacritics-helpers.ts
@ -236,7 +236,6 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
        endingMatches: ["ه"],
        beginningMatches: ["ا", "ع"],
        // canComeAfterHeyEnding: true,
-        // canBeFirstPartOfFathahanEnding: true,
    },
    "u": {
        diacritic: zwarakey,
@ -311,13 +310,162 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
    }
    return result;
 }
+export enum PhonemeStatus {
+    LeadingLongVowel,
+    LeadingConsonantOrShortVowel,
+    DoubleConsonantTashdeed,
+    EndingWithHeyHim,
+    DirectMatch,
+    DirectMatchAfterSukun,
+    EndingWithHeyHimFromSukun,
+    ShortVowel,
+    PersianSilentWWithAa,
+    ArabicWasla,
+    Izafe,
+    EndOfDuParticle,
+    ShortAEndingAfterHeem,
+    AlefDaggarEnding,
+    AinWithLongAAtBeginning,
+    LongAinVowelMissingComma,
+    ShortAinVowelMissingComma,
+    ShortAinVowelMissingCommaAfterAlefStart,
+    AinBeginningAfterShortVowel,
+    AlefWithHamza,
+    AlefWithHamzaWithGlottalStop,
+    WoEndingO,
+    ShortAForAlefBeforeFathatan,
+    NOnFathatan,
+}
+
+export function stateInfo({ state, i, phonemes, phoneme }: {
+    state: DiacriticsAccumulator,
+    i: number,
+    phonemes: Phoneme[],
+    phoneme: Phoneme,
+}) {
+    const isOutOfWord = (char: string) => !char || char === " ";
+    const prevPLetter = last(state.pOut);
+    const currentPLetter = state.pIn[0];
+    const nextPLetter = state.pIn[1];
+    const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
+    const isEndOfWord = isOutOfWord(nextPLetter);
+    const phonemeInfo = phonemeTable[phoneme];
+    const nextPhoneme = phonemes[i+1];
+    const previousPhoneme = i > 0 && phonemes[i-1];
+    const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
+    // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
+    // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
+    const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
+    const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
+    const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
+    const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع"));
+    const diacritic = useAinBlendDiacritics
+    ? phonemeInfo.ainBlendDiacritic
+    : isEndOfWord 
+    ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
+    
+    const lastWordEndedW = (char: string) => ((prevPLetter === char && !currentPLetter) || (prevPLetter === " " && last(state.pOut, 2) === char));
+
+    function getPhonemeState(): PhonemeStatus {
+        if (isBeginningOfWord && phoneme === "aa" && phonemeInfo.beginningMatches?.includes(currentPLetter)) {
+            return PhonemeStatus.DirectMatch;
+        }
+        if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
+            if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
+                throw Error("phonetics error - needs alef prefix");
+            }
+            return PhonemeStatus.LeadingLongVowel;
+        }
+        if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
+            return PhonemeStatus.LeadingConsonantOrShortVowel;
+        }
+        if (isBeginningOfWord && phoneme === "aa" && currentPLetter === "ع" && nextPLetter === "ا") {
+            return PhonemeStatus.AinWithLongAAtBeginning;
+        }
+        // console.log("------");
+        // console.log("phoneme", phoneme);
+        // console.log("state", state);
+        // console.log("prevPLetter is space", prevPLetter === " ");
+        // console.log("------");
+        if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
+            return PhonemeStatus.EndOfDuParticle
+        }
+        if (phoneme === "a" && currentPLetter === "ا" && nextPLetter === fathahan) {
+            return PhonemeStatus.ShortAForAlefBeforeFathatan;
+        }
+        if (phoneme === "'" && last(state.pOut, 2) === "ع" && isOutOfWord(last(state.pOut, 3))) {
+            return PhonemeStatus.AinBeginningAfterShortVowel;
+        }
+        if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
+            return PhonemeStatus.PersianSilentWWithAa;
+        }
+        if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") {
+            return PhonemeStatus.ArabicWasla;
+        }
+        if (phoneme === "-i-" && isBeginningOfWord) {
+            return PhonemeStatus.Izafe;
+        }
+        if (phoneme === "a" && currentPLetter === "أ") {
+            return PhonemeStatus.AlefWithHamza;
+        }
+        if (phoneme === "'" && nextPhoneme === "a" && currentPLetter === "أ") {
+            return PhonemeStatus.AlefWithHamzaWithGlottalStop;
+        }
+        if (currentPLetter === "ع" && phoneme !== "'" && nextPhoneme !== "'") {
+            if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
+                return PhonemeStatus.ShortAinVowelMissingComma;
+            }
+            if ((last(state.pOut, 2) === "ا") && isOutOfWord(last(state.pOut, 3))) {
+                return PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart;
+            }
+        }
+        if (useAinBlendDiacritics) {
+            return PhonemeStatus.LongAinVowelMissingComma;
+        }
+        if (needsTashdeed) {
+            return PhonemeStatus.DoubleConsonantTashdeed;
+        }
+        if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
+            return PhonemeStatus.AlefDaggarEnding;
+        }
+        if (phoneme === "a" && lastWordEndedW("ح")) {
+            return PhonemeStatus.ShortAEndingAfterHeem;
+        }
+        if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
+            return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
+        }
+        if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
+            return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
+        }
+        if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {  
+            return PhonemeStatus.ShortVowel;
+        }
+        if (phoneme === "o" && previousPhoneme === "w" && lastWordEndedW("و")) {
+            return PhonemeStatus.WoEndingO;
+        }
+        if (isEndOfWord && phoneme === "n" && currentPLetter === fathahan && prevPLetter === "ا") {
+            return PhonemeStatus.NOnFathatan;
+        }
+        console.log(state);
+        // console.log("bad phoneme is ", phoneme);
+        throw new Error("phonetics error - no status found for phoneme: " + phoneme);
+    }
+
+    const phs = getPhonemeState();
+
+    return {
+        phs, phonemeInfo, diacritic, prevPLetter,
+    };
+};
+
+
 /**
- * returns the last character of a string
+ * returns the nth last character of a string
 * 
 * @param s 
 */
-export function last(s: string) {
-    return s[s.length - 1];
+export function last(s: string, n = 1) {
+    return s[s.length - n];
 }

 export function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator {
--- a/src/lib/diacritics.test.ts
+++ b/src/lib/diacritics.test.ts
@ -110,6 +110,21 @@ const diacriticsSections: {
                },
                out: "پَسْتَه",
            },
+            // working with ئ as vowel at end
+            {
+                in: {
+                    p: "شئ",
+                    f: "sheyy",
+                },
+                out: "شئ",
+            },
+            {
+                in: {
+                    p: "کار کوئ چې لاړ شئ",
+                    f: "kaar kawéyy che laaR sheyy",
+                },
+                out: "کار کَوئ چې لاړ شئ",
+            },
            // working with وs
            {
                in: {
@ -209,6 +224,41 @@ const diacriticsSections: {
                },
                out: "لِیک",
            },
+            {
+                in: {
+                    p: "ماضی",
+                    f: "maazee",
+                },
+                out: null,
+            },
+            {
+                in: {
+                    p: "وسېدل",
+                    f: "osedul",
+                },
+                out: null,
+            },
+            {
+                in: {
+                    p: "يست",
+                    f: "eest",
+                },
+                out: null,
+            },
+            {
+                in: {
+                    p: "ست",
+                    f: "ist",
+                },
+                out: null,
+            },
+            {
+                in: {
+                    p: "haca",
+                    f: "هځه",
+                },
+                out: null,
+            },
            {
                in: {
                    p: "رغېدل",
@ -458,6 +508,13 @@ const diacriticsSections: {
                },
                out: "پَتَّه تُور",      
            },
+            {
+                in: {
+                    p: "لکۍ وال",
+                    f: "lakuy waal",
+                },
+                out: "لَکۍ وال",
+            },
            // avoid false double consonant
            {
                in: {
@ -466,6 +523,107 @@ const diacriticsSections: {
                },
                out: "اَزَل لِیک",
            },
+            {
+                in: {
+                    p: "سه",
+                    f: "si",
+                },
+                out: "سِه",
+            },
+            {
+                in: {
+                    p: "سه شنبه",
+                    f: "sishamba",
+                },
+                out: "سِه شَنْبَه",
+            },
+            {
+                in: {
+                    p: "توجه",
+                    f: "tawajÚ",
+                },
+                out: "تَوَجُه",
+            },
+            {
+                in: {
+                    p: "توجه کول",
+                    f: "tawajU kawul",
+                },
+                out: "تَوَجُه کَو" + zwarakey + "ل",
+            },
+            {
+                in: {
+                    p: "با استعداد",
+                    f: "baa isti'dáad",
+                },
+                out: "با اِسْتِعْداد",
+            },
+            {
+                in: {
+                    p: "آدم",
+                    f: "aadam",
+                },
+                out: "آدَم",
+            },
+            {
+                in: {
+                    p: "آسان",
+                    f: "aasáan",
+                },
+                out: "آسان",
+            },
+            {
+                in: {
+                    p: "آسان",
+                    f: "asáan",
+                },
+                out: null,
+            },
+            {
+                in: {
+                    p: "یدام",
+                    f: "aadam",
+                },
+                out: null,
+            },
+        ],
+    },
+    {
+        describe: "ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی",
+        tests: [
+            {
+                in: {
+                    p: "پتېیل",
+                    f: "pateyúl",
+                },
+                out: null,
+            },
+            {
+                in: {
+                    p: "پتېیل",
+                    f: "pate`yúl",
+                },
+                out: "پَتېی" + zwarakey + "ل",
+            },
+            {
+                in: {
+                    p: "درېیم",
+                    f: "dre`yum",
+                },
+                out: "دْرېی" + zwarakey + "م",
+            },
+        ],
+    },
+    {
+        describe: "handle circumpositions",
+        tests: [
+            {
+                in: {
+                    p: "تر ... پورې",
+                    f: "tur ... pore",
+                },
+                out: "ت" + zwarakey + "ر ... پورې",
+            },
        ],
    },
    {
@ -480,6 +638,25 @@ const diacriticsSections: {
            },
        ],
    },
+    {
+        describe: "excetption for و - wo",
+        tests: [
+            {
+                in: {
+                    p: "و",
+                    f: "wo",
+                },
+                out: "و",
+            },
+            {
+                in: {
+                    p: "سړی و",
+                    f: "saRey wo",
+                },
+                out: "سَړی و",
+            },
+        ],
+    },
    {
        describe: "alef with hamza above",
        tests: [
@ -593,43 +770,105 @@ const diacriticsSections: {
                },
                out: "طَمَع اِسْتِعمال",
            },
-            // {
-            //     in: {
-            //         p: "اعتصاب شکن",
-            //         f: "itisaab shakan",
-            //     },
-            //     out: "اِعتِصاب شَکَن",
-            // },
-            // {
-            //     in: {
-            //         p: "عادل",
-            //         f: "aadíl",
-            //     },
-            //     out: "عادل",
-            // },
-            // // starting with ع
-            // {
-            //     in: {
-            //         p: "عزت",
-            //         f: "izzat",
-            //     },
-            //     out: "عِزَّت",
-            // },
-            // {
-            //     in: {
-            //         p: "عزت",
-            //         f: "i'zzat",
-            //     },
-            //     out: "عِزَّت",
-            // },
-            // // middle ع
-            // {
-            //     in: {
-            //         p: "معنا",
-            //         f: "ma'anaa",
-            //     },
-            //     out: "مَعَنا",
-            // },
+            {
+                in: {
+                    p: "مربع",
+                    f: "mUraba'",
+                },
+                out: "مُرَبَع",
+            },
+            {
+                in: {
+                    p: "مربع جذر",
+                    f: "mUraba' jazúr",
+                },
+                out: "مُرَبَع جَذ" + zwarakey + "ر",
+            },
+            {
+                in: {
+                    p: "عام",
+                    f: "'aam",
+                },
+                out: "عام",
+            },
+            {
+                in: {
+                    p: "قتل عام",
+                    f: "qatl-i-aam",
+                },
+                out: "قَتْلِ عام",
+            },
+            {
+                in: {
+                    p: "توقع",
+                    f: "tawaqqÚ",
+                },
+                out: "تَوَقّعُ",
+            },
+        ],
+    },
+    {
+        describe: "ayn at the beginning",
+        tests: [
+            // as a short vowel at the beginning
+            {
+                in: {
+                    p: "عزت",
+                    f: "izzat",
+                },
+                out: "عِزَّت",
+            },
+            {
+                in: {
+                    p: "عزت",
+                    f: "i'zzat",
+                },
+                out: "عِْزَّت",
+            },
+            {
+                in: {
+                    p: "عذر",
+                    f: "Uzar",
+                },
+                out: "عُذَر",
+            },
+            {
+                in: {
+                    p: "عذر",
+                    f: "U'zar",
+                },
+                out: "عُْذَر",
+            },
+            // as a short i with an alef
+            {
+                in: {
+                    p: "اعتصاب شکن",
+                    f: "itisaab shakan",
+                },
+                out: "اِعتِصاب شَکَن",
+            },
+            {
+                in: {
+                    p: "اعتصاب شکن",
+                    f: "i'tisaab shakan",
+                },
+                out: "اِعْتِصاب شَکَن",
+            },
+            // as a long aa at beginning
+            {
+                in: {
+                    p: "عادل",
+                    f: "aadíl",
+                },
+                out: "عادِل",
+            },
+            {
+                in: {
+                    p: "عید",
+                    f: "eed",
+                },
+                out: "عِید",
+            },
        ],
    },
    {
@ -687,6 +926,25 @@ const diacriticsSections: {
            },
        ],
    },
+    {
+        describe: "joiner و",
+        tests: [
+            {
+                in: {
+                    p: "کار و بار",
+                    f: "kaar-U-baar",
+                },
+                out: "کار و بار",
+            },
+            {
+                in: {
+                    p: "کاروبار",
+                    f: "kaar-U-baar",
+                },
+                out: "کاروبار",
+            },
+        ],
+    },
    {
        describe: "special behaviour with د",
        tests: [
@ -716,13 +974,13 @@ const diacriticsSections: {
    {
        describe: "ha ending with ح",
        tests: [
-            // {
-            //     in: {
-            //         p: "ذبح",
-            //         f: "zabha",
-            //     },
-            //     out: "ذَبْحَ",
-            // },
+            {
+                in: {
+                    p: "ذبح",
+                    f: "zabha",
+                },
+                out: "ذَبْحَ",
+            },
            {
                in: {
                    p: "ذبح کول",
@ -764,10 +1022,42 @@ const diacriticsSections: {
                out: "مَعَنیٰ",
            },
        ],
-    }
+    },
+    {
+        describe: "require fathatan on words ending in اً ",
+        tests: [
+            {
+                in: {
+                    p: "دقیقا",
+                    f: "daqeeqan",
+                },
+                out: null,
+            },
+            {
+                in: {
+                    p: "دقیقاً",
+                    f: "daqeeqan",
+                },
+                out: "دَقِیقاً",
+            },
+        ],
+    },
+    {
+        describe: "Ua ؤ",
+        tests: [
+            {
+                in: {
+                    p: "مودب",
+                    f: "mUaddab",
+                },
+                out: "مؤدَّب",
+            },
+        ],
+    },
 ];

 diacriticsSections.forEach((section) => {
+    // if (!section.describe.includes("require fathatan")) return;
    describe(section.describe, () => {
        section.tests.forEach((t) => {
            if (t.out) {
@ -785,34 +1075,34 @@ diacriticsSections.forEach((section) => {

 // ERRORS

-const brokenDiacritics = [
-    {
-        p: "تشناب",
-        f: "peshnaab",
-    },
-    {
-        p: "وسېدل",
-        f: "osedul",
-    },
-];
+// const brokenDiacritics = [
+//     {
+//         p: "تشناب",
+//         f: "peshnaab",
+//     },
+//     {
+//         p: "وسېدل",
+//         f: "osedul",
+//     },
+// ];

-test("ending with left over Pashto script will throw an error", () => {
-    expect(() => {
-        addDiacritics({ p: "کور ته", f: "kor" });
-    }).toThrow(`phonetics error - phonetics shorter than pashto script`);
-});
+// test("ending with left over Pashto script will throw an error", () => {
+//     expect(() => {
+//         addDiacritics({ p: "کور ته", f: "kor" });
+//     }).toThrow(`phonetics error - phonetics shorter than pashto script`);
+// });

-test("ending with left over phonetics will throw an error", () => {
-    expect(() => {
-        addDiacritics({ p: "کار", f: "kaar kawul" });
-    }).toThrow();
-});
+// test("ending with left over phonetics will throw an error", () => {
+//     expect(() => {
+//         addDiacritics({ p: "کار", f: "kaar kawul" });
+//     }).toThrow();
+// });

-test("adding diacritics errors when phonetecs and pashto do not line up", () => {
-    brokenDiacritics.forEach((t) => {
-        expect(() => {
-            addDiacritics(t);
-        }).toThrow();
-    });
-});
+// test("adding diacritics errors when phonetecs and pashto do not line up", () => {
+//     brokenDiacritics.forEach((t) => {
+//         expect(() => {
+//             addDiacritics(t);
+//         }).toThrow();
+//     });
+// });

--- a/src/lib/diacritics.ts
+++ b/src/lib/diacritics.ts
@ -21,15 +21,15 @@ import {
    wasla,
    daggerAlif,
    fathahan,
-    lastNonWhitespace,
    addP,
-    last,
    advanceP,
    reverseP,
    overwriteP,
    advanceForHamza,
    advanceForHamzaMid,
    DiacriticsAccumulator,
+    stateInfo,
+    PhonemeStatus,
 } from "./diacritics-helpers";

 import { firstPhonetics } from "./p-text-helpers";
@ -51,27 +51,6 @@ import { pipe } from "rambda";
    };
 }

-enum PhonemeStatus {
-    LeadingLongVowel,
-    LeadingConsonantOrShortVowel,
-    DoubleConsonantTashdeed,
-    EndingWithHeyHim,
-    DirectMatch,
-    DirectMatchAfterSukun,
-    EndingWithHeyHimFromSukun,
-    ShortVowel,
-    PersianSilentWWithAa,
-    ArabicWasla,
-    Izafe,
-    EndOfDuParticle,
-    HaEndingWithHeem,
-    AlefDaggarEnding,
-    LongAinVowelMissingComma,
-    ShortAinVowelMissingComma,
-    AlefWithHamza,
-    AlefWithHamzaWithGlottalStop,
-}
-
 function processPhoneme(
    acc: DiacriticsAccumulator,
    phoneme: Phoneme,
@ -96,6 +75,7 @@ function processPhoneme(
        phonemeInfo,
        diacritic,
        phs,
+        prevPLetter,
    } = stateInfo({ state, i, phoneme, phonemes });

    // console.log("phoneme", phoneme);
@ -154,10 +134,9 @@ function processPhoneme(
                reverseP,
                addP(zwarakey),
            )(state)
-        : (phs === PhonemeStatus.HaEndingWithHeem) ?
+        : (phs === PhonemeStatus.ShortAEndingAfterHeem) ?
            pipe(
-                reverseP,
-                // prevPLetter === " " ? reverseP ,
+                prevPLetter === " " ? reverseP : addP(""),
                addP(zwar),
            )(state)
        : (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
@ -181,114 +160,44 @@ function processPhoneme(
                addP(diacritic),
                advanceP,
            )(state)
+        : (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ?
+            pipe(
+                advanceP,
+                advanceP,
+            )(state)
+        : (phs === PhonemeStatus.AinWithLongAAtBeginning) ?
+            pipe(
+               advanceP,
+               advanceP, 
+            )(state)
        : (phs === PhonemeStatus.AlefWithHamza) ?
            pipe(
                advanceP,
            )(state)
-        : (phs === PhonemeStatus.AlefWithHamzaWithGlottalStop) ?
-            state
-        :
-        // phs === PhonemeState.ShortVowel
+        : (phs === PhonemeStatus.ShortVowel) ?
            pipe(
                advanceForHamzaMid,
                addP(phonemeInfo.diacritic),
                // TODO THIS?
                advanceForHamza,
-            )(state);
+            )(state)
+        : (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ?
+            pipe(
+                advanceP,
+            )(state)
+        : (phs === PhonemeStatus.NOnFathatan) ?
+            pipe(
+                advanceP,
+            )(state)
+        : state;
+        
+        // (phs === PhonemeStatus.AlefWithHamzaWithGlottalStop) ?
+        //    state
+        // : (phs === PhonemeStatus.AinBeginningAfterShortVowel) ?
+        //    state
+        //: (phs === PhonemeStatus.WoEndingO) ?
+        //    state
+        // :
+        // 
+
 }
-
-
-
-function stateInfo({ state, i, phonemes, phoneme }: {
-    state: DiacriticsAccumulator,
-    i: number,
-    phonemes: Phoneme[],
-    phoneme: Phoneme,
-}) {
-    const prevPLetter = last(state.pOut);
-    const currentPLetter = state.pIn[0];
-    const nextPLetter = state.pIn[1];
-    const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
-    const isEndOfWord = !nextPLetter || nextPLetter === " ";
-    const phonemeInfo = phonemeTable[phoneme];
-    const nextPhoneme = phonemes[i+1];
-    const previousPhoneme = i > 0 && phonemes[i-1];
-    const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
-    // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
-    // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
-    const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
-    const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
-    const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
-    const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع"));
-    const diacritic = useAinBlendDiacritics
-        ? phonemeInfo.ainBlendDiacritic
-        : isEndOfWord 
-        ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
-
-    function getPhonemeState(): PhonemeStatus {
-        if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
-            if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
-                throw Error("phonetics error - needs alef prefix");
-            }
-            return PhonemeStatus.LeadingLongVowel;
-        }
-        if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
-            return PhonemeStatus.LeadingConsonantOrShortVowel;
-        }
-        // console.log("------");
-        // console.log("phoneme", phoneme);
-        // console.log("state", state);
-        // console.log("prevPLetter is space", prevPLetter === " ");
-        // console.log("------");
-        if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
-            return PhonemeStatus.EndOfDuParticle
-        }
-        if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
-            return PhonemeStatus.PersianSilentWWithAa;
-        }
-        if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") {
-            return PhonemeStatus.ArabicWasla;
-        }
-        if (phoneme === "-i-" && isBeginningOfWord) {
-            return PhonemeStatus.Izafe;
-        }
-        if (phoneme === "a" && currentPLetter === "أ") {
-            return PhonemeStatus.AlefWithHamza;
-        }
-        if (phoneme === "'" && nextPhoneme === "a" && currentPLetter === "أ") {
-            return PhonemeStatus.AlefWithHamzaWithGlottalStop;
-        }
-        if (currentPLetter === "ع" && phoneme !== "'" && nextPhoneme !== "'" && phonemeInfo.diacritic && !phonemeInfo.longVowel) {
-            return PhonemeStatus.ShortAinVowelMissingComma;
-        }
-        if (useAinBlendDiacritics) {
-            return PhonemeStatus.LongAinVowelMissingComma;
-        }
-        if (needsTashdeed) {
-            return PhonemeStatus.DoubleConsonantTashdeed;
-        }
-        if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
-            return PhonemeStatus.AlefDaggarEnding;
-        }
-        if (((isEndOfWord && prevPLetter === "ح") || (prevPLetter === " " && state.pOut[state.pOut.length - 2])) && phoneme === "a") {
-            return PhonemeStatus.HaEndingWithHeem;
-        }
-        if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
-            return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
-        }
-        if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
-            return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
-        }
-        if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {  
-            return PhonemeStatus.ShortVowel;
-        }
-        // console.log("bad phoneme is ", phoneme);
-        throw new Error("phonetics error - no status found for phoneme: " + phoneme);
-    }
-
-    const phs = getPhonemeState();
-
-    return {
-        phs, phonemeInfo, diacritic,
-    };
-};