more work failing on phonetics

2021-05-24 22:06:03 +04:30 · 2021-05-24 22:06:03 +04:30 · fb71efd51d
parent 5d41d953a0
commit fb71efd51d
2 changed files with 121 additions and 24 deletions
--- a/src/lib/diacritics.test.ts
+++ b/src/lib/diacritics.test.ts
@ -103,6 +103,13 @@ const diacriticsSections: {
                },
                out: "تَشْناب",
            },
            {
                in: {
                    p: "پسته",
                    f: "pasta",
                },
                out: "پَسْتَه",
            },
            // working with وs
            {
                in: {
@ -498,6 +505,14 @@ const diacriticsSections: {
                },
                out: "عِزَّت",
            },
            // middle ع
            {
                in: {
                    p: "معنا",
                    f: "ma'anaa",
                },
                out: "مَعَنا",
            },
            // ending with ayn
            {
                in: {
@ -596,6 +611,58 @@ const diacriticsSections: {
            },
        ],
    },
    {
        describe: "ha ending with ح",
        tests: [
            {
                in: {
                    p: "ذبح",
                    f: "zabha",
                },
                out: "ذَبْحَ",
            },
            {
                in: {
                    p: "ذبح کول",
                    f: "zabha kawul",
                },
                out: "ذَبْحَ کَو" + zwarakey + "ل",
            },
        ],
    },
    {
        describe: "require dagger alif on words ending with یٰ",
        tests: [
            {
                in: {
                    p: "یحیی",
                    f: "yahyaa",
                },
                out: null,
            },
            {
                in: {
                    p: "یحییٰ",
                    f: "yahyaa",
                },
                out: "یَحْییٰ",
            },
            {
                in: {
                    p: "یحییٰ چېرته",
                    f: "yahyaa cherta",
                },
                out: "یَحْییٰ چېرْتَه",
            },
            {
                in: {
                    p: "معنیٰ",
                    f: "ma'anaa",
                },
                out: "مَعَنیٰ",
            },
        ],
    }
 ];
 diacriticsSections.forEach((section) => {
@ -627,23 +694,23 @@ const brokenDiacritics = [
    },
 ];
-// test("ending with left over Pashto script will throw an error", () => {
+test("ending with left over Pashto script will throw an error", () => {
-//     expect(() => {
+    expect(() => {
-//         addDiacritics({ p: "کور ته", f: "kor" });
+        addDiacritics({ p: "کور ته", f: "kor" });
-//     }).toThrow(`phonetics error - phonetics shorter than pashto script`);
+    }).toThrow(`phonetics error - phonetics shorter than pashto script`);
-// });
+});
-// test("ending with left over phonetics will throw an error", () => {
+test("ending with left over phonetics will throw an error", () => {
-//     expect(() => {
+    expect(() => {
-//         addDiacritics({ p: "کار", f: "kaar kawul" });
+        addDiacritics({ p: "کار", f: "kaar kawul" });
-//     }).toThrow();
+    }).toThrow();
-// });
+});
-// test("adding diacritics errors when phonetecs and pashto do not line up", () => {
+test("adding diacritics errors when phonetecs and pashto do not line up", () => {
-//     brokenDiacritics.forEach((t) => {
+    brokenDiacritics.forEach((t) => {
-//         expect(() => {
+        expect(() => {
-//             addDiacritics(t);
+            addDiacritics(t);
-//         }).toThrow();
+        }).toThrow();
-//     });
+    });
-// });
+});
--- a/src/lib/diacritics.ts
+++ b/src/lib/diacritics.ts
@ -58,11 +58,15 @@ enum PhonemeStatus {
    DoubleConsonantTashdeed,
    EndingWithHeyHim,
    DirectMatch,
    DirectMatchAfterSukun,
    EndingWithHeyHimFromSukun,
    ShortVowel,
    PersianSilentWWithAa,
    ArabicWasla,
    Izafe,
    EndOfDuParticle,
    HaEndingWithHeem,
    AlefDaggarEnding,
 }
 function processPhoneme(
@ -87,8 +91,9 @@ function processPhoneme(
    const {
        phonemeInfo,
-        sukunOrDiacritic,
+        diacritic,
        phs,
        prevPLetter,
    } = stateInfo({ state, i, phoneme, phonemes });
    return (phs === PhonemeStatus.LeadingLongVowel) ?
@ -100,7 +105,7 @@ function processPhoneme(
        : (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
            pipe(
                advanceP,
-                addP(sukunOrDiacritic),
+                addP(diacritic),
                advanceForAin,
            )(state)
        : (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
@ -114,7 +119,12 @@ function processPhoneme(
            )(state)
        : (phs === PhonemeStatus.DirectMatch) ?
            pipe(
-                addP(sukunOrDiacritic),
+                addP(diacritic),
                advanceP,
            )(state)
        : (phs === PhonemeStatus.DirectMatchAfterSukun) ?
            pipe(
                addP(sukun),
                advanceP,
            )(state)
        : (phs === PhonemeStatus.PersianSilentWWithAa) ?
@ -139,6 +149,21 @@ function processPhoneme(
                reverseP,
                addP(zwarakey),
            )(state)
        : (phs === PhonemeStatus.HaEndingWithHeem) ?
            pipe(
                prevPLetter === " " ? reverseP : (s: any) => s,
                addP(zwar),
            )(state)
        : (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
            pipe(
                addP(sukun),
                advanceP,
            )(state)
        : (phs === PhonemeStatus.AlefDaggarEnding) ?
            pipe(
                advanceP,
                advanceP,
            )(state)
        :
        // phs === PhonemeState.ShortVowel
            pipe(
@ -168,7 +193,6 @@ function stateInfo({ state, i, phonemes, phoneme }: {
    const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
    const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
    const diacritic = isEndOfWord ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
    const sukunOrDiacritic = (needsSukun ? sukun : diacritic);
    function getPhonemeState(): PhonemeStatus {
        if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
@ -200,11 +224,17 @@ function stateInfo({ state, i, phonemes, phoneme }: {
        if (needsTashdeed) {
            return PhonemeStatus.DoubleConsonantTashdeed;
        }
        if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
            return PhonemeStatus.AlefDaggarEnding;
        }
        if (((isEndOfWord && prevPLetter === "ح") || (prevPLetter === " " && state.pOut[state.pOut.length - 2])) && phoneme === "a") {
            return PhonemeStatus.HaEndingWithHeem;
        }
        if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
-            return PhonemeStatus.EndingWithHeyHim;
+            return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
        }
        if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
-            return PhonemeStatus.DirectMatch;
+            return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
        }
        if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
            return PhonemeStatus.ShortVowel;
@ -216,6 +246,6 @@ function stateInfo({ state, i, phonemes, phoneme }: {
    const phs = getPhonemeState();
    return {
-        phs, phonemeInfo, sukunOrDiacritic,
+        phs, phonemeInfo, diacritic, prevPLetter,
    };
 };