more work failing on phonetics

2021-05-24 22:06:03 +04:30 · 2021-05-24 22:06:03 +04:30 · fb71efd51d
parent 5d41d953a0
commit fb71efd51d
2 changed files with 121 additions and 24 deletions
--- a/src/lib/diacritics.test.ts
+++ b/src/lib/diacritics.test.ts
@ -103,6 +103,13 @@ const diacriticsSections: {
                },
                out: "تَشْناب",
            },
+            {
+                in: {
+                    p: "پسته",
+                    f: "pasta",
+                },
+                out: "پَسْتَه",
+            },
            // working with وs
            {
                in: {
@ -498,6 +505,14 @@ const diacriticsSections: {
                },
                out: "عِزَّت",
            },
+            // middle ع
+            {
+                in: {
+                    p: "معنا",
+                    f: "ma'anaa",
+                },
+                out: "مَعَنا",
+            },
            // ending with ayn
            {
                in: {
@ -596,6 +611,58 @@ const diacriticsSections: {
            },
        ],
    },
+    {
+        describe: "ha ending with ح",
+        tests: [
+            {
+                in: {
+                    p: "ذبح",
+                    f: "zabha",
+                },
+                out: "ذَبْحَ",
+            },
+            {
+                in: {
+                    p: "ذبح کول",
+                    f: "zabha kawul",
+                },
+                out: "ذَبْحَ کَو" + zwarakey + "ل",
+            },
+        ],
+    },
+    {
+        describe: "require dagger alif on words ending with یٰ",
+        tests: [
+            {
+                in: {
+                    p: "یحیی",
+                    f: "yahyaa",
+                },
+                out: null,
+            },
+            {
+                in: {
+                    p: "یحییٰ",
+                    f: "yahyaa",
+                },
+                out: "یَحْییٰ",
+            },
+            {
+                in: {
+                    p: "یحییٰ چېرته",
+                    f: "yahyaa cherta",
+                },
+                out: "یَحْییٰ چېرْتَه",
+            },
+            {
+                in: {
+                    p: "معنیٰ",
+                    f: "ma'anaa",
+                },
+                out: "مَعَنیٰ",
+            },
+        ],
+    }
 ];

 diacriticsSections.forEach((section) => {
@ -627,23 +694,23 @@ const brokenDiacritics = [
    },
 ];

-// test("ending with left over Pashto script will throw an error", () => {
-//     expect(() => {
-//         addDiacritics({ p: "کور ته", f: "kor" });
-//     }).toThrow(`phonetics error - phonetics shorter than pashto script`);
-// });
+test("ending with left over Pashto script will throw an error", () => {
+    expect(() => {
+        addDiacritics({ p: "کور ته", f: "kor" });
+    }).toThrow(`phonetics error - phonetics shorter than pashto script`);
+});

-// test("ending with left over phonetics will throw an error", () => {
-//     expect(() => {
-//         addDiacritics({ p: "کار", f: "kaar kawul" });
-//     }).toThrow();
-// });
+test("ending with left over phonetics will throw an error", () => {
+    expect(() => {
+        addDiacritics({ p: "کار", f: "kaar kawul" });
+    }).toThrow();
+});

-// test("adding diacritics errors when phonetecs and pashto do not line up", () => {
-//     brokenDiacritics.forEach((t) => {
-//         expect(() => {
-//             addDiacritics(t);
-//         }).toThrow();
-//     });
-// });
+test("adding diacritics errors when phonetecs and pashto do not line up", () => {
+    brokenDiacritics.forEach((t) => {
+        expect(() => {
+            addDiacritics(t);
+        }).toThrow();
+    });
+});

--- a/src/lib/diacritics.ts
+++ b/src/lib/diacritics.ts
@ -58,11 +58,15 @@ enum PhonemeStatus {
    DoubleConsonantTashdeed,
    EndingWithHeyHim,
    DirectMatch,
+    DirectMatchAfterSukun,
+    EndingWithHeyHimFromSukun,
    ShortVowel,
    PersianSilentWWithAa,
    ArabicWasla,
    Izafe,
    EndOfDuParticle,
+    HaEndingWithHeem,
+    AlefDaggarEnding,
 }

 function processPhoneme(
@ -87,8 +91,9 @@ function processPhoneme(

    const {
        phonemeInfo,
-        sukunOrDiacritic,
+        diacritic,
        phs,
+        prevPLetter,
    } = stateInfo({ state, i, phoneme, phonemes });

    return (phs === PhonemeStatus.LeadingLongVowel) ?
@ -100,7 +105,7 @@ function processPhoneme(
        : (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
            pipe(
                advanceP,
-                addP(sukunOrDiacritic),
+                addP(diacritic),
                advanceForAin,
            )(state)
        : (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
@ -114,7 +119,12 @@ function processPhoneme(
            )(state)
        : (phs === PhonemeStatus.DirectMatch) ?
            pipe(
-                addP(sukunOrDiacritic),
+                addP(diacritic),
+                advanceP,
+            )(state)
+        : (phs === PhonemeStatus.DirectMatchAfterSukun) ?
+            pipe(
+                addP(sukun),
                advanceP,
            )(state)
        : (phs === PhonemeStatus.PersianSilentWWithAa) ?
@ -139,6 +149,21 @@ function processPhoneme(
                reverseP,
                addP(zwarakey),
            )(state)
+        : (phs === PhonemeStatus.HaEndingWithHeem) ?
+            pipe(
+                prevPLetter === " " ? reverseP : (s: any) => s,
+                addP(zwar),
+            )(state)
+        : (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
+            pipe(
+                addP(sukun),
+                advanceP,
+            )(state)
+        : (phs === PhonemeStatus.AlefDaggarEnding) ?
+            pipe(
+                advanceP,
+                advanceP,
+            )(state)
        :
        // phs === PhonemeState.ShortVowel
            pipe(
@ -168,7 +193,6 @@ function stateInfo({ state, i, phonemes, phoneme }: {
    const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
    const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
    const diacritic = isEndOfWord ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
-    const sukunOrDiacritic = (needsSukun ? sukun : diacritic);

    function getPhonemeState(): PhonemeStatus {
        if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
@ -200,11 +224,17 @@ function stateInfo({ state, i, phonemes, phoneme }: {
        if (needsTashdeed) {
            return PhonemeStatus.DoubleConsonantTashdeed;
        }
+        if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
+            return PhonemeStatus.AlefDaggarEnding;
+        }
+        if (((isEndOfWord && prevPLetter === "ح") || (prevPLetter === " " && state.pOut[state.pOut.length - 2])) && phoneme === "a") {
+            return PhonemeStatus.HaEndingWithHeem;
+        }
        if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
-            return PhonemeStatus.EndingWithHeyHim;
+            return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
        }
        if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
-            return PhonemeStatus.DirectMatch;
+            return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
        }
        if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
            return PhonemeStatus.ShortVowel;
@ -216,6 +246,6 @@ function stateInfo({ state, i, phonemes, phoneme }: {
    const phs = getPhonemeState();

    return {
-        phs, phonemeInfo, sukunOrDiacritic,
+        phs, phonemeInfo, diacritic, prevPLetter,
    };
 };