working with special du behaviour

2021-05-16 18:00:05 +03:00 · 2021-05-16 18:00:05 +03:00 · 5d41d953a0
parent 73f786890e
commit 5d41d953a0
4 changed files with 62 additions and 37 deletions
--- a/src/lib/diacritics-helpers.test.ts
+++ b/src/lib/diacritics-helpers.test.ts
@ -2,7 +2,7 @@ import {
    splitFIntoPhonemes,
    last,
    addP,
-    prev2Chars,
+    lastNonWhitespace,
    advanceP,
    reverseP,
    overwriteP,
@ -97,8 +97,32 @@ test("addP should work", () => {
    });
 });

-test("prev2Chars should work", () => {
-    expect(prev2Chars("تورن")).toBe("رن");
-    expect(prev2Chars("وست .. ")).toBe("ست");
-    expect(prev2Chars("دَ ... ")).toBe("دَ");
+test("lastNonWhiteSpace should work", () => {
+    expect(lastNonWhitespace("تورن")).toBe("ن");
+    expect(lastNonWhitespace("وست .. ")).toBe("ت");
+    expect(lastNonWhitespace("د ... ")).toBe("د");
 });
+
+test("reverseP should work", () => {
+    expect(reverseP({
+        pIn: "کور",
+        pOut: "تور ",
+    })).toEqual({
+        pIn: " کور",
+        pOut: "تور",
+    });
+    expect(reverseP({
+        pIn: "کور",
+        pOut: "تور ... ",
+    })).toEqual({
+        pIn: " ... کور",
+        pOut: "تور",
+    });
+    expect(reverseP({
+        pIn: "کور",
+        pOut: "تور . ",
+    })).toEqual({
+        pIn: " . کور",
+        pOut: "تور",
+    });
+})
--- a/src/lib/diacritics-helpers.ts
+++ b/src/lib/diacritics-helpers.ts
@ -353,19 +353,16 @@ export const overwriteP = (toWrite: string) => (state: DiacriticsAccumulator): D
 };

 /**
- * returns the last two character in a string that was not a space or a dote
+ * returns the last letter before any whitespace (" " / ".")
 * 
 * @param s 
 * @returns 
 */
-export function prev2Chars(s: string): string {
-    // console.log("looking at pOut", s);
+export function lastNonWhitespace(s: string): string {
    const reversed = [...s].reverse();
-    // console.log(reversed.join("-"));
    const lastIndex = reversed.findIndex((c) => ![" ", "."].includes(c));
-    const last2 = reversed[lastIndex + 1] + reversed[lastIndex];
-    // console.log("last2", last2);
-    return last2;
+    const penultimateChar = reversed[lastIndex];
+    return penultimateChar;
 }

 export function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {
--- a/src/lib/diacritics.test.ts
+++ b/src/lib/diacritics.test.ts
@ -587,13 +587,13 @@ const diacriticsSections: {
                },
                out: "د" + zwarakey + " لاس",
            },
-            // {
-            //     in: {
-            //         p: "د ... په شان",
-            //         f: "du ... pu shaan",
-            //     },
-            //     out: "د" + zwarakey + "... پهٔ شان",
-            // },
+            {
+                in: {
+                    p: "د ... په شان",
+                    f: "du ... pu shaan",
+                },
+                out: "د" + zwarakey + " ... پهٔ شان",
+            },
        ],
    },
 ];
@ -601,7 +601,6 @@ const diacriticsSections: {
 diacriticsSections.forEach((section) => {
    describe(section.describe, () => {
        section.tests.forEach((t) => {
-            if (section.describe === "special behaviour with د") {
            if (t.out) {
                test(`diacritics should work for ${t.in.p} - ${t.in.f}`, () => {
                    expect(addDiacritics(t.in)).toEqual({ p: t.out, f: t.in.f });
@ -611,7 +610,6 @@ diacriticsSections.forEach((section) => {
                    expect(addDiacritics(t.in)).toThrowError();
                });
            }
-            }
        });
    });
 });
--- a/src/lib/diacritics.ts
+++ b/src/lib/diacritics.ts
@ -21,7 +21,7 @@ import {
    wasla,
    daggerAlif,
    fathahan,
-    prev2Chars,
+    lastNonWhitespace,
    addP,
    last,
    advanceP,
@ -75,7 +75,10 @@ function processPhoneme(
    // console.log("space coming up", acc.pIn[0] === " ");
    // console.log("state", acc);
    // Prep state
-    const state = acc.pIn[0] === " "
+    // TODO: CLEANER function jump to next char
+    const state = acc.pIn.slice(0, 5) === " ... "
+        ? advanceP(acc, 5)
+        : acc.pIn[0] === " "
        ? advanceP(acc)
        : acc;
    // console.log("AFTER SPACE PREP", phoneme);
@ -132,10 +135,10 @@ function processPhoneme(
                addP(zer),
            )(state)
        : (phs === PhonemeStatus.EndOfDuParticle) ?
-            (console.log("here"), pipe(
+            pipe(
                reverseP,
                addP(zwarakey),
-            )(state))
+            )(state)
        :
        // phs === PhonemeState.ShortVowel
            pipe(
@ -177,9 +180,12 @@ function stateInfo({ state, i, phonemes, phoneme }: {
        if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
            return PhonemeStatus.LeadingConsonantOrShortVowel;
        }
-        console.log(phoneme, phonemes, prev2Chars(state.pOut))
-        if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && prev2Chars(state.pOut) === ("د" + zwarakey)) {
-            // console.log("du here", phoneme, phonemes);
+        // console.log("------");
+        // console.log("phoneme", phoneme);
+        // console.log("state", state);
+        // console.log("prevPLetter is space", prevPLetter === " ");
+        // console.log("------");
+        if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
            return PhonemeStatus.EndOfDuParticle
        }
        if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {