more on upcoming diacritics engine / cool functional refactor

2021-05-07 14:48:33 +03:00 · 2021-05-07 14:48:33 +03:00 · 6053d11bc0
parent 3aaee3b6f2
commit 6053d11bc0
4 changed files with 97 additions and 21 deletions
--- a/package.json
+++ b/package.json
@ -24,7 +24,8 @@
  },
  "dependencies": {
    "classnames": "^2.2.6",
-    "pbf": "^3.2.1"
+    "pbf": "^3.2.1",
+    "rambda": "^6.7.0"
  },
  "devDependencies": {
    "@fortawesome/fontawesome-free": "^5.15.2",
--- a/src/lib/diacritics.test.ts
+++ b/src/lib/diacritics.test.ts
@ -258,7 +258,7 @@ const diacriticsTest: Array<{
        },
        out: "اِیسار",
    },
-    // double consonant
+    // double consonant / tashdeed
    {
        in: {
            p: "بتن",
@ -266,6 +266,50 @@ const diacriticsTest: Array<{
        },
        out: "ب" + zwar + "ت" + tashdeed + zwar + "ن",
    },
+    {
+        in: {
+            p: "بتطن",
+            f: "battan",
+        },
+        out: "ب" + zwar + "ت" + sukun + "ط" + zwar + "ن",
+    },
+    // vowel endings working
+    {
+        in: {
+            p: "بته",
+            f: "bata",
+        },
+        out: "بَتَه",
+    },
+    {
+        in: {
+            p: "بته",
+            f: "bati",
+        },
+        out: "بَتِه",
+    },
+    {
+        in: {
+            p: "پرمختیا",
+            f: "parmakhtyaa",
+        },
+        out: "پَرْمَخْتْیا",
+    },
+    // {
+    //     in: {
+    //         p: "پته",
+    //         f: "patta",
+    //     },
+    //     out: "پَتّه",
+    // },
+    // get ayn stuff working
+    // {
+    //     in: {
+    //         p: "اعتصاب شکن",
+    //         f: "itisaabshikan",
+    //     },
+    //     out: "اِعتِصاب شِکَن",
+    // },
    // avoid false double consonant
    {
        in: {
--- a/src/lib/diacritics.ts
+++ b/src/lib/diacritics.ts
@ -9,6 +9,7 @@
 import * as T from "../types";
 import { removeAccents } from "./accent-helpers";
 import { firstPhonetics } from "./p-text-helpers";
+import { pipe } from "rambda";

 const zwar = "َ";
 const zwarakey = "ٙ";
@ -341,36 +342,54 @@ function processPhoneme(
    const currentPLetter = state.pIn[0];
    const nextPLetter = state.pIn[1];
    const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
+    // const isEndOfWord = !nextPLetter || nextPLetter === " ";
    const phonemeInfo = phonemeTable[phoneme];
    const previousPhoneme = i > 0 && phonemes[i-1];
    const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
+    // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
+    // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
    const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
-    const needsTashdeed = doubleConsonant && (previousPhoneme === phoneme);
-    const needsSukun = doubleConsonant && (previousPhoneme !== phoneme);
+    const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
+    const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
+    const sukunOrDiacritic = (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : "");

-    if (needsTashdeed) {
-        return addP(state, tashdeed);
-    }
+    // if it's not an exception (TODO)
+    // it must be one of the following 5 possibilities

+    // 1. beginning a word with a long vowel
    if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
        if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
            throw Error("phonetics error - needs alef prefix");
        }
-        const ns = advanceP(state);
-        const ns2 = phonemeInfo.diacritic ? addP(ns, phonemeInfo.diacritic) : ns;
-        return advanceP(ns2);
+        return pipe(
+            advanceP,
+            addP(phonemeInfo.diacritic),
+            advanceP,
+        )(state);
+    // 2. beginning a word with something else
    } else if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
-        const ns = advanceP(state);
-        return addP(ns, (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : ""));
+        return pipe(
+            advanceP,
+            addP(sukunOrDiacritic),
+        )(state);
+    // 3. double consonant to be marked with tashdeed
+    } else if (needsTashdeed) {
+        return addP(tashdeed)(state);
+    // 4. direct match of phoneme / P letter
    } else if (phonemeInfo.matches?.includes(currentPLetter)) {
-        const ns = addP(state, (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : ""));
-        return advanceP(ns);
-    }
-
-    if (phonemeInfo.diacritic) {
-        return addP(state, phonemeInfo.diacritic);
+        return pipe(
+            addP(sukunOrDiacritic),
+            advanceP,
+        )(state);
+    // 5. just a diacritic for short vowel
+    } else if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
+        return pipe(
+            addP(phonemeInfo.diacritic),
+            advanceIfReachedEndingHamza,
+        )(state);
    }

+    // anything that gets to this point is a failure/error
    // console.log(state);
    throw new Error("phonetics error");
 }
@ -391,9 +410,16 @@ function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumu
    }
 }

-function addP(state: DiacriticsAccumulator, toAdd: string): DiacriticsAccumulator {
+const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
    return {
        ...state,
-        pOut: state.pOut + toAdd,
+        pOut: toAdd ? (state.pOut + toAdd) : state.pOut,
    };
-}
+}
+
+function advanceIfReachedEndingHamza(state: DiacriticsAccumulator): DiacriticsAccumulator {
+    if (state.pIn[0] === "ه" && (!state.pIn[1] || state.pIn[1] === " ")) {
+        return advanceP(state);
+    }
+    return state;
+}
--- a/yarn.lock
+++ b/yarn.lock
@ -9112,6 +9112,11 @@ raf@^3.4.1:
  dependencies:
    performance-now "^2.1.0"

+rambda@^6.7.0:
+  version "6.7.0"
+  resolved "https://registry.yarnpkg.com/rambda/-/rambda-6.7.0.tgz#50322efdd23a108b61eb6ac4e0868d10dd95b4aa"
+  integrity sha512-qg2atEwhAS4ipYoNfggkIP7qBUbY2OqdW17n25VqZIz5YC1MIwSpIToQ7XacvqSCZz16efM8Y8QKLx+Js1Sybg==
+
 randombytes@^2.0.0, randombytes@^2.0.1, randombytes@^2.0.5, randombytes@^2.1.0:
  version "2.1.0"
  resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"