more

2021-06-03 20:42:07 +04:30 · 2021-06-03 20:42:07 +04:30 · a62ab986ba
parent b1aac313f9
commit a62ab986ba
4 changed files with 73 additions and 5 deletions
--- a/src/lib/diacritics-helpers.ts
+++ b/src/lib/diacritics-helpers.ts
@ -274,7 +274,7 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
    const willIgnore = ["?", " ", "`", ".", "…", ","];
    const result: Phoneme[] = [];
-    const f = removeAccents(fIn);
+    const f = removeAccents(fIn).replace(/ă/g, "a");
    let index = 0;
    while (index < f.length) {
        const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
@ -336,6 +336,7 @@ export enum PhonemeStatus {
    ShortAForAlefBeforeFathatan,
    NOnFathatan,
    HamzaOnWow,
    ArabicDefiniteArticleUl,
 }
 export function stateInfo({ state, i, phonemes, phoneme }: {
@ -348,11 +349,11 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
    const prevPLetter = last(state.pOut);
    const currentPLetter = state.pIn[0];
    const nextPLetter = state.pIn[1];
    const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
    const isEndOfWord = isOutOfWord(nextPLetter);
    const phonemeInfo = phonemeTable[phoneme];
    const nextPhoneme = phonemes[i+1];
    const previousPhoneme = i > 0 && phonemes[i-1];
    const isBeginningOfWord = (state.pOut === "" || prevPLetter === " ") || (previousPhoneme === "-Ul-" && prevPLetter === "ل");
    const isEndOfWord = isOutOfWord(nextPLetter);
    const phonemeInfo = phonemeTable[phoneme];
    const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
    // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
    // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
@ -391,6 +392,9 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
        if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
            return PhonemeStatus.EndOfDuParticle
        }
        if (isBeginningOfWord && phoneme === "-Ul-" && currentPLetter === "ا" && nextPLetter === "ل") {
            return PhonemeStatus.ArabicDefiniteArticleUl;
        }
        if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
            return PhonemeStatus.HamzaOnWow;
        }
--- a/src/lib/diacritics.test.ts
+++ b/src/lib/diacritics.test.ts
@ -586,6 +586,13 @@ const diacriticsSections: {
                },
                out: null,
            },
            {
                in: {
                    p: "سختسری",
                    f: "sakht sărey",
                },
                out: "سَخْتْسَری",
            },
        ],
    },
    {
@ -805,6 +812,20 @@ const diacriticsSections: {
                },
                out: "تَوَقُّع",
            },
            {
                in: {
                    p: "راجع کېدل",
                    f: "raaji kedul",
                },
                out: "راجِع کېد" + zwarakey + "ل",
            },
            {
                in: {
                    p: "ربیع",
                    f: "rabee'",
                },
                out: "رَبِیع",
            },
        ],
    },
    {
@ -1054,6 +1075,44 @@ const diacriticsSections: {
            },
        ],
    },
    {
        describe: "With Arabic definate article -Ul- ال",
        tests: [
            {
                in: {
                    p: "حق الاجاره",
                    f: "haq-Ul-ijaara",
                },
                out: "حَق اُلاِجارَه",
            },
            {
                in: {
                    p: "دار العلوم",
                    f: "daar-Ul-Ulóom",
                },
                out: "دار اُلعُلُوم",
            },
        ],
    },
    // {
    //     describe: "double consonants on end of words",
    //     tests: [
    //         {
    //             in: {
    //                 p: "حق",
    //                 f: "haqq",
    //             },
    //             out: "حَقّ",
    //         },
    //         {
    //             in: {
    //                 p: "حق پر",
    //                 f: "haqq par",
    //             },
    //             out: "حَقّ پَر",
    //         },
    //     ],
    // },
 ];
 diacriticsSections.forEach((section) => {
--- a/src/lib/diacritics.ts
+++ b/src/lib/diacritics.ts
@ -195,6 +195,12 @@ function processPhoneme(
                addP(hamzaAbove),
                addP(diacritic),
            )(state)
        : (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
            pipe(
                advanceP,
                addP(pesh),
                advanceP,
            )(state)
        : state;
--- a/src/lib/phonetics-to-diacritics.test.ts
+++ b/src/lib/phonetics-to-diacritics.test.ts
@ -970,7 +970,6 @@ const toTest: Array<{
        },
        out: "حَقّ پَر",
    },
    // TODO: Allow ' in there
    {
        in: {
            p: "راجع کېدل",