some more refactoring, getting stuck on the du behaviour 😒

2021-05-16 17:13:42 +03:00 · 2021-05-16 17:13:42 +03:00 · 73f786890e
parent c5c9ea86d2
commit 73f786890e
4 changed files with 1147 additions and 960 deletions
--- a/src/lib/diacritics-helpers.test.ts
+++ b/src/lib/diacritics-helpers.test.ts
@ -0,0 +1,104 @@
 import {
    splitFIntoPhonemes,
    last,
    addP,
    prev2Chars,
    advanceP,
    reverseP,
    overwriteP,
    advanceForAin,
    advanceForAinOrHamza,
    advanceForHamzaMid,
 } from "./diacritics-helpers";
 const phonemeSplits: Array<{
    in: string,
    out: string[],
 }> = [
    {
        in: "kor",
        out: ["k", "o", "r"],
    },
    {
        in: "raaghey",
        out: ["r", "aa", "gh", "ey"],
    },
    {
        in: "hatsa",
        out: ["h", "a", "ts", "a"],
    },
    {
        in: "ba",
        out: ["b", "a"],
    },
    {
        in: "peydáa",
        out: ["p", "ey", "d", "aa"],
    },
    {
        in: "be kaar",
        out: ["b", "e", "k", "aa", "r"],
    },
    {
        in: "raadzeyy",
        out: ["r", "aa", "dz", "eyy"],
    },
    {
        in: "badanuy ??",
        out: ["b", "a", "d", "a", "n", "uy"],
    },
    {
        in: "tur ... pore",
        out: ["t", "u", "r", "p", "o", "r", "e"],
    },
    {
        in: "daar-Ul-iqaama",
        out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"],
    },
 ];
 phonemeSplits.forEach((s) => {
    test(`${s.in} should split properly`, () => {
        const result = splitFIntoPhonemes(s.in);
        expect(result).toEqual(s.out);
    });
 });
 const badPhonetics: Array<{
    in: string,
    problem: string,
 }> = [
    {
        in: "acar",
        problem: "c",
    },
    {
        in: "a7am",
        problem: "7",
    },
 ];
 test("bad phonetic characters should throw an error", () => {
    badPhonetics.forEach((s) => {
        expect(() => {
            splitFIntoPhonemes(s.in);
        }).toThrow(`illegal phonetic character: ${s.problem}`);
    });
 });
 test("last should work", () => {
    expect(last("this")).toBe("s");
 });
 test("addP should work", () => {
    expect(addP("ت")({ pIn: "", pOut: "کر" })).toEqual({
        pIn: "",
        pOut: "کرت",
    });
 });
 test("prev2Chars should work", () => {
    expect(prev2Chars("تورن")).toBe("رن");
    expect(prev2Chars("وست .. ")).toBe("ست");
    expect(prev2Chars("دَ ... ")).toBe("دَ");
 });
--- a/src/lib/diacritics-helpers.ts
+++ b/src/lib/diacritics-helpers.ts
@ -0,0 +1,401 @@
 /**
 * Copyright (c) 2021 lingdocs.com
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 *
 */
 import { removeAccents } from "./accent-helpers";
 export type DiacriticsAccumulator = { pIn: string, pOut: string };
 type Consonant = "b" | "p" | "t" | "T" | "s" | "j" | "ch" | "kh" | "ts" | "dz" | "d" | "D" | "r" | "R" | "z" | "jz" | "G" | "sh" | "x" | "gh" | "f" | "q" | "k" | "g" | "l" | "m" | "n" | "N" | "h" | "w" | "y";
 type Ain = "'"
 type JoiningVowel = "-i-" | "-U-" | "-Ul-"; 
 type LongVowel = "aa" | "ee" | "e" | "oo" | "o" | "ey" | "uy" | "eyy";
 type ShortVowel = "a" | "i" | "u" | "U";
 export type Phoneme = Consonant | Ain | LongVowel | ShortVowel | JoiningVowel;
 type PhonemeInfo = {
    matches?: string[],
    beginningMatches?: string[],
    endingMatches?: string[],
    consonant?: true,
    diacritic?: string,
    endingOnly?: true,
    takesSukunOnEnding?: true,
    longVowel?: true,
    canStartWithAynBefore?: true,
    useEndingDiacritic?: true,
 }
 export const zwar = "َ";
 export const zwarakey = "ٙ";
 export const zer = "ِ";
 export const pesh = "ُ";
 export const sukun = "ْ";
 export const hamzaAbove = "ٔ";
 export const tashdeed = "ّ";
 export const wasla = "ٱ";
 export const daggerAlif = "ٰ";
 export const fathahan = "ً";
 export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
    // Consonants
    "b": {
        matches: ["ب"],
        consonant: true,
    },
    "p": {
        matches: ["پ"],
        consonant: true,
    },
    "t": {
        matches: ["ت", "ط"],
        consonant: true,
    },
    "T": {
        matches: ["ټ"],
        consonant: true,
    },
    "s": {
        matches: ["س", "ص", "ث"],
        consonant: true,
    },
    "j": {
        matches: ["ج"],
        consonant: true,
    },
    "ch": {
        matches: ["چ"],
        consonant: true,
    },
    "kh": {
        matches: ["خ"],
        consonant: true,
    },
    "ts": {
        matches: ["څ"],
        consonant: true,
    },
    "dz": {
        matches: ["ځ"],
        consonant: true,
    },
    "d": {
        matches: ["د"],
        consonant: true,
    },
    "D": {
        matches: ["ډ"],
        consonant: true,
    },
    "r": {
        matches: ["ر"],
        consonant: true,
    },
    "R": {
        matches: ["ړ"],
        consonant: true,
    },
    "z": {
        matches: ["ز", "ذ", "ظ", "ض"],
        consonant: true,
    },
    "jz": {
        matches: ["ژ"],
        consonant: true,
    },
    "G": {
        matches: ["ږ"],
        consonant: true,
    },
    "sh": {
        matches: ["ش"],
        consonant: true,
    },
    "x": {
        matches: ["ښ"],
        consonant: true,
    },
    "gh": {
        matches: ["غ"],
        consonant: true,
    },
    "f": {
        matches: ["ف"],
        consonant: true,
    },
    "q": {
        matches: ["ق"],
        consonant: true,
    },
    "k": {
        matches: ["ک"],
        consonant: true,
    },
    "g": {
        matches: ["ګ"],
        consonant: true,
    },
    "l": {
        matches: ["ل"],
        consonant: true,
    },
    "m": {
        matches: ["م"],
        consonant: true,
    },
    "n": {
        matches: ["ن"],
        consonant: true,
    },
    "N": {
        matches: ["ڼ"],
        consonant: true,
    },
    "h": {
        matches: ["ه", "ح"],
        consonant: true,
        takesSukunOnEnding: true,
    },
    "w": {
        matches: ["و"],
        consonant: true,
    },
    "y": {
        matches: ["ی"],
        consonant: true,
    },
    // Ain
    "'": {
        matches: ["ع", "ئ"],
        consonant: true,
    },
    // Joining Vowels
    "-i-": {
    },
    "-U-": {
        matches: [" و ", "و"],
    },
    "-Ul-": {
        matches: ["ال"],
    },
    // Long Vowels
    "aa": {
        matches: ["ا"],
        beginningMatches: ["آ", "ا"],
        endingMatches: ["ا", "یٰ"],
        longVowel: true,
    },
    "ee": {
        matches: ["ی"],
        longVowel: true,
        endingMatches: ["ي"],
        diacritic: zer,
        canStartWithAynBefore: true
    },
    "e": {
        matches: ["ې"],
        longVowel: true,
    },
    "o": {
        matches: ["و"],
        longVowel: true,
    },
    "oo": {
        matches: ["و"],
        longVowel: true,
        // alsoCanBePrefix: true,
        diacritic: pesh,
        useEndingDiacritic: true,
    },
    "ey": {
        matches: ["ی"],
        longVowel: true,
        endingMatches: ["ی"],
    },
    "uy": {
        matches: ["ۍ"],
        longVowel: true,
        endingOnly: true,
    },
    "eyy": {
        matches: ["ئ"],
        longVowel: true,
        endingOnly: true,
    },
    // Short Vowels
    "a": {
        diacritic: zwar,
        endingMatches: ["ه"],
        beginningMatches: ["ا", "ع"],
        // canComeAfterHeyEnding: true,
        // canBeFirstPartOfFathahanEnding: true,
    },
    "u": {
        diacritic: zwarakey,
        endingMatches: ["ه"],
    },
    "i": {
        diacritic: zer,
        endingMatches: ["ه"],
        beginningMatches: ["ا", "ع"],
        // takesDiacriticBeforeGurdaHeyEnding: true,
        // canBeWasla: true,
    },
    "U": {
        diacritic: pesh,
        endingMatches: ["ه"],
        // takesDiacriticBeforeGurdaHeyEnding: true,
        beginningMatches: ["ا", "ع"],
    },
 }
 /**
 * splits a phonetics string into an array of Phonemes
 * 
 * will error if there is an illeagal phonetics character
 * 
 * @param fIn a phonetics string
 * @returns an array of phonemes
 */
 export function splitFIntoPhonemes(fIn: string): Phoneme[] {
    const singleLetterPhonemes: Phoneme[] = ["a", "i", "u", "o", "e", "U", "b", "p", "t", "T", "s", "j", "d", "D", "r", "R", "z", "G", "x", "f", "q", "k", "g", "l", "m", "n", "N", "h", "w", "y"];
    const quadrigraphs: Phoneme[] = ["-Ul-"];
    const trigraphs: Phoneme[] = ["eyy", "-i-", "-U-"];
    const digraphs: Phoneme[] = ["aa", "ee", "ey", "oo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"];
    const endingDigraphs: Phoneme[] = ["uy"];
    const willIgnore = ["?", " ", "`", ".", "…", ",", "'"];
    const result: Phoneme[] = [];
    const f = removeAccents(fIn);
    let index = 0;
    while (index < f.length) {
        const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
        const threeLetterChunk = f.slice(index, index + 3) as Phoneme;
        const fourLetterChunk = f.slice(index, index + 4) as Phoneme;
        if (quadrigraphs.includes(fourLetterChunk)) {
            result.push(fourLetterChunk);
            index += 4;
            continue;
        }
        if (trigraphs.includes(threeLetterChunk)) {
            result.push(threeLetterChunk);
            index += 3;
            continue;
        }
        const twoLetterChunk = f.slice(index, index + 2) as Phoneme;
        if (
            digraphs.includes(twoLetterChunk) ||
            (isLastTwoLetters && endingDigraphs.includes(twoLetterChunk))
        ) {
            result.push(twoLetterChunk);
            index += 2;
            continue;
        }
        const singleLetter = f.slice(index, index + 1) as Phoneme;
        if (!willIgnore.includes(singleLetter)) {
            if (!singleLetterPhonemes.includes(singleLetter)) {
                throw new Error(`illegal phonetic character: ${singleLetter}`);
            }
            result.push(singleLetter);
        }
        index++;
    }
    return result;
 }
 /**
 * returns the last character of a string
 * 
 * @param s 
 */
 export function last(s: string) {
    return s[s.length - 1];
 }
 export function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator {
    return {
        pIn: state.pIn.slice(n),
        pOut: state.pOut + state.pIn.slice(0, n),
    };
 }
 /**
 * moves back to the last character that wasn't a " " or "."
 * 
 * @param state 
 * @returns 
 */
 export function reverseP(state: DiacriticsAccumulator): DiacriticsAccumulator {
    const reversed = [...state.pOut].reverse();
    const howFar = reversed.findIndex((c) => ![" ", "."].includes(c));
    return {
        pIn: state.pOut.slice(-howFar) + state.pIn,
        pOut: state.pOut.slice(0, -howFar),
    };
 }
 export const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
    return {
        ...state,
        pOut: toAdd ? (state.pOut + toAdd) : state.pOut,
    };
 };
 export const overwriteP = (toWrite: string) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
    return {
        pIn: state.pIn.slice(1),
        pOut: state.pOut + toWrite,
    };
 };
 /**
 * returns the last two character in a string that was not a space or a dote
 * 
 * @param s 
 * @returns 
 */
 export function prev2Chars(s: string): string {
    // console.log("looking at pOut", s);
    const reversed = [...s].reverse();
    // console.log(reversed.join("-"));
    const lastIndex = reversed.findIndex((c) => ![" ", "."].includes(c));
    const last2 = reversed[lastIndex + 1] + reversed[lastIndex];
    // console.log("last2", last2);
    return last2;
 }
 export function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {
    return {
        current: state.pIn[0],
        next: state.pIn[1],
    };
 }
 export function advanceForAin(state: DiacriticsAccumulator): DiacriticsAccumulator {
    const { current } = getCurrentNext(state);
    return (current === "ع") ? advanceP(state) : state;
 }
 export function advanceForHamzaMid(state: DiacriticsAccumulator): DiacriticsAccumulator {
    const { current, next } = getCurrentNext(state);
    if (current === "ئ" && next && next !== "ئ") {
        return advanceP(state);
    }
    return state;
 }
 export function advanceForAinOrHamza(state: DiacriticsAccumulator): DiacriticsAccumulator {
    const { current, next } = getCurrentNext(state);
    if (current === "ه" && (!next || next === " ")) {
        return advanceP(state);
    }
    if (current === "ع") {
        return advanceP(state);
    }
    return state;
 }
--- a/src/lib/diacritics.test.ts
+++ b/src/lib/diacritics.test.ts
--- a/src/lib/diacritics.ts
+++ b/src/lib/diacritics.ts
@ -7,255 +7,35 @@
 */
 import * as T from "../types";
-import { removeAccents } from "./accent-helpers";
+import {
    splitFIntoPhonemes,
    Phoneme,
    phonemeTable,
    zwar,
    zwarakey,
    zer,
    pesh,
    sukun,
    hamzaAbove,
    tashdeed,
    wasla,
    daggerAlif,
    fathahan,
    prev2Chars,
    addP,
    last,
    advanceP,
    reverseP,
    overwriteP,
    advanceForAin,
    advanceForAinOrHamza,
    advanceForHamzaMid,
    DiacriticsAccumulator,
 } from "./diacritics-helpers";
 import { firstPhonetics } from "./p-text-helpers";
 import { pipe } from "rambda";
 const zwar = "َ";
 const zwarakey = "ٙ";
 const zer = "ِ";
 const pesh = "ُ";
 const sukun = "ْ";
 const hamzaAbove = "ٔ";
 const tashdeed = "ّ";
 const wasla = "ٱ";
 const daggerAlif = "ٰ";
 const fathahan = "ً";
 type Consonant = "b" | "p" | "t" | "T" | "s" | "j" | "ch" | "kh" | "ts" | "dz" | "d" | "D" | "r" | "R" | "z" | "jz" | "G" | "sh" | "x" | "gh" | "f" | "q" | "k" | "g" | "l" | "m" | "n" | "N" | "h" | "w" | "y";
 type Ain = "'"
 type JoiningVowel = "-i-" | "-U-" | "-Ul-"; 
 type LongVowel = "aa" | "ee" | "e" | "oo" | "o" | "ey" | "uy" | "eyy";
 type ShortVowel = "a" | "i" | "u" | "U";
 type Phoneme = Consonant | Ain | LongVowel | ShortVowel | JoiningVowel;
 type DiacriticsAccumulator = { pIn: string, pOut: string };
 type PhonemeInfo = {
    matches?: string[],
    beginningMatches?: string[],
    endingMatches?: string[],
    consonant?: true,
    diacritic?: string,
    endingOnly?: true,
    takesSukunOnEnding?: true,
    longVowel?: true,
    canStartWithAynBefore?: true,
    useEndingDiacritic?: true,
 }
 const phonemeTable: Record<Phoneme, PhonemeInfo> = {
    // Consonants
    "b": {
        matches: ["ب"],
        consonant: true,
    },
    "p": {
        matches: ["پ"],
        consonant: true,
    },
    "t": {
        matches: ["ت", "ط"],
        consonant: true,
    },
    "T": {
        matches: ["ټ"],
        consonant: true,
    },
    "s": {
        matches: ["س", "ص", "ث"],
        consonant: true,
    },
    "j": {
        matches: ["ج"],
        consonant: true,
    },
    "ch": {
        matches: ["چ"],
        consonant: true,
    },
    "kh": {
        matches: ["خ"],
        consonant: true,
    },
    "ts": {
        matches: ["څ"],
        consonant: true,
    },
    "dz": {
        matches: ["ځ"],
        consonant: true,
    },
    "d": {
        matches: ["د"],
        consonant: true,
    },
    "D": {
        matches: ["ډ"],
        consonant: true,
    },
    "r": {
        matches: ["ر"],
        consonant: true,
    },
    "R": {
        matches: ["ړ"],
        consonant: true,
    },
    "z": {
        matches: ["ز", "ذ", "ظ", "ض"],
        consonant: true,
    },
    "jz": {
        matches: ["ژ"],
        consonant: true,
    },
    "G": {
        matches: ["ږ"],
        consonant: true,
    },
    "sh": {
        matches: ["ش"],
        consonant: true,
    },
    "x": {
        matches: ["ښ"],
        consonant: true,
    },
    "gh": {
        matches: ["غ"],
        consonant: true,
    },
    "f": {
        matches: ["ف"],
        consonant: true,
    },
    "q": {
        matches: ["ق"],
        consonant: true,
    },
    "k": {
        matches: ["ک"],
        consonant: true,
    },
    "g": {
        matches: ["ګ"],
        consonant: true,
    },
    "l": {
        matches: ["ل"],
        consonant: true,
    },
    "m": {
        matches: ["م"],
        consonant: true,
    },
    "n": {
        matches: ["ن"],
        consonant: true,
    },
    "N": {
        matches: ["ڼ"],
        consonant: true,
    },
    "h": {
        matches: ["ه", "ح"],
        consonant: true,
        takesSukunOnEnding: true,
    },
    "w": {
        matches: ["و"],
        consonant: true,
    },
    "y": {
        matches: ["ی"],
        consonant: true,
    },
    // Ain
    "'": {
        matches: ["ع", "ئ"],
        consonant: true,
    },
    // Joining Vowels
    "-i-": {
    },
    "-U-": {
        matches: [" و ", "و"],
    },
    "-Ul-": {
        matches: ["ال"],
    },
    // Long Vowels
    "aa": {
        matches: ["ا"],
        beginningMatches: ["آ", "ا"],
        endingMatches: ["ا", "یٰ"],
        longVowel: true,
    },
    "ee": {
        matches: ["ی"],
        longVowel: true,
        endingMatches: ["ي"],
        diacritic: zer,
        canStartWithAynBefore: true
    },
    "e": {
        matches: ["ې"],
        longVowel: true,
    },
    "o": {
        matches: ["و"],
        longVowel: true,
    },
    "oo": {
        matches: ["و"],
        longVowel: true,
        // alsoCanBePrefix: true,
        diacritic: pesh,
        useEndingDiacritic: true,
    },
    "ey": {
        matches: ["ی"],
        longVowel: true,
        endingMatches: ["ی"],
    },
    "uy": {
        matches: ["ۍ"],
        longVowel: true,
        endingOnly: true,
    },
    "eyy": {
        matches: ["ئ"],
        longVowel: true,
        endingOnly: true,
    },
    // Short Vowels
    "a": {
        diacritic: zwar,
        endingMatches: ["ه"],
        beginningMatches: ["ا", "ع"],
        // canComeAfterHeyEnding: true,
        // canBeFirstPartOfFathahanEnding: true,
    },
    "u": {
        diacritic: zwarakey,
        endingMatches: ["ه"],
    },
    "i": {
        diacritic: zer,
        endingMatches: ["ه"],
        beginningMatches: ["ا", "ع"],
        // takesDiacriticBeforeGurdaHeyEnding: true,
        // canBeWasla: true,
    },
    "U": {
        diacritic: pesh,
        endingMatches: ["ه"],
        // takesDiacriticBeforeGurdaHeyEnding: true,
        beginningMatches: ["ا", "ع"],
    },
 }
 /**
 * Adds diacritics to a given PsString.
 * Errors if the phonetics and script don't line up.
@ -272,61 +52,6 @@ const phonemeTable: Record<Phoneme, PhonemeInfo> = {
    };
 }
 /**
 * splits a phonetics string into an array of Phonemes
 * 
 * will error if there is an illeagal phonetics character
 * 
 * @param fIn a phonetics string
 * @returns an array of phonemes
 */
 export function splitFIntoPhonemes(fIn: string): Phoneme[] {
    const singleLetterPhonemes: Phoneme[] = ["a", "i", "u", "o", "e", "U", "b", "p", "t", "T", "s", "j", "d", "D", "r", "R", "z", "G", "x", "f", "q", "k", "g", "l", "m", "n", "N", "h", "w", "y"];
    const quadrigraphs: Phoneme[] = ["-Ul-"];
    const trigraphs: Phoneme[] = ["eyy", "-i-", "-U-"];
    const digraphs: Phoneme[] = ["aa", "ee", "ey", "oo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"];
    const endingDigraphs: Phoneme[] = ["uy"];
    const willIgnore = ["?", " ", "`", ".", "…", ",", "'"];
    const result: Phoneme[] = [];
    const f = removeAccents(fIn);
    let index = 0;
    while (index < f.length) {
        const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
        const threeLetterChunk = f.slice(index, index + 3) as Phoneme;
        const fourLetterChunk = f.slice(index, index + 4) as Phoneme;
        if (quadrigraphs.includes(fourLetterChunk)) {
            result.push(fourLetterChunk);
            index += 4;
            continue;
        }
        if (trigraphs.includes(threeLetterChunk)) {
            result.push(threeLetterChunk);
            index += 3;
            continue;
        }
        const twoLetterChunk = f.slice(index, index + 2) as Phoneme;
        if (
            digraphs.includes(twoLetterChunk) ||
            (isLastTwoLetters && endingDigraphs.includes(twoLetterChunk))
        ) {
            result.push(twoLetterChunk);
            index += 2;
            continue;
        }
        const singleLetter = f.slice(index, index + 1) as Phoneme;
        if (!willIgnore.includes(singleLetter)) {
            if (!singleLetterPhonemes.includes(singleLetter)) {
                throw new Error(`illegal phonetic character: ${singleLetter}`);
            }
            result.push(singleLetter);
        }
        index++;
    }
    return result;
 }
 enum PhonemeStatus {
    LeadingLongVowel,
    LeadingConsonantOrShortVowel,
@ -337,6 +62,7 @@ enum PhonemeStatus {
    PersianSilentWWithAa,
    ArabicWasla,
    Izafe,
    EndOfDuParticle,
 }
 function processPhoneme(
@ -349,7 +75,9 @@ function processPhoneme(
    // console.log("space coming up", acc.pIn[0] === " ");
    // console.log("state", acc);
    // Prep state
-    const state = acc.pIn[0] === " " ? advanceP(acc) : acc;
+    const state = acc.pIn[0] === " "
        ? advanceP(acc)
        : acc;
    // console.log("AFTER SPACE PREP", phoneme);
    // console.log("state", state);
    // WARNING: Do not use acc after this point!
@ -403,6 +131,11 @@ function processPhoneme(
                reverseP,
                addP(zer),
            )(state)
        : (phs === PhonemeStatus.EndOfDuParticle) ?
            (console.log("here"), pipe(
                reverseP,
                addP(zwarakey),
            )(state))
        :
        // phs === PhonemeState.ShortVowel
            pipe(
@ -444,6 +177,11 @@ function stateInfo({ state, i, phonemes, phoneme }: {
        if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
            return PhonemeStatus.LeadingConsonantOrShortVowel;
        }
        console.log(phoneme, phonemes, prev2Chars(state.pOut))
        if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && prev2Chars(state.pOut) === ("د" + zwarakey)) {
            // console.log("du here", phoneme, phonemes);
            return PhonemeStatus.EndOfDuParticle
        }
        if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
            return PhonemeStatus.PersianSilentWWithAa;
        }
@ -465,6 +203,7 @@ function stateInfo({ state, i, phonemes, phoneme }: {
        if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
            return PhonemeStatus.ShortVowel;
        }
        // console.log("bad phoneme is ", phoneme);
        throw new Error("phonetics error - no status found for phoneme: " + phoneme);
    }
@ -474,70 +213,3 @@ function stateInfo({ state, i, phonemes, phoneme }: {
        phs, phonemeInfo, sukunOrDiacritic,
    };
 };
 /**
 * returns the last character of a string
 * 
 * @param s 
 */
 function last(s: string) {
    return s[s.length - 1];
 }
 function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator {
    return {
        pIn: state.pIn.slice(n),
        pOut: state.pOut + state.pIn.slice(0, n),
    };
 }
 function reverseP(state: DiacriticsAccumulator): DiacriticsAccumulator {
    return {
        pIn: state.pOut.slice(-1) + state.pIn,
        pOut: state.pOut.slice(0, -1),
    };
 }
 const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
    return {
        ...state,
        pOut: toAdd ? (state.pOut + toAdd) : state.pOut,
    };
 };
 const overwriteP = (toWrite: string) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
    return {
        pIn: state.pIn.slice(1),
        pOut: state.pOut + toWrite,
    };
 };
 function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {
    return {
        current: state.pIn[0],
        next: state.pIn[1],
    };
 }
 function advanceForAin(state: DiacriticsAccumulator): DiacriticsAccumulator {
    const { current } = getCurrentNext(state);
    return (current === "ع") ? advanceP(state) : state;
 }
 function advanceForHamzaMid(state: DiacriticsAccumulator): DiacriticsAccumulator {
    const { current, next } = getCurrentNext(state);
    if (current === "ئ" && next && next !== "ئ") {
        return advanceP(state);
    }
    return state;
 }
 function advanceForAinOrHamza(state: DiacriticsAccumulator): DiacriticsAccumulator {
    const { current, next } = getCurrentNext(state);
    if (current === "ه" && (!next || next === " ")) {
        return advanceP(state);
    }
    if (current === "ع") {
        return advanceP(state);
    }
    return state;
 }