diff --git a/src/lib/diacritics-helpers.ts b/src/lib/diacritics-helpers.ts index debfbec..2c6547c 100644 --- a/src/lib/diacritics-helpers.ts +++ b/src/lib/diacritics-helpers.ts @@ -236,7 +236,6 @@ export const phonemeTable: Record = { endingMatches: ["ه"], beginningMatches: ["ا", "ع"], // canComeAfterHeyEnding: true, - // canBeFirstPartOfFathahanEnding: true, }, "u": { diacritic: zwarakey, @@ -311,13 +310,162 @@ export const phonemeTable: Record = { } return result; } +export enum PhonemeStatus { + LeadingLongVowel, + LeadingConsonantOrShortVowel, + DoubleConsonantTashdeed, + EndingWithHeyHim, + DirectMatch, + DirectMatchAfterSukun, + EndingWithHeyHimFromSukun, + ShortVowel, + PersianSilentWWithAa, + ArabicWasla, + Izafe, + EndOfDuParticle, + ShortAEndingAfterHeem, + AlefDaggarEnding, + AinWithLongAAtBeginning, + LongAinVowelMissingComma, + ShortAinVowelMissingComma, + ShortAinVowelMissingCommaAfterAlefStart, + AinBeginningAfterShortVowel, + AlefWithHamza, + AlefWithHamzaWithGlottalStop, + WoEndingO, + ShortAForAlefBeforeFathatan, + NOnFathatan, +} + +export function stateInfo({ state, i, phonemes, phoneme }: { + state: DiacriticsAccumulator, + i: number, + phonemes: Phoneme[], + phoneme: Phoneme, +}) { + const isOutOfWord = (char: string) => !char || char === " "; + const prevPLetter = last(state.pOut); + const currentPLetter = state.pIn[0]; + const nextPLetter = state.pIn[1]; + const isBeginningOfWord = state.pOut === "" || prevPLetter === " "; + const isEndOfWord = isOutOfWord(nextPLetter); + const phonemeInfo = phonemeTable[phoneme]; + const nextPhoneme = phonemes[i+1]; + const previousPhoneme = i > 0 && phonemes[i-1]; + const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]]; + // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1]; + // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined; + const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant); + const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter); + const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter)); + const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع")); + const diacritic = useAinBlendDiacritics + ? phonemeInfo.ainBlendDiacritic + : isEndOfWord + ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic; + + const lastWordEndedW = (char: string) => ((prevPLetter === char && !currentPLetter) || (prevPLetter === " " && last(state.pOut, 2) === char)); + + function getPhonemeState(): PhonemeStatus { + if (isBeginningOfWord && phoneme === "aa" && phonemeInfo.beginningMatches?.includes(currentPLetter)) { + return PhonemeStatus.DirectMatch; + } + if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) { + if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) { + throw Error("phonetics error - needs alef prefix"); + } + return PhonemeStatus.LeadingLongVowel; + } + if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) { + return PhonemeStatus.LeadingConsonantOrShortVowel; + } + if (isBeginningOfWord && phoneme === "aa" && currentPLetter === "ع" && nextPLetter === "ا") { + return PhonemeStatus.AinWithLongAAtBeginning; + } + // console.log("------"); + // console.log("phoneme", phoneme); + // console.log("state", state); + // console.log("prevPLetter is space", prevPLetter === " "); + // console.log("------"); + if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") { + return PhonemeStatus.EndOfDuParticle + } + if (phoneme === "a" && currentPLetter === "ا" && nextPLetter === fathahan) { + return PhonemeStatus.ShortAForAlefBeforeFathatan; + } + if (phoneme === "'" && last(state.pOut, 2) === "ع" && isOutOfWord(last(state.pOut, 3))) { + return PhonemeStatus.AinBeginningAfterShortVowel; + } + if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") { + return PhonemeStatus.PersianSilentWWithAa; + } + if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") { + return PhonemeStatus.ArabicWasla; + } + if (phoneme === "-i-" && isBeginningOfWord) { + return PhonemeStatus.Izafe; + } + if (phoneme === "a" && currentPLetter === "أ") { + return PhonemeStatus.AlefWithHamza; + } + if (phoneme === "'" && nextPhoneme === "a" && currentPLetter === "أ") { + return PhonemeStatus.AlefWithHamzaWithGlottalStop; + } + if (currentPLetter === "ع" && phoneme !== "'" && nextPhoneme !== "'") { + if (phonemeInfo.diacritic && !phonemeInfo.longVowel) { + return PhonemeStatus.ShortAinVowelMissingComma; + } + if ((last(state.pOut, 2) === "ا") && isOutOfWord(last(state.pOut, 3))) { + return PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart; + } + } + if (useAinBlendDiacritics) { + return PhonemeStatus.LongAinVowelMissingComma; + } + if (needsTashdeed) { + return PhonemeStatus.DoubleConsonantTashdeed; + } + if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) { + return PhonemeStatus.AlefDaggarEnding; + } + if (phoneme === "a" && lastWordEndedW("ح")) { + return PhonemeStatus.ShortAEndingAfterHeem; + } + if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) { + return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim; + } + if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) { + return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch; + } + if (phonemeInfo.diacritic && !phonemeInfo.longVowel) { + return PhonemeStatus.ShortVowel; + } + if (phoneme === "o" && previousPhoneme === "w" && lastWordEndedW("و")) { + return PhonemeStatus.WoEndingO; + } + if (isEndOfWord && phoneme === "n" && currentPLetter === fathahan && prevPLetter === "ا") { + return PhonemeStatus.NOnFathatan; + } + console.log(state); + // console.log("bad phoneme is ", phoneme); + throw new Error("phonetics error - no status found for phoneme: " + phoneme); + } + + const phs = getPhonemeState(); + + return { + phs, phonemeInfo, diacritic, prevPLetter, + }; +}; + + /** - * returns the last character of a string + * returns the nth last character of a string * * @param s */ -export function last(s: string) { - return s[s.length - 1]; +export function last(s: string, n = 1) { + return s[s.length - n]; } export function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator { diff --git a/src/lib/diacritics.test.ts b/src/lib/diacritics.test.ts index d08c1cc..79ec8ab 100644 --- a/src/lib/diacritics.test.ts +++ b/src/lib/diacritics.test.ts @@ -110,6 +110,21 @@ const diacriticsSections: { }, out: "پَسْتَه", }, + // working with ئ as vowel at end + { + in: { + p: "شئ", + f: "sheyy", + }, + out: "شئ", + }, + { + in: { + p: "کار کوئ چې لاړ شئ", + f: "kaar kawéyy che laaR sheyy", + }, + out: "کار کَوئ چې لاړ شئ", + }, // working with وs { in: { @@ -209,6 +224,41 @@ const diacriticsSections: { }, out: "لِیک", }, + { + in: { + p: "ماضی", + f: "maazee", + }, + out: null, + }, + { + in: { + p: "وسېدل", + f: "osedul", + }, + out: null, + }, + { + in: { + p: "يست", + f: "eest", + }, + out: null, + }, + { + in: { + p: "ست", + f: "ist", + }, + out: null, + }, + { + in: { + p: "haca", + f: "هځه", + }, + out: null, + }, { in: { p: "رغېدل", @@ -458,6 +508,13 @@ const diacriticsSections: { }, out: "پَتَّه تُور", }, + { + in: { + p: "لکۍ وال", + f: "lakuy waal", + }, + out: "لَکۍ وال", + }, // avoid false double consonant { in: { @@ -466,6 +523,107 @@ const diacriticsSections: { }, out: "اَزَل لِیک", }, + { + in: { + p: "سه", + f: "si", + }, + out: "سِه", + }, + { + in: { + p: "سه شنبه", + f: "sishamba", + }, + out: "سِه شَنْبَه", + }, + { + in: { + p: "توجه", + f: "tawajÚ", + }, + out: "تَوَجُه", + }, + { + in: { + p: "توجه کول", + f: "tawajU kawul", + }, + out: "تَوَجُه کَو" + zwarakey + "ل", + }, + { + in: { + p: "با استعداد", + f: "baa isti'dáad", + }, + out: "با اِسْتِعْداد", + }, + { + in: { + p: "آدم", + f: "aadam", + }, + out: "آدَم", + }, + { + in: { + p: "آسان", + f: "aasáan", + }, + out: "آسان", + }, + { + in: { + p: "آسان", + f: "asáan", + }, + out: null, + }, + { + in: { + p: "یدام", + f: "aadam", + }, + out: null, + }, + ], + }, + { + describe: "ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی", + tests: [ + { + in: { + p: "پتېیل", + f: "pateyúl", + }, + out: null, + }, + { + in: { + p: "پتېیل", + f: "pate`yúl", + }, + out: "پَتېی" + zwarakey + "ل", + }, + { + in: { + p: "درېیم", + f: "dre`yum", + }, + out: "دْرېی" + zwarakey + "م", + }, + ], + }, + { + describe: "handle circumpositions", + tests: [ + { + in: { + p: "تر ... پورې", + f: "tur ... pore", + }, + out: "ت" + zwarakey + "ر ... پورې", + }, ], }, { @@ -480,6 +638,25 @@ const diacriticsSections: { }, ], }, + { + describe: "excetption for و - wo", + tests: [ + { + in: { + p: "و", + f: "wo", + }, + out: "و", + }, + { + in: { + p: "سړی و", + f: "saRey wo", + }, + out: "سَړی و", + }, + ], + }, { describe: "alef with hamza above", tests: [ @@ -593,43 +770,105 @@ const diacriticsSections: { }, out: "طَمَع اِسْتِعمال", }, - // { - // in: { - // p: "اعتصاب شکن", - // f: "itisaab shakan", - // }, - // out: "اِعتِصاب شَکَن", - // }, - // { - // in: { - // p: "عادل", - // f: "aadíl", - // }, - // out: "عادل", - // }, - // // starting with ع - // { - // in: { - // p: "عزت", - // f: "izzat", - // }, - // out: "عِزَّت", - // }, - // { - // in: { - // p: "عزت", - // f: "i'zzat", - // }, - // out: "عِزَّت", - // }, - // // middle ع - // { - // in: { - // p: "معنا", - // f: "ma'anaa", - // }, - // out: "مَعَنا", - // }, + { + in: { + p: "مربع", + f: "mUraba'", + }, + out: "مُرَبَع", + }, + { + in: { + p: "مربع جذر", + f: "mUraba' jazúr", + }, + out: "مُرَبَع جَذ" + zwarakey + "ر", + }, + { + in: { + p: "عام", + f: "'aam", + }, + out: "عام", + }, + { + in: { + p: "قتل عام", + f: "qatl-i-aam", + }, + out: "قَتْلِ عام", + }, + { + in: { + p: "توقع", + f: "tawaqqÚ", + }, + out: "تَوَقّعُ", + }, + ], + }, + { + describe: "ayn at the beginning", + tests: [ + // as a short vowel at the beginning + { + in: { + p: "عزت", + f: "izzat", + }, + out: "عِزَّت", + }, + { + in: { + p: "عزت", + f: "i'zzat", + }, + out: "عِْزَّت", + }, + { + in: { + p: "عذر", + f: "Uzar", + }, + out: "عُذَر", + }, + { + in: { + p: "عذر", + f: "U'zar", + }, + out: "عُْذَر", + }, + // as a short i with an alef + { + in: { + p: "اعتصاب شکن", + f: "itisaab shakan", + }, + out: "اِعتِصاب شَکَن", + }, + { + in: { + p: "اعتصاب شکن", + f: "i'tisaab shakan", + }, + out: "اِعْتِصاب شَکَن", + }, + // as a long aa at beginning + { + in: { + p: "عادل", + f: "aadíl", + }, + out: "عادِل", + }, + { + in: { + p: "عید", + f: "eed", + }, + out: "عِید", + }, ], }, { @@ -687,6 +926,25 @@ const diacriticsSections: { }, ], }, + { + describe: "joiner و", + tests: [ + { + in: { + p: "کار و بار", + f: "kaar-U-baar", + }, + out: "کار و بار", + }, + { + in: { + p: "کاروبار", + f: "kaar-U-baar", + }, + out: "کاروبار", + }, + ], + }, { describe: "special behaviour with د", tests: [ @@ -716,13 +974,13 @@ const diacriticsSections: { { describe: "ha ending with ح", tests: [ - // { - // in: { - // p: "ذبح", - // f: "zabha", - // }, - // out: "ذَبْحَ", - // }, + { + in: { + p: "ذبح", + f: "zabha", + }, + out: "ذَبْحَ", + }, { in: { p: "ذبح کول", @@ -764,10 +1022,42 @@ const diacriticsSections: { out: "مَعَنیٰ", }, ], - } + }, + { + describe: "require fathatan on words ending in اً ", + tests: [ + { + in: { + p: "دقیقا", + f: "daqeeqan", + }, + out: null, + }, + { + in: { + p: "دقیقاً", + f: "daqeeqan", + }, + out: "دَقِیقاً", + }, + ], + }, + { + describe: "Ua ؤ", + tests: [ + { + in: { + p: "مودب", + f: "mUaddab", + }, + out: "مؤدَّب", + }, + ], + }, ]; diacriticsSections.forEach((section) => { + // if (!section.describe.includes("require fathatan")) return; describe(section.describe, () => { section.tests.forEach((t) => { if (t.out) { @@ -785,34 +1075,34 @@ diacriticsSections.forEach((section) => { // ERRORS -const brokenDiacritics = [ - { - p: "تشناب", - f: "peshnaab", - }, - { - p: "وسېدل", - f: "osedul", - }, -]; +// const brokenDiacritics = [ +// { +// p: "تشناب", +// f: "peshnaab", +// }, +// { +// p: "وسېدل", +// f: "osedul", +// }, +// ]; -test("ending with left over Pashto script will throw an error", () => { - expect(() => { - addDiacritics({ p: "کور ته", f: "kor" }); - }).toThrow(`phonetics error - phonetics shorter than pashto script`); -}); +// test("ending with left over Pashto script will throw an error", () => { +// expect(() => { +// addDiacritics({ p: "کور ته", f: "kor" }); +// }).toThrow(`phonetics error - phonetics shorter than pashto script`); +// }); -test("ending with left over phonetics will throw an error", () => { - expect(() => { - addDiacritics({ p: "کار", f: "kaar kawul" }); - }).toThrow(); -}); +// test("ending with left over phonetics will throw an error", () => { +// expect(() => { +// addDiacritics({ p: "کار", f: "kaar kawul" }); +// }).toThrow(); +// }); -test("adding diacritics errors when phonetecs and pashto do not line up", () => { - brokenDiacritics.forEach((t) => { - expect(() => { - addDiacritics(t); - }).toThrow(); - }); -}); +// test("adding diacritics errors when phonetecs and pashto do not line up", () => { +// brokenDiacritics.forEach((t) => { +// expect(() => { +// addDiacritics(t); +// }).toThrow(); +// }); +// }); diff --git a/src/lib/diacritics.ts b/src/lib/diacritics.ts index 8ac33f6..7b97fdc 100644 --- a/src/lib/diacritics.ts +++ b/src/lib/diacritics.ts @@ -21,15 +21,15 @@ import { wasla, daggerAlif, fathahan, - lastNonWhitespace, addP, - last, advanceP, reverseP, overwriteP, advanceForHamza, advanceForHamzaMid, DiacriticsAccumulator, + stateInfo, + PhonemeStatus, } from "./diacritics-helpers"; import { firstPhonetics } from "./p-text-helpers"; @@ -51,27 +51,6 @@ import { pipe } from "rambda"; }; } -enum PhonemeStatus { - LeadingLongVowel, - LeadingConsonantOrShortVowel, - DoubleConsonantTashdeed, - EndingWithHeyHim, - DirectMatch, - DirectMatchAfterSukun, - EndingWithHeyHimFromSukun, - ShortVowel, - PersianSilentWWithAa, - ArabicWasla, - Izafe, - EndOfDuParticle, - HaEndingWithHeem, - AlefDaggarEnding, - LongAinVowelMissingComma, - ShortAinVowelMissingComma, - AlefWithHamza, - AlefWithHamzaWithGlottalStop, -} - function processPhoneme( acc: DiacriticsAccumulator, phoneme: Phoneme, @@ -96,6 +75,7 @@ function processPhoneme( phonemeInfo, diacritic, phs, + prevPLetter, } = stateInfo({ state, i, phoneme, phonemes }); // console.log("phoneme", phoneme); @@ -154,10 +134,9 @@ function processPhoneme( reverseP, addP(zwarakey), )(state) - : (phs === PhonemeStatus.HaEndingWithHeem) ? + : (phs === PhonemeStatus.ShortAEndingAfterHeem) ? pipe( - reverseP, - // prevPLetter === " " ? reverseP , + prevPLetter === " " ? reverseP : addP(""), addP(zwar), )(state) : (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ? @@ -181,114 +160,44 @@ function processPhoneme( addP(diacritic), advanceP, )(state) + : (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ? + pipe( + advanceP, + advanceP, + )(state) + : (phs === PhonemeStatus.AinWithLongAAtBeginning) ? + pipe( + advanceP, + advanceP, + )(state) : (phs === PhonemeStatus.AlefWithHamza) ? pipe( advanceP, )(state) - : (phs === PhonemeStatus.AlefWithHamzaWithGlottalStop) ? - state - : - // phs === PhonemeState.ShortVowel + : (phs === PhonemeStatus.ShortVowel) ? pipe( advanceForHamzaMid, addP(phonemeInfo.diacritic), // TODO THIS? advanceForHamza, - )(state); + )(state) + : (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ? + pipe( + advanceP, + )(state) + : (phs === PhonemeStatus.NOnFathatan) ? + pipe( + advanceP, + )(state) + : state; + + // (phs === PhonemeStatus.AlefWithHamzaWithGlottalStop) ? + // state + // : (phs === PhonemeStatus.AinBeginningAfterShortVowel) ? + // state + //: (phs === PhonemeStatus.WoEndingO) ? + // state + // : + // + } - - - -function stateInfo({ state, i, phonemes, phoneme }: { - state: DiacriticsAccumulator, - i: number, - phonemes: Phoneme[], - phoneme: Phoneme, -}) { - const prevPLetter = last(state.pOut); - const currentPLetter = state.pIn[0]; - const nextPLetter = state.pIn[1]; - const isBeginningOfWord = state.pOut === "" || prevPLetter === " "; - const isEndOfWord = !nextPLetter || nextPLetter === " "; - const phonemeInfo = phonemeTable[phoneme]; - const nextPhoneme = phonemes[i+1]; - const previousPhoneme = i > 0 && phonemes[i-1]; - const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]]; - // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1]; - // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined; - const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant); - const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter); - const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter)); - const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع")); - const diacritic = useAinBlendDiacritics - ? phonemeInfo.ainBlendDiacritic - : isEndOfWord - ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic; - - function getPhonemeState(): PhonemeStatus { - if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) { - if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) { - throw Error("phonetics error - needs alef prefix"); - } - return PhonemeStatus.LeadingLongVowel; - } - if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) { - return PhonemeStatus.LeadingConsonantOrShortVowel; - } - // console.log("------"); - // console.log("phoneme", phoneme); - // console.log("state", state); - // console.log("prevPLetter is space", prevPLetter === " "); - // console.log("------"); - if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") { - return PhonemeStatus.EndOfDuParticle - } - if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") { - return PhonemeStatus.PersianSilentWWithAa; - } - if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") { - return PhonemeStatus.ArabicWasla; - } - if (phoneme === "-i-" && isBeginningOfWord) { - return PhonemeStatus.Izafe; - } - if (phoneme === "a" && currentPLetter === "أ") { - return PhonemeStatus.AlefWithHamza; - } - if (phoneme === "'" && nextPhoneme === "a" && currentPLetter === "أ") { - return PhonemeStatus.AlefWithHamzaWithGlottalStop; - } - if (currentPLetter === "ع" && phoneme !== "'" && nextPhoneme !== "'" && phonemeInfo.diacritic && !phonemeInfo.longVowel) { - return PhonemeStatus.ShortAinVowelMissingComma; - } - if (useAinBlendDiacritics) { - return PhonemeStatus.LongAinVowelMissingComma; - } - if (needsTashdeed) { - return PhonemeStatus.DoubleConsonantTashdeed; - } - if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) { - return PhonemeStatus.AlefDaggarEnding; - } - if (((isEndOfWord && prevPLetter === "ح") || (prevPLetter === " " && state.pOut[state.pOut.length - 2])) && phoneme === "a") { - return PhonemeStatus.HaEndingWithHeem; - } - if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) { - return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim; - } - if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) { - return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch; - } - if (phonemeInfo.diacritic && !phonemeInfo.longVowel) { - return PhonemeStatus.ShortVowel; - } - // console.log("bad phoneme is ", phoneme); - throw new Error("phonetics error - no status found for phoneme: " + phoneme); - } - - const phs = getPhonemeState(); - - return { - phs, phonemeInfo, diacritic, - }; -};