more coming

This commit is contained in:
Bill D 2021-06-03 18:22:14 +04:30
parent cf01df5c6d
commit 1a0480a9d3
3 changed files with 550 additions and 203 deletions

View File

@ -236,7 +236,6 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
endingMatches: ["ه"], endingMatches: ["ه"],
beginningMatches: ["ا", "ع"], beginningMatches: ["ا", "ع"],
// canComeAfterHeyEnding: true, // canComeAfterHeyEnding: true,
// canBeFirstPartOfFathahanEnding: true,
}, },
"u": { "u": {
diacritic: zwarakey, diacritic: zwarakey,
@ -311,13 +310,162 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
} }
return result; return result;
} }
export enum PhonemeStatus {
LeadingLongVowel,
LeadingConsonantOrShortVowel,
DoubleConsonantTashdeed,
EndingWithHeyHim,
DirectMatch,
DirectMatchAfterSukun,
EndingWithHeyHimFromSukun,
ShortVowel,
PersianSilentWWithAa,
ArabicWasla,
Izafe,
EndOfDuParticle,
ShortAEndingAfterHeem,
AlefDaggarEnding,
AinWithLongAAtBeginning,
LongAinVowelMissingComma,
ShortAinVowelMissingComma,
ShortAinVowelMissingCommaAfterAlefStart,
AinBeginningAfterShortVowel,
AlefWithHamza,
AlefWithHamzaWithGlottalStop,
WoEndingO,
ShortAForAlefBeforeFathatan,
NOnFathatan,
}
export function stateInfo({ state, i, phonemes, phoneme }: {
state: DiacriticsAccumulator,
i: number,
phonemes: Phoneme[],
phoneme: Phoneme,
}) {
const isOutOfWord = (char: string) => !char || char === " ";
const prevPLetter = last(state.pOut);
const currentPLetter = state.pIn[0];
const nextPLetter = state.pIn[1];
const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
const isEndOfWord = isOutOfWord(nextPLetter);
const phonemeInfo = phonemeTable[phoneme];
const nextPhoneme = phonemes[i+1];
const previousPhoneme = i > 0 && phonemes[i-1];
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع"));
const diacritic = useAinBlendDiacritics
? phonemeInfo.ainBlendDiacritic
: isEndOfWord
? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
const lastWordEndedW = (char: string) => ((prevPLetter === char && !currentPLetter) || (prevPLetter === " " && last(state.pOut, 2) === char));
function getPhonemeState(): PhonemeStatus {
if (isBeginningOfWord && phoneme === "aa" && phonemeInfo.beginningMatches?.includes(currentPLetter)) {
return PhonemeStatus.DirectMatch;
}
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
throw Error("phonetics error - needs alef prefix");
}
return PhonemeStatus.LeadingLongVowel;
}
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
return PhonemeStatus.LeadingConsonantOrShortVowel;
}
if (isBeginningOfWord && phoneme === "aa" && currentPLetter === "ع" && nextPLetter === "ا") {
return PhonemeStatus.AinWithLongAAtBeginning;
}
// console.log("------");
// console.log("phoneme", phoneme);
// console.log("state", state);
// console.log("prevPLetter is space", prevPLetter === " ");
// console.log("------");
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
return PhonemeStatus.EndOfDuParticle
}
if (phoneme === "a" && currentPLetter === "ا" && nextPLetter === fathahan) {
return PhonemeStatus.ShortAForAlefBeforeFathatan;
}
if (phoneme === "'" && last(state.pOut, 2) === "ع" && isOutOfWord(last(state.pOut, 3))) {
return PhonemeStatus.AinBeginningAfterShortVowel;
}
if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
return PhonemeStatus.PersianSilentWWithAa;
}
if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") {
return PhonemeStatus.ArabicWasla;
}
if (phoneme === "-i-" && isBeginningOfWord) {
return PhonemeStatus.Izafe;
}
if (phoneme === "a" && currentPLetter === "أ") {
return PhonemeStatus.AlefWithHamza;
}
if (phoneme === "'" && nextPhoneme === "a" && currentPLetter === "أ") {
return PhonemeStatus.AlefWithHamzaWithGlottalStop;
}
if (currentPLetter === "ع" && phoneme !== "'" && nextPhoneme !== "'") {
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
return PhonemeStatus.ShortAinVowelMissingComma;
}
if ((last(state.pOut, 2) === "ا") && isOutOfWord(last(state.pOut, 3))) {
return PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart;
}
}
if (useAinBlendDiacritics) {
return PhonemeStatus.LongAinVowelMissingComma;
}
if (needsTashdeed) {
return PhonemeStatus.DoubleConsonantTashdeed;
}
if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
return PhonemeStatus.AlefDaggarEnding;
}
if (phoneme === "a" && lastWordEndedW("ح")) {
return PhonemeStatus.ShortAEndingAfterHeem;
}
if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
}
if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
}
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
return PhonemeStatus.ShortVowel;
}
if (phoneme === "o" && previousPhoneme === "w" && lastWordEndedW("و")) {
return PhonemeStatus.WoEndingO;
}
if (isEndOfWord && phoneme === "n" && currentPLetter === fathahan && prevPLetter === "ا") {
return PhonemeStatus.NOnFathatan;
}
console.log(state);
// console.log("bad phoneme is ", phoneme);
throw new Error("phonetics error - no status found for phoneme: " + phoneme);
}
const phs = getPhonemeState();
return {
phs, phonemeInfo, diacritic, prevPLetter,
};
};
/** /**
* returns the last character of a string * returns the nth last character of a string
* *
* @param s * @param s
*/ */
export function last(s: string) { export function last(s: string, n = 1) {
return s[s.length - 1]; return s[s.length - n];
} }
export function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator { export function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator {

View File

@ -110,6 +110,21 @@ const diacriticsSections: {
}, },
out: "پَسْتَه", out: "پَسْتَه",
}, },
// working with ئ as vowel at end
{
in: {
p: "شئ",
f: "sheyy",
},
out: "شئ",
},
{
in: {
p: "کار کوئ چې لاړ شئ",
f: "kaar kawéyy che laaR sheyy",
},
out: "کار کَوئ چې لاړ شئ",
},
// working with وs // working with وs
{ {
in: { in: {
@ -209,6 +224,41 @@ const diacriticsSections: {
}, },
out: "لِیک", out: "لِیک",
}, },
{
in: {
p: "ماضی",
f: "maazee",
},
out: null,
},
{
in: {
p: "وسېدل",
f: "osedul",
},
out: null,
},
{
in: {
p: "يست",
f: "eest",
},
out: null,
},
{
in: {
p: "ست",
f: "ist",
},
out: null,
},
{
in: {
p: "haca",
f: "هځه",
},
out: null,
},
{ {
in: { in: {
p: "رغېدل", p: "رغېدل",
@ -458,6 +508,13 @@ const diacriticsSections: {
}, },
out: "پَتَّه تُور", out: "پَتَّه تُور",
}, },
{
in: {
p: "لکۍ وال",
f: "lakuy waal",
},
out: "لَکۍ وال",
},
// avoid false double consonant // avoid false double consonant
{ {
in: { in: {
@ -466,6 +523,107 @@ const diacriticsSections: {
}, },
out: "اَزَل لِیک", out: "اَزَل لِیک",
}, },
{
in: {
p: "سه",
f: "si",
},
out: "سِه",
},
{
in: {
p: "سه شنبه",
f: "sishamba",
},
out: "سِه شَنْبَه",
},
{
in: {
p: "توجه",
f: "tawajÚ",
},
out: "تَوَجُه",
},
{
in: {
p: "توجه کول",
f: "tawajU kawul",
},
out: "تَوَجُه کَو" + zwarakey + "ل",
},
{
in: {
p: "با استعداد",
f: "baa isti'dáad",
},
out: "با اِسْتِعْداد",
},
{
in: {
p: "آدم",
f: "aadam",
},
out: "آدَم",
},
{
in: {
p: "آسان",
f: "aasáan",
},
out: "آسان",
},
{
in: {
p: "آسان",
f: "asáan",
},
out: null,
},
{
in: {
p: "یدام",
f: "aadam",
},
out: null,
},
],
},
{
describe: "ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی",
tests: [
{
in: {
p: "پتېیل",
f: "pateyúl",
},
out: null,
},
{
in: {
p: "پتېیل",
f: "pate`yúl",
},
out: "پَتېی" + zwarakey + "ل",
},
{
in: {
p: "درېیم",
f: "dre`yum",
},
out: "دْرېی" + zwarakey + "م",
},
],
},
{
describe: "handle circumpositions",
tests: [
{
in: {
p: "تر ... پورې",
f: "tur ... pore",
},
out: "ت" + zwarakey + "ر ... پورې",
},
], ],
}, },
{ {
@ -480,6 +638,25 @@ const diacriticsSections: {
}, },
], ],
}, },
{
describe: "excetption for و - wo",
tests: [
{
in: {
p: "و",
f: "wo",
},
out: "و",
},
{
in: {
p: "سړی و",
f: "saRey wo",
},
out: "سَړی و",
},
],
},
{ {
describe: "alef with hamza above", describe: "alef with hamza above",
tests: [ tests: [
@ -593,43 +770,105 @@ const diacriticsSections: {
}, },
out: "طَمَع اِسْتِعمال", out: "طَمَع اِسْتِعمال",
}, },
// { {
// in: { in: {
// p: "اعتصاب شکن", p: "مربع",
// f: "itisaab shakan", f: "mUraba'",
// }, },
// out: "اِعتِصاب شَکَن", out: "مُرَبَع",
// }, },
// { {
// in: { in: {
// p: "عادل", p: "مربع جذر",
// f: "aadíl", f: "mUraba' jazúr",
// }, },
// out: "عادل", out: "مُرَبَع جَذ" + zwarakey + "ر",
// }, },
// // starting with ع {
// { in: {
// in: { p: "عام",
// p: "عزت", f: "'aam",
// f: "izzat", },
// }, out: "عام",
// out: "عِزَّت", },
// }, {
// { in: {
// in: { p: "قتل عام",
// p: "عزت", f: "qatl-i-aam",
// f: "i'zzat", },
// }, out: "قَتْلِ عام",
// out: "عِزَّت", },
// }, {
// // middle ع in: {
// { p: "توقع",
// in: { f: "tawaqqÚ",
// p: "معنا", },
// f: "ma'anaa", out: "تَوَقّعُ",
// }, },
// out: "مَعَنا", ],
// }, },
{
describe: "ayn at the beginning",
tests: [
// as a short vowel at the beginning
{
in: {
p: "عزت",
f: "izzat",
},
out: "عِزَّت",
},
{
in: {
p: "عزت",
f: "i'zzat",
},
out: "عِْزَّت",
},
{
in: {
p: "عذر",
f: "Uzar",
},
out: "عُذَر",
},
{
in: {
p: "عذر",
f: "U'zar",
},
out: "عُْذَر",
},
// as a short i with an alef
{
in: {
p: "اعتصاب شکن",
f: "itisaab shakan",
},
out: "اِعتِصاب شَکَن",
},
{
in: {
p: "اعتصاب شکن",
f: "i'tisaab shakan",
},
out: "اِعْتِصاب شَکَن",
},
// as a long aa at beginning
{
in: {
p: "عادل",
f: "aadíl",
},
out: "عادِل",
},
{
in: {
p: "عید",
f: "eed",
},
out: "عِید",
},
], ],
}, },
{ {
@ -687,6 +926,25 @@ const diacriticsSections: {
}, },
], ],
}, },
{
describe: "joiner و",
tests: [
{
in: {
p: "کار و بار",
f: "kaar-U-baar",
},
out: "کار و بار",
},
{
in: {
p: "کاروبار",
f: "kaar-U-baar",
},
out: "کاروبار",
},
],
},
{ {
describe: "special behaviour with د", describe: "special behaviour with د",
tests: [ tests: [
@ -716,13 +974,13 @@ const diacriticsSections: {
{ {
describe: "ha ending with ح", describe: "ha ending with ح",
tests: [ tests: [
// { {
// in: { in: {
// p: "ذبح", p: "ذبح",
// f: "zabha", f: "zabha",
// }, },
// out: "ذَبْحَ", out: "ذَبْحَ",
// }, },
{ {
in: { in: {
p: "ذبح کول", p: "ذبح کول",
@ -764,10 +1022,42 @@ const diacriticsSections: {
out: "مَعَنیٰ", out: "مَعَنیٰ",
}, },
], ],
} },
{
describe: "require fathatan on words ending in اً ",
tests: [
{
in: {
p: "دقیقا",
f: "daqeeqan",
},
out: null,
},
{
in: {
p: "دقیقاً",
f: "daqeeqan",
},
out: "دَقِیقاً",
},
],
},
{
describe: "Ua ؤ",
tests: [
{
in: {
p: "مودب",
f: "mUaddab",
},
out: "مؤدَّب",
},
],
},
]; ];
diacriticsSections.forEach((section) => { diacriticsSections.forEach((section) => {
// if (!section.describe.includes("require fathatan")) return;
describe(section.describe, () => { describe(section.describe, () => {
section.tests.forEach((t) => { section.tests.forEach((t) => {
if (t.out) { if (t.out) {
@ -785,34 +1075,34 @@ diacriticsSections.forEach((section) => {
// ERRORS // ERRORS
const brokenDiacritics = [ // const brokenDiacritics = [
{ // {
p: "تشناب", // p: "تشناب",
f: "peshnaab", // f: "peshnaab",
}, // },
{ // {
p: "وسېدل", // p: "وسېدل",
f: "osedul", // f: "osedul",
}, // },
]; // ];
test("ending with left over Pashto script will throw an error", () => { // test("ending with left over Pashto script will throw an error", () => {
expect(() => { // expect(() => {
addDiacritics({ p: "کور ته", f: "kor" }); // addDiacritics({ p: "کور ته", f: "kor" });
}).toThrow(`phonetics error - phonetics shorter than pashto script`); // }).toThrow(`phonetics error - phonetics shorter than pashto script`);
}); // });
test("ending with left over phonetics will throw an error", () => { // test("ending with left over phonetics will throw an error", () => {
expect(() => { // expect(() => {
addDiacritics({ p: "کار", f: "kaar kawul" }); // addDiacritics({ p: "کار", f: "kaar kawul" });
}).toThrow(); // }).toThrow();
}); // });
test("adding diacritics errors when phonetecs and pashto do not line up", () => { // test("adding diacritics errors when phonetecs and pashto do not line up", () => {
brokenDiacritics.forEach((t) => { // brokenDiacritics.forEach((t) => {
expect(() => { // expect(() => {
addDiacritics(t); // addDiacritics(t);
}).toThrow(); // }).toThrow();
}); // });
}); // });

View File

@ -21,15 +21,15 @@ import {
wasla, wasla,
daggerAlif, daggerAlif,
fathahan, fathahan,
lastNonWhitespace,
addP, addP,
last,
advanceP, advanceP,
reverseP, reverseP,
overwriteP, overwriteP,
advanceForHamza, advanceForHamza,
advanceForHamzaMid, advanceForHamzaMid,
DiacriticsAccumulator, DiacriticsAccumulator,
stateInfo,
PhonemeStatus,
} from "./diacritics-helpers"; } from "./diacritics-helpers";
import { firstPhonetics } from "./p-text-helpers"; import { firstPhonetics } from "./p-text-helpers";
@ -51,27 +51,6 @@ import { pipe } from "rambda";
}; };
} }
enum PhonemeStatus {
LeadingLongVowel,
LeadingConsonantOrShortVowel,
DoubleConsonantTashdeed,
EndingWithHeyHim,
DirectMatch,
DirectMatchAfterSukun,
EndingWithHeyHimFromSukun,
ShortVowel,
PersianSilentWWithAa,
ArabicWasla,
Izafe,
EndOfDuParticle,
HaEndingWithHeem,
AlefDaggarEnding,
LongAinVowelMissingComma,
ShortAinVowelMissingComma,
AlefWithHamza,
AlefWithHamzaWithGlottalStop,
}
function processPhoneme( function processPhoneme(
acc: DiacriticsAccumulator, acc: DiacriticsAccumulator,
phoneme: Phoneme, phoneme: Phoneme,
@ -96,6 +75,7 @@ function processPhoneme(
phonemeInfo, phonemeInfo,
diacritic, diacritic,
phs, phs,
prevPLetter,
} = stateInfo({ state, i, phoneme, phonemes }); } = stateInfo({ state, i, phoneme, phonemes });
// console.log("phoneme", phoneme); // console.log("phoneme", phoneme);
@ -154,10 +134,9 @@ function processPhoneme(
reverseP, reverseP,
addP(zwarakey), addP(zwarakey),
)(state) )(state)
: (phs === PhonemeStatus.HaEndingWithHeem) ? : (phs === PhonemeStatus.ShortAEndingAfterHeem) ?
pipe( pipe(
reverseP, prevPLetter === " " ? reverseP : addP(""),
// prevPLetter === " " ? reverseP ,
addP(zwar), addP(zwar),
)(state) )(state)
: (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ? : (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
@ -181,114 +160,44 @@ function processPhoneme(
addP(diacritic), addP(diacritic),
advanceP, advanceP,
)(state) )(state)
: (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ?
pipe(
advanceP,
advanceP,
)(state)
: (phs === PhonemeStatus.AinWithLongAAtBeginning) ?
pipe(
advanceP,
advanceP,
)(state)
: (phs === PhonemeStatus.AlefWithHamza) ? : (phs === PhonemeStatus.AlefWithHamza) ?
pipe( pipe(
advanceP, advanceP,
)(state) )(state)
: (phs === PhonemeStatus.AlefWithHamzaWithGlottalStop) ? : (phs === PhonemeStatus.ShortVowel) ?
state
:
// phs === PhonemeState.ShortVowel
pipe( pipe(
advanceForHamzaMid, advanceForHamzaMid,
addP(phonemeInfo.diacritic), addP(phonemeInfo.diacritic),
// TODO THIS? // TODO THIS?
advanceForHamza, advanceForHamza,
)(state); )(state)
: (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ?
pipe(
advanceP,
)(state)
: (phs === PhonemeStatus.NOnFathatan) ?
pipe(
advanceP,
)(state)
: state;
// (phs === PhonemeStatus.AlefWithHamzaWithGlottalStop) ?
// state
// : (phs === PhonemeStatus.AinBeginningAfterShortVowel) ?
// state
//: (phs === PhonemeStatus.WoEndingO) ?
// state
// :
//
} }
function stateInfo({ state, i, phonemes, phoneme }: {
state: DiacriticsAccumulator,
i: number,
phonemes: Phoneme[],
phoneme: Phoneme,
}) {
const prevPLetter = last(state.pOut);
const currentPLetter = state.pIn[0];
const nextPLetter = state.pIn[1];
const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
const isEndOfWord = !nextPLetter || nextPLetter === " ";
const phonemeInfo = phonemeTable[phoneme];
const nextPhoneme = phonemes[i+1];
const previousPhoneme = i > 0 && phonemes[i-1];
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع"));
const diacritic = useAinBlendDiacritics
? phonemeInfo.ainBlendDiacritic
: isEndOfWord
? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
function getPhonemeState(): PhonemeStatus {
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
throw Error("phonetics error - needs alef prefix");
}
return PhonemeStatus.LeadingLongVowel;
}
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
return PhonemeStatus.LeadingConsonantOrShortVowel;
}
// console.log("------");
// console.log("phoneme", phoneme);
// console.log("state", state);
// console.log("prevPLetter is space", prevPLetter === " ");
// console.log("------");
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
return PhonemeStatus.EndOfDuParticle
}
if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
return PhonemeStatus.PersianSilentWWithAa;
}
if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") {
return PhonemeStatus.ArabicWasla;
}
if (phoneme === "-i-" && isBeginningOfWord) {
return PhonemeStatus.Izafe;
}
if (phoneme === "a" && currentPLetter === "أ") {
return PhonemeStatus.AlefWithHamza;
}
if (phoneme === "'" && nextPhoneme === "a" && currentPLetter === "أ") {
return PhonemeStatus.AlefWithHamzaWithGlottalStop;
}
if (currentPLetter === "ع" && phoneme !== "'" && nextPhoneme !== "'" && phonemeInfo.diacritic && !phonemeInfo.longVowel) {
return PhonemeStatus.ShortAinVowelMissingComma;
}
if (useAinBlendDiacritics) {
return PhonemeStatus.LongAinVowelMissingComma;
}
if (needsTashdeed) {
return PhonemeStatus.DoubleConsonantTashdeed;
}
if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
return PhonemeStatus.AlefDaggarEnding;
}
if (((isEndOfWord && prevPLetter === "ح") || (prevPLetter === " " && state.pOut[state.pOut.length - 2])) && phoneme === "a") {
return PhonemeStatus.HaEndingWithHeem;
}
if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
}
if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
}
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
return PhonemeStatus.ShortVowel;
}
// console.log("bad phoneme is ", phoneme);
throw new Error("phonetics error - no status found for phoneme: " + phoneme);
}
const phs = getPhonemeState();
return {
phs, phonemeInfo, diacritic,
};
};