From fb71efd51d8fce87b91139958e15e13a14effb98 Mon Sep 17 00:00:00 2001 From: Bill D Date: Mon, 24 May 2021 22:06:03 +0430 Subject: [PATCH] more work failing on phonetics --- src/lib/diacritics.test.ts | 101 ++++++++++++++++++++++++++++++------- src/lib/diacritics.ts | 44 +++++++++++++--- 2 files changed, 121 insertions(+), 24 deletions(-) diff --git a/src/lib/diacritics.test.ts b/src/lib/diacritics.test.ts index 4d03af6..d2a12bf 100644 --- a/src/lib/diacritics.test.ts +++ b/src/lib/diacritics.test.ts @@ -103,6 +103,13 @@ const diacriticsSections: { }, out: "تَشْناب", }, + { + in: { + p: "پسته", + f: "pasta", + }, + out: "پَسْتَه", + }, // working with وs { in: { @@ -498,6 +505,14 @@ const diacriticsSections: { }, out: "عِزَّت", }, + // middle ع + { + in: { + p: "معنا", + f: "ma'anaa", + }, + out: "مَعَنا", + }, // ending with ayn { in: { @@ -596,6 +611,58 @@ const diacriticsSections: { }, ], }, + { + describe: "ha ending with ح", + tests: [ + { + in: { + p: "ذبح", + f: "zabha", + }, + out: "ذَبْحَ", + }, + { + in: { + p: "ذبح کول", + f: "zabha kawul", + }, + out: "ذَبْحَ کَو" + zwarakey + "ل", + }, + ], + }, + { + describe: "require dagger alif on words ending with یٰ", + tests: [ + { + in: { + p: "یحیی", + f: "yahyaa", + }, + out: null, + }, + { + in: { + p: "یحییٰ", + f: "yahyaa", + }, + out: "یَحْییٰ", + }, + { + in: { + p: "یحییٰ چېرته", + f: "yahyaa cherta", + }, + out: "یَحْییٰ چېرْتَه", + }, + { + in: { + p: "معنیٰ", + f: "ma'anaa", + }, + out: "مَعَنیٰ", + }, + ], + } ]; diacriticsSections.forEach((section) => { @@ -627,23 +694,23 @@ const brokenDiacritics = [ }, ]; -// test("ending with left over Pashto script will throw an error", () => { -// expect(() => { -// addDiacritics({ p: "کور ته", f: "kor" }); -// }).toThrow(`phonetics error - phonetics shorter than pashto script`); -// }); +test("ending with left over Pashto script will throw an error", () => { + expect(() => { + addDiacritics({ p: "کور ته", f: "kor" }); + }).toThrow(`phonetics error - phonetics shorter than pashto script`); +}); -// test("ending with left over phonetics will throw an error", () => { -// expect(() => { -// addDiacritics({ p: "کار", f: "kaar kawul" }); -// }).toThrow(); -// }); +test("ending with left over phonetics will throw an error", () => { + expect(() => { + addDiacritics({ p: "کار", f: "kaar kawul" }); + }).toThrow(); +}); -// test("adding diacritics errors when phonetecs and pashto do not line up", () => { -// brokenDiacritics.forEach((t) => { -// expect(() => { -// addDiacritics(t); -// }).toThrow(); -// }); -// }); +test("adding diacritics errors when phonetecs and pashto do not line up", () => { + brokenDiacritics.forEach((t) => { + expect(() => { + addDiacritics(t); + }).toThrow(); + }); +}); diff --git a/src/lib/diacritics.ts b/src/lib/diacritics.ts index a67f33b..9577d80 100644 --- a/src/lib/diacritics.ts +++ b/src/lib/diacritics.ts @@ -58,11 +58,15 @@ enum PhonemeStatus { DoubleConsonantTashdeed, EndingWithHeyHim, DirectMatch, + DirectMatchAfterSukun, + EndingWithHeyHimFromSukun, ShortVowel, PersianSilentWWithAa, ArabicWasla, Izafe, EndOfDuParticle, + HaEndingWithHeem, + AlefDaggarEnding, } function processPhoneme( @@ -87,8 +91,9 @@ function processPhoneme( const { phonemeInfo, - sukunOrDiacritic, + diacritic, phs, + prevPLetter, } = stateInfo({ state, i, phoneme, phonemes }); return (phs === PhonemeStatus.LeadingLongVowel) ? @@ -100,7 +105,7 @@ function processPhoneme( : (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ? pipe( advanceP, - addP(sukunOrDiacritic), + addP(diacritic), advanceForAin, )(state) : (phs === PhonemeStatus.DoubleConsonantTashdeed) ? @@ -114,7 +119,12 @@ function processPhoneme( )(state) : (phs === PhonemeStatus.DirectMatch) ? pipe( - addP(sukunOrDiacritic), + addP(diacritic), + advanceP, + )(state) + : (phs === PhonemeStatus.DirectMatchAfterSukun) ? + pipe( + addP(sukun), advanceP, )(state) : (phs === PhonemeStatus.PersianSilentWWithAa) ? @@ -139,6 +149,21 @@ function processPhoneme( reverseP, addP(zwarakey), )(state) + : (phs === PhonemeStatus.HaEndingWithHeem) ? + pipe( + prevPLetter === " " ? reverseP : (s: any) => s, + addP(zwar), + )(state) + : (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ? + pipe( + addP(sukun), + advanceP, + )(state) + : (phs === PhonemeStatus.AlefDaggarEnding) ? + pipe( + advanceP, + advanceP, + )(state) : // phs === PhonemeState.ShortVowel pipe( @@ -168,7 +193,6 @@ function stateInfo({ state, i, phonemes, phoneme }: { const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter); const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter)); const diacritic = isEndOfWord ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic; - const sukunOrDiacritic = (needsSukun ? sukun : diacritic); function getPhonemeState(): PhonemeStatus { if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) { @@ -200,11 +224,17 @@ function stateInfo({ state, i, phonemes, phoneme }: { if (needsTashdeed) { return PhonemeStatus.DoubleConsonantTashdeed; } + if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) { + return PhonemeStatus.AlefDaggarEnding; + } + if (((isEndOfWord && prevPLetter === "ح") || (prevPLetter === " " && state.pOut[state.pOut.length - 2])) && phoneme === "a") { + return PhonemeStatus.HaEndingWithHeem; + } if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) { - return PhonemeStatus.EndingWithHeyHim; + return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim; } if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) { - return PhonemeStatus.DirectMatch; + return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch; } if (phonemeInfo.diacritic && !phonemeInfo.longVowel) { return PhonemeStatus.ShortVowel; @@ -216,6 +246,6 @@ function stateInfo({ state, i, phonemes, phoneme }: { const phs = getPhonemeState(); return { - phs, phonemeInfo, sukunOrDiacritic, + phs, phonemeInfo, diacritic, prevPLetter, }; };