more work failing on phonetics

This commit is contained in:
Bill D 2021-05-24 22:06:03 +04:30
parent 5d41d953a0
commit fb71efd51d
2 changed files with 121 additions and 24 deletions

View File

@ -103,6 +103,13 @@ const diacriticsSections: {
}, },
out: "تَشْناب", out: "تَشْناب",
}, },
{
in: {
p: "پسته",
f: "pasta",
},
out: "پَسْتَه",
},
// working with وs // working with وs
{ {
in: { in: {
@ -498,6 +505,14 @@ const diacriticsSections: {
}, },
out: "عِزَّت", out: "عِزَّت",
}, },
// middle ع
{
in: {
p: "معنا",
f: "ma'anaa",
},
out: "مَعَنا",
},
// ending with ayn // ending with ayn
{ {
in: { in: {
@ -596,6 +611,58 @@ const diacriticsSections: {
}, },
], ],
}, },
{
describe: "ha ending with ح",
tests: [
{
in: {
p: "ذبح",
f: "zabha",
},
out: "ذَبْحَ",
},
{
in: {
p: "ذبح کول",
f: "zabha kawul",
},
out: "ذَبْحَ کَو" + zwarakey + "ل",
},
],
},
{
describe: "require dagger alif on words ending with یٰ",
tests: [
{
in: {
p: "یحیی",
f: "yahyaa",
},
out: null,
},
{
in: {
p: "یحییٰ",
f: "yahyaa",
},
out: "یَحْییٰ",
},
{
in: {
p: "یحییٰ چېرته",
f: "yahyaa cherta",
},
out: "یَحْییٰ چېرْتَه",
},
{
in: {
p: "معنیٰ",
f: "ma'anaa",
},
out: "مَعَنیٰ",
},
],
}
]; ];
diacriticsSections.forEach((section) => { diacriticsSections.forEach((section) => {
@ -627,23 +694,23 @@ const brokenDiacritics = [
}, },
]; ];
// test("ending with left over Pashto script will throw an error", () => { test("ending with left over Pashto script will throw an error", () => {
// expect(() => { expect(() => {
// addDiacritics({ p: "کور ته", f: "kor" }); addDiacritics({ p: "کور ته", f: "kor" });
// }).toThrow(`phonetics error - phonetics shorter than pashto script`); }).toThrow(`phonetics error - phonetics shorter than pashto script`);
// }); });
// test("ending with left over phonetics will throw an error", () => { test("ending with left over phonetics will throw an error", () => {
// expect(() => { expect(() => {
// addDiacritics({ p: "کار", f: "kaar kawul" }); addDiacritics({ p: "کار", f: "kaar kawul" });
// }).toThrow(); }).toThrow();
// }); });
// test("adding diacritics errors when phonetecs and pashto do not line up", () => { test("adding diacritics errors when phonetecs and pashto do not line up", () => {
// brokenDiacritics.forEach((t) => { brokenDiacritics.forEach((t) => {
// expect(() => { expect(() => {
// addDiacritics(t); addDiacritics(t);
// }).toThrow(); }).toThrow();
// }); });
// }); });

View File

@ -58,11 +58,15 @@ enum PhonemeStatus {
DoubleConsonantTashdeed, DoubleConsonantTashdeed,
EndingWithHeyHim, EndingWithHeyHim,
DirectMatch, DirectMatch,
DirectMatchAfterSukun,
EndingWithHeyHimFromSukun,
ShortVowel, ShortVowel,
PersianSilentWWithAa, PersianSilentWWithAa,
ArabicWasla, ArabicWasla,
Izafe, Izafe,
EndOfDuParticle, EndOfDuParticle,
HaEndingWithHeem,
AlefDaggarEnding,
} }
function processPhoneme( function processPhoneme(
@ -87,8 +91,9 @@ function processPhoneme(
const { const {
phonemeInfo, phonemeInfo,
sukunOrDiacritic, diacritic,
phs, phs,
prevPLetter,
} = stateInfo({ state, i, phoneme, phonemes }); } = stateInfo({ state, i, phoneme, phonemes });
return (phs === PhonemeStatus.LeadingLongVowel) ? return (phs === PhonemeStatus.LeadingLongVowel) ?
@ -100,7 +105,7 @@ function processPhoneme(
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ? : (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
pipe( pipe(
advanceP, advanceP,
addP(sukunOrDiacritic), addP(diacritic),
advanceForAin, advanceForAin,
)(state) )(state)
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ? : (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
@ -114,7 +119,12 @@ function processPhoneme(
)(state) )(state)
: (phs === PhonemeStatus.DirectMatch) ? : (phs === PhonemeStatus.DirectMatch) ?
pipe( pipe(
addP(sukunOrDiacritic), addP(diacritic),
advanceP,
)(state)
: (phs === PhonemeStatus.DirectMatchAfterSukun) ?
pipe(
addP(sukun),
advanceP, advanceP,
)(state) )(state)
: (phs === PhonemeStatus.PersianSilentWWithAa) ? : (phs === PhonemeStatus.PersianSilentWWithAa) ?
@ -139,6 +149,21 @@ function processPhoneme(
reverseP, reverseP,
addP(zwarakey), addP(zwarakey),
)(state) )(state)
: (phs === PhonemeStatus.HaEndingWithHeem) ?
pipe(
prevPLetter === " " ? reverseP : (s: any) => s,
addP(zwar),
)(state)
: (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
pipe(
addP(sukun),
advanceP,
)(state)
: (phs === PhonemeStatus.AlefDaggarEnding) ?
pipe(
advanceP,
advanceP,
)(state)
: :
// phs === PhonemeState.ShortVowel // phs === PhonemeState.ShortVowel
pipe( pipe(
@ -168,7 +193,6 @@ function stateInfo({ state, i, phonemes, phoneme }: {
const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter); const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter)); const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
const diacritic = isEndOfWord ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic; const diacritic = isEndOfWord ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
const sukunOrDiacritic = (needsSukun ? sukun : diacritic);
function getPhonemeState(): PhonemeStatus { function getPhonemeState(): PhonemeStatus {
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) { if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
@ -200,11 +224,17 @@ function stateInfo({ state, i, phonemes, phoneme }: {
if (needsTashdeed) { if (needsTashdeed) {
return PhonemeStatus.DoubleConsonantTashdeed; return PhonemeStatus.DoubleConsonantTashdeed;
} }
if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
return PhonemeStatus.AlefDaggarEnding;
}
if (((isEndOfWord && prevPLetter === "ح") || (prevPLetter === " " && state.pOut[state.pOut.length - 2])) && phoneme === "a") {
return PhonemeStatus.HaEndingWithHeem;
}
if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) { if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
return PhonemeStatus.EndingWithHeyHim; return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
} }
if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) { if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
return PhonemeStatus.DirectMatch; return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
} }
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) { if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
return PhonemeStatus.ShortVowel; return PhonemeStatus.ShortVowel;
@ -216,6 +246,6 @@ function stateInfo({ state, i, phonemes, phoneme }: {
const phs = getPhonemeState(); const phs = getPhonemeState();
return { return {
phs, phonemeInfo, sukunOrDiacritic, phs, phonemeInfo, diacritic, prevPLetter,
}; };
}; };