more work failing on phonetics
This commit is contained in:
parent
5d41d953a0
commit
fb71efd51d
|
@ -103,6 +103,13 @@ const diacriticsSections: {
|
||||||
},
|
},
|
||||||
out: "تَشْناب",
|
out: "تَشْناب",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "پسته",
|
||||||
|
f: "pasta",
|
||||||
|
},
|
||||||
|
out: "پَسْتَه",
|
||||||
|
},
|
||||||
// working with وs
|
// working with وs
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -498,6 +505,14 @@ const diacriticsSections: {
|
||||||
},
|
},
|
||||||
out: "عِزَّت",
|
out: "عِزَّت",
|
||||||
},
|
},
|
||||||
|
// middle ع
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "معنا",
|
||||||
|
f: "ma'anaa",
|
||||||
|
},
|
||||||
|
out: "مَعَنا",
|
||||||
|
},
|
||||||
// ending with ayn
|
// ending with ayn
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -596,6 +611,58 @@ const diacriticsSections: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
describe: "ha ending with ح",
|
||||||
|
tests: [
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "ذبح",
|
||||||
|
f: "zabha",
|
||||||
|
},
|
||||||
|
out: "ذَبْحَ",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "ذبح کول",
|
||||||
|
f: "zabha kawul",
|
||||||
|
},
|
||||||
|
out: "ذَبْحَ کَو" + zwarakey + "ل",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
describe: "require dagger alif on words ending with یٰ",
|
||||||
|
tests: [
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "یحیی",
|
||||||
|
f: "yahyaa",
|
||||||
|
},
|
||||||
|
out: null,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "یحییٰ",
|
||||||
|
f: "yahyaa",
|
||||||
|
},
|
||||||
|
out: "یَحْییٰ",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "یحییٰ چېرته",
|
||||||
|
f: "yahyaa cherta",
|
||||||
|
},
|
||||||
|
out: "یَحْییٰ چېرْتَه",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "معنیٰ",
|
||||||
|
f: "ma'anaa",
|
||||||
|
},
|
||||||
|
out: "مَعَنیٰ",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
diacriticsSections.forEach((section) => {
|
diacriticsSections.forEach((section) => {
|
||||||
|
@ -627,23 +694,23 @@ const brokenDiacritics = [
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
// test("ending with left over Pashto script will throw an error", () => {
|
test("ending with left over Pashto script will throw an error", () => {
|
||||||
// expect(() => {
|
expect(() => {
|
||||||
// addDiacritics({ p: "کور ته", f: "kor" });
|
addDiacritics({ p: "کور ته", f: "kor" });
|
||||||
// }).toThrow(`phonetics error - phonetics shorter than pashto script`);
|
}).toThrow(`phonetics error - phonetics shorter than pashto script`);
|
||||||
// });
|
});
|
||||||
|
|
||||||
// test("ending with left over phonetics will throw an error", () => {
|
test("ending with left over phonetics will throw an error", () => {
|
||||||
// expect(() => {
|
expect(() => {
|
||||||
// addDiacritics({ p: "کار", f: "kaar kawul" });
|
addDiacritics({ p: "کار", f: "kaar kawul" });
|
||||||
// }).toThrow();
|
}).toThrow();
|
||||||
// });
|
});
|
||||||
|
|
||||||
// test("adding diacritics errors when phonetecs and pashto do not line up", () => {
|
test("adding diacritics errors when phonetecs and pashto do not line up", () => {
|
||||||
// brokenDiacritics.forEach((t) => {
|
brokenDiacritics.forEach((t) => {
|
||||||
// expect(() => {
|
expect(() => {
|
||||||
// addDiacritics(t);
|
addDiacritics(t);
|
||||||
// }).toThrow();
|
}).toThrow();
|
||||||
// });
|
});
|
||||||
// });
|
});
|
||||||
|
|
||||||
|
|
|
@ -58,11 +58,15 @@ enum PhonemeStatus {
|
||||||
DoubleConsonantTashdeed,
|
DoubleConsonantTashdeed,
|
||||||
EndingWithHeyHim,
|
EndingWithHeyHim,
|
||||||
DirectMatch,
|
DirectMatch,
|
||||||
|
DirectMatchAfterSukun,
|
||||||
|
EndingWithHeyHimFromSukun,
|
||||||
ShortVowel,
|
ShortVowel,
|
||||||
PersianSilentWWithAa,
|
PersianSilentWWithAa,
|
||||||
ArabicWasla,
|
ArabicWasla,
|
||||||
Izafe,
|
Izafe,
|
||||||
EndOfDuParticle,
|
EndOfDuParticle,
|
||||||
|
HaEndingWithHeem,
|
||||||
|
AlefDaggarEnding,
|
||||||
}
|
}
|
||||||
|
|
||||||
function processPhoneme(
|
function processPhoneme(
|
||||||
|
@ -87,8 +91,9 @@ function processPhoneme(
|
||||||
|
|
||||||
const {
|
const {
|
||||||
phonemeInfo,
|
phonemeInfo,
|
||||||
sukunOrDiacritic,
|
diacritic,
|
||||||
phs,
|
phs,
|
||||||
|
prevPLetter,
|
||||||
} = stateInfo({ state, i, phoneme, phonemes });
|
} = stateInfo({ state, i, phoneme, phonemes });
|
||||||
|
|
||||||
return (phs === PhonemeStatus.LeadingLongVowel) ?
|
return (phs === PhonemeStatus.LeadingLongVowel) ?
|
||||||
|
@ -100,7 +105,7 @@ function processPhoneme(
|
||||||
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
|
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
|
||||||
pipe(
|
pipe(
|
||||||
advanceP,
|
advanceP,
|
||||||
addP(sukunOrDiacritic),
|
addP(diacritic),
|
||||||
advanceForAin,
|
advanceForAin,
|
||||||
)(state)
|
)(state)
|
||||||
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
|
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
|
||||||
|
@ -114,7 +119,12 @@ function processPhoneme(
|
||||||
)(state)
|
)(state)
|
||||||
: (phs === PhonemeStatus.DirectMatch) ?
|
: (phs === PhonemeStatus.DirectMatch) ?
|
||||||
pipe(
|
pipe(
|
||||||
addP(sukunOrDiacritic),
|
addP(diacritic),
|
||||||
|
advanceP,
|
||||||
|
)(state)
|
||||||
|
: (phs === PhonemeStatus.DirectMatchAfterSukun) ?
|
||||||
|
pipe(
|
||||||
|
addP(sukun),
|
||||||
advanceP,
|
advanceP,
|
||||||
)(state)
|
)(state)
|
||||||
: (phs === PhonemeStatus.PersianSilentWWithAa) ?
|
: (phs === PhonemeStatus.PersianSilentWWithAa) ?
|
||||||
|
@ -139,6 +149,21 @@ function processPhoneme(
|
||||||
reverseP,
|
reverseP,
|
||||||
addP(zwarakey),
|
addP(zwarakey),
|
||||||
)(state)
|
)(state)
|
||||||
|
: (phs === PhonemeStatus.HaEndingWithHeem) ?
|
||||||
|
pipe(
|
||||||
|
prevPLetter === " " ? reverseP : (s: any) => s,
|
||||||
|
addP(zwar),
|
||||||
|
)(state)
|
||||||
|
: (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
|
||||||
|
pipe(
|
||||||
|
addP(sukun),
|
||||||
|
advanceP,
|
||||||
|
)(state)
|
||||||
|
: (phs === PhonemeStatus.AlefDaggarEnding) ?
|
||||||
|
pipe(
|
||||||
|
advanceP,
|
||||||
|
advanceP,
|
||||||
|
)(state)
|
||||||
:
|
:
|
||||||
// phs === PhonemeState.ShortVowel
|
// phs === PhonemeState.ShortVowel
|
||||||
pipe(
|
pipe(
|
||||||
|
@ -168,7 +193,6 @@ function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
|
const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
|
||||||
const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
|
const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
|
||||||
const diacritic = isEndOfWord ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
|
const diacritic = isEndOfWord ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
|
||||||
const sukunOrDiacritic = (needsSukun ? sukun : diacritic);
|
|
||||||
|
|
||||||
function getPhonemeState(): PhonemeStatus {
|
function getPhonemeState(): PhonemeStatus {
|
||||||
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
|
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
|
||||||
|
@ -200,11 +224,17 @@ function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
if (needsTashdeed) {
|
if (needsTashdeed) {
|
||||||
return PhonemeStatus.DoubleConsonantTashdeed;
|
return PhonemeStatus.DoubleConsonantTashdeed;
|
||||||
}
|
}
|
||||||
|
if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
|
||||||
|
return PhonemeStatus.AlefDaggarEnding;
|
||||||
|
}
|
||||||
|
if (((isEndOfWord && prevPLetter === "ح") || (prevPLetter === " " && state.pOut[state.pOut.length - 2])) && phoneme === "a") {
|
||||||
|
return PhonemeStatus.HaEndingWithHeem;
|
||||||
|
}
|
||||||
if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
|
if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
|
||||||
return PhonemeStatus.EndingWithHeyHim;
|
return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
|
||||||
}
|
}
|
||||||
if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
|
if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
|
||||||
return PhonemeStatus.DirectMatch;
|
return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
|
||||||
}
|
}
|
||||||
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
|
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
|
||||||
return PhonemeStatus.ShortVowel;
|
return PhonemeStatus.ShortVowel;
|
||||||
|
@ -216,6 +246,6 @@ function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
const phs = getPhonemeState();
|
const phs = getPhonemeState();
|
||||||
|
|
||||||
return {
|
return {
|
||||||
phs, phonemeInfo, sukunOrDiacritic,
|
phs, phonemeInfo, diacritic, prevPLetter,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue