This commit is contained in:
Bill D 2021-06-03 20:42:07 +04:30
parent b1aac313f9
commit a62ab986ba
4 changed files with 73 additions and 5 deletions

View File

@ -274,7 +274,7 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
const willIgnore = ["?", " ", "`", ".", "…", ","]; const willIgnore = ["?", " ", "`", ".", "…", ","];
const result: Phoneme[] = []; const result: Phoneme[] = [];
const f = removeAccents(fIn); const f = removeAccents(fIn).replace(/ă/g, "a");
let index = 0; let index = 0;
while (index < f.length) { while (index < f.length) {
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " "); const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
@ -336,6 +336,7 @@ export enum PhonemeStatus {
ShortAForAlefBeforeFathatan, ShortAForAlefBeforeFathatan,
NOnFathatan, NOnFathatan,
HamzaOnWow, HamzaOnWow,
ArabicDefiniteArticleUl,
} }
export function stateInfo({ state, i, phonemes, phoneme }: { export function stateInfo({ state, i, phonemes, phoneme }: {
@ -348,11 +349,11 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
const prevPLetter = last(state.pOut); const prevPLetter = last(state.pOut);
const currentPLetter = state.pIn[0]; const currentPLetter = state.pIn[0];
const nextPLetter = state.pIn[1]; const nextPLetter = state.pIn[1];
const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
const isEndOfWord = isOutOfWord(nextPLetter);
const phonemeInfo = phonemeTable[phoneme];
const nextPhoneme = phonemes[i+1]; const nextPhoneme = phonemes[i+1];
const previousPhoneme = i > 0 && phonemes[i-1]; const previousPhoneme = i > 0 && phonemes[i-1];
const isBeginningOfWord = (state.pOut === "" || prevPLetter === " ") || (previousPhoneme === "-Ul-" && prevPLetter === "ل");
const isEndOfWord = isOutOfWord(nextPLetter);
const phonemeInfo = phonemeTable[phoneme];
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]]; const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1]; // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined; // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
@ -391,6 +392,9 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") { if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
return PhonemeStatus.EndOfDuParticle return PhonemeStatus.EndOfDuParticle
} }
if (isBeginningOfWord && phoneme === "-Ul-" && currentPLetter === "ا" && nextPLetter === "ل") {
return PhonemeStatus.ArabicDefiniteArticleUl;
}
if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") { if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
return PhonemeStatus.HamzaOnWow; return PhonemeStatus.HamzaOnWow;
} }

View File

@ -586,6 +586,13 @@ const diacriticsSections: {
}, },
out: null, out: null,
}, },
{
in: {
p: "سختسری",
f: "sakht sărey",
},
out: "سَخْتْسَری",
},
], ],
}, },
{ {
@ -805,6 +812,20 @@ const diacriticsSections: {
}, },
out: "تَوَقُّع", out: "تَوَقُّع",
}, },
{
in: {
p: "راجع کېدل",
f: "raaji kedul",
},
out: "راجِع کېد" + zwarakey + "ل",
},
{
in: {
p: "ربیع",
f: "rabee'",
},
out: "رَبِیع",
},
], ],
}, },
{ {
@ -1054,6 +1075,44 @@ const diacriticsSections: {
}, },
], ],
}, },
{
describe: "With Arabic definate article -Ul- ال",
tests: [
{
in: {
p: "حق الاجاره",
f: "haq-Ul-ijaara",
},
out: "حَق اُلاِجارَه",
},
{
in: {
p: "دار العلوم",
f: "daar-Ul-Ulóom",
},
out: "دار اُلعُلُوم",
},
],
},
// {
// describe: "double consonants on end of words",
// tests: [
// {
// in: {
// p: "حق",
// f: "haqq",
// },
// out: "حَقّ",
// },
// {
// in: {
// p: "حق پر",
// f: "haqq par",
// },
// out: "حَقّ پَر",
// },
// ],
// },
]; ];
diacriticsSections.forEach((section) => { diacriticsSections.forEach((section) => {

View File

@ -195,6 +195,12 @@ function processPhoneme(
addP(hamzaAbove), addP(hamzaAbove),
addP(diacritic), addP(diacritic),
)(state) )(state)
: (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
pipe(
advanceP,
addP(pesh),
advanceP,
)(state)
: state; : state;

View File

@ -970,7 +970,6 @@ const toTest: Array<{
}, },
out: "حَقّ پَر", out: "حَقّ پَر",
}, },
// TODO: Allow ' in there
{ {
in: { in: {
p: "راجع کېدل", p: "راجع کېدل",