This commit is contained in:
Bill D 2021-06-03 20:42:07 +04:30
parent b1aac313f9
commit a62ab986ba
4 changed files with 73 additions and 5 deletions

View File

@ -274,7 +274,7 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
const willIgnore = ["?", " ", "`", ".", "…", ","];
const result: Phoneme[] = [];
const f = removeAccents(fIn);
const f = removeAccents(fIn).replace(/ă/g, "a");
let index = 0;
while (index < f.length) {
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
@ -336,6 +336,7 @@ export enum PhonemeStatus {
ShortAForAlefBeforeFathatan,
NOnFathatan,
HamzaOnWow,
ArabicDefiniteArticleUl,
}
export function stateInfo({ state, i, phonemes, phoneme }: {
@ -348,11 +349,11 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
const prevPLetter = last(state.pOut);
const currentPLetter = state.pIn[0];
const nextPLetter = state.pIn[1];
const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
const isEndOfWord = isOutOfWord(nextPLetter);
const phonemeInfo = phonemeTable[phoneme];
const nextPhoneme = phonemes[i+1];
const previousPhoneme = i > 0 && phonemes[i-1];
const isBeginningOfWord = (state.pOut === "" || prevPLetter === " ") || (previousPhoneme === "-Ul-" && prevPLetter === "ل");
const isEndOfWord = isOutOfWord(nextPLetter);
const phonemeInfo = phonemeTable[phoneme];
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
@ -391,6 +392,9 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
return PhonemeStatus.EndOfDuParticle
}
if (isBeginningOfWord && phoneme === "-Ul-" && currentPLetter === "ا" && nextPLetter === "ل") {
return PhonemeStatus.ArabicDefiniteArticleUl;
}
if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
return PhonemeStatus.HamzaOnWow;
}

View File

@ -586,6 +586,13 @@ const diacriticsSections: {
},
out: null,
},
{
in: {
p: "سختسری",
f: "sakht sărey",
},
out: "سَخْتْسَری",
},
],
},
{
@ -805,6 +812,20 @@ const diacriticsSections: {
},
out: "تَوَقُّع",
},
{
in: {
p: "راجع کېدل",
f: "raaji kedul",
},
out: "راجِع کېد" + zwarakey + "ل",
},
{
in: {
p: "ربیع",
f: "rabee'",
},
out: "رَبِیع",
},
],
},
{
@ -1054,6 +1075,44 @@ const diacriticsSections: {
},
],
},
{
describe: "With Arabic definate article -Ul- ال",
tests: [
{
in: {
p: "حق الاجاره",
f: "haq-Ul-ijaara",
},
out: "حَق اُلاِجارَه",
},
{
in: {
p: "دار العلوم",
f: "daar-Ul-Ulóom",
},
out: "دار اُلعُلُوم",
},
],
},
// {
// describe: "double consonants on end of words",
// tests: [
// {
// in: {
// p: "حق",
// f: "haqq",
// },
// out: "حَقّ",
// },
// {
// in: {
// p: "حق پر",
// f: "haqq par",
// },
// out: "حَقّ پَر",
// },
// ],
// },
];
diacriticsSections.forEach((section) => {

View File

@ -195,6 +195,12 @@ function processPhoneme(
addP(hamzaAbove),
addP(diacritic),
)(state)
: (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
pipe(
advanceP,
addP(pesh),
advanceP,
)(state)
: state;

View File

@ -970,7 +970,6 @@ const toTest: Array<{
},
out: "حَقّ پَر",
},
// TODO: Allow ' in there
{
in: {
p: "راجع کېدل",