more
This commit is contained in:
parent
b1aac313f9
commit
a62ab986ba
|
@ -274,7 +274,7 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
|||
const willIgnore = ["?", " ", "`", ".", "…", ","];
|
||||
|
||||
const result: Phoneme[] = [];
|
||||
const f = removeAccents(fIn);
|
||||
const f = removeAccents(fIn).replace(/ă/g, "a");
|
||||
let index = 0;
|
||||
while (index < f.length) {
|
||||
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
|
||||
|
@ -336,6 +336,7 @@ export enum PhonemeStatus {
|
|||
ShortAForAlefBeforeFathatan,
|
||||
NOnFathatan,
|
||||
HamzaOnWow,
|
||||
ArabicDefiniteArticleUl,
|
||||
}
|
||||
|
||||
export function stateInfo({ state, i, phonemes, phoneme }: {
|
||||
|
@ -348,11 +349,11 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
|
|||
const prevPLetter = last(state.pOut);
|
||||
const currentPLetter = state.pIn[0];
|
||||
const nextPLetter = state.pIn[1];
|
||||
const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
|
||||
const isEndOfWord = isOutOfWord(nextPLetter);
|
||||
const phonemeInfo = phonemeTable[phoneme];
|
||||
const nextPhoneme = phonemes[i+1];
|
||||
const previousPhoneme = i > 0 && phonemes[i-1];
|
||||
const isBeginningOfWord = (state.pOut === "" || prevPLetter === " ") || (previousPhoneme === "-Ul-" && prevPLetter === "ل");
|
||||
const isEndOfWord = isOutOfWord(nextPLetter);
|
||||
const phonemeInfo = phonemeTable[phoneme];
|
||||
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
|
||||
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
|
||||
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
|
||||
|
@ -391,6 +392,9 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
|
|||
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
|
||||
return PhonemeStatus.EndOfDuParticle
|
||||
}
|
||||
if (isBeginningOfWord && phoneme === "-Ul-" && currentPLetter === "ا" && nextPLetter === "ل") {
|
||||
return PhonemeStatus.ArabicDefiniteArticleUl;
|
||||
}
|
||||
if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
|
||||
return PhonemeStatus.HamzaOnWow;
|
||||
}
|
||||
|
|
|
@ -586,6 +586,13 @@ const diacriticsSections: {
|
|||
},
|
||||
out: null,
|
||||
},
|
||||
{
|
||||
in: {
|
||||
p: "سختسری",
|
||||
f: "sakht sărey",
|
||||
},
|
||||
out: "سَخْتْسَری",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -805,6 +812,20 @@ const diacriticsSections: {
|
|||
},
|
||||
out: "تَوَقُّع",
|
||||
},
|
||||
{
|
||||
in: {
|
||||
p: "راجع کېدل",
|
||||
f: "raaji kedul",
|
||||
},
|
||||
out: "راجِع کېد" + zwarakey + "ل",
|
||||
},
|
||||
{
|
||||
in: {
|
||||
p: "ربیع",
|
||||
f: "rabee'",
|
||||
},
|
||||
out: "رَبِیع",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -1054,6 +1075,44 @@ const diacriticsSections: {
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
describe: "With Arabic definate article -Ul- ال",
|
||||
tests: [
|
||||
{
|
||||
in: {
|
||||
p: "حق الاجاره",
|
||||
f: "haq-Ul-ijaara",
|
||||
},
|
||||
out: "حَق اُلاِجارَه",
|
||||
},
|
||||
{
|
||||
in: {
|
||||
p: "دار العلوم",
|
||||
f: "daar-Ul-Ulóom",
|
||||
},
|
||||
out: "دار اُلعُلُوم",
|
||||
},
|
||||
],
|
||||
},
|
||||
// {
|
||||
// describe: "double consonants on end of words",
|
||||
// tests: [
|
||||
// {
|
||||
// in: {
|
||||
// p: "حق",
|
||||
// f: "haqq",
|
||||
// },
|
||||
// out: "حَقّ",
|
||||
// },
|
||||
// {
|
||||
// in: {
|
||||
// p: "حق پر",
|
||||
// f: "haqq par",
|
||||
// },
|
||||
// out: "حَقّ پَر",
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
];
|
||||
|
||||
diacriticsSections.forEach((section) => {
|
||||
|
|
|
@ -195,6 +195,12 @@ function processPhoneme(
|
|||
addP(hamzaAbove),
|
||||
addP(diacritic),
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
addP(pesh),
|
||||
advanceP,
|
||||
)(state)
|
||||
: state;
|
||||
|
||||
|
||||
|
|
|
@ -970,7 +970,6 @@ const toTest: Array<{
|
|||
},
|
||||
out: "حَقّ پَر",
|
||||
},
|
||||
// TODO: Allow ' in there
|
||||
{
|
||||
in: {
|
||||
p: "راجع کېدل",
|
||||
|
|
Loading…
Reference in New Issue