more
This commit is contained in:
parent
b1aac313f9
commit
a62ab986ba
|
@ -274,7 +274,7 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
||||||
const willIgnore = ["?", " ", "`", ".", "…", ","];
|
const willIgnore = ["?", " ", "`", ".", "…", ","];
|
||||||
|
|
||||||
const result: Phoneme[] = [];
|
const result: Phoneme[] = [];
|
||||||
const f = removeAccents(fIn);
|
const f = removeAccents(fIn).replace(/ă/g, "a");
|
||||||
let index = 0;
|
let index = 0;
|
||||||
while (index < f.length) {
|
while (index < f.length) {
|
||||||
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
|
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
|
||||||
|
@ -336,6 +336,7 @@ export enum PhonemeStatus {
|
||||||
ShortAForAlefBeforeFathatan,
|
ShortAForAlefBeforeFathatan,
|
||||||
NOnFathatan,
|
NOnFathatan,
|
||||||
HamzaOnWow,
|
HamzaOnWow,
|
||||||
|
ArabicDefiniteArticleUl,
|
||||||
}
|
}
|
||||||
|
|
||||||
export function stateInfo({ state, i, phonemes, phoneme }: {
|
export function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
|
@ -348,11 +349,11 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
const prevPLetter = last(state.pOut);
|
const prevPLetter = last(state.pOut);
|
||||||
const currentPLetter = state.pIn[0];
|
const currentPLetter = state.pIn[0];
|
||||||
const nextPLetter = state.pIn[1];
|
const nextPLetter = state.pIn[1];
|
||||||
const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
|
|
||||||
const isEndOfWord = isOutOfWord(nextPLetter);
|
|
||||||
const phonemeInfo = phonemeTable[phoneme];
|
|
||||||
const nextPhoneme = phonemes[i+1];
|
const nextPhoneme = phonemes[i+1];
|
||||||
const previousPhoneme = i > 0 && phonemes[i-1];
|
const previousPhoneme = i > 0 && phonemes[i-1];
|
||||||
|
const isBeginningOfWord = (state.pOut === "" || prevPLetter === " ") || (previousPhoneme === "-Ul-" && prevPLetter === "ل");
|
||||||
|
const isEndOfWord = isOutOfWord(nextPLetter);
|
||||||
|
const phonemeInfo = phonemeTable[phoneme];
|
||||||
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
|
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
|
||||||
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
|
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
|
||||||
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
|
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
|
||||||
|
@ -391,6 +392,9 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
|
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
|
||||||
return PhonemeStatus.EndOfDuParticle
|
return PhonemeStatus.EndOfDuParticle
|
||||||
}
|
}
|
||||||
|
if (isBeginningOfWord && phoneme === "-Ul-" && currentPLetter === "ا" && nextPLetter === "ل") {
|
||||||
|
return PhonemeStatus.ArabicDefiniteArticleUl;
|
||||||
|
}
|
||||||
if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
|
if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
|
||||||
return PhonemeStatus.HamzaOnWow;
|
return PhonemeStatus.HamzaOnWow;
|
||||||
}
|
}
|
||||||
|
|
|
@ -586,6 +586,13 @@ const diacriticsSections: {
|
||||||
},
|
},
|
||||||
out: null,
|
out: null,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "سختسری",
|
||||||
|
f: "sakht sărey",
|
||||||
|
},
|
||||||
|
out: "سَخْتْسَری",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -805,6 +812,20 @@ const diacriticsSections: {
|
||||||
},
|
},
|
||||||
out: "تَوَقُّع",
|
out: "تَوَقُّع",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "راجع کېدل",
|
||||||
|
f: "raaji kedul",
|
||||||
|
},
|
||||||
|
out: "راجِع کېد" + zwarakey + "ل",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "ربیع",
|
||||||
|
f: "rabee'",
|
||||||
|
},
|
||||||
|
out: "رَبِیع",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1054,6 +1075,44 @@ const diacriticsSections: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
describe: "With Arabic definate article -Ul- ال",
|
||||||
|
tests: [
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "حق الاجاره",
|
||||||
|
f: "haq-Ul-ijaara",
|
||||||
|
},
|
||||||
|
out: "حَق اُلاِجارَه",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "دار العلوم",
|
||||||
|
f: "daar-Ul-Ulóom",
|
||||||
|
},
|
||||||
|
out: "دار اُلعُلُوم",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// {
|
||||||
|
// describe: "double consonants on end of words",
|
||||||
|
// tests: [
|
||||||
|
// {
|
||||||
|
// in: {
|
||||||
|
// p: "حق",
|
||||||
|
// f: "haqq",
|
||||||
|
// },
|
||||||
|
// out: "حَقّ",
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// in: {
|
||||||
|
// p: "حق پر",
|
||||||
|
// f: "haqq par",
|
||||||
|
// },
|
||||||
|
// out: "حَقّ پَر",
|
||||||
|
// },
|
||||||
|
// ],
|
||||||
|
// },
|
||||||
];
|
];
|
||||||
|
|
||||||
diacriticsSections.forEach((section) => {
|
diacriticsSections.forEach((section) => {
|
||||||
|
|
|
@ -195,6 +195,12 @@ function processPhoneme(
|
||||||
addP(hamzaAbove),
|
addP(hamzaAbove),
|
||||||
addP(diacritic),
|
addP(diacritic),
|
||||||
)(state)
|
)(state)
|
||||||
|
: (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
|
||||||
|
pipe(
|
||||||
|
advanceP,
|
||||||
|
addP(pesh),
|
||||||
|
advanceP,
|
||||||
|
)(state)
|
||||||
: state;
|
: state;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -970,7 +970,6 @@ const toTest: Array<{
|
||||||
},
|
},
|
||||||
out: "حَقّ پَر",
|
out: "حَقّ پَر",
|
||||||
},
|
},
|
||||||
// TODO: Allow ' in there
|
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "راجع کېدل",
|
p: "راجع کېدل",
|
||||||
|
|
Loading…
Reference in New Issue