This commit is contained in:
Bill D 2021-05-09 20:08:35 +03:00
parent 2dea82c32b
commit c1ee7d3289
1 changed files with 73 additions and 54 deletions

View File

@ -329,6 +329,15 @@ export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsSt
}; };
} }
enum PhonemeStatus {
LeadingLongVowel,
LeadingConsonantOrShortVowel,
DoubleConsonantTashdeed,
EndingWithHeyHim,
DirectMatch,
ShortVowel,
}
function processPhoneme( function processPhoneme(
acc: DiacriticsAccumulator, acc: DiacriticsAccumulator,
phoneme: Phoneme, phoneme: Phoneme,
@ -346,64 +355,46 @@ function processPhoneme(
const { const {
phonemeInfo, phonemeInfo,
isBeginningOfWord,
currentPLetter,
needsTashdeed,
sukunOrDiacritic, sukunOrDiacritic,
nextPLetter, phonemeState,
isEndOfWord,
} = stateInfo({ state, i, phoneme, phonemes }); } = stateInfo({ state, i, phoneme, phonemes });
// if it's not an exception (TODO) const p = phonemeState
// it must be one of the following 5 possibilities
// 1. beginning a word with a long vowel return (p === PhonemeStatus.LeadingLongVowel) ?
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) { pipe(
if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
throw Error("phonetics error - needs alef prefix");
}
return pipe(
advanceP, advanceP,
addP(phonemeInfo.diacritic), addP(phonemeInfo.diacritic),
advanceP, advanceP,
)(state); )(state)
// 2. beginning a word with something else : (p === PhonemeStatus.LeadingConsonantOrShortVowel) ?
} else if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) { pipe(
return pipe(
advanceP, advanceP,
addP(sukunOrDiacritic), addP(sukunOrDiacritic),
advanceForAin, advanceForAin,
)(state); )(state)
// 3. double consonant to be marked with tashdeed : (p === PhonemeStatus.DoubleConsonantTashdeed) ?
} else if (needsTashdeed) { pipe(
return pipe(
addP(tashdeed) addP(tashdeed)
)(state); )(state)
// 4. special ه ending : (p === PhonemeStatus.EndingWithHeyHim) ?
} else if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) { pipe(
return pipe(
advanceP, advanceP,
addP(phoneme === "u" ? hamzaAbove : sukun), addP(phoneme === "u" ? hamzaAbove : sukun),
)(state); )(state)
// 5. direct match of phoneme / P letter : (p === PhonemeStatus.DirectMatch) ?
} else if (phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب")) { pipe(
return pipe(
addP(sukunOrDiacritic), addP(sukunOrDiacritic),
advanceP, advanceP,
)(state); )(state)
// 6. just a diacritic for short vowel : // p === PhonemeState.ShortVowel
} else if (phonemeInfo.diacritic && !phonemeInfo.longVowel) { pipe(
return pipe(
advanceForHamzaMid, advanceForHamzaMid,
addP(phonemeInfo.diacritic), addP(phonemeInfo.diacritic),
advanceForAinOrHamza, advanceForAinOrHamza,
)(state); )(state);
}
// anything that gets to this point is a failure/error
throw new Error("phonetics error");
} }
function stateInfo({ state, i, phonemes, phoneme }: { function stateInfo({ state, i, phonemes, phoneme }: {
state: DiacriticsAccumulator, state: DiacriticsAccumulator,
i: number, i: number,
@ -425,8 +416,36 @@ function stateInfo({ state, i, phonemes, phoneme }: {
const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter)); const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
const diacritic = isEndOfWord ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic; const diacritic = isEndOfWord ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
const sukunOrDiacritic = (needsSukun ? sukun : diacritic); const sukunOrDiacritic = (needsSukun ? sukun : diacritic);
function getPhonemeState(): PhonemeStatus {
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
throw Error("phonetics error - needs alef prefix");
}
return PhonemeStatus.LeadingLongVowel;
}
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
return PhonemeStatus.LeadingConsonantOrShortVowel;
}
if (needsTashdeed) {
return PhonemeStatus.DoubleConsonantTashdeed;
}
if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
return PhonemeStatus.EndingWithHeyHim;
}
if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
return PhonemeStatus.DirectMatch;
}
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
return PhonemeStatus.ShortVowel;
}
throw new Error("phonetics error - no status found for phoneme: " + phoneme);
}
const phonemeState = getPhonemeState();
return { return {
phonemeInfo, isBeginningOfWord, currentPLetter, needsTashdeed, sukunOrDiacritic, nextPLetter, isEndOfWord, phonemeState, phonemeInfo, sukunOrDiacritic,
}; };
}; };