more stuff, almost working for izafe
This commit is contained in:
parent
c1ee7d3289
commit
9f9edde731
|
@ -545,6 +545,30 @@ const diacriticsTest: Array<{
|
||||||
},
|
},
|
||||||
out: "فائِدَه",
|
out: "فائِدَه",
|
||||||
},
|
},
|
||||||
|
// واخ being khaa in the middle of a word
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "استخوان",
|
||||||
|
f: "UstUkháan",
|
||||||
|
},
|
||||||
|
out: "اُسْتُخ(و)ان",
|
||||||
|
},
|
||||||
|
// Arabic wasla
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "بالکل",
|
||||||
|
f: "bilkUl",
|
||||||
|
},
|
||||||
|
out: "بِٱلْکُل",
|
||||||
|
},
|
||||||
|
// izafe
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "ایصال ثواب",
|
||||||
|
f: "eesaal-i-sawaab",
|
||||||
|
},
|
||||||
|
out: "اِیصالِ ثَواب",
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
phonemeSplits.forEach((s) => {
|
phonemeSplits.forEach((s) => {
|
||||||
|
|
|
@ -256,6 +256,22 @@ const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds diacritics to a given PsString.
|
||||||
|
* Errors if the phonetics and script don't line up.
|
||||||
|
*/
|
||||||
|
export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString {
|
||||||
|
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? firstPhonetics(f) : f);
|
||||||
|
const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p });
|
||||||
|
if (pIn !== "") {
|
||||||
|
throw new Error("phonetics error - phonetics shorter than pashto script");
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
p: pOut,
|
||||||
|
f,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* splits a phonetics string into an array of Phonemes
|
* splits a phonetics string into an array of Phonemes
|
||||||
*
|
*
|
||||||
|
@ -311,24 +327,6 @@ export function splitFIntoPhonemes(fIn: string): Phoneme[] {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds diacritics to a given PsString.
|
|
||||||
* Errors if the phonetics and script don't line up.
|
|
||||||
*
|
|
||||||
* @param ps a PsSTring without phonetics
|
|
||||||
*/
|
|
||||||
export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString {
|
|
||||||
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? firstPhonetics(f) : f);
|
|
||||||
const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p });
|
|
||||||
if (pIn !== "") {
|
|
||||||
throw new Error("phonetics error - phonetics shorter than pashto script");
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
p: pOut,
|
|
||||||
f,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
enum PhonemeStatus {
|
enum PhonemeStatus {
|
||||||
LeadingLongVowel,
|
LeadingLongVowel,
|
||||||
LeadingConsonantOrShortVowel,
|
LeadingConsonantOrShortVowel,
|
||||||
|
@ -336,6 +334,9 @@ enum PhonemeStatus {
|
||||||
EndingWithHeyHim,
|
EndingWithHeyHim,
|
||||||
DirectMatch,
|
DirectMatch,
|
||||||
ShortVowel,
|
ShortVowel,
|
||||||
|
PersianSilentWWithAa,
|
||||||
|
ArabicWasla,
|
||||||
|
Izafe,
|
||||||
}
|
}
|
||||||
|
|
||||||
function processPhoneme(
|
function processPhoneme(
|
||||||
|
@ -356,38 +357,57 @@ function processPhoneme(
|
||||||
const {
|
const {
|
||||||
phonemeInfo,
|
phonemeInfo,
|
||||||
sukunOrDiacritic,
|
sukunOrDiacritic,
|
||||||
phonemeState,
|
phs,
|
||||||
} = stateInfo({ state, i, phoneme, phonemes });
|
} = stateInfo({ state, i, phoneme, phonemes });
|
||||||
|
|
||||||
const p = phonemeState
|
return (phs === PhonemeStatus.LeadingLongVowel) ?
|
||||||
|
|
||||||
return (p === PhonemeStatus.LeadingLongVowel) ?
|
|
||||||
pipe(
|
pipe(
|
||||||
advanceP,
|
advanceP,
|
||||||
addP(phonemeInfo.diacritic),
|
addP(phonemeInfo.diacritic),
|
||||||
advanceP,
|
advanceP,
|
||||||
)(state)
|
)(state)
|
||||||
: (p === PhonemeStatus.LeadingConsonantOrShortVowel) ?
|
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
|
||||||
pipe(
|
pipe(
|
||||||
advanceP,
|
advanceP,
|
||||||
addP(sukunOrDiacritic),
|
addP(sukunOrDiacritic),
|
||||||
advanceForAin,
|
advanceForAin,
|
||||||
)(state)
|
)(state)
|
||||||
: (p === PhonemeStatus.DoubleConsonantTashdeed) ?
|
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
|
||||||
pipe(
|
pipe(
|
||||||
addP(tashdeed)
|
addP(tashdeed)
|
||||||
)(state)
|
)(state)
|
||||||
: (p === PhonemeStatus.EndingWithHeyHim) ?
|
: (phs === PhonemeStatus.EndingWithHeyHim) ?
|
||||||
pipe(
|
pipe(
|
||||||
advanceP,
|
advanceP,
|
||||||
addP(phoneme === "u" ? hamzaAbove : sukun),
|
addP(phoneme === "u" ? hamzaAbove : sukun),
|
||||||
)(state)
|
)(state)
|
||||||
: (p === PhonemeStatus.DirectMatch) ?
|
: (phs === PhonemeStatus.DirectMatch) ?
|
||||||
pipe(
|
pipe(
|
||||||
addP(sukunOrDiacritic),
|
addP(sukunOrDiacritic),
|
||||||
advanceP,
|
advanceP,
|
||||||
)(state)
|
)(state)
|
||||||
: // p === PhonemeState.ShortVowel
|
: (phs === PhonemeStatus.PersianSilentWWithAa) ?
|
||||||
|
pipe(
|
||||||
|
addP("("),
|
||||||
|
advanceP,
|
||||||
|
addP(")"),
|
||||||
|
advanceP,
|
||||||
|
)(state)
|
||||||
|
: (phs === PhonemeStatus.ArabicWasla) ?
|
||||||
|
pipe(
|
||||||
|
addP(zer),
|
||||||
|
overwriteP(wasla),
|
||||||
|
)(state)
|
||||||
|
: (phs === PhonemeStatus.Izafe) ?
|
||||||
|
(console.log(pipe(
|
||||||
|
reverseP,
|
||||||
|
addP(zer),
|
||||||
|
)(state), phoneme), pipe(
|
||||||
|
reverseP,
|
||||||
|
addP(zer),
|
||||||
|
)(state))
|
||||||
|
:
|
||||||
|
// phs === PhonemeState.ShortVowel
|
||||||
pipe(
|
pipe(
|
||||||
advanceForHamzaMid,
|
advanceForHamzaMid,
|
||||||
addP(phonemeInfo.diacritic),
|
addP(phonemeInfo.diacritic),
|
||||||
|
@ -427,6 +447,15 @@ function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
|
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
|
||||||
return PhonemeStatus.LeadingConsonantOrShortVowel;
|
return PhonemeStatus.LeadingConsonantOrShortVowel;
|
||||||
}
|
}
|
||||||
|
if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
|
||||||
|
return PhonemeStatus.PersianSilentWWithAa;
|
||||||
|
}
|
||||||
|
if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") {
|
||||||
|
return PhonemeStatus.ArabicWasla;
|
||||||
|
}
|
||||||
|
if (phoneme === "-i-" && isBeginningOfWord) {
|
||||||
|
return PhonemeStatus.Izafe;
|
||||||
|
}
|
||||||
if (needsTashdeed) {
|
if (needsTashdeed) {
|
||||||
return PhonemeStatus.DoubleConsonantTashdeed;
|
return PhonemeStatus.DoubleConsonantTashdeed;
|
||||||
}
|
}
|
||||||
|
@ -442,10 +471,10 @@ function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
throw new Error("phonetics error - no status found for phoneme: " + phoneme);
|
throw new Error("phonetics error - no status found for phoneme: " + phoneme);
|
||||||
}
|
}
|
||||||
|
|
||||||
const phonemeState = getPhonemeState();
|
const phs = getPhonemeState();
|
||||||
|
|
||||||
return {
|
return {
|
||||||
phonemeState, phonemeInfo, sukunOrDiacritic,
|
phs, phonemeInfo, sukunOrDiacritic,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -460,9 +489,16 @@ function last(s: string) {
|
||||||
|
|
||||||
function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator {
|
function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator {
|
||||||
return {
|
return {
|
||||||
pOut: state.pOut + state.pIn.slice(0, n),
|
|
||||||
pIn: state.pIn.slice(n),
|
pIn: state.pIn.slice(n),
|
||||||
}
|
pOut: state.pOut + state.pIn.slice(0, n),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function reverseP(state: DiacriticsAccumulator): DiacriticsAccumulator {
|
||||||
|
return {
|
||||||
|
pIn: state.pIn + state.pOut.slice(-1),
|
||||||
|
pOut: state.pOut.slice(0, -1),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
|
const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
|
||||||
|
@ -470,7 +506,14 @@ const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): Diac
|
||||||
...state,
|
...state,
|
||||||
pOut: toAdd ? (state.pOut + toAdd) : state.pOut,
|
pOut: toAdd ? (state.pOut + toAdd) : state.pOut,
|
||||||
};
|
};
|
||||||
}
|
};
|
||||||
|
|
||||||
|
const overwriteP = (toWrite: string) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
|
||||||
|
return {
|
||||||
|
pIn: state.pIn.slice(1),
|
||||||
|
pOut: state.pOut + toWrite,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {
|
function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {
|
||||||
return {
|
return {
|
||||||
|
|
Loading…
Reference in New Issue