This commit is contained in:
Bill D 2021-05-07 11:52:25 +03:00
parent 98c5eb7452
commit 3aaee3b6f2
2 changed files with 43 additions and 26 deletions

View File

@ -250,6 +250,14 @@ const diacriticsTest: Array<{
}, },
out: "اِسْلام", out: "اِسْلام",
}, },
// starting long vowels with ا
{
in: {
p: "ایسار",
f: "eesaar",
},
out: "اِیسار",
},
// double consonant // double consonant
{ {
in: { in: {
@ -288,6 +296,10 @@ const brokenDiacritics = [
p: "تشناب", p: "تشناب",
f: "peshnaab", f: "peshnaab",
}, },
{
p: "وسېدل",
f: "osedul",
},
]; ];
const badPhonetics: Array<{ const badPhonetics: Array<{

View File

@ -38,7 +38,7 @@ type PhonemeInfo = {
diacritic?: string, diacritic?: string,
endingOnly?: true, endingOnly?: true,
takesSukunOnEnding?: true, takesSukunOnEnding?: true,
addAlefOnBeginning?: true, longVowel?: true,
canStartWithAynBefore?: true, canStartWithAynBefore?: true,
} }
@ -188,39 +188,42 @@ const phonemeTable: Record<Phoneme, PhonemeInfo> = {
matches: ["ا"], matches: ["ا"],
beginningMatches: ["آ", "ا"], beginningMatches: ["آ", "ا"],
endingMatches: ["ا", "یٰ"], endingMatches: ["ا", "یٰ"],
longVowel: true,
}, },
"ee": { "ee": {
matches: ["ی"], matches: ["ی"],
addAlefOnBeginning: true, longVowel: true,
endingMatches: ["ي"], endingMatches: ["ي"],
diacritic: zer, diacritic: zer,
canStartWithAynBefore: true canStartWithAynBefore: true
}, },
"e": { "e": {
matches: ["ې"], matches: ["ې"],
addAlefOnBeginning: true, longVowel: true,
}, },
"o": { "o": {
matches: ["و"], matches: ["و"],
addAlefOnBeginning: true, longVowel: true,
}, },
"oo": { "oo": {
matches: ["و"], matches: ["و"],
addAlefOnBeginning: true, longVowel: true,
// alsoCanBePrefix: true, // alsoCanBePrefix: true,
diacritic: pesh, diacritic: pesh,
}, },
"ey": { "ey": {
matches: ["ی"], matches: ["ی"],
addAlefOnBeginning: true, longVowel: true,
endingMatches: ["ی"], endingMatches: ["ی"],
}, },
"uy": { "uy": {
matches: ["ۍ"], matches: ["ۍ"],
longVowel: true,
endingOnly: true, endingOnly: true,
}, },
"eyy": { "eyy": {
matches: ["ئ"], matches: ["ئ"],
longVowel: true,
endingOnly: true, endingOnly: true,
}, },
// Short Vowels // Short Vowels
@ -336,7 +339,7 @@ function processPhoneme(
const prevPLetter = last(state.pOut); const prevPLetter = last(state.pOut);
const currentPLetter = state.pIn[0]; const currentPLetter = state.pIn[0];
// const nextPLetter = state.pIn[1]; const nextPLetter = state.pIn[1];
const isBeginningOfWord = state.pOut === "" || prevPLetter === " "; const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
const phonemeInfo = phonemeTable[phoneme]; const phonemeInfo = phonemeTable[phoneme];
const previousPhoneme = i > 0 && phonemes[i-1]; const previousPhoneme = i > 0 && phonemes[i-1];
@ -346,34 +349,29 @@ function processPhoneme(
const needsSukun = doubleConsonant && (previousPhoneme !== phoneme); const needsSukun = doubleConsonant && (previousPhoneme !== phoneme);
if (needsTashdeed) { if (needsTashdeed) {
return { return addP(state, tashdeed);
pOut: state.pOut + tashdeed,
pIn: state.pIn,
};
} }
// TODO: Beginning of word with long vowels and alef etc. if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) { if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
throw Error("phonetics error - needs alef prefix");
}
const ns = advanceP(state); const ns = advanceP(state);
return { const ns2 = phonemeInfo.diacritic ? addP(ns, phonemeInfo.diacritic) : ns;
...ns, return advanceP(ns2);
pOut: ns.pOut + (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : ""), } else if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
}; const ns = advanceP(state);
return addP(ns, (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : ""));
} else if (phonemeInfo.matches?.includes(currentPLetter)) { } else if (phonemeInfo.matches?.includes(currentPLetter)) {
return advanceP({ const ns = addP(state, (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : ""));
...state, return advanceP(ns);
pOut: state.pOut
+ (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : ""),
});
} }
if (phonemeInfo.diacritic) { if (phonemeInfo.diacritic) {
return { return addP(state, phonemeInfo.diacritic);
...state,
pOut: state.pOut + phonemeInfo.diacritic,
};
} }
// console.log(state);
throw new Error("phonetics error"); throw new Error("phonetics error");
} }
@ -391,4 +389,11 @@ function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumu
pOut: state.pOut + state.pIn.slice(0, n), pOut: state.pOut + state.pIn.slice(0, n),
pIn: state.pIn.slice(n), pIn: state.pIn.slice(n),
} }
}
function addP(state: DiacriticsAccumulator, toAdd: string): DiacriticsAccumulator {
return {
...state,
pOut: state.pOut + toAdd,
};
} }