2021-05-06 20:28:03 +00:00
|
|
|
|
/**
|
|
|
|
|
* Copyright (c) 2021 lingdocs.com
|
|
|
|
|
*
|
|
|
|
|
* This source code is licensed under the MIT license found in the
|
|
|
|
|
* LICENSE file in the root directory of this source tree.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
import * as T from "../types";
|
|
|
|
|
import { removeAccents } from "./accent-helpers";
|
|
|
|
|
import { firstPhonetics } from "./p-text-helpers";
|
2021-05-07 11:48:33 +00:00
|
|
|
|
import { pipe } from "rambda";
|
2021-05-06 20:28:03 +00:00
|
|
|
|
|
|
|
|
|
const zwar = "َ";
|
|
|
|
|
const zwarakey = "ٙ";
|
|
|
|
|
const zer = "ِ";
|
|
|
|
|
const pesh = "ُ";
|
|
|
|
|
const sukun = "ْ";
|
|
|
|
|
const hamzaAbove = "ٔ";
|
|
|
|
|
const tashdeed = "ّ";
|
|
|
|
|
const wasla = "ٱ";
|
|
|
|
|
const daggerAlif = "ٰ";
|
|
|
|
|
const fathahan = "ً";
|
|
|
|
|
|
|
|
|
|
type Consonant = "b" | "p" | "t" | "T" | "s" | "j" | "ch" | "kh" | "ts" | "dz" | "d" | "D" | "r" | "R" | "z" | "jz" | "G" | "sh" | "x" | "gh" | "f" | "q" | "k" | "g" | "l" | "m" | "n" | "N" | "h" | "w" | "y";
|
|
|
|
|
type Ain = "'"
|
|
|
|
|
type JoiningVowel = "-i-" | "-U-" | "-Ul-";
|
|
|
|
|
type LongVowel = "aa" | "ee" | "e" | "oo" | "o" | "ey" | "uy" | "eyy";
|
|
|
|
|
type ShortVowel = "a" | "i" | "u" | "U";
|
|
|
|
|
type Phoneme = Consonant | Ain | LongVowel | ShortVowel | JoiningVowel;
|
|
|
|
|
|
2021-05-07 07:54:09 +00:00
|
|
|
|
type DiacriticsAccumulator = { pIn: string, pOut: string };
|
|
|
|
|
|
2021-05-06 20:28:03 +00:00
|
|
|
|
type PhonemeInfo = {
|
|
|
|
|
matches?: string[],
|
|
|
|
|
beginningMatches?: string[],
|
|
|
|
|
endingMatches?: string[],
|
|
|
|
|
consonant?: true,
|
|
|
|
|
diacritic?: string,
|
|
|
|
|
endingOnly?: true,
|
|
|
|
|
takesSukunOnEnding?: true,
|
2021-05-07 08:52:25 +00:00
|
|
|
|
longVowel?: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
canStartWithAynBefore?: true,
|
2021-05-08 18:31:59 +00:00
|
|
|
|
useEndingDiacritic?: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
|
|
|
|
// Consonants
|
|
|
|
|
"b": {
|
|
|
|
|
matches: ["ب"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"p": {
|
|
|
|
|
matches: ["پ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"t": {
|
|
|
|
|
matches: ["ت", "ط"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"T": {
|
|
|
|
|
matches: ["ټ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"s": {
|
|
|
|
|
matches: ["س", "ص", "ث"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"j": {
|
|
|
|
|
matches: ["ج"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"ch": {
|
|
|
|
|
matches: ["چ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"kh": {
|
|
|
|
|
matches: ["خ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"ts": {
|
|
|
|
|
matches: ["څ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"dz": {
|
|
|
|
|
matches: ["ځ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"d": {
|
|
|
|
|
matches: ["د"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"D": {
|
|
|
|
|
matches: ["ډ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"r": {
|
|
|
|
|
matches: ["ر"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"R": {
|
|
|
|
|
matches: ["ړ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"z": {
|
|
|
|
|
matches: ["ز", "ذ", "ظ", "ض"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"jz": {
|
|
|
|
|
matches: ["ژ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"G": {
|
|
|
|
|
matches: ["ږ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"sh": {
|
|
|
|
|
matches: ["ش"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"x": {
|
|
|
|
|
matches: ["ښ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"gh": {
|
|
|
|
|
matches: ["غ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"f": {
|
|
|
|
|
matches: ["ف"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"q": {
|
|
|
|
|
matches: ["ق"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"k": {
|
|
|
|
|
matches: ["ک"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"g": {
|
|
|
|
|
matches: ["ګ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"l": {
|
|
|
|
|
matches: ["ل"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"m": {
|
|
|
|
|
matches: ["م"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"n": {
|
|
|
|
|
matches: ["ن"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"N": {
|
|
|
|
|
matches: ["ڼ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"h": {
|
|
|
|
|
matches: ["ه", "ح"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
takesSukunOnEnding: true,
|
|
|
|
|
},
|
|
|
|
|
"w": {
|
|
|
|
|
matches: ["و"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
"y": {
|
|
|
|
|
matches: ["ی"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
// Ain
|
|
|
|
|
"'": {
|
|
|
|
|
matches: ["ع", "ئ"],
|
|
|
|
|
consonant: true,
|
|
|
|
|
},
|
|
|
|
|
// Joining Vowels
|
|
|
|
|
"-i-": {
|
|
|
|
|
},
|
|
|
|
|
"-U-": {
|
|
|
|
|
matches: [" و ", "و"],
|
|
|
|
|
},
|
|
|
|
|
"-Ul-": {
|
|
|
|
|
matches: ["ال"],
|
|
|
|
|
},
|
|
|
|
|
// Long Vowels
|
|
|
|
|
"aa": {
|
|
|
|
|
matches: ["ا"],
|
|
|
|
|
beginningMatches: ["آ", "ا"],
|
|
|
|
|
endingMatches: ["ا", "یٰ"],
|
2021-05-07 08:52:25 +00:00
|
|
|
|
longVowel: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
},
|
|
|
|
|
"ee": {
|
|
|
|
|
matches: ["ی"],
|
2021-05-07 08:52:25 +00:00
|
|
|
|
longVowel: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
endingMatches: ["ي"],
|
|
|
|
|
diacritic: zer,
|
|
|
|
|
canStartWithAynBefore: true
|
|
|
|
|
},
|
|
|
|
|
"e": {
|
|
|
|
|
matches: ["ې"],
|
2021-05-07 08:52:25 +00:00
|
|
|
|
longVowel: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
},
|
|
|
|
|
"o": {
|
|
|
|
|
matches: ["و"],
|
2021-05-07 08:52:25 +00:00
|
|
|
|
longVowel: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
},
|
|
|
|
|
"oo": {
|
|
|
|
|
matches: ["و"],
|
2021-05-07 08:52:25 +00:00
|
|
|
|
longVowel: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
// alsoCanBePrefix: true,
|
|
|
|
|
diacritic: pesh,
|
2021-05-08 18:31:59 +00:00
|
|
|
|
useEndingDiacritic: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
},
|
|
|
|
|
"ey": {
|
|
|
|
|
matches: ["ی"],
|
2021-05-07 08:52:25 +00:00
|
|
|
|
longVowel: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
endingMatches: ["ی"],
|
|
|
|
|
},
|
|
|
|
|
"uy": {
|
|
|
|
|
matches: ["ۍ"],
|
2021-05-07 08:52:25 +00:00
|
|
|
|
longVowel: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
endingOnly: true,
|
|
|
|
|
},
|
|
|
|
|
"eyy": {
|
|
|
|
|
matches: ["ئ"],
|
2021-05-07 08:52:25 +00:00
|
|
|
|
longVowel: true,
|
2021-05-06 20:28:03 +00:00
|
|
|
|
endingOnly: true,
|
|
|
|
|
},
|
|
|
|
|
// Short Vowels
|
|
|
|
|
"a": {
|
|
|
|
|
diacritic: zwar,
|
|
|
|
|
endingMatches: ["ه"],
|
2021-05-08 18:31:59 +00:00
|
|
|
|
beginningMatches: ["ا", "ع"],
|
2021-05-06 20:28:03 +00:00
|
|
|
|
// canComeAfterHeyEnding: true,
|
|
|
|
|
// canBeFirstPartOfFathahanEnding: true,
|
|
|
|
|
},
|
|
|
|
|
"u": {
|
|
|
|
|
diacritic: zwarakey,
|
|
|
|
|
endingMatches: ["ه"],
|
|
|
|
|
},
|
|
|
|
|
"i": {
|
|
|
|
|
diacritic: zer,
|
|
|
|
|
endingMatches: ["ه"],
|
|
|
|
|
beginningMatches: ["ا", "ع"],
|
|
|
|
|
// takesDiacriticBeforeGurdaHeyEnding: true,
|
|
|
|
|
// canBeWasla: true,
|
|
|
|
|
},
|
|
|
|
|
"U": {
|
|
|
|
|
diacritic: pesh,
|
|
|
|
|
endingMatches: ["ه"],
|
|
|
|
|
// takesDiacriticBeforeGurdaHeyEnding: true,
|
|
|
|
|
beginningMatches: ["ا", "ع"],
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-13 09:54:46 +00:00
|
|
|
|
/**
|
|
|
|
|
* Adds diacritics to a given PsString.
|
|
|
|
|
* Errors if the phonetics and script don't line up.
|
|
|
|
|
*/
|
|
|
|
|
export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString {
|
|
|
|
|
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? firstPhonetics(f) : f);
|
|
|
|
|
const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p });
|
|
|
|
|
if (pIn !== "") {
|
|
|
|
|
throw new Error("phonetics error - phonetics shorter than pashto script");
|
|
|
|
|
}
|
|
|
|
|
return {
|
|
|
|
|
p: pOut,
|
|
|
|
|
f,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-06 20:48:53 +00:00
|
|
|
|
/**
|
|
|
|
|
* splits a phonetics string into an array of Phonemes
|
|
|
|
|
*
|
|
|
|
|
* will error if there is an illeagal phonetics character
|
|
|
|
|
*
|
|
|
|
|
* @param fIn a phonetics string
|
|
|
|
|
* @returns an array of phonemes
|
|
|
|
|
*/
|
2021-05-06 20:28:03 +00:00
|
|
|
|
export function splitFIntoPhonemes(fIn: string): Phoneme[] {
|
|
|
|
|
const singleLetterPhonemes: Phoneme[] = ["a", "i", "u", "o", "e", "U", "b", "p", "t", "T", "s", "j", "d", "D", "r", "R", "z", "G", "x", "f", "q", "k", "g", "l", "m", "n", "N", "h", "w", "y"];
|
|
|
|
|
|
|
|
|
|
const quadrigraphs: Phoneme[] = ["-Ul-"];
|
|
|
|
|
const trigraphs: Phoneme[] = ["eyy", "-i-", "-U-"];
|
|
|
|
|
const digraphs: Phoneme[] = ["aa", "ee", "ey", "oo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"];
|
|
|
|
|
const endingDigraphs: Phoneme[] = ["uy"];
|
2021-05-08 18:31:59 +00:00
|
|
|
|
const willIgnore = ["?", " ", "`", ".", "…", ",", "'"];
|
2021-05-06 20:28:03 +00:00
|
|
|
|
|
|
|
|
|
const result: Phoneme[] = [];
|
|
|
|
|
const f = removeAccents(fIn);
|
|
|
|
|
let index = 0;
|
|
|
|
|
while (index < f.length) {
|
|
|
|
|
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
|
|
|
|
|
const threeLetterChunk = f.slice(index, index + 3) as Phoneme;
|
|
|
|
|
const fourLetterChunk = f.slice(index, index + 4) as Phoneme;
|
|
|
|
|
if (quadrigraphs.includes(fourLetterChunk)) {
|
|
|
|
|
result.push(fourLetterChunk);
|
|
|
|
|
index += 4;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (trigraphs.includes(threeLetterChunk)) {
|
|
|
|
|
result.push(threeLetterChunk);
|
|
|
|
|
index += 3;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
const twoLetterChunk = f.slice(index, index + 2) as Phoneme;
|
|
|
|
|
if (
|
|
|
|
|
digraphs.includes(twoLetterChunk) ||
|
|
|
|
|
(isLastTwoLetters && endingDigraphs.includes(twoLetterChunk))
|
|
|
|
|
) {
|
|
|
|
|
result.push(twoLetterChunk);
|
|
|
|
|
index += 2;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
const singleLetter = f.slice(index, index + 1) as Phoneme;
|
|
|
|
|
if (!willIgnore.includes(singleLetter)) {
|
|
|
|
|
if (!singleLetterPhonemes.includes(singleLetter)) {
|
|
|
|
|
throw new Error(`illegal phonetic character: ${singleLetter}`);
|
|
|
|
|
}
|
|
|
|
|
result.push(singleLetter);
|
|
|
|
|
}
|
|
|
|
|
index++;
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-09 17:08:35 +00:00
|
|
|
|
enum PhonemeStatus {
|
|
|
|
|
LeadingLongVowel,
|
|
|
|
|
LeadingConsonantOrShortVowel,
|
|
|
|
|
DoubleConsonantTashdeed,
|
|
|
|
|
EndingWithHeyHim,
|
|
|
|
|
DirectMatch,
|
|
|
|
|
ShortVowel,
|
2021-05-13 09:54:46 +00:00
|
|
|
|
PersianSilentWWithAa,
|
|
|
|
|
ArabicWasla,
|
|
|
|
|
Izafe,
|
2021-05-09 17:08:35 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-05-07 07:54:09 +00:00
|
|
|
|
function processPhoneme(
|
|
|
|
|
acc: DiacriticsAccumulator,
|
|
|
|
|
phoneme: Phoneme,
|
|
|
|
|
i: number,
|
|
|
|
|
phonemes: Phoneme[],
|
|
|
|
|
) {
|
2021-05-08 18:31:59 +00:00
|
|
|
|
// console.log("PHONEME", phoneme);
|
|
|
|
|
// console.log("space coming up", acc.pIn[0] === " ");
|
|
|
|
|
// console.log("state", acc);
|
2021-05-07 07:54:09 +00:00
|
|
|
|
// Prep state
|
|
|
|
|
const state = acc.pIn[0] === " " ? advanceP(acc) : acc;
|
2021-05-08 18:31:59 +00:00
|
|
|
|
// console.log("AFTER SPACE PREP", phoneme);
|
|
|
|
|
// console.log("state", state);
|
2021-05-07 07:54:09 +00:00
|
|
|
|
// WARNING: Do not use acc after this point!
|
|
|
|
|
|
2021-05-08 18:31:59 +00:00
|
|
|
|
const {
|
|
|
|
|
phonemeInfo,
|
|
|
|
|
sukunOrDiacritic,
|
2021-05-13 09:54:46 +00:00
|
|
|
|
phs,
|
2021-05-08 18:31:59 +00:00
|
|
|
|
} = stateInfo({ state, i, phoneme, phonemes });
|
2021-05-07 07:54:09 +00:00
|
|
|
|
|
2021-05-13 09:54:46 +00:00
|
|
|
|
return (phs === PhonemeStatus.LeadingLongVowel) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
|
pipe(
|
|
|
|
|
advanceP,
|
|
|
|
|
addP(phonemeInfo.diacritic),
|
|
|
|
|
advanceP,
|
|
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
|
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
|
pipe(
|
|
|
|
|
advanceP,
|
|
|
|
|
addP(sukunOrDiacritic),
|
|
|
|
|
advanceForAin,
|
|
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
|
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
|
pipe(
|
|
|
|
|
addP(tashdeed)
|
|
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
|
: (phs === PhonemeStatus.EndingWithHeyHim) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
|
pipe(
|
|
|
|
|
advanceP,
|
|
|
|
|
addP(phoneme === "u" ? hamzaAbove : sukun),
|
|
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
|
: (phs === PhonemeStatus.DirectMatch) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
|
pipe(
|
|
|
|
|
addP(sukunOrDiacritic),
|
|
|
|
|
advanceP,
|
|
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
|
: (phs === PhonemeStatus.PersianSilentWWithAa) ?
|
|
|
|
|
pipe(
|
|
|
|
|
addP("("),
|
|
|
|
|
advanceP,
|
|
|
|
|
addP(")"),
|
|
|
|
|
advanceP,
|
|
|
|
|
)(state)
|
|
|
|
|
: (phs === PhonemeStatus.ArabicWasla) ?
|
|
|
|
|
pipe(
|
|
|
|
|
addP(zer),
|
|
|
|
|
overwriteP(wasla),
|
|
|
|
|
)(state)
|
|
|
|
|
: (phs === PhonemeStatus.Izafe) ?
|
2021-05-13 19:40:41 +00:00
|
|
|
|
pipe(
|
2021-05-13 09:54:46 +00:00
|
|
|
|
reverseP,
|
|
|
|
|
addP(zer),
|
2021-05-13 19:40:41 +00:00
|
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
|
:
|
|
|
|
|
// phs === PhonemeState.ShortVowel
|
2021-05-09 17:08:35 +00:00
|
|
|
|
pipe(
|
|
|
|
|
advanceForHamzaMid,
|
|
|
|
|
addP(phonemeInfo.diacritic),
|
|
|
|
|
advanceForAinOrHamza,
|
|
|
|
|
)(state);
|
|
|
|
|
}
|
2021-05-08 18:31:59 +00:00
|
|
|
|
|
|
|
|
|
function stateInfo({ state, i, phonemes, phoneme }: {
|
|
|
|
|
state: DiacriticsAccumulator,
|
|
|
|
|
i: number,
|
|
|
|
|
phonemes: Phoneme[],
|
|
|
|
|
phoneme: Phoneme,
|
|
|
|
|
}) {
|
|
|
|
|
const prevPLetter = last(state.pOut);
|
|
|
|
|
const currentPLetter = state.pIn[0];
|
|
|
|
|
const nextPLetter = state.pIn[1];
|
|
|
|
|
const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
|
|
|
|
|
const isEndOfWord = !nextPLetter || nextPLetter === " ";
|
|
|
|
|
const phonemeInfo = phonemeTable[phoneme];
|
|
|
|
|
const previousPhoneme = i > 0 && phonemes[i-1];
|
|
|
|
|
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
|
|
|
|
|
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
|
|
|
|
|
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
|
|
|
|
|
const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
|
|
|
|
|
const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
|
|
|
|
|
const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
|
|
|
|
|
const diacritic = isEndOfWord ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
|
|
|
|
|
const sukunOrDiacritic = (needsSukun ? sukun : diacritic);
|
2021-05-09 17:08:35 +00:00
|
|
|
|
|
|
|
|
|
function getPhonemeState(): PhonemeStatus {
|
|
|
|
|
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
|
|
|
|
|
if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
|
|
|
|
|
throw Error("phonetics error - needs alef prefix");
|
|
|
|
|
}
|
|
|
|
|
return PhonemeStatus.LeadingLongVowel;
|
|
|
|
|
}
|
|
|
|
|
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
|
|
|
|
|
return PhonemeStatus.LeadingConsonantOrShortVowel;
|
|
|
|
|
}
|
2021-05-13 09:54:46 +00:00
|
|
|
|
if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
|
|
|
|
|
return PhonemeStatus.PersianSilentWWithAa;
|
|
|
|
|
}
|
|
|
|
|
if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") {
|
|
|
|
|
return PhonemeStatus.ArabicWasla;
|
|
|
|
|
}
|
|
|
|
|
if (phoneme === "-i-" && isBeginningOfWord) {
|
|
|
|
|
return PhonemeStatus.Izafe;
|
|
|
|
|
}
|
2021-05-09 17:08:35 +00:00
|
|
|
|
if (needsTashdeed) {
|
|
|
|
|
return PhonemeStatus.DoubleConsonantTashdeed;
|
|
|
|
|
}
|
|
|
|
|
if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
|
|
|
|
|
return PhonemeStatus.EndingWithHeyHim;
|
|
|
|
|
}
|
|
|
|
|
if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
|
|
|
|
|
return PhonemeStatus.DirectMatch;
|
|
|
|
|
}
|
|
|
|
|
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
|
|
|
|
|
return PhonemeStatus.ShortVowel;
|
|
|
|
|
}
|
|
|
|
|
throw new Error("phonetics error - no status found for phoneme: " + phoneme);
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-13 09:54:46 +00:00
|
|
|
|
const phs = getPhonemeState();
|
2021-05-09 17:08:35 +00:00
|
|
|
|
|
2021-05-08 18:31:59 +00:00
|
|
|
|
return {
|
2021-05-13 09:54:46 +00:00
|
|
|
|
phs, phonemeInfo, sukunOrDiacritic,
|
2021-05-08 18:31:59 +00:00
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
2021-05-06 20:28:03 +00:00
|
|
|
|
/**
|
|
|
|
|
* returns the last character of a string
|
|
|
|
|
*
|
|
|
|
|
* @param s
|
|
|
|
|
*/
|
|
|
|
|
function last(s: string) {
|
|
|
|
|
return s[s.length - 1];
|
2021-05-07 07:54:09 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator {
|
|
|
|
|
return {
|
|
|
|
|
pIn: state.pIn.slice(n),
|
2021-05-13 09:54:46 +00:00
|
|
|
|
pOut: state.pOut + state.pIn.slice(0, n),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function reverseP(state: DiacriticsAccumulator): DiacriticsAccumulator {
|
|
|
|
|
return {
|
2021-05-13 19:40:41 +00:00
|
|
|
|
pIn: state.pOut.slice(-1) + state.pIn,
|
2021-05-13 09:54:46 +00:00
|
|
|
|
pOut: state.pOut.slice(0, -1),
|
|
|
|
|
};
|
2021-05-07 08:52:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-05-07 11:48:33 +00:00
|
|
|
|
const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
|
2021-05-07 08:52:25 +00:00
|
|
|
|
return {
|
|
|
|
|
...state,
|
2021-05-07 11:48:33 +00:00
|
|
|
|
pOut: toAdd ? (state.pOut + toAdd) : state.pOut,
|
2021-05-07 08:52:25 +00:00
|
|
|
|
};
|
2021-05-13 09:54:46 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const overwriteP = (toWrite: string) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
|
|
|
|
|
return {
|
|
|
|
|
pIn: state.pIn.slice(1),
|
|
|
|
|
pOut: state.pOut + toWrite,
|
|
|
|
|
};
|
|
|
|
|
};
|
2021-05-07 11:48:33 +00:00
|
|
|
|
|
2021-05-08 18:31:59 +00:00
|
|
|
|
function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {
|
|
|
|
|
return {
|
|
|
|
|
current: state.pIn[0],
|
|
|
|
|
next: state.pIn[1],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function advanceForAin(state: DiacriticsAccumulator): DiacriticsAccumulator {
|
|
|
|
|
const { current } = getCurrentNext(state);
|
|
|
|
|
return (current === "ع") ? advanceP(state) : state;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function advanceForHamzaMid(state: DiacriticsAccumulator): DiacriticsAccumulator {
|
|
|
|
|
const { current, next } = getCurrentNext(state);
|
|
|
|
|
if (current === "ئ" && next && next !== "ئ") {
|
|
|
|
|
return advanceP(state);
|
|
|
|
|
}
|
|
|
|
|
return state;
|
|
|
|
|
}
|
|
|
|
|
function advanceForAinOrHamza(state: DiacriticsAccumulator): DiacriticsAccumulator {
|
|
|
|
|
const { current, next } = getCurrentNext(state);
|
|
|
|
|
if (current === "ه" && (!next || next === " ")) {
|
|
|
|
|
return advanceP(state);
|
|
|
|
|
}
|
|
|
|
|
if (current === "ع") {
|
2021-05-07 11:48:33 +00:00
|
|
|
|
return advanceP(state);
|
|
|
|
|
}
|
|
|
|
|
return state;
|
|
|
|
|
}
|