2021-05-06 20:28:03 +00:00
|
|
|
/**
|
|
|
|
* Copyright (c) 2021 lingdocs.com
|
|
|
|
*
|
|
|
|
* This source code is licensed under the MIT license found in the
|
|
|
|
* LICENSE file in the root directory of this source tree.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
import * as T from "../types";
|
2021-05-16 14:13:42 +00:00
|
|
|
import {
|
|
|
|
splitFIntoPhonemes,
|
|
|
|
Phoneme,
|
|
|
|
zwar,
|
|
|
|
zwarakey,
|
|
|
|
zer,
|
|
|
|
pesh,
|
|
|
|
sukun,
|
|
|
|
hamzaAbove,
|
|
|
|
tashdeed,
|
|
|
|
wasla,
|
|
|
|
addP,
|
|
|
|
advanceP,
|
|
|
|
reverseP,
|
|
|
|
overwriteP,
|
2021-05-27 06:36:30 +00:00
|
|
|
advanceForHamza,
|
2021-05-16 14:13:42 +00:00
|
|
|
advanceForHamzaMid,
|
|
|
|
DiacriticsAccumulator,
|
2021-06-03 13:52:14 +00:00
|
|
|
stateInfo,
|
|
|
|
PhonemeStatus,
|
2021-05-16 14:13:42 +00:00
|
|
|
} from "./diacritics-helpers";
|
|
|
|
|
2022-04-08 10:04:16 +00:00
|
|
|
import { removeFVarients } from "./accent-and-ps-utils";
|
2021-05-07 11:48:33 +00:00
|
|
|
import { pipe } from "rambda";
|
2021-05-06 20:28:03 +00:00
|
|
|
|
2021-05-13 09:54:46 +00:00
|
|
|
/**
|
|
|
|
* Adds diacritics to a given PsString.
|
|
|
|
* Errors if the phonetics and script don't line up.
|
|
|
|
*/
|
|
|
|
export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString {
|
2021-09-14 15:04:45 +00:00
|
|
|
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? removeFVarients(f) : f);
|
2021-06-05 16:29:35 +00:00
|
|
|
const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p.trim() });
|
2021-05-13 09:54:46 +00:00
|
|
|
if (pIn !== "") {
|
|
|
|
throw new Error("phonetics error - phonetics shorter than pashto script");
|
|
|
|
}
|
|
|
|
return {
|
|
|
|
p: pOut,
|
|
|
|
f,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2021-05-07 07:54:09 +00:00
|
|
|
function processPhoneme(
|
|
|
|
acc: DiacriticsAccumulator,
|
|
|
|
phoneme: Phoneme,
|
|
|
|
i: number,
|
|
|
|
phonemes: Phoneme[],
|
2021-05-25 09:47:02 +00:00
|
|
|
): DiacriticsAccumulator {
|
2021-05-16 15:00:05 +00:00
|
|
|
const state = acc.pIn.slice(0, 5) === " ... "
|
|
|
|
? advanceP(acc, 5)
|
|
|
|
: acc.pIn[0] === " "
|
2021-05-16 14:13:42 +00:00
|
|
|
? advanceP(acc)
|
|
|
|
: acc;
|
2021-05-07 07:54:09 +00:00
|
|
|
|
2021-05-08 18:31:59 +00:00
|
|
|
const {
|
|
|
|
phonemeInfo,
|
2021-05-24 17:36:03 +00:00
|
|
|
diacritic,
|
2021-05-13 09:54:46 +00:00
|
|
|
phs,
|
2021-06-03 13:52:14 +00:00
|
|
|
prevPLetter,
|
2021-05-08 18:31:59 +00:00
|
|
|
} = stateInfo({ state, i, phoneme, phonemes });
|
2021-05-07 07:54:09 +00:00
|
|
|
|
2021-05-13 09:54:46 +00:00
|
|
|
return (phs === PhonemeStatus.LeadingLongVowel) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
addP(phonemeInfo.diacritic),
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
pipe(
|
|
|
|
advanceP,
|
2021-05-24 17:36:03 +00:00
|
|
|
addP(diacritic),
|
2021-05-09 17:08:35 +00:00
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
pipe(
|
2021-06-05 16:29:35 +00:00
|
|
|
prevPLetter === " " ? reverseP : addP(""),
|
2021-05-09 17:08:35 +00:00
|
|
|
addP(tashdeed)
|
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
: (phs === PhonemeStatus.EndingWithHeyHim) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
addP(phoneme === "u" ? hamzaAbove : sukun),
|
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
: (phs === PhonemeStatus.DirectMatch) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
pipe(
|
2021-05-24 17:36:03 +00:00
|
|
|
addP(diacritic),
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
|
|
|
: (phs === PhonemeStatus.DirectMatchAfterSukun) ?
|
|
|
|
pipe(
|
|
|
|
addP(sukun),
|
2021-05-09 17:08:35 +00:00
|
|
|
advanceP,
|
|
|
|
)(state)
|
2021-05-13 09:54:46 +00:00
|
|
|
: (phs === PhonemeStatus.PersianSilentWWithAa) ?
|
|
|
|
pipe(
|
|
|
|
addP("("),
|
|
|
|
advanceP,
|
|
|
|
addP(")"),
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
|
|
|
: (phs === PhonemeStatus.ArabicWasla) ?
|
|
|
|
pipe(
|
|
|
|
addP(zer),
|
|
|
|
overwriteP(wasla),
|
|
|
|
)(state)
|
|
|
|
: (phs === PhonemeStatus.Izafe) ?
|
2021-05-13 19:40:41 +00:00
|
|
|
pipe(
|
2021-05-13 09:54:46 +00:00
|
|
|
reverseP,
|
|
|
|
addP(zer),
|
2021-05-13 19:40:41 +00:00
|
|
|
)(state)
|
2021-05-16 14:13:42 +00:00
|
|
|
: (phs === PhonemeStatus.EndOfDuParticle) ?
|
2021-05-16 15:00:05 +00:00
|
|
|
pipe(
|
2021-05-16 14:13:42 +00:00
|
|
|
reverseP,
|
|
|
|
addP(zwarakey),
|
2021-05-16 15:00:05 +00:00
|
|
|
)(state)
|
2021-06-03 13:52:14 +00:00
|
|
|
: (phs === PhonemeStatus.ShortAEndingAfterHeem) ?
|
2021-05-24 17:36:03 +00:00
|
|
|
pipe(
|
2021-06-03 13:52:14 +00:00
|
|
|
prevPLetter === " " ? reverseP : addP(""),
|
2021-05-24 17:36:03 +00:00
|
|
|
addP(zwar),
|
|
|
|
)(state)
|
|
|
|
: (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
|
|
|
|
pipe(
|
|
|
|
addP(sukun),
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
|
|
|
: (phs === PhonemeStatus.AlefDaggarEnding) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
2021-05-27 06:36:30 +00:00
|
|
|
: (phs === PhonemeStatus.LongAinVowelMissingComma) ?
|
2021-05-25 09:47:02 +00:00
|
|
|
pipe(
|
|
|
|
addP(diacritic),
|
|
|
|
advanceP,
|
2021-05-27 06:36:30 +00:00
|
|
|
addP(diacritic)
|
2021-05-25 09:47:02 +00:00
|
|
|
)(state)
|
2021-05-28 11:58:59 +00:00
|
|
|
: (phs === PhonemeStatus.ShortAinVowelMissingComma) ?
|
|
|
|
pipe(
|
|
|
|
addP(diacritic),
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
2021-06-03 13:52:14 +00:00
|
|
|
: (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
|
|
|
: (phs === PhonemeStatus.AinWithLongAAtBeginning) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
2021-05-30 12:26:31 +00:00
|
|
|
: (phs === PhonemeStatus.AlefWithHamza) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
2021-06-03 13:52:14 +00:00
|
|
|
: (phs === PhonemeStatus.ShortVowel) ?
|
2021-05-09 17:08:35 +00:00
|
|
|
pipe(
|
|
|
|
advanceForHamzaMid,
|
|
|
|
addP(phonemeInfo.diacritic),
|
2021-05-27 06:36:30 +00:00
|
|
|
// TODO THIS?
|
|
|
|
advanceForHamza,
|
2021-06-03 13:52:14 +00:00
|
|
|
)(state)
|
|
|
|
: (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
|
|
|
: (phs === PhonemeStatus.NOnFathatan) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
2021-06-03 15:56:08 +00:00
|
|
|
: (phs === PhonemeStatus.HamzaOnWow) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
addP(hamzaAbove),
|
|
|
|
addP(diacritic),
|
|
|
|
)(state)
|
2021-06-03 16:12:07 +00:00
|
|
|
: (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
addP(pesh),
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
2021-06-05 16:29:35 +00:00
|
|
|
: (phs === PhonemeStatus.OoPrefix) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
addP(pesh),
|
|
|
|
)(state)
|
|
|
|
: (phs === PhonemeStatus.GlottalStopBeforeOo) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
addP(hamzaAbove),
|
|
|
|
)(state)
|
|
|
|
: (phs === PhonemeStatus.OoAfterGlottalStopOo) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
2021-06-17 18:16:11 +00:00
|
|
|
: (phs === PhonemeStatus.SilentAinAfterAlef) ?
|
|
|
|
pipe(
|
|
|
|
advanceP,
|
|
|
|
advanceP,
|
|
|
|
)(state)
|
2021-06-03 13:52:14 +00:00
|
|
|
: state;
|
|
|
|
}
|