pashto-inflector/src/lib/diacritics.ts

211 lines
5.8 KiB
TypeScript
Raw Normal View History

/**
* Copyright (c) 2021 lingdocs.com
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
import * as T from "../types";
import {
splitFIntoPhonemes,
Phoneme,
zwar,
zwarakey,
zer,
pesh,
sukun,
hamzaAbove,
tashdeed,
wasla,
addP,
advanceP,
reverseP,
overwriteP,
2021-05-27 06:36:30 +00:00
advanceForHamza,
advanceForHamzaMid,
DiacriticsAccumulator,
2021-06-03 13:52:14 +00:00
stateInfo,
PhonemeStatus,
} from "./diacritics-helpers";
import { removeFVarients } from "./accent-and-ps-utils";
import { pipe } from "rambda";
2021-05-13 09:54:46 +00:00
/**
* Adds diacritics to a given PsString.
* Errors if the phonetics and script don't line up.
*/
export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString {
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? removeFVarients(f) : f);
2021-06-05 16:29:35 +00:00
const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p.trim() });
2021-05-13 09:54:46 +00:00
if (pIn !== "") {
throw new Error("phonetics error - phonetics shorter than pashto script");
}
return {
p: pOut,
f,
};
}
2021-05-07 07:54:09 +00:00
function processPhoneme(
acc: DiacriticsAccumulator,
phoneme: Phoneme,
i: number,
phonemes: Phoneme[],
2021-05-25 09:47:02 +00:00
): DiacriticsAccumulator {
2021-05-16 15:00:05 +00:00
const state = acc.pIn.slice(0, 5) === " ... "
? advanceP(acc, 5)
: acc.pIn[0] === " "
? advanceP(acc)
: acc;
2021-05-07 07:54:09 +00:00
2021-05-08 18:31:59 +00:00
const {
phonemeInfo,
2021-05-24 17:36:03 +00:00
diacritic,
2021-05-13 09:54:46 +00:00
phs,
2021-06-03 13:52:14 +00:00
prevPLetter,
2021-05-08 18:31:59 +00:00
} = stateInfo({ state, i, phoneme, phonemes });
2021-05-07 07:54:09 +00:00
2021-05-13 09:54:46 +00:00
return (phs === PhonemeStatus.LeadingLongVowel) ?
2021-05-09 17:08:35 +00:00
pipe(
advanceP,
addP(phonemeInfo.diacritic),
advanceP,
)(state)
2021-05-13 09:54:46 +00:00
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
2021-05-09 17:08:35 +00:00
pipe(
advanceP,
2021-05-24 17:36:03 +00:00
addP(diacritic),
2021-05-09 17:08:35 +00:00
)(state)
2021-05-13 09:54:46 +00:00
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
2021-05-09 17:08:35 +00:00
pipe(
2021-06-05 16:29:35 +00:00
prevPLetter === " " ? reverseP : addP(""),
2021-05-09 17:08:35 +00:00
addP(tashdeed)
)(state)
2021-05-13 09:54:46 +00:00
: (phs === PhonemeStatus.EndingWithHeyHim) ?
2021-05-09 17:08:35 +00:00
pipe(
advanceP,
addP(phoneme === "u" ? hamzaAbove : sukun),
)(state)
2021-05-13 09:54:46 +00:00
: (phs === PhonemeStatus.DirectMatch) ?
2021-05-09 17:08:35 +00:00
pipe(
2021-05-24 17:36:03 +00:00
addP(diacritic),
advanceP,
)(state)
: (phs === PhonemeStatus.DirectMatchAfterSukun) ?
pipe(
addP(sukun),
2021-05-09 17:08:35 +00:00
advanceP,
)(state)
2021-05-13 09:54:46 +00:00
: (phs === PhonemeStatus.PersianSilentWWithAa) ?
pipe(
addP("("),
advanceP,
addP(")"),
advanceP,
)(state)
: (phs === PhonemeStatus.ArabicWasla) ?
pipe(
addP(zer),
overwriteP(wasla),
)(state)
: (phs === PhonemeStatus.Izafe) ?
pipe(
2021-05-13 09:54:46 +00:00
reverseP,
addP(zer),
)(state)
: (phs === PhonemeStatus.EndOfDuParticle) ?
2021-05-16 15:00:05 +00:00
pipe(
reverseP,
addP(zwarakey),
2021-05-16 15:00:05 +00:00
)(state)
2021-06-03 13:52:14 +00:00
: (phs === PhonemeStatus.ShortAEndingAfterHeem) ?
2021-05-24 17:36:03 +00:00
pipe(
2021-06-03 13:52:14 +00:00
prevPLetter === " " ? reverseP : addP(""),
2021-05-24 17:36:03 +00:00
addP(zwar),
)(state)
: (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
pipe(
addP(sukun),
advanceP,
)(state)
: (phs === PhonemeStatus.AlefDaggarEnding) ?
pipe(
advanceP,
advanceP,
)(state)
2021-05-27 06:36:30 +00:00
: (phs === PhonemeStatus.LongAinVowelMissingComma) ?
2021-05-25 09:47:02 +00:00
pipe(
addP(diacritic),
advanceP,
2021-05-27 06:36:30 +00:00
addP(diacritic)
2021-05-25 09:47:02 +00:00
)(state)
2021-05-28 11:58:59 +00:00
: (phs === PhonemeStatus.ShortAinVowelMissingComma) ?
pipe(
addP(diacritic),
advanceP,
)(state)
2021-06-03 13:52:14 +00:00
: (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ?
pipe(
advanceP,
advanceP,
)(state)
: (phs === PhonemeStatus.AinWithLongAAtBeginning) ?
pipe(
advanceP,
advanceP,
)(state)
2021-05-30 12:26:31 +00:00
: (phs === PhonemeStatus.AlefWithHamza) ?
pipe(
advanceP,
)(state)
2021-06-03 13:52:14 +00:00
: (phs === PhonemeStatus.ShortVowel) ?
2021-05-09 17:08:35 +00:00
pipe(
advanceForHamzaMid,
addP(phonemeInfo.diacritic),
2021-05-27 06:36:30 +00:00
// TODO THIS?
advanceForHamza,
2021-06-03 13:52:14 +00:00
)(state)
: (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ?
pipe(
advanceP,
)(state)
: (phs === PhonemeStatus.NOnFathatan) ?
pipe(
advanceP,
)(state)
2021-06-03 15:56:08 +00:00
: (phs === PhonemeStatus.HamzaOnWow) ?
pipe(
advanceP,
addP(hamzaAbove),
addP(diacritic),
)(state)
2021-06-03 16:12:07 +00:00
: (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
pipe(
advanceP,
addP(pesh),
advanceP,
)(state)
2021-06-05 16:29:35 +00:00
: (phs === PhonemeStatus.OoPrefix) ?
pipe(
advanceP,
addP(pesh),
)(state)
: (phs === PhonemeStatus.GlottalStopBeforeOo) ?
pipe(
advanceP,
addP(hamzaAbove),
)(state)
: (phs === PhonemeStatus.OoAfterGlottalStopOo) ?
pipe(
advanceP,
)(state)
2021-06-17 18:16:11 +00:00
: (phs === PhonemeStatus.SilentAinAfterAlef) ?
pipe(
advanceP,
advanceP,
)(state)
2021-06-03 13:52:14 +00:00
: state;
}