2021-05-06 20:28:03 +00:00
|
|
|
|
/**
|
|
|
|
|
* Copyright (c) 2021 lingdocs.com
|
|
|
|
|
*
|
|
|
|
|
* This source code is licensed under the MIT license found in the
|
|
|
|
|
* LICENSE file in the root directory of this source tree.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
import {
|
|
|
|
|
addDiacritics,
|
|
|
|
|
splitFIntoPhonemes,
|
|
|
|
|
} from "./diacritics";
|
|
|
|
|
import * as T from "../types";
|
|
|
|
|
|
2021-05-06 20:48:53 +00:00
|
|
|
|
const zwar = "َ";
|
|
|
|
|
const zwarakey = "ٙ";
|
|
|
|
|
const zer = "ِ";
|
|
|
|
|
const pesh = "ُ";
|
|
|
|
|
const sukun = "ْ";
|
|
|
|
|
const hamzaAbove = "ٔ";
|
|
|
|
|
const tashdeed = "ّ";
|
|
|
|
|
const wasla = "ٱ";
|
|
|
|
|
const daggerAlif = "ٰ";
|
|
|
|
|
const fathahan = "ً";
|
|
|
|
|
|
2021-05-06 20:28:03 +00:00
|
|
|
|
const phonemeSplits: Array<{
|
|
|
|
|
in: string,
|
|
|
|
|
out: string[],
|
|
|
|
|
}> = [
|
|
|
|
|
{
|
|
|
|
|
in: "kor",
|
|
|
|
|
out: ["k", "o", "r"],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: "raaghey",
|
|
|
|
|
out: ["r", "aa", "gh", "ey"],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: "hatsa",
|
|
|
|
|
out: ["h", "a", "ts", "a"],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: "ba",
|
|
|
|
|
out: ["b", "a"],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: "peydáa",
|
|
|
|
|
out: ["p", "ey", "d", "aa"],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: "be kaar",
|
|
|
|
|
out: ["b", "e", "k", "aa", "r"],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: "raadzeyy",
|
|
|
|
|
out: ["r", "aa", "dz", "eyy"],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: "badanuy ??",
|
|
|
|
|
out: ["b", "a", "d", "a", "n", "uy"],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: "tur ... pore",
|
|
|
|
|
out: ["t", "u", "r", "p", "o", "r", "e"],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: "daar-Ul-iqaama",
|
|
|
|
|
out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"],
|
|
|
|
|
},
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
const diacriticsTest: Array<{
|
|
|
|
|
in: T.PsString,
|
|
|
|
|
out: string,
|
|
|
|
|
}> = [
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کور",
|
|
|
|
|
f: "kor",
|
|
|
|
|
},
|
|
|
|
|
out: "کور",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کور",
|
|
|
|
|
f: "koor",
|
|
|
|
|
},
|
|
|
|
|
out: "کُور",
|
|
|
|
|
},
|
2021-05-07 07:54:09 +00:00
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کور کور",
|
|
|
|
|
f: "kor koor",
|
|
|
|
|
},
|
|
|
|
|
out: "کور کُور",
|
|
|
|
|
},
|
2021-05-06 20:28:03 +00:00
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "تب",
|
|
|
|
|
f: "tib",
|
|
|
|
|
},
|
|
|
|
|
out: "تِب",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "تب",
|
|
|
|
|
f: "tab",
|
|
|
|
|
},
|
|
|
|
|
out: "تَب",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "تب",
|
|
|
|
|
f: "tUb",
|
|
|
|
|
},
|
|
|
|
|
out: "تُب",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "تب",
|
|
|
|
|
f: "tub",
|
|
|
|
|
},
|
|
|
|
|
out: "تٙب",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "تب",
|
|
|
|
|
f: "tb",
|
|
|
|
|
},
|
|
|
|
|
out: "تْب",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "تلب",
|
|
|
|
|
f: "tilab",
|
|
|
|
|
},
|
|
|
|
|
out: "تِلَب",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "تشناب",
|
|
|
|
|
f: "tashnaab",
|
|
|
|
|
},
|
|
|
|
|
out: "تَشْناب",
|
|
|
|
|
},
|
2021-05-06 20:48:53 +00:00
|
|
|
|
// working with وs
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کول",
|
|
|
|
|
f: "kwal",
|
|
|
|
|
},
|
|
|
|
|
out: "کْوَل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "تول",
|
|
|
|
|
f: "tool",
|
|
|
|
|
},
|
|
|
|
|
out: "تُول",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "مقبول",
|
|
|
|
|
f: "maqbool",
|
|
|
|
|
},
|
|
|
|
|
out: "مَقْبُول",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کول",
|
|
|
|
|
f: "kawul",
|
|
|
|
|
},
|
|
|
|
|
out: "کَو" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کول",
|
|
|
|
|
f: "kiwul",
|
|
|
|
|
},
|
|
|
|
|
out: "کِو" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کول",
|
|
|
|
|
f: "kUwul",
|
|
|
|
|
},
|
|
|
|
|
out: "کُو" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کول",
|
|
|
|
|
f: "kuwul",
|
|
|
|
|
},
|
|
|
|
|
out: "ک" + zwarakey + "و" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کول",
|
|
|
|
|
f: "kawal",
|
|
|
|
|
},
|
|
|
|
|
out: "کَوَل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کول",
|
|
|
|
|
f: "kUwal",
|
|
|
|
|
},
|
|
|
|
|
out: "کُوَل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "پشتګرد",
|
|
|
|
|
f: "pishtgird",
|
|
|
|
|
},
|
|
|
|
|
out: "پِشْتْګِرْد",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "سپین",
|
|
|
|
|
f: "speen",
|
|
|
|
|
},
|
|
|
|
|
out: "سْپِین",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "سپین",
|
|
|
|
|
f: "speyn",
|
|
|
|
|
},
|
|
|
|
|
out: "سْپین",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "پېش",
|
|
|
|
|
f: "pesh",
|
|
|
|
|
},
|
|
|
|
|
out: "پېش",
|
|
|
|
|
},
|
2021-05-07 07:54:09 +00:00
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "لیک",
|
|
|
|
|
f: "leek",
|
|
|
|
|
},
|
|
|
|
|
out: "لِیک",
|
|
|
|
|
},
|
2021-05-08 18:31:59 +00:00
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "رغېدل",
|
|
|
|
|
f: "raghedul",
|
|
|
|
|
},
|
|
|
|
|
out: "رَغېد" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کارول",
|
|
|
|
|
f: "kaarawul",
|
|
|
|
|
},
|
|
|
|
|
out: "کارَو" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "پېښېدل",
|
|
|
|
|
f: "pexedul",
|
|
|
|
|
},
|
|
|
|
|
out: "پېښېد" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "مین",
|
|
|
|
|
f: "mayín",
|
|
|
|
|
},
|
|
|
|
|
out: "مَیِن",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "سړی",
|
|
|
|
|
f: "saRey",
|
|
|
|
|
},
|
|
|
|
|
out: "سَړی",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "سړي",
|
|
|
|
|
f: "saRee",
|
|
|
|
|
},
|
|
|
|
|
out: "سَړي",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "زه",
|
|
|
|
|
f: "zu",
|
|
|
|
|
},
|
|
|
|
|
out: "زهٔ",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "زه",
|
|
|
|
|
f: "za",
|
|
|
|
|
},
|
|
|
|
|
out: "زَه",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "پېشنهاد",
|
|
|
|
|
f: "peshniháad",
|
|
|
|
|
},
|
|
|
|
|
out: "پېشْنِهاد",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "ایستل",
|
|
|
|
|
f: "eestul",
|
|
|
|
|
},
|
|
|
|
|
out: "اِیسْت" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "ایستل",
|
|
|
|
|
f: "eystul",
|
|
|
|
|
},
|
|
|
|
|
out: "ایسْت" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "اېسېدل",
|
|
|
|
|
f: "esedul",
|
|
|
|
|
},
|
|
|
|
|
out: "اېسېد" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "اوسېدل",
|
|
|
|
|
f: "osedul",
|
|
|
|
|
},
|
|
|
|
|
out: "اوسېد" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "اواز",
|
|
|
|
|
f: "awaaz",
|
|
|
|
|
},
|
|
|
|
|
out: "اَواز",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "اسلام",
|
|
|
|
|
f: "islaam",
|
|
|
|
|
},
|
|
|
|
|
out: "اِسْلام",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "واردول",
|
|
|
|
|
f: "waaridawul",
|
|
|
|
|
},
|
|
|
|
|
out: "وارِدَو" + zwarakey + "ل",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "غاړه",
|
|
|
|
|
f: "ghaaRa",
|
|
|
|
|
},
|
|
|
|
|
out: "غاړَه",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "اوتر",
|
|
|
|
|
f: "awtár",
|
|
|
|
|
},
|
|
|
|
|
out: "اَوْتَر",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "اختیار",
|
|
|
|
|
f: "ikhtiyáar",
|
|
|
|
|
},
|
|
|
|
|
out: "اِخْتِیار",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "فریاد",
|
|
|
|
|
f: "faryáad",
|
|
|
|
|
},
|
|
|
|
|
out: "فَرْیاد",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "کارغه",
|
|
|
|
|
f: "kaarghu",
|
|
|
|
|
},
|
|
|
|
|
out: "کارْغهٔ",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "بې کار",
|
|
|
|
|
f: "be kaar",
|
|
|
|
|
},
|
|
|
|
|
out: "بې کار",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "بې کار",
|
|
|
|
|
f: "bekaar",
|
|
|
|
|
},
|
|
|
|
|
out: "بې کار",
|
|
|
|
|
},
|
|
|
|
|
// TODO: nb mb thing
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "انبار",
|
|
|
|
|
f: "ambáar",
|
|
|
|
|
},
|
|
|
|
|
out: "اَنْبار",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "ارغون",
|
|
|
|
|
f: "arghóon",
|
|
|
|
|
},
|
|
|
|
|
out: "اَرْغُون",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "ارمټه",
|
|
|
|
|
f: "armaTa",
|
|
|
|
|
},
|
|
|
|
|
out: "اَرْمَټَه",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "اروا پوه",
|
|
|
|
|
f: "arwaa poh",
|
|
|
|
|
},
|
|
|
|
|
out: "اَرْوا پوهْ",
|
|
|
|
|
},
|
2021-05-07 07:54:09 +00:00
|
|
|
|
// starting alefs
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "اسلام",
|
|
|
|
|
f: "islaam",
|
|
|
|
|
},
|
|
|
|
|
out: "اِسْلام",
|
|
|
|
|
},
|
2021-05-07 08:52:25 +00:00
|
|
|
|
// starting long vowels with ا
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "ایسار",
|
|
|
|
|
f: "eesaar",
|
|
|
|
|
},
|
|
|
|
|
out: "اِیسار",
|
|
|
|
|
},
|
2021-05-07 11:48:33 +00:00
|
|
|
|
// double consonant / tashdeed
|
2021-05-06 20:48:53 +00:00
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "بتن",
|
|
|
|
|
f: "battan",
|
|
|
|
|
},
|
|
|
|
|
out: "ب" + zwar + "ت" + tashdeed + zwar + "ن",
|
|
|
|
|
},
|
2021-05-07 11:48:33 +00:00
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "بتطن",
|
|
|
|
|
f: "battan",
|
|
|
|
|
},
|
|
|
|
|
out: "ب" + zwar + "ت" + sukun + "ط" + zwar + "ن",
|
|
|
|
|
},
|
|
|
|
|
// vowel endings working
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "بته",
|
|
|
|
|
f: "bata",
|
|
|
|
|
},
|
|
|
|
|
out: "بَتَه",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "بته",
|
|
|
|
|
f: "bati",
|
|
|
|
|
},
|
|
|
|
|
out: "بَتِه",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "پرمختیا",
|
|
|
|
|
f: "parmakhtyaa",
|
|
|
|
|
},
|
|
|
|
|
out: "پَرْمَخْتْیا",
|
|
|
|
|
},
|
2021-05-08 18:31:59 +00:00
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "پته",
|
|
|
|
|
f: "patta",
|
|
|
|
|
},
|
|
|
|
|
out: "پَتَّه",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "پته تور",
|
|
|
|
|
f: "patta toor",
|
|
|
|
|
},
|
|
|
|
|
out: "پَتَّه تُور",
|
|
|
|
|
},
|
2021-05-07 11:48:33 +00:00
|
|
|
|
// get ayn stuff working
|
2021-05-08 18:31:59 +00:00
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "اعتصاب شکن",
|
|
|
|
|
f: "itisaab shakan",
|
|
|
|
|
},
|
|
|
|
|
out: "اِعتِصاب شَکَن",
|
|
|
|
|
},
|
2021-05-07 07:54:09 +00:00
|
|
|
|
// avoid false double consonant
|
2021-05-06 20:28:03 +00:00
|
|
|
|
{
|
2021-05-07 07:54:09 +00:00
|
|
|
|
in: {
|
|
|
|
|
p: "ازل لیک",
|
|
|
|
|
f: "azalléek",
|
|
|
|
|
},
|
|
|
|
|
out: "اَزَل لِیک",
|
2021-05-06 20:28:03 +00:00
|
|
|
|
},
|
2021-05-08 18:31:59 +00:00
|
|
|
|
// starting with ع
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "عزت",
|
|
|
|
|
f: "izzat",
|
|
|
|
|
},
|
|
|
|
|
out: "عِزَّت",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "عزت",
|
|
|
|
|
f: "i'zzat",
|
|
|
|
|
},
|
|
|
|
|
out: "عِزَّت",
|
|
|
|
|
},
|
|
|
|
|
// ئ in the middle
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "برائت",
|
|
|
|
|
f: "baraa'at",
|
|
|
|
|
},
|
|
|
|
|
out: "بَرائَت",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "فائده",
|
|
|
|
|
f: "faaida",
|
|
|
|
|
},
|
|
|
|
|
out: "فائِدَه",
|
|
|
|
|
},
|
2021-05-13 09:54:46 +00:00
|
|
|
|
// واخ being khaa in the middle of a word
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "استخوان",
|
|
|
|
|
f: "UstUkháan",
|
|
|
|
|
},
|
|
|
|
|
out: "اُسْتُخ(و)ان",
|
|
|
|
|
},
|
|
|
|
|
// Arabic wasla
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "بالکل",
|
|
|
|
|
f: "bilkUl",
|
|
|
|
|
},
|
|
|
|
|
out: "بِٱلْکُل",
|
|
|
|
|
},
|
|
|
|
|
// izafe
|
|
|
|
|
{
|
|
|
|
|
in: {
|
|
|
|
|
p: "ایصال ثواب",
|
|
|
|
|
f: "eesaal-i-sawaab",
|
|
|
|
|
},
|
|
|
|
|
out: "اِیصالِ ثَواب",
|
|
|
|
|
},
|
2021-05-06 20:28:03 +00:00
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
phonemeSplits.forEach((s) => {
|
|
|
|
|
test(`${s.in} should split properly`, () => {
|
|
|
|
|
const result = splitFIntoPhonemes(s.in);
|
|
|
|
|
expect(result).toEqual(s.out);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2021-05-08 18:31:59 +00:00
|
|
|
|
|
|
|
|
|
diacriticsTest.forEach((t) => {
|
|
|
|
|
test(`diacritics should work for ${t.in.p} - ${t.in.f}`, () => {
|
2021-05-07 07:54:09 +00:00
|
|
|
|
expect(addDiacritics(t.in)).toEqual({ p: t.out, f: t.in.f });
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// ERRORS
|
|
|
|
|
|
|
|
|
|
const brokenDiacritics = [
|
|
|
|
|
{
|
|
|
|
|
p: "تشناب",
|
|
|
|
|
f: "peshnaab",
|
|
|
|
|
},
|
2021-05-07 08:52:25 +00:00
|
|
|
|
{
|
|
|
|
|
p: "وسېدل",
|
|
|
|
|
f: "osedul",
|
|
|
|
|
},
|
2021-05-07 07:54:09 +00:00
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
const badPhonetics: Array<{
|
|
|
|
|
in: string,
|
|
|
|
|
problem: string,
|
|
|
|
|
}> = [
|
|
|
|
|
{
|
|
|
|
|
in: "acar",
|
|
|
|
|
problem: "c",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
in: "a7am",
|
|
|
|
|
problem: "7",
|
|
|
|
|
},
|
|
|
|
|
];
|
|
|
|
|
|
2021-05-06 20:28:03 +00:00
|
|
|
|
test("bad phonetic characters should throw an error", () => {
|
|
|
|
|
badPhonetics.forEach((s) => {
|
|
|
|
|
expect(() => {
|
|
|
|
|
splitFIntoPhonemes(s.in);
|
|
|
|
|
}).toThrow(`illegal phonetic character: ${s.problem}`);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2021-05-06 20:48:53 +00:00
|
|
|
|
test("ending with left over Pashto script will throw an error", () => {
|
|
|
|
|
expect(() => {
|
|
|
|
|
addDiacritics({ p: "کور ته", f: "kor" });
|
|
|
|
|
}).toThrow(`phonetics error - phonetics shorter than pashto script`);
|
|
|
|
|
});
|
|
|
|
|
|
2021-05-07 07:54:09 +00:00
|
|
|
|
test("ending with left over phonetics will throw an error", () => {
|
|
|
|
|
expect(() => {
|
|
|
|
|
addDiacritics({ p: "کار", f: "kaar kawul" });
|
|
|
|
|
}).toThrow();
|
2021-05-06 20:28:03 +00:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test("adding diacritics errors when phonetecs and pashto do not line up", () => {
|
|
|
|
|
brokenDiacritics.forEach((t) => {
|
|
|
|
|
expect(() => {
|
|
|
|
|
addDiacritics(t);
|
|
|
|
|
}).toThrow();
|
|
|
|
|
});
|
|
|
|
|
});
|