/** * Copyright (c) 2021 lingdocs.com * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ import { phoneticsToDiacritics, splitFIntoPhonemes, } from "./phonetics-to-diacritics"; const zwarakey = "ٙ"; const phonemeSplits: Array<{ in: string, out: string[], }> = [ { in: "kor", out: ["k", "o", "r"], }, { in: "raaghey", out: ["r", "aa", "gh", "ey"], }, { in: "hatsa", out: ["h", "a", "ts", "a"], }, { in: "ba", out: ["b", "a"], }, { in: "peydáa", out: ["p", "ey", "d", "áa"], }, { in: "be kaar", out: ["b", "e", "k", "aa", "r"], }, { in: "raadzeyy", out: ["r", "aa", "dz", "eyy"], }, { in: "badanuy ??", out: ["b", "a", "d", "a", "n", "uy"], }, { in: "tur ... pore", out: ["t", "u", "r", "p", "o", "r", "e"], }, { in: "daar-Ul-iqaama", out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"], }, ]; phonemeSplits.forEach((s) => { test(`${s.in} should split properly`, () => { const result = splitFIntoPhonemes(s.in); expect(result).toEqual(s.out); }); }); const toTest: Array<{ in: { p: string, f: string }, out: string | undefined, }> = [ { in: { p: "کور", f: "kor", }, out: "کور", }, { in: { p: "کور", f: "koor", }, out: "کُور", }, { in: { p: "تب", f: "tib", }, out: "تِب", }, { in: { p: "تب", f: "tab", }, out: "تَب", }, { in: { p: "تب", f: "tUb", }, out: "تُب", }, { in: { p: "تب", f: "tub", }, out: "تٙب", }, { in: { p: "تب", f: "tb", }, out: "تْب", }, { in: { p: "تلب", f: "tilab", }, out: "تِلَب", }, { in: { p: "تشناب", f: "tashnaab", }, out: "تَشْناب", }, // broken phonetics will return undefined { in: { p: "تشناب", f: "peshnaab", }, out: undefined, }, // working with وs { in: { p: "کول", f: "kwal", }, out: "کْوَل", }, { in: { p: "تول", f: "tool", }, out: "تُول", }, { in: { p: "مقبول", f: "maqbool", }, out: "مَقْبُول", }, { in: { p: "کول", f: "kawul", }, out: "کَو" + zwarakey + "ل", }, { in: { p: "کول", f: "kiwul", }, out: "کِو" + zwarakey + "ل", }, { in: { p: "کول", f: "kUwul", }, out: "کُو" + zwarakey + "ل", }, { in: { p: "کول", f: "kuwul", }, out: "ک" + zwarakey + "و" + zwarakey + "ل", }, { in: { p: "کول", f: "kawal", }, out: "کَوَل", }, { in: { p: "کول", f: "kUwal", }, out: "کُوَل", }, { in: { p: "پشتګرد", f: "pishtgird", }, out: "پِشْتْګِرْد", }, { in: { p: "سپین", f: "speen", }, out: "سْپِین", }, { in: { p: "سپین", f: "speyn", }, out: "سْپین", }, { in: { p: "پېش", f: "pesh", }, out: "پېش", }, { in: { p: "پېش", f: "peysh", }, out: undefined, }, { in: { p: "رغېدل", f: "raghedul", }, out: "رَغېد" + zwarakey + "ل", }, { in: { p: "کارول", f: "kaarawul", }, out: "کارَو" + zwarakey + "ل", }, { in: { p: "پېښېدل", f: "pexedul", }, out: "پېښېد" + zwarakey + "ل", }, { in: { p: "مین", f: "mayín", }, out: "مَیِن", }, { in: { p: "سړی", f: "saRey", }, out: "سَړی", }, { in: { p: "سړي", f: "saRee", }, out: "سَړي", }, { in: { p: "زه", f: "zu", }, out: "زهٔ", }, { in: { p: "زه", f: "za", }, out: "زه", }, { in: { p: "پېشنهاد", f: "peshniháad", }, out: "پېشْنِهاد", }, { in: { p: "ایستل", f: "eestul", }, out: "اِیسْت" + zwarakey + "ل", }, { in: { p: "ایستل", f: "eystul", }, out: "ایسْت" + zwarakey + "ل", }, { in: { p: "اېسېدل", f: "esedul", }, out: "اېسېد" + zwarakey + "ل", }, { in: { p: "اوسېدل", f: "osedul", }, out: "اوسېد" + zwarakey + "ل", }, { in: { p: "اواز", f: "awaaz", }, out: "اَواز", }, { in: { p: "اسلام", f: "islaam", }, out: "اِسْلام", }, { in: { p: "واردول", f: "waaridawul", }, out: "وارِدَو" + zwarakey + "ل", }, { in: { p: "غاړه", f: "ghaaRa", }, out: "غاړه", }, { in: { p: "اوتر", f: "awtár", }, out: "اَوْتَر", }, { in: { p: "اختیار", f: "ikhtiyáar", }, out: "اِخْتِیار", }, { in: { p: "فریاد", f: "faryáad", }, out: "فَرْیاد", }, { in: { p: "کارغه", f: "kaarghu", }, out: "کارْغهٔ", }, { in: { p: "بې کار", f: "be kaar", }, out: "بې کار", }, { in: { p: "بې کار", f: "bekaar", }, out: "بې کار", }, { in: { p: "انبار", f: "ambáar", }, out: "اَنْبار", }, { in: { p: "ارغون", f: "arghóon", }, out: "اَرْغُون", }, { in: { p: "ارمټه", f: "armaTa", }, out: "اَرْمَټه", }, { in: { p: "اروا پوه", f: "arwaa poh", }, out: "اَرْوا پوهْ", }, { in: { p: "اسحاق", f: "ishaaq", }, out: undefined, }, { in: { p: "اسحاق", f: "is`haaq", }, out: "اِسْحاق", }, { in: { p: "سعات", f: "saat", }, out: "سعات", }, { in: { p: "سعات", f: "sa'aat", }, out: "سَعات", }, { in: { p: "استعمال", f: "ist'imaal", }, out: "اِسْتعِمال", }, { in: { p: "استعمال", f: "istimaal", }, out: "اِسْتعِمال", }, { in: { p: "اروایي", f: "arwaayee", }, out: "اَرْوایي", }, { in: { p: "اریځ", f: "Uryadz", }, out: "اُرْیَځ", }, { in: { p: "ازغن تار", f: "azghun taar", }, out: "اَزْغ" + zwarakey + "ن" + " تار", }, { in: { p: "اره څکول", f: "ara tskawul", }, out: "اَره څْکَو" + zwarakey + "ل", }, { in: { p: "اږیل", f: "aGuyúl", }, out: "اَږ" + zwarakey + "ی" + zwarakey + "ل", }, { in: { p: "استازندوی", f: "astaazandoy", }, out: "اَسْتازَنْدوی", }, // واخ being khaa in the middle of a word { in: { p: "استخوان", f: "UstUkháan", }, out: "اُسْتُخ(و)ان", }, { in: { p: "اسطلاع", f: "istilaa", }, out: "اِسْطِلاع", }, { in: { p: "اسهال", f: "is`háal", }, out: "اِسْهال", }, { in: { p: "اسهامي", f: "as`haamee", }, out: "اَسْهامي", }, // avoid false double consonant { in: { p: "ازل لیک", f: "azalléek", }, out: "اَزَل لِیک", }, // bad ending test { in: { p: "ماضی", f: "maazee", }, out: undefined, }, // bad beginning test { in: { p: "وسېدل", f: "osedul", }, out: undefined, }, { in: { p: "يست", f: "eest", }, out: undefined, }, { in: { p: "ست", f: "ist", }, out: undefined, }, { in: { p: "haca", f: "هځه", }, out: undefined, }, // tashdeed { in: { p: "پته", f: "patta", }, out: "پَتّه", }, { in: { p: "اعتصاب شکن", f: "itisaabshikan", }, out: "اِعتِصاب شِکَن", }, // Arabic wasla { in: { p: "بالکل", f: "bilkUl", }, out: "بِٱلْکُل", }, // izafe { in: { p: "ایصال ثواب", f: "eesaal-i-sawaab", }, out: "اِیصالِ ثَواب", }, { in: { p: "با استعداد", f: "baa isti'dáad", }, out: "با اِسْتِعداد", }, // starting with ع { in: { p: "عزت", f: "izzat", }, out: "عِزَّت", }, { in: { p: "عزت", f: "i'zzat", }, out: "عِزَّت", }, // ئ in the middle { in: { p: "برائت", f: "baraa'at", }, out: "بَرائَت", }, { in: { p: "فائده", f: "faaida", }, out: "فائِده", }, // starting with long aa { in: { p: "آدم", f: "aadam", }, out: "آدَم", }, { in: { p: "یدام", f: "aadam", }, out: undefined, }, { in: { p: "منع", f: "mán'a", }, out: "مَنعَ", }, { in: { p: "منع", f: "mana", }, out: "مَنعَ", }, { in: { p: "منابع", f: "mUnaabí", }, out: "مُنابعِ", }, { // TODO: Is this correct?? in: { p: "اسان", f: "aasaan", }, out: "اسان", }, // ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی { in: { p: "پتېیل", f: "pateyúl", }, out: undefined, }, { in: { p: "پتېیل", f: "pate`yúl", }, out: "پَتېی" + zwarakey + "ل", }, { in: { p: "درېیم", f: "dre`yum", }, out: "دْرېی" + zwarakey + "م", }, { in: { p: "تابع دار", f: "taabidaar", }, out: "تابعِ دار", }, // handle circumpositions { in: { p: "تر ... پورې", f: "tur ... pore", }, out: "ت" + zwarakey + "ر ... پورې", }, // joiner و { in: { p: "کار و بار", f: "kaar-U-baar", }, out: "کار و بار", }, { in: { p: "کاروبار", f: "kaar-U-baar", }, out: "کاروبار", }, { in: { p: "توقع", f: "tawaqqÚ", }, out: "تَوَقّعُ", }, // special behaviour with د { in: { p: "د", f: "du", }, out: "د" + zwarakey, }, { in: { p: "د لاس", f: "du laas", }, out: "د" + zwarakey + " لاس", }, { in: { p: "د ... په شان", f: "du ... pu shaan", }, out: "د" + zwarakey + " ... پهٔ شان", }, { in: { p: "ذبح", f: "zabha", }, out: "ذَبْحَ", }, { in: { p: "ذبح", f: "zabha", }, out: "ذَبْحَ", }, { in: { p: "ذبح کول", f: "zabha kawul", }, out: "ذَبْحَ کَو" + zwarakey + "ل", }, // require dagger alif on words ending with یٰ { in: { p: "یحیی", f: "yahyaa", }, out: undefined, }, { in: { p: "یحییٰ", f: "yahyaa", }, out: "یَحْییٰ", }, { in: { p: "معنیٰ", f: "ma'anaa", }, out: "مَعَنیٰ", }, // require fathatan on words ending in اً { in: { p: "دقیقا", f: "daqeeqan", }, out: undefined, }, { in: { p: "دقیقاً", f: "daqeeqan", }, out: "دَقِیقاً", }, // words starting in عا { in: { p: "عام", f: "aam", }, out: "عام", }, { in: { p: "عام", f: "'aam", }, out: "عام", }, { in: { p: "قتل عام", f: "qatl-i-aam", }, out: "قَتْلِ عام", }, { in: { p: "طمع لرل", f: "tama larul", }, out: "طَمعَ لَر" + zwarakey + "ل", }, // Ua ؤ { in: { p: "مودب", f: "mUaddab", }, out: "مؤدَّب", }, { in: { p: "لکۍ وال", f: "lakuy waal", }, out: "لَکۍ وال", }, // shouldn't skip the ئ at the end { in: { p: "شئ", f: "sheyy", }, out: "شئ", }, // excetption for و - wo { in: { p: "و", f: "wo", }, out: "و", }, { in: { p: "سړی و", f: "saRey wo", }, out: "سَړی و", }, { in: { p: "عید", f: "eed", }, out: "عِید", }, // i ending can also be i { in: { p: "سه", f: "si", }, out: "سِه", }, { in: { p: "سه شنبه", f: "sishamba", }, out: "سِه شَنْبه", }, { in: { p: "توجه", f: "tawajÚ", }, out: "تَوَجُه", }, { in: { p: "توجه کول", f: "tawajU kawul", }, out: "تَوَجُه کَو" + zwarakey + "ل", }, // With Arabic definate article -Ul- ال { in: { p: "حق الاجاره", f: "haq-Ul-ijaara", }, out: "حَق اُلاِجاره", }, { in: { p: "دار العلوم", f: "daar-Ul-Ulóom", }, out: "دار اُلعُلُوم", }, // double consonants on end of words { in: { p: "حق", f: "haqq", }, out: "حَقّ", }, { in: { p: "حق پر", f: "haqq par", }, out: "حَقّ پَر", }, { in: { p: "راجع کېدل", f: "raaji kedul", }, out: "راجعِ کېد" + zwarakey + "ل", }, { in: { p: "ربیع", f: "rabee'", }, out: "رَبِیع", }, { in: { p: "سختسری", f: "sakht sărey", }, out: "سَخْتْسَری", }, { in: { p: "معنیٰ", f: "ma'naa", }, out: "مَعنیٰ", }, // issue with یٰ ending and then continuing to the next word { in: { p: "معنیٰ دار", f: "ma'naa daar", }, out: "مَعنیٰ دار", }, { in: { p: "اله", f: "ilah", }, out: "اِلَهْ", }, // issue with words ending in عه going to the next word { in: { p: "قطعه بازي", f: "qit'a baazee", }, out: "قِطعه بازي", }, // أ in the middle of the word { in: { p: "متأسف", f: "mUta'assif", }, out: "مُتأسِّف", }, // words ending in ع a' on to the next word { in: { p: "مربع", f: "mUraba'", }, out: "مُرَبَع", }, { in: { p: "مربع جذر", f: "mUraba' jazúr", }, out: "مُرَبَع جَذ" + zwarakey + "ر", }, { in: { p: "مسوول", f: "mas'ool", }, out: "مَسوُول", // TODO: Is this best?? }, // allow for beginnings prefixed with ور در را { in: { p: "وراوږد", f: "wăr-ooGad", }, out: "وَراُوږَد", }, { in: { p: "دراوږد", f: "dăr-ooGad", }, out: "دَراُوږَد", }, { in: { p: "رااوږد", f: "raa-ooGad", }, out: "رااُوږَد", }, // allow for spaces at beginning of phonetics etc. { in: { p: " سپین کړه", f: " speen kRu", }, out: "سْپِین کْړهٔ", }, { in: { p: "اوب", f: "ob", }, out: "اوب", }, // allow oo at start with و prefix { in: { p: "وباسي", f: "oobaasee", }, out: "وباسي", }, { in: { p: "وځم", f: "oodzum", }, out: "وځ" + zwarakey + "م", }, { in: { p: "وځم", f: "wUdzum", }, out: "وُځ" + zwarakey + "م", }, ]; // TODO: قطع کول - qat'a kawul - failing // TODO: فی الحال // TODO: الله words toTest.forEach((t) => { test(`${t.in.p} given phonetics ${t.in.f} should translate to ${t.out}`, () => { const output = phoneticsToDiacritics(t.in.p, t.in.f); expect(output).toBe(t.out); }); }); test("should forbid oo prefixes when the option is passed", () => { const output = phoneticsToDiacritics("وځم", "oodzum", true); expect(output).toBe(undefined); });