phonetics conversion done

This commit is contained in:
adueck 2023-07-27 12:28:50 +04:00
parent fc97db0dd3
commit c0cd34c3d6
11 changed files with 4890 additions and 4018 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,131 +1,133 @@
import { import {
splitFIntoPhonemes, splitFIntoPhonemes,
last, last,
addP, addP,
lastNonWhitespace, lastNonWhitespace,
advanceP, reverseP,
reverseP,
overwriteP,
advanceForHamza,
advanceForHamzaMid,
} from "./diacritics-helpers"; } from "./diacritics-helpers";
const phonemeSplits: Array<{ const phonemeSplits: Array<{
in: string, in: string;
out: string[], out: string[];
}> = [ }> = [
{ {
in: "kor", in: "kor",
out: ["k", "o", "r"], out: ["k", "o", "r"],
}, },
{ {
in: "raaghey", in: "raaghay",
out: ["r", "aa", "gh", "ey"], out: ["r", "aa", "gh", "ay"],
}, },
{ {
in: "ist'imaal", in: "ist'imaal",
out: ["i", "s", "t", "'", "i", "m", "aa", "l"], out: ["i", "s", "t", "'", "i", "m", "aa", "l"],
}, },
{ {
in: "hatsa", in: "hatsa",
out: ["h", "a", "ts", "a"], out: ["h", "a", "ts", "a"],
}, },
{ {
in: "ba", in: "ba",
out: ["b", "a"], out: ["b", "a"],
}, },
{ {
in: "peydáa", in: "paydáa",
out: ["p", "ey", "d", "aa"], out: ["p", "ay", "d", "aa"],
}, },
{ {
in: "be kaar", in: "be kaar",
out: ["b", "e", "k", "aa", "r"], out: ["b", "e", "k", "aa", "r"],
}, },
{ {
in: "raadzeyy", in: "raadzey",
out: ["r", "aa", "dz", "eyy"], out: ["r", "aa", "dz", "ey"],
}, },
{ {
in: "badanuy ??", in: "badanuy ??",
out: ["b", "a", "d", "a", "n", "uy"], out: ["b", "a", "d", "a", "n", "uy"],
}, },
{ {
in: "tur ... pore", in: "tur ... pore",
out: ["t", "u", "r", "p", "o", "r", "e"], out: ["t", "u", "r", "p", "o", "r", "e"],
}, },
{ {
in: "daar-Ul-iqaama", in: "daar-Ul-iqaama",
out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"], out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"],
}, },
]; ];
phonemeSplits.forEach((s) => { phonemeSplits.forEach((s) => {
test(`${s.in} should split properly`, () => { test(`${s.in} should split properly`, () => {
const result = splitFIntoPhonemes(s.in); const result = splitFIntoPhonemes(s.in);
expect(result).toEqual(s.out); expect(result).toEqual(s.out);
}); });
}); });
const badPhonetics: Array<{ const badPhonetics: Array<{
in: string, in: string;
problem: string, problem: string;
}> = [ }> = [
{ {
in: "acar", in: "acar",
problem: "c", problem: "c",
}, },
{ {
in: "a7am", in: "a7am",
problem: "7", problem: "7",
}, },
]; ];
test("bad phonetic characters should throw an error", () => { test("bad phonetic characters should throw an error", () => {
badPhonetics.forEach((s) => { badPhonetics.forEach((s) => {
expect(() => { expect(() => {
splitFIntoPhonemes(s.in); splitFIntoPhonemes(s.in);
}).toThrow(`illegal phonetic character: ${s.problem}`); }).toThrow(`illegal phonetic character: ${s.problem}`);
}); });
}); });
test("last should work", () => { test("last should work", () => {
expect(last("this")).toBe("s"); expect(last("this")).toBe("s");
}); });
test("addP should work", () => { test("addP should work", () => {
expect(addP("ت")({ pIn: "", pOut: "کر" })).toEqual({ expect(addP("ت")({ pIn: "", pOut: "کر" })).toEqual({
pIn: "", pIn: "",
pOut: "کرت", pOut: "کرت",
}); });
}); });
test("lastNonWhiteSpace should work", () => { test("lastNonWhiteSpace should work", () => {
expect(lastNonWhitespace("تورن")).toBe("ن"); expect(lastNonWhitespace("تورن")).toBe("ن");
expect(lastNonWhitespace("وست .. ")).toBe("ت"); expect(lastNonWhitespace("وست .. ")).toBe("ت");
expect(lastNonWhitespace("د ... ")).toBe("د"); expect(lastNonWhitespace("د ... ")).toBe("د");
}); });
test("reverseP should work", () => { test("reverseP should work", () => {
expect(reverseP({ expect(
pIn: "کور", reverseP({
pOut: "تور ", pIn: "کور",
})).toEqual({ pOut: "تور ",
pIn: " کور", })
pOut: "تور", ).toEqual({
}); pIn: " کور",
expect(reverseP({ pOut: "تور",
pIn: "کور", });
pOut: "تور ... ", expect(
})).toEqual({ reverseP({
pIn: " ... کور", pIn: "کور",
pOut: "تور", pOut: "تور ... ",
}); })
expect(reverseP({ ).toEqual({
pIn: "کور", pIn: " ... کور",
pOut: "تور . ", pOut: "تور",
})).toEqual({ });
pIn: " . کور", expect(
pOut: "تور", reverseP({
}); pIn: "کور",
}) pOut: "تور . ",
})
).toEqual({
pIn: " . کور",
pOut: "تور",
});
});

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -8,25 +8,25 @@
import * as T from "../../types"; import * as T from "../../types";
import { import {
splitFIntoPhonemes, splitFIntoPhonemes,
Phoneme, Phoneme,
zwar, zwar,
zwarakey, zwarakay,
zer, zer,
pesh, pesh,
sukun, sukun,
hamzaAbove, hamzaAbove,
tashdeed, tashdeed,
wasla, wasla,
addP, addP,
advanceP, advanceP,
reverseP, reverseP,
overwriteP, overwriteP,
advanceForHamza, advanceForHamza,
advanceForHamzaMid, advanceForHamzaMid,
DiacriticsAccumulator, DiacriticsAccumulator,
stateInfo, stateInfo,
PhonemeStatus, PhonemeStatus,
} from "./diacritics-helpers"; } from "./diacritics-helpers";
import { removeFVarients } from "./accent-and-ps-utils"; import { removeFVarients } from "./accent-and-ps-utils";
@ -35,176 +35,107 @@ import { pipe } from "rambda";
/** /**
* Adds diacritics to a given PsString. * Adds diacritics to a given PsString.
* Errors if the phonetics and script don't line up. * Errors if the phonetics and script don't line up.
*
* IN PROGRESS - This will hopefully get done and replace the messy, unmaintainable phonetics-to-diacritics.ts currently in use
*/ */
export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString { export function addDiacritics(
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? removeFVarients(f) : f); { p, f }: T.PsString,
const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p.trim() }); ignoreCommas?: true
if (pIn !== "") { ): T.PsString {
throw new Error("phonetics error - phonetics shorter than pashto script"); const phonemes: Phoneme[] = splitFIntoPhonemes(
} !ignoreCommas ? removeFVarients(f) : f
return { );
p: pOut, const { pIn, pOut } = phonemes.reduce(processPhoneme, {
f, pOut: "",
}; pIn: p.trim(),
});
if (pIn !== "") {
throw new Error("phonetics error - phonetics shorter than pashto script");
}
return {
p: pOut,
f,
};
} }
function processPhoneme( function processPhoneme(
acc: DiacriticsAccumulator, acc: DiacriticsAccumulator,
phoneme: Phoneme, phoneme: Phoneme,
i: number, i: number,
phonemes: Phoneme[], phonemes: Phoneme[]
): DiacriticsAccumulator { ): DiacriticsAccumulator {
const state = acc.pIn.slice(0, 5) === " ... " const state =
? advanceP(acc, 5) acc.pIn.slice(0, 5) === " ... "
: acc.pIn[0] === " " ? advanceP(acc, 5)
? advanceP(acc) : acc.pIn[0] === " "
: acc; ? advanceP(acc)
: acc;
const { const { phonemeInfo, diacritic, phs, prevPLetter } = stateInfo({
phonemeInfo, state,
diacritic, i,
phs, phoneme,
prevPLetter, phonemes,
} = stateInfo({ state, i, phoneme, phonemes }); });
return (phs === PhonemeStatus.LeadingLongVowel) ? return phs === PhonemeStatus.LeadingLongVowel
pipe( ? pipe(advanceP, addP(phonemeInfo.diacritic), advanceP)(state)
advanceP, : phs === PhonemeStatus.LeadingConsonantOrShortVowel
addP(phonemeInfo.diacritic), ? pipe(advanceP, addP(diacritic))(state)
advanceP, : phs === PhonemeStatus.DoubleConsonantTashdeed
)(state) ? pipe(prevPLetter === " " ? reverseP : addP(""), addP(tashdeed))(state)
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ? : phs === PhonemeStatus.EndingWithHayHim
pipe( ? pipe(advanceP, addP(phoneme === "u" ? hamzaAbove : sukun))(state)
advanceP, : phs === PhonemeStatus.DirectMatch
addP(diacritic), ? pipe(addP(diacritic), advanceP)(state)
)(state) : phs === PhonemeStatus.DirectMatchAfterSukun
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ? ? pipe(addP(sukun), advanceP)(state)
pipe( : phs === PhonemeStatus.PersianSilentWWithAa
prevPLetter === " " ? reverseP : addP(""), ? pipe(addP("("), advanceP, addP(")"), advanceP)(state)
addP(tashdeed) : phs === PhonemeStatus.ArabicWasla
)(state) ? pipe(addP(zer), overwriteP(wasla))(state)
: (phs === PhonemeStatus.EndingWithHeyHim) ? : phs === PhonemeStatus.Izafe
pipe( ? pipe(reverseP, addP(zer))(state)
advanceP, : phs === PhonemeStatus.EndOfDuParticle
addP(phoneme === "u" ? hamzaAbove : sukun), ? pipe(reverseP, addP(zwarakay))(state)
)(state) : phs === PhonemeStatus.ShortAEndingAfterHeem
: (phs === PhonemeStatus.DirectMatch) ? ? pipe(prevPLetter === " " ? reverseP : addP(""), addP(zwar))(state)
pipe( : phs === PhonemeStatus.EndingWithHayHimFromSukun
addP(diacritic), ? pipe(addP(sukun), advanceP)(state)
advanceP, : phs === PhonemeStatus.AlefDaggarEnding
)(state) ? pipe(advanceP, advanceP)(state)
: (phs === PhonemeStatus.DirectMatchAfterSukun) ? : phs === PhonemeStatus.LongAinVowelMissingComma
pipe( ? pipe(addP(diacritic), advanceP, addP(diacritic))(state)
addP(sukun), : phs === PhonemeStatus.ShortAinVowelMissingComma
advanceP, ? pipe(addP(diacritic), advanceP)(state)
)(state) : phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart
: (phs === PhonemeStatus.PersianSilentWWithAa) ? ? pipe(advanceP, advanceP)(state)
pipe( : phs === PhonemeStatus.AinWithLongAAtBeginning
addP("("), ? pipe(advanceP, advanceP)(state)
advanceP, : phs === PhonemeStatus.AlefWithHamza
addP(")"), ? pipe(advanceP)(state)
advanceP, : phs === PhonemeStatus.ShortVowel
)(state) ? pipe(
: (phs === PhonemeStatus.ArabicWasla) ? advanceForHamzaMid,
pipe( addP(phonemeInfo.diacritic),
addP(zer), // TODO THIS?
overwriteP(wasla), advanceForHamza
)(state) )(state)
: (phs === PhonemeStatus.Izafe) ? : phs === PhonemeStatus.ShortAForAlefBeforeFathatan
pipe( ? pipe(advanceP)(state)
reverseP, : phs === PhonemeStatus.NOnFathatan
addP(zer), ? pipe(advanceP)(state)
)(state) : phs === PhonemeStatus.HamzaOnWow
: (phs === PhonemeStatus.EndOfDuParticle) ? ? pipe(advanceP, addP(hamzaAbove), addP(diacritic))(state)
pipe( : phs === PhonemeStatus.ArabicDefiniteArticleUl
reverseP, ? pipe(advanceP, addP(pesh), advanceP)(state)
addP(zwarakey), : phs === PhonemeStatus.OoPrefix
)(state) ? pipe(advanceP, addP(pesh))(state)
: (phs === PhonemeStatus.ShortAEndingAfterHeem) ? : phs === PhonemeStatus.GlottalStopBeforeOo
pipe( ? pipe(advanceP, addP(hamzaAbove))(state)
prevPLetter === " " ? reverseP : addP(""), : phs === PhonemeStatus.OoAfterGlottalStopOo
addP(zwar), ? pipe(advanceP)(state)
)(state) : phs === PhonemeStatus.SilentAinAfterAlef
: (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ? ? pipe(advanceP, advanceP)(state)
pipe( : state;
addP(sukun),
advanceP,
)(state)
: (phs === PhonemeStatus.AlefDaggarEnding) ?
pipe(
advanceP,
advanceP,
)(state)
: (phs === PhonemeStatus.LongAinVowelMissingComma) ?
pipe(
addP(diacritic),
advanceP,
addP(diacritic)
)(state)
: (phs === PhonemeStatus.ShortAinVowelMissingComma) ?
pipe(
addP(diacritic),
advanceP,
)(state)
: (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ?
pipe(
advanceP,
advanceP,
)(state)
: (phs === PhonemeStatus.AinWithLongAAtBeginning) ?
pipe(
advanceP,
advanceP,
)(state)
: (phs === PhonemeStatus.AlefWithHamza) ?
pipe(
advanceP,
)(state)
: (phs === PhonemeStatus.ShortVowel) ?
pipe(
advanceForHamzaMid,
addP(phonemeInfo.diacritic),
// TODO THIS?
advanceForHamza,
)(state)
: (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ?
pipe(
advanceP,
)(state)
: (phs === PhonemeStatus.NOnFathatan) ?
pipe(
advanceP,
)(state)
: (phs === PhonemeStatus.HamzaOnWow) ?
pipe(
advanceP,
addP(hamzaAbove),
addP(diacritic),
)(state)
: (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
pipe(
advanceP,
addP(pesh),
advanceP,
)(state)
: (phs === PhonemeStatus.OoPrefix) ?
pipe(
advanceP,
addP(pesh),
)(state)
: (phs === PhonemeStatus.GlottalStopBeforeOo) ?
pipe(
advanceP,
addP(hamzaAbove),
)(state)
: (phs === PhonemeStatus.OoAfterGlottalStopOo) ?
pipe(
advanceP,
)(state)
: (phs === PhonemeStatus.SilentAinAfterAlef) ?
pipe(
advanceP,
advanceP,
)(state)
: state;
} }

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,7 @@
*/ */
const zwar = "َ"; const zwar = "َ";
const zwarakey = "ٙ"; const zwarakay = "ٙ";
const zer = "ِ"; const zer = "ِ";
const pesh = "ُ"; const pesh = "ُ";
const sukun = "ْ"; const sukun = "ْ";
@ -19,8 +19,25 @@ const fathahan = "ً";
// TODO: THESE OTHER TRIGRAPHS?? // TODO: THESE OTHER TRIGRAPHS??
const quadrigraphs = ["-Ul-"]; const quadrigraphs = ["-Ul-"];
const trigraphs = ["eyy", "éyy", "-i-", "-U-"]; // , "aay", "áay", "ooy", "óoy"]; const trigraphs = ["ey", "éy", "-i-", "-U-"]; // , "aay", "áay", "ooy", "óoy"];
const digraphs = ["ắ", "aa", "áa", "ee", "ée", "ey", "éy", "oo", "óo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"]; const digraphs = [
"ắ",
"aa",
"áa",
"ee",
"ée",
"ay",
"áy",
"oo",
"óo",
"kh",
"gh",
"ts",
"dz",
"jz",
"ch",
"sh",
];
const endingDigraphs = ["uy", "úy"]; const endingDigraphs = ["uy", "úy"];
const willIgnore = ["?", " ", "`", ".", "…"]; const willIgnore = ["?", " ", "`", ".", "…"];
@ -28,7 +45,7 @@ export function splitFIntoPhonemes(f: string): string[] {
const result: string[] = []; const result: string[] = [];
let index = 0; let index = 0;
while (index < f.length) { while (index < f.length) {
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " "); const isLastTwoLetters = index === f.length - 2 || f[index + 2] === " ";
const threeLetterChunk = f.slice(index, index + 3); const threeLetterChunk = f.slice(index, index + 3);
const fourLetterChunk = f.slice(index, index + 4); const fourLetterChunk = f.slice(index, index + 4);
if (quadrigraphs.includes(fourLetterChunk)) { if (quadrigraphs.includes(fourLetterChunk)) {
@ -89,43 +106,145 @@ const phonemeTable = [
{ phoneme: "m", possibilities: ["م"], consonant: true }, { phoneme: "m", possibilities: ["م"], consonant: true },
{ phoneme: "n", possibilities: ["ن"], consonant: true }, { phoneme: "n", possibilities: ["ن"], consonant: true },
{ phoneme: "N", possibilities: ["ڼ"], consonant: true }, { phoneme: "N", possibilities: ["ڼ"], consonant: true },
{ phoneme: "h", possibilities: ["ه", "ح"], consonant: true, takesSukunOnEnding: true }, {
phoneme: "h",
possibilities: ["ه", "ح"],
consonant: true,
takesSukunOnEnding: true,
},
{ phoneme: "w", possibilities: ["و"], consonant: true }, { phoneme: "w", possibilities: ["و"], consonant: true },
{ phoneme: "y", possibilities: ["ی"], consonant: true }, { phoneme: "y", possibilities: ["ی"], consonant: true },
{ phoneme: "'", possibilities: ["ع", "ئ"], consonant: true }, { phoneme: "'", possibilities: ["ع", "ئ"], consonant: true },
{ phoneme: "-i-", isIzafe: true }, { phoneme: "-i-", isIzafe: true },
{ phoneme: "-U-", possibilities: [" و ", "و"]}, { phoneme: "-U-", possibilities: [" و ", "و"] },
{ phoneme: "-Ul-", possibilities: ["ال"]}, { phoneme: "-Ul-", possibilities: ["ال"] },
// vowels // vowels
{ phoneme: "aa", possibilities: ["ا"], beginning: ["آ", "ا"], endingPossibilities: ["ا", "یٰ"], isLongA: true, canStartWithAynBefore: true }, {
{ phoneme: "áa", possibilities: ["ا"], beginning: ["آ", "ا"], endingPossibilities: ["ا", "یٰ"], isLongA: true, canStartWithAynBefore: true }, phoneme: "aa",
{ phoneme: "ee", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ي"], diacritic: zer, canStartWithAynBefore: true }, possibilities: ["ا"],
{ phoneme: "ée", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ي"], diacritic: zer, canStartWithAynBefore: true }, beginning: ["آ", "ا"],
endingPossibilities: ["ا", "یٰ"],
isLongA: true,
canStartWithAynBefore: true,
},
{
phoneme: "áa",
possibilities: ["ا"],
beginning: ["آ", "ا"],
endingPossibilities: ["ا", "یٰ"],
isLongA: true,
canStartWithAynBefore: true,
},
{
phoneme: "ee",
possibilities: ["ی"],
addAlefOnBeginning: true,
endingPossibilities: ["ي"],
diacritic: zer,
canStartWithAynBefore: true,
},
{
phoneme: "ée",
possibilities: ["ی"],
addAlefOnBeginning: true,
endingPossibilities: ["ي"],
diacritic: zer,
canStartWithAynBefore: true,
},
{ phoneme: "e", possibilities: ["ې"], addAlefOnBeginning: true }, { phoneme: "e", possibilities: ["ې"], addAlefOnBeginning: true },
{ phoneme: "é", possibilities: ["ې"], addAlefOnBeginning: true }, { phoneme: "é", possibilities: ["ې"], addAlefOnBeginning: true },
{ phoneme: "o", possibilities: ["و"], addAlefOnBeginning: true }, { phoneme: "o", possibilities: ["و"], addAlefOnBeginning: true },
{ phoneme: "ó", possibilities: ["و"], addAlefOnBeginning: true }, { phoneme: "ó", possibilities: ["و"], addAlefOnBeginning: true },
{ phoneme: "oo", possibilities: ["و"], addAlefOnBeginning: true, alsoCanBePrefix: true, diacritic: pesh }, {
{ phoneme: "óo", possibilities: ["و"], addAlefOnBeginning: true, diacritic: pesh }, phoneme: "oo",
{ phoneme: "ey", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ی"]}, possibilities: ["و"],
{ phoneme: "éy", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ی"]}, addAlefOnBeginning: true,
alsoCanBePrefix: true,
diacritic: pesh,
},
{
phoneme: "óo",
possibilities: ["و"],
addAlefOnBeginning: true,
diacritic: pesh,
},
{
phoneme: "ay",
possibilities: ["ی"],
addAlefOnBeginning: true,
endingPossibilities: ["ی"],
},
{
phoneme: "áy",
possibilities: ["ی"],
addAlefOnBeginning: true,
endingPossibilities: ["ی"],
},
{ phoneme: "uy", possibilities: ["ۍ"], endingOnly: true }, { phoneme: "uy", possibilities: ["ۍ"], endingOnly: true },
{ phoneme: "úy", possibilities: ["ۍ"], endingOnly: true }, // THIS CAN ONLY COME AT THE END DEAL WITH THIS { phoneme: "úy", possibilities: ["ۍ"], endingOnly: true }, // THIS CAN ONLY COME AT THE END DEAL WITH THIS
{ phoneme: "eyy", possibilities: ["ئ"], endingOnly: true }, { phoneme: "ey", possibilities: ["ئ"], endingOnly: true },
{ phoneme: "éyy", possibilities: ["ئ"], endingOnly: true }, { phoneme: "éy", possibilities: ["ئ"], endingOnly: true },
{ phoneme: "a", diacritic: zwar, endingPossibilities: ["ه"], canComeAfterHeyEnding: true, canBeFirstPartOfFathahanEnding: true }, {
{ phoneme: "á", diacritic: zwar, endingPossibilities: ["ه"], canComeAfterHeyEnding: true, canBeFirstPartOfFathahanEnding: true }, phoneme: "a",
diacritic: zwar,
endingPossibilities: ["ه"],
canComeAfterHayEnding: true,
canBeFirstPartOfFathahanEnding: true,
},
{
phoneme: "á",
diacritic: zwar,
endingPossibilities: ["ه"],
canComeAfterHayEnding: true,
canBeFirstPartOfFathahanEnding: true,
},
{ phoneme: "ă", diacritic: zwar }, { phoneme: "ă", diacritic: zwar },
{ phoneme: "ắ", diacritic: zwar }, { phoneme: "ắ", diacritic: zwar },
{ phoneme: "u", diacritic: zwarakey, endingPossibilities: ["ه"], hamzaOnEnd: true }, {
{ phoneme: "ú", diacritic: zwarakey, endingPossibilities: ["ه"], hamzaOnEnd: true }, phoneme: "u",
{ phoneme: "i", diacritic: zer, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, canBeWasla: true, beginning: ["ا", "ع"] }, diacritic: zwarakay,
{ phoneme: "í", diacritic: zer, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, canBeWasla: true, beginning: ["ا", "ع"] }, endingPossibilities: ["ه"],
{ phoneme: "U", diacritic: pesh, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, beginning: ["ا", "ع"] }, hamzaOnEnd: true,
{ phoneme: "Ú", diacritic: pesh, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, beginning: ["ا", "ع"] }, },
{
phoneme: "ú",
diacritic: zwarakay,
endingPossibilities: ["ه"],
hamzaOnEnd: true,
},
{
phoneme: "i",
diacritic: zer,
endingPossibilities: ["ه"],
takesDiacriticBeforeGurdaHayEnding: true,
canBeWasla: true,
beginning: ["ا", "ع"],
},
{
phoneme: "í",
diacritic: zer,
endingPossibilities: ["ه"],
takesDiacriticBeforeGurdaHayEnding: true,
canBeWasla: true,
beginning: ["ا", "ع"],
},
{
phoneme: "U",
diacritic: pesh,
endingPossibilities: ["ه"],
takesDiacriticBeforeGurdaHayEnding: true,
beginning: ["ا", "ع"],
},
{
phoneme: "Ú",
diacritic: pesh,
endingPossibilities: ["ه"],
takesDiacriticBeforeGurdaHayEnding: true,
beginning: ["ا", "ع"],
},
]; ];
function isSpace(s: string): boolean { function isSpace(s: string): boolean {
@ -142,7 +261,11 @@ interface IDiacriticsErrorMessage {
i: number; i: number;
} }
function possibilityMatches(p: string, pIndex: number, possibilities: string[] | undefined): boolean { function possibilityMatches(
p: string,
pIndex: number,
possibilities: string[] | undefined
): boolean {
/* istanbul ignore next */ /* istanbul ignore next */
if (!possibilities) { if (!possibilities) {
return false; return false;
@ -155,10 +278,15 @@ function possibilityMatches(p: string, pIndex: number, possibilities: string[] |
return false; return false;
} }
function isPrefixedByDirectionalPronoun(i: number, phonemes: string[]): boolean { function isPrefixedByDirectionalPronoun(
i: number,
phonemes: string[]
): boolean {
const potentialPronounFourCharSlice = phonemes.slice(i - 4, i).join(""); const potentialPronounFourCharSlice = phonemes.slice(i - 4, i).join("");
const potentialPronounThreeCharSlice = phonemes.slice(i - 3, i).join(""); const potentialPronounThreeCharSlice = phonemes.slice(i - 3, i).join("");
if (["wăr-", "war-", "dăr-", "dar-"].includes(potentialPronounFourCharSlice)) { if (
["wăr-", "war-", "dăr-", "dar-"].includes(potentialPronounFourCharSlice)
) {
return true; return true;
} }
if (potentialPronounThreeCharSlice === "raa-") { if (potentialPronounThreeCharSlice === "raa-") {
@ -167,7 +295,11 @@ function isPrefixedByDirectionalPronoun(i: number, phonemes: string[]): boolean
return false; return false;
} }
export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes: boolean = false): string | undefined { export function phoneticsToDiacritics(
ps: string,
ph: string,
forbidOoPrefixes: boolean = false
): string | undefined {
const phonemes = splitFIntoPhonemes(ph.trim().split(",")[0]); const phonemes = splitFIntoPhonemes(ph.trim().split(",")[0]);
const p = ps.trim(); const p = ps.trim();
let result = ""; let result = "";
@ -179,58 +311,72 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
if (phoneme === "-") { if (phoneme === "-") {
return; return;
} }
const phonemeInfo = phonemeTable.find((element) => element.phoneme === phoneme); const phonemeInfo = phonemeTable.find(
(element) => element.phoneme === phoneme
);
if (!phonemeInfo) { if (!phonemeInfo) {
errored.push({ error: "phoneme info not found", phoneme, i }); errored.push({ error: "phoneme info not found", phoneme, i });
return; return;
} }
const isDoubleConsonant = ( const isDoubleConsonant =
phonemeInfo.consonant && phonemeInfo.consonant &&
phoneme === phonemes[i - 1] && phoneme === phonemes[i - 1] &&
// TODO: is this thourough enough to allow double consonants on the ending of the previous word? // TODO: is this thourough enough to allow double consonants on the ending of the previous word?
!(isSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex])) // avoid false double consonant ie ازل لیک azalleek !(isSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex])) // avoid false double consonant ie ازل لیک azalleek
) ? true : false; ? true
const isBeginning = !isDoubleConsonant && ((i === 0) || isSpace(p[pIndex - 1]) || (phonemes[i - 1] === "-Ul-") || isPrefixedByDirectionalPronoun(i, phonemes)); : false;
const upcomingAEndingAfterHey = (p[pIndex] === "ح" && isSpace(p[pIndex + 1]) && ["a", "á"].includes(phonemes[i + 1])); const isBeginning =
!isDoubleConsonant &&
(i === 0 ||
isSpace(p[pIndex - 1]) ||
phonemes[i - 1] === "-Ul-" ||
isPrefixedByDirectionalPronoun(i, phonemes));
const upcomingAEndingAfterHay =
p[pIndex] === "ح" &&
isSpace(p[pIndex + 1]) &&
["a", "á"].includes(phonemes[i + 1]);
// TODO: break this into a seperate function -- why can it sometimes be set to undefined? // TODO: break this into a seperate function -- why can it sometimes be set to undefined?
const isEnding = (i === phonemes.length - 1) || (( const isEnding =
(phonemeInfo.possibilities && isSpace(p[pIndex + 1])) || i === phonemes.length - 1 ||
(!phonemeInfo.possibilities && isSpace(p[pIndex])) || (((phonemeInfo.possibilities && isSpace(p[pIndex + 1])) ||
( (!phonemeInfo.possibilities && isSpace(p[pIndex])) ||
(!phonemeInfo.possibilities && isSpace(p[pIndex + 1])) && (!phonemeInfo.possibilities &&
(possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) || (p[pIndex] === "ع" && phonemes[i + 1] !== "'")) isSpace(p[pIndex + 1]) &&
) (possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) ||
) && !upcomingAEndingAfterHey (p[pIndex] === "ع" && phonemes[i + 1] !== "'")))) &&
&& // makes sure the next letter isn't a double consonant like haqq <- !upcomingAEndingAfterHay && // makes sure the next letter isn't a double consonant like haqq <-
!( !(
phonemeInfo.consonant && phoneme === phonemes[i + 1] // && (phonemeInfo.consonant && phoneme === phonemes[i + 1]) // &&
// !(isSpace(p[pIndex + 1]) && phonemeInfo.possibilities.includes(p[pIndex])) // !(isSpace(p[pIndex + 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
) )) || // can be the trailing double consanant on the end of a word
) || // can be the trailing double consanant on the end of a word (phonemeInfo.consonant &&
( phoneme === phonemes[i - 1] &&
phonemeInfo.consonant && phoneme === phonemes[i - 1] && !(
!(isEndSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex])) isEndSpace(p[pIndex - 1]) &&
) || // can be یٰ ending phonemeInfo.possibilities.includes(p[pIndex])
( )) || // can be یٰ ending
isEndSpace(p[pIndex + 2]) && (p.slice(pIndex, pIndex + 2) === "یٰ") (isEndSpace(p[pIndex + 2]) && p.slice(pIndex, pIndex + 2) === "یٰ");
);
const isUofDu = phoneme === "u" && ( const isUofDu =
p.slice(pIndex - 2, pIndex) === "د " || // د as previous word phoneme === "u" &&
(p[pIndex] === undefined && p[pIndex - 1] === "د") || // د as the whole thing (p.slice(pIndex - 2, pIndex) === "د " || // د as previous word
p.slice(pIndex - 6, pIndex) === "د ... " // ... د is as the previous word (p[pIndex] === undefined && p[pIndex - 1] === "د") || // د as the whole thing
); p.slice(pIndex - 6, pIndex) === "د ... "); // ... د is as the previous word
// TODO: Should p[pIndex - 1] also be in there ??? It messed up قطعه for instance // TODO: Should p[pIndex - 1] also be in there ??? It messed up قطعه for instance
const isEndingAynVowel = isEnding && phonemeInfo.diacritic && [p[pIndex], p[pIndex - 1]].includes("ع") && p[pIndex] !== "ه"; const isEndingAynVowel =
isEnding &&
phonemeInfo.diacritic &&
[p[pIndex], p[pIndex - 1]].includes("ع") &&
p[pIndex] !== "ه";
const isMiddle = !isBeginning && !isEnding; const isMiddle = !isBeginning && !isEnding;
const isSilentWaw = ( const isSilentWaw =
p[pIndex] === "و" && p[pIndex] === "و" &&
p[pIndex - 1] === "خ" && p[pIndex - 1] === "خ" &&
p[pIndex + 1] === "ا" && p[pIndex + 1] === "ا" &&
["áa", "aa"].includes(phoneme) ["áa", "aa"].includes(phoneme);
); const isAnAEndingAfterHay =
const isAnAEndingAfterHey = isEnding && p[pIndex - 1] === "ح" && phonemeInfo.canComeAfterHeyEnding; isEnding && p[pIndex - 1] === "ح" && phonemeInfo.canComeAfterHayEnding;
if (isDoubleConsonant) { if (isDoubleConsonant) {
pIndex--; pIndex--;
if (isSpace(p[pIndex])) { if (isSpace(p[pIndex])) {
@ -247,14 +393,22 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
pIndex++; pIndex++;
} }
// special check for Arabic wasla // special check for Arabic wasla
if (p.slice(0, 3) === "بال" && phonemes[i - 1] === "b" && phonemeInfo.canBeWasla && phonemes[i + 1] === "l") { if (
p.slice(0, 3) === "بال" &&
phonemes[i - 1] === "b" &&
phonemeInfo.canBeWasla &&
phonemes[i + 1] === "l"
) {
result += phonemeInfo.diacritic + wasla; result += phonemeInfo.diacritic + wasla;
pIndex++; pIndex++;
previousPhonemeWasAConsonant = false; previousPhonemeWasAConsonant = false;
return; return;
} }
// special check for fathahan ending // special check for fathahan ending
if (phonemeInfo.canBeFirstPartOfFathahanEnding && p.slice(pIndex, pIndex + 2) === "اً") { if (
phonemeInfo.canBeFirstPartOfFathahanEnding &&
p.slice(pIndex, pIndex + 2) === "اً"
) {
result += "ا"; result += "ا";
pIndex++; pIndex++;
return; return;
@ -265,7 +419,12 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return; return;
} }
// special check for words starting with عا or عی // special check for words starting with عا or عی
if (isBeginning && phonemeInfo.canStartWithAynBefore && p[pIndex] === "ع" && phonemeInfo.possibilities.includes(p[pIndex + 1])) { if (
isBeginning &&
phonemeInfo.canStartWithAynBefore &&
p[pIndex] === "ع" &&
phonemeInfo.possibilities.includes(p[pIndex + 1])
) {
result += "ع"; result += "ع";
result += phonemeInfo.diacritic ? phonemeInfo.diacritic : ""; result += phonemeInfo.diacritic ? phonemeInfo.diacritic : "";
result += p[pIndex + 1]; result += p[pIndex + 1];
@ -273,23 +432,45 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return; return;
} }
// special check for ؤ Ua // special check for ؤ Ua
if (phoneme === "U" && phonemes[i + 1] === "a" && phonemes[i + 2] !== "a" && p[pIndex] === "و") { if (
phoneme === "U" &&
phonemes[i + 1] === "a" &&
phonemes[i + 2] !== "a" &&
p[pIndex] === "و"
) {
result += "ؤ"; result += "ؤ";
pIndex++; pIndex++;
return; return;
} }
if (phoneme === "a" && phonemes[i - 1] === "U" && phonemes[i + 1] !== "a" && result.slice(-2) === "ؤ") { if (
phoneme === "a" &&
phonemes[i - 1] === "U" &&
phonemes[i + 1] !== "a" &&
result.slice(-2) === "ؤ"
) {
previousPhonemeWasAConsonant = false; previousPhonemeWasAConsonant = false;
return; return;
} }
// special check for و wo // special check for و wo
if (isBeginning && phoneme === "w" && phonemes[i + 1] === "o" && p[pIndex] === "و" && isEndSpace(p[pIndex + 1])) { if (
isBeginning &&
phoneme === "w" &&
phonemes[i + 1] === "o" &&
p[pIndex] === "و" &&
isEndSpace(p[pIndex + 1])
) {
result += "و"; result += "و";
pIndex++; pIndex++;
return; return;
} }
// TODO: isEndSpace here is redundant?? // TODO: isEndSpace here is redundant??
if (isEnding && phoneme === "o" && phonemes[i - 1] === "w" && p[pIndex - 1] === "و" && isEndSpace(p[pIndex])) { if (
isEnding &&
phoneme === "o" &&
phonemes[i - 1] === "w" &&
p[pIndex - 1] === "و" &&
isEndSpace(p[pIndex])
) {
pIndex++; pIndex++;
return; return;
} }
@ -300,38 +481,67 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return; return;
} }
// special check for for أ in the middle of the word // special check for for أ in the middle of the word
if (!isBeginning && p[pIndex] === "أ" && phoneme === "a" && phonemes[i + 1] === "'" && phonemes[i + 2] === "a") { if (
!isBeginning &&
p[pIndex] === "أ" &&
phoneme === "a" &&
phonemes[i + 1] === "'" &&
phonemes[i + 2] === "a"
) {
result += "أ"; result += "أ";
pIndex++; pIndex++;
return; return;
} }
if (p[pIndex - 1] === "أ" && phonemes[i - 1] === "a" && phoneme === "'" && phonemes[i + 1] === "a") { if (
p[pIndex - 1] === "أ" &&
phonemes[i - 1] === "a" &&
phoneme === "'" &&
phonemes[i + 1] === "a"
) {
return; return;
} }
if (p[pIndex - 1] === "أ" && phonemes[i - 2] === "a" && phonemes[i - 1] === "'" && phoneme === "a") { if (
p[pIndex - 1] === "أ" &&
phonemes[i - 2] === "a" &&
phonemes[i - 1] === "'" &&
phoneme === "a"
) {
previousPhonemeWasAConsonant = false; previousPhonemeWasAConsonant = false;
return; return;
} }
// special check for وو 'oo // special check for وو 'oo
if (!isBeginning && p[pIndex] === "و" && p[pIndex + 1] === "و" && phoneme === "'" && phonemes[i + 1] === "oo") { if (
!isBeginning &&
p[pIndex] === "و" &&
p[pIndex + 1] === "و" &&
phoneme === "'" &&
phonemes[i + 1] === "oo"
) {
result += "وُو"; result += "وُو";
pIndex += 2; pIndex += 2;
return; return;
} }
if (p[pIndex - 2] === "و" && p[pIndex - 1] === "و" && phonemes[i - 1] === "'" && phoneme === "oo") { if (
p[pIndex - 2] === "و" &&
p[pIndex - 1] === "و" &&
phonemes[i - 1] === "'" &&
phoneme === "oo"
) {
previousPhonemeWasAConsonant = false; previousPhonemeWasAConsonant = false;
return; return;
} }
const prevLetterWasBeginningAyn = ( const prevLetterWasBeginningAyn =
p[pIndex - 1] === "ع" && p[pIndex - 1] === "ع" &&
// isEndSpace(p[pIndex]) && // This breaks it // isEndSpace(p[pIndex]) && // This breaks it
phoneme === "'" phoneme === "'";
);
// check if the phoneme lines up in the Pashto word // check if the phoneme lines up in the Pashto word
if (isBeginning && !isUofDu && phonemeInfo.addAlefOnBeginning) { if (isBeginning && !isUofDu && phonemeInfo.addAlefOnBeginning) {
// TODO: Maybe a little bad because it doesn't loop through possibilities // TODO: Maybe a little bad because it doesn't loop through possibilities
if ((!phonemeInfo.alsoCanBePrefix || forbidOoPrefixes) && p.slice(pIndex, pIndex + 2) !== "ا" + phonemeInfo.possibilities[0]) { if (
(!phonemeInfo.alsoCanBePrefix || forbidOoPrefixes) &&
p.slice(pIndex, pIndex + 2) !== "ا" + phonemeInfo.possibilities[0]
) {
errored.push({ error: "didn't start with an aleph", phoneme, i }); errored.push({ error: "didn't start with an aleph", phoneme, i });
return; return;
} }
@ -348,18 +558,18 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
pIndex++; pIndex++;
return; return;
} else if ( } else if (
(isEnding && phonemeInfo.endingPossibilities) && isEnding &&
phonemeInfo.endingPossibilities &&
!isUofDu && !isUofDu &&
( !possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) &&
!possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) && !isEndingAynVowel && // allowing short vowels on the end of words ending with ع
!isEndingAynVowel && // allowing short vowels on the end of words ending with ع !isAnAEndingAfterHay
!isAnAEndingAfterHey
)
) { ) {
errored.push({ error: "bad ending", phoneme, i }); errored.push({ error: "bad ending", phoneme, i });
return; return;
} else if ( } else if (
(isEnding && !phonemeInfo.endingPossibilities) && isEnding &&
!phonemeInfo.endingPossibilities &&
phonemeInfo.possibilities && phonemeInfo.possibilities &&
!phonemeInfo.possibilities.includes(p[pIndex]) !phonemeInfo.possibilities.includes(p[pIndex])
) { ) {
@ -367,14 +577,17 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
errored.push({ error: "bad ending 2", phoneme, i }); errored.push({ error: "bad ending 2", phoneme, i });
return; return;
} else if ( } else if (
(phonemeInfo.possibilities && !isEnding) && phonemeInfo.possibilities &&
( !isEnding &&
!(phonemeInfo.possibilities.includes(p[pIndex])) && !phonemeInfo.possibilities.includes(p[pIndex]) &&
!(p[pIndex] === "ن" && (p[pIndex + 1] === "ب" && phoneme === "m")) && // && // exception case with نب === mb !(p[pIndex] === "ن" && p[pIndex + 1] === "ب" && phoneme === "m") && // && // exception case with نب === mb
!prevLetterWasBeginningAyn // exception case with words starting with ع like i'zzat !prevLetterWasBeginningAyn // exception case with words starting with ع like i'zzat
)
) { ) {
errored.push({ error: "improper coressponding letter in middle of word", phoneme, i }); errored.push({
error: "improper coressponding letter in middle of word",
phoneme,
i,
});
return; return;
} }
// console.log(phoneme, pIndex, p[pIndex], isEnding); // console.log(phoneme, pIndex, p[pIndex], isEnding);
@ -382,7 +595,12 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
// OK, it lines up with the Pashto word, we're good // OK, it lines up with the Pashto word, we're good
// Now continue building the result string // Now continue building the result string
// deal with starting with short vowels and alef // deal with starting with short vowels and alef
if (!isUofDu && isBeginning && !phonemeInfo.possibilities && !phonemeInfo.isIzafe) { if (
!isUofDu &&
isBeginning &&
!phonemeInfo.possibilities &&
!phonemeInfo.isIzafe
) {
// TODO: WHY IS THIS HERE // TODO: WHY IS THIS HERE
if (!["ا", "ع"].includes(p[pIndex])) { if (!["ا", "ع"].includes(p[pIndex])) {
errored.push({ error: "bad beginning 2", phoneme, i }); errored.push({ error: "bad beginning 2", phoneme, i });
@ -392,22 +610,30 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
pIndex++; pIndex++;
} }
// if the phoneme carries a diacritic insert it (before the letter if it's coming) // if the phoneme carries a diacritic insert it (before the letter if it's coming)
const isOoPrefix = (phonemeInfo.alsoCanBePrefix && isBeginning && (p[pIndex - 1] !== "ا")); const isOoPrefix =
phonemeInfo.alsoCanBePrefix && isBeginning && p[pIndex - 1] !== "ا";
if (phonemeInfo.diacritic && !isEnding && !isOoPrefix) { if (phonemeInfo.diacritic && !isEnding && !isOoPrefix) {
// using this hack to remove the space and put it after the zwarakey we're going to add after د // using this hack to remove the space and put it after the zwarakay we're going to add after د
if (isUofDu && result.slice(-5) === " ... ") { if (isUofDu && result.slice(-5) === " ... ") {
result = result.slice(0, -5) + zwarakey + " ... "; result = result.slice(0, -5) + zwarakay + " ... ";
} else if (isUofDu && result.slice(-1) === " ") { } else if (isUofDu && result.slice(-1) === " ") {
result = result.slice(0, -1) + zwarakey + " "; result = result.slice(0, -1) + zwarakay + " ";
} else { } else {
result += phonemeInfo.diacritic; result += phonemeInfo.diacritic;
} }
} }
// TODO: The middle stuff might be unneccessary/unhelpful // TODO: The middle stuff might be unneccessary/unhelpful
const isACommaWithoutAyn = (phoneme === "'" && (p[pIndex] !== "ع" && !(isMiddle && p[pIndex] === "ئ"))); const isACommaWithoutAyn =
phoneme === "'" && p[pIndex] !== "ع" && !(isMiddle && p[pIndex] === "ئ");
// if the previous phoneme was a consonant insert a sukun // if the previous phoneme was a consonant insert a sukun
// console.log("Will I go into the adding thing?"); // console.log("Will I go into the adding thing?");
if (!isBeginning && previousPhonemeWasAConsonant && phonemeInfo.consonant && phonemes[i - 1] !== "'" && p[pIndex] !== "ع") { if (
!isBeginning &&
previousPhonemeWasAConsonant &&
phonemeInfo.consonant &&
phonemes[i - 1] !== "'" &&
p[pIndex] !== "ع"
) {
result += isDoubleConsonant ? tashdeed : sukun; result += isDoubleConsonant ? tashdeed : sukun;
} }
if (isEnding && isDoubleConsonant) { if (isEnding && isDoubleConsonant) {
@ -417,30 +643,38 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
} }
} }
// if there's a pashto letter for the phoneme, insert it // if there's a pashto letter for the phoneme, insert it
if (!isEndingAynVowel && !isACommaWithoutAyn && (phonemeInfo.possibilities || isEnding)) { if (
!isEndingAynVowel &&
!isACommaWithoutAyn &&
(phonemeInfo.possibilities || isEnding)
) {
// need the isSpace check to prevent weird behaviour with izafe // need the isSpace check to prevent weird behaviour with izafe
if (!isUofDu) { if (!isUofDu) {
if (isAnAEndingAfterHey) { if (isAnAEndingAfterHay) {
result += zwar; result += zwar;
if (p[pIndex] === " ") { if (p[pIndex] === " ") {
result += " "; result += " ";
} }
} else { } else {
result += (isDoubleConsonant || isSpace(p[pIndex])) ? "" : p[pIndex]; result += isDoubleConsonant || isSpace(p[pIndex]) ? "" : p[pIndex];
} }
} }
pIndex++; pIndex++;
} }
if (isEnding) { if (isEnding) {
if (isUofDu) { if (isUofDu) {
result += zwarakey; result += zwarakay;
} else if (phonemeInfo.hamzaOnEnd) { } else if (phonemeInfo.hamzaOnEnd) {
result += hamzaAbove; result += hamzaAbove;
} else if (phonemeInfo.takesSukunOnEnding) { } else if (phonemeInfo.takesSukunOnEnding) {
result += sukun; result += sukun;
} else if (p[pIndex] === daggerAlif) { } else if (p[pIndex] === daggerAlif) {
result += daggerAlif; result += daggerAlif;
} else if (isEndSpace(p[pIndex]) && p[pIndex - 1] === "ه" && phonemeInfo.takesDiacriticBeforeGurdaHeyEnding) { } else if (
isEndSpace(p[pIndex]) &&
p[pIndex - 1] === "ه" &&
phonemeInfo.takesDiacriticBeforeGurdaHayEnding
) {
result = result.slice(0, -1) + phonemeInfo.diacritic + "ه"; result = result.slice(0, -1) + phonemeInfo.diacritic + "ه";
} }
} }
@ -456,13 +690,20 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
} }
return; return;
} }
previousPhonemeWasAConsonant = (!isEnding && phonemeInfo.consonant) ? true : false; previousPhonemeWasAConsonant =
!isEnding && phonemeInfo.consonant ? true : false;
// ignore the ع or ئ if there's not a ' in the phonetics // ignore the ع or ئ if there's not a ' in the phonetics
const nextPhonemeInfo = phonemeTable.find((element) => phonemes[i + 1] === element.phoneme); const nextPhonemeInfo = phonemeTable.find(
(element) => phonemes[i + 1] === element.phoneme
);
if ( if (
["ع", "ئ"].includes(p[pIndex]) && ["ع", "ئ"].includes(p[pIndex]) &&
![phonemes[i + 1], phonemes[i + 2]].includes("'") && ![phonemes[i + 1], phonemes[i + 2]].includes("'") &&
!(nextPhonemeInfo && nextPhonemeInfo.diacritic && isEndSpace(p[pIndex + 1])) && // don't skip the ع on the end if there's another short letter coming after it !(
nextPhonemeInfo &&
nextPhonemeInfo.diacritic &&
isEndSpace(p[pIndex + 1])
) && // don't skip the ع on the end if there's another short letter coming after it
!(p[pIndex] === "ئ" && isEndSpace(p[pIndex + 1])) && // don't skip ئ on the end !(p[pIndex] === "ئ" && isEndSpace(p[pIndex + 1])) && // don't skip ئ on the end
!phonemeInfo.isIzafe !phonemeInfo.isIzafe
) { ) {
@ -476,7 +717,11 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return; return;
} }
// if we've arrived at a space in the Pashto, move along before the next iteration // if we've arrived at a space in the Pashto, move along before the next iteration
if (isSpace(p[pIndex]) && phonemes[i + 1] !== "-i-" && !upcomingAEndingAfterHey) { if (
isSpace(p[pIndex]) &&
phonemes[i + 1] !== "-i-" &&
!upcomingAEndingAfterHay
) {
result += " "; result += " ";
pIndex++; pIndex++;
} }

View File

@ -1,139 +1,139 @@
import * as T from "../../types"; import * as T from "../../types";
export const sandwiches: T.Sandwich[] = [ export const sandwiches: T.Sandwich[] = [
{ {
type: "sandwich", type: "sandwich",
before: { p: "له", f: "la" }, before: { p: "له", f: "la" },
after: { p: "نه", f: "na" }, after: { p: "نه", f: "na" },
e: "from", e: "from",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "له", f: "la" }, before: { p: "له", f: "la" },
after: { p: "څخه", f: "tsuxa" }, after: { p: "څخه", f: "tsuxa" },
e: "from", e: "from",
}, },
// TODO: Implement mayonaise // TODO: Implement mayonaise
// { // {
// type: "sandwich", // type: "sandwich",
// before: { p: "له", f: "la" }, // before: { p: "له", f: "la" },
// after: "mayonaise", // after: "mayonaise",
// e: "from", // e: "from",
// }, // },
{ {
type: "sandwich", type: "sandwich",
before: { p: "له", f: "la" }, before: { p: "له", f: "la" },
after: { p: "سره", f: "sara" }, after: { p: "سره", f: "sara" },
e: "with", e: "with",
}, },
{ {
type: "sandwich", type: "sandwich",
before: undefined, before: undefined,
after: { p: "ته", f: "ta" }, after: { p: "ته", f: "ta" },
e: "to", e: "to",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "لپاره", f: "lapaara" }, after: { p: "لپاره", f: "lapaara" },
e: "for", e: "for",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "دمخې", f: "dumúkhe" }, after: { p: "دمخې", f: "dumúkhe" },
e: "before/in front of", e: "before/in front of",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "په څانګ", f: "pu tsaang" }, after: { p: "په څانګ", f: "pu tsaang" },
e: "beside", e: "beside",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "پر", f: "pur" }, before: { p: "پر", f: "pur" },
after: { p: "باندې", f: "baande" }, after: { p: "باندې", f: "baande" },
e: "on", e: "on",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "په", f: "pu" }, before: { p: "په", f: "pu" },
after: { p: "کې", f: "ke" }, after: { p: "کې", f: "ke" },
e: "in", e: "in",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "دننه", f: "dununa" }, after: { p: "دننه", f: "dununa" },
e: "inside", e: "inside",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "دباندې", f: "dubaande" }, after: { p: "دباندې", f: "dubaande" },
e: "outside", e: "outside",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "مخې ته", f: "mukhe ta" }, after: { p: "مخې ته", f: "mukhe ta" },
e: "in front of", e: "in front of",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "شا ته", f: "shaa ta" }, after: { p: "شا ته", f: "shaa ta" },
e: "behind", e: "behind",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "لاندې", f: "laande" }, after: { p: "لاندې", f: "laande" },
e: "under", e: "under",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "په شان", f: "pu shaan" }, after: { p: "په شان", f: "pu shaan" },
e: "like", e: "like",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "غوندې", f: "ghwunde" }, after: { p: "غوندې", f: "ghwunde" },
e: "like", e: "like",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "په حیث", f: "pu heys" }, after: { p: "په حیث", f: "pu hays" },
e: "as", e: "as",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "په لور", f: "pu lor" }, after: { p: "په لور", f: "pu lor" },
e: "towards", e: "towards",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "په اړه", f: "pu aRa" }, after: { p: "په اړه", f: "pu aRa" },
e: "about", e: "about",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "په باره کې", f: "pu baara ke" }, after: { p: "په باره کې", f: "pu baara ke" },
e: "about", e: "about",
}, },
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "په اړوند", f: "pu aRwand" }, after: { p: "په اړوند", f: "pu aRwand" },
e: "concerning", e: "concerning",
}, },
]; ];
export default sandwiches; export default sandwiches;

View File

@ -105,14 +105,14 @@ export const replacerInfo: IReplacerInfoItem[] = [
ipa: "ɪ́", ipa: "ɪ́",
}, },
{ {
char: "ey", char: "ay",
alalc: "ay", alalc: "ay",
ipa: "ai", ipa: "ay",
}, },
{ {
char: "éy", char: "áy",
alalc: "áy", alalc: "áy",
ipa: i", ipa: j",
}, },
{ {
char: "ee", char: "ee",
@ -140,9 +140,9 @@ export const replacerInfo: IReplacerInfoItem[] = [
ipa: "u:j", ipa: "u:j",
}, },
{ {
char: "eyy", char: "ey",
alalc: "y", alalc: "ey",
ipa: "ɛ̝j", ipa: "ej",
}, },
{ {
char: "e", char: "e",
@ -351,4 +351,5 @@ export const replacerInfo: IReplacerInfoItem[] = [
]; ];
// tslint:disable-next-line // tslint:disable-next-line
export const replacerRegex = /aay|áay|aa|áa|a|á|U|Ú|u|ú|ooy|o{1,2}|óo|ó|ey|éy|e{1,2}|ée|é|uy|úy|i|í|w|y|q|g|ts|sh|s|dz|z|t|T|d|D|r|R|n|N|f|b|p|x|kh|q|k|gh|g|G|j|ch|l|l|m|h/g; export const replacerRegex =
/aay|áay|aa|áa|a|á|U|Ú|u|ú|ooy|o{1,2}|óo|ó|ay|áy|e{1,2}|ée|é|ey|éy|uy|úy|i|í|w|y|q|g|ts|sh|s|dz|z|t|T|d|D|r|R|n|N|f|b|p|x|kh|q|k|gh|g|G|j|ch|l|l|m|h/g;

View File

@ -6,9 +6,7 @@
* *
*/ */
import { import { translatePhonetics } from "./translate-phonetics";
translatePhonetics,
} from "./translate-phonetics";
const dialects = ["southern", "standard", "peshawer"]; const dialects = ["southern", "standard", "peshawer"];
const systems = ["ipa", "alalc"]; const systems = ["ipa", "alalc"];
@ -54,11 +52,11 @@ const translations = [
}, },
}, },
{ {
original: "saRey", original: "saRay",
ipa: { ipa: {
southern: "saɻai", southern: "saɻaj",
standard: "saɻai", standard: "saɻaj",
peshawer: "saɻai", peshawer: "saɻaj",
}, },
alalc: { alalc: {
southern: "saṛay", southern: "saṛay",
@ -72,20 +70,17 @@ translations.forEach((t) => {
systems.forEach((system) => { systems.forEach((system) => {
// check each dialect with given system // check each dialect with given system
dialects.forEach((dialect) => { dialects.forEach((dialect) => {
test( test(// @ts-ignore
// @ts-ignore `${t.original} should be translated to ${t.ipa[dialect]} using ${system} with ${dialect} dialect`, () => {
`${t.original} should be translated to ${t.ipa[dialect]} using ${system} with ${dialect} dialect`, const translated = translatePhonetics(t.original, {
() => {
const translated = translatePhonetics(t.original, {
// @ts-ignore
system,
// @ts-ignore
dialect,
});
// @ts-ignore // @ts-ignore
expect(translated).toBe(t[system][dialect]); system,
}, // @ts-ignore
); dialect,
});
// @ts-ignore
expect(translated).toBe(t[system][dialect]);
});
}); });
}); });
}); });

View File

@ -8,234 +8,461 @@
import { standardizeEntry, validateEntry } from "./validate-entry"; import { standardizeEntry, validateEntry } from "./validate-entry";
import * as T from "../../types"; import * as T from "../../types";
import { standardizePhonetics } from "./standardize-pashto";
const toTest: { const toTest: {
input: any, input: any;
output: T.DictionaryEntryError | { ok: true } | { checkComplement: true }, output: T.DictionaryEntryError | { ok: true } | { checkComplement: true };
}[] = [ }[] = [
{ {
input: { ts: undefined }, input: { ts: undefined },
output: { output: {
errors: ["missing ts", "missing i", "missing p", "missing f", "missing e"], errors: [
p: "", "missing ts",
f: "", "missing i",
e: "", "missing p",
erroneousFields: ["ts", "i", "p", "f", "e"], "missing f",
ts: 0, "missing e",
}, ],
p: "",
f: "",
e: "",
erroneousFields: ["ts", "i", "p", "f", "e"],
ts: 0,
}, },
{ },
input: { ts: 123, p: "کور", e: "house" }, {
output: { input: { ts: 123, p: "کور", e: "house" },
errors: ["missing i", "missing f"], output: {
p: "کور", errors: ["missing i", "missing f"],
f: "", p: "کور",
ts: 123, f: "",
e: "house", ts: 123,
erroneousFields: ["i", "f"], e: "house",
}, erroneousFields: ["i", "f"],
}, },
{ },
input: {"i":293,"ts":1527821299,"p":"اخطار","f":"ixtáar","e":"warning, reprimand, admonishment","c":"n. m."}, {
output: { input: {
errors: ["script and phonetics do not match for p and f"], i: 293,
p: "اخطار", ts: 1527821299,
f: "ixtáar", p: "اخطار",
e: "warning, reprimand, admonishment", f: "ixtáar",
ts: 1527821299, e: "warning, reprimand, admonishment",
erroneousFields: ["p", "f"], c: "n. m.",
},
}, },
{ output: {
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puxtaanu","infbf":"puxtan"}, errors: ["script and phonetics do not match for p and f"],
output: { p: "اخطار",
errors: ["missing infbp"], f: "ixtáar",
p: "پښتون", e: "warning, reprimand, admonishment",
f: "puxtoon", ts: 1527821299,
e: "Pashtun", erroneousFields: ["p", "f"],
ts: 1527815197,
erroneousFields: ["infbp"],
},
}, },
{ },
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puxtaanu","infbp":"پښتن"}, {
output: { input: {
errors: ["missing infbf"], i: 2433,
p: "پښتون", ts: 1527815197,
f: "puxtoon", p: "پښتون",
e: "Pashtun", f: "puxtoon",
ts: 1527815197, e: "Pashtun",
erroneousFields: ["infbf"], c: "n. m. unisex / adj. irreg.",
}, infap: "پښتانه",
infaf: "puxtaanu",
infbf: "puxtan",
}, },
{ output: {
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puktaanu","infbp":"پښتن"}, errors: ["missing infbp"],
output: { p: "پښتون",
errors: ["script and phonetics do not match for infap and infaf", "missing infbf"], f: "puxtoon",
p: "پښتون", e: "Pashtun",
f: "puxtoon", ts: 1527815197,
e: "Pashtun", erroneousFields: ["infbp"],
ts: 1527815197,
erroneousFields: ["infap", "infaf", "infbf"],
},
}, },
{ },
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"tsumlaastúl","e":"to lie down","l":1596485996977,"separationAtP":2,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true}, {
output: { input: {
errors: ["missing separationAtF"], i: 2433,
p: "څملاستل", ts: 1527815197,
f: "tsumlaastúl", p: "پښتون",
e: "to lie down", f: "puxtoon",
ts: 1527819674, e: "Pashtun",
erroneousFields: ["separationAtF"], c: "n. m. unisex / adj. irreg.",
}, infap: "پښتانه",
infaf: "puxtaanu",
infbp: "پښتن",
}, },
{ output: {
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"sumlaastúl","e":"to lie down","l":1596485996977,"separationAtP":2,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true}, errors: ["missing infbf"],
output: { p: "پښتون",
errors: ["script and phonetics do not match for p and f", "missing separationAtF"], f: "puxtoon",
p: "څملاستل", e: "Pashtun",
f: "sumlaastúl", ts: 1527815197,
e: "to lie down", erroneousFields: ["infbf"],
ts: 1527819674,
erroneousFields: ["p", "f", "separationAtF"],
},
}, },
{ },
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"tsumlaastúl","e":"to lie down","l":1596485996977,"separationAtF":4,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true}, {
output: { input: {
errors: ["missing separationAtP"], i: 2433,
p: "څملاستل", ts: 1527815197,
f: "tsumlaastúl", p: "پښتون",
e: "to lie down", f: "puxtoon",
ts: 1527819674, e: "Pashtun",
erroneousFields: ["separationAtP"], c: "n. m. unisex / adj. irreg.",
}, infap: "پښتانه",
infaf: "puktaanu",
infbp: "پښتن",
}, },
{ output: {
input: {"i":2222,"ts":1571859113828,"p":"پخول","f":"pakhawul","e":"to cook, prepare, to cause to ripen, mature","c":"v. stat. comp. trans."}, errors: [
output: { "script and phonetics do not match for infap and infaf",
errors: ["missing complement for compound verb"], "missing infbf",
p: "پخول", ],
f: "pakhawul", p: "پښتون",
e: "to cook, prepare, to cause to ripen, mature", f: "puxtoon",
ts: 1571859113828, e: "Pashtun",
erroneousFields: ["l"], ts: 1527815197,
}, erroneousFields: ["infap", "infaf", "infbf"],
}, },
{ },
input: {"i":2222,"ts":1571859113828,"p":"پخول","f":"pakhawul","e":"to cook, prepare, to cause to ripen, mature","l":1574867531681,"c":"v. stat. comp. trans."}, {
output: { input: {
checkComplement: true, i: 5000,
}, ts: 1527819674,
p: "څملاستل",
f: "tsumlaastúl",
e: "to lie down",
l: 1596485996977,
separationAtP: 2,
c: "v. intrans. seperable",
psp: "څمل",
psf: "tsaml",
noOo: true,
}, },
{ output: {
input: {"i":2231,"ts":1527812013,"p":"پراخ","f":"praakh, paráakh","e":"wide, broad, spacious, vast","c":"adj."}, errors: ["missing separationAtF"],
output: { ok: true }, p: "څملاستل",
f: "tsumlaastúl",
e: "to lie down",
ts: 1527819674,
erroneousFields: ["separationAtF"],
}, },
{ },
input: {"i":0,"ts":1527812013,"p":"پراخ","f":"praakh, paráakh","e":"wide, broad, spacious, vast","c":"adj."}, {
output: { ok: true }, input: {
i: 5000,
ts: 1527819674,
p: "څملاستل",
f: "sumlaastúl",
e: "to lie down",
l: 1596485996977,
separationAtP: 2,
c: "v. intrans. seperable",
psp: "څمل",
psf: "tsaml",
noOo: true,
}, },
{ output: {
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj."}, errors: [
output: { "script and phonetics do not match for p and f",
errors: ["script and phonetics do not match for p and f"], "missing separationAtF",
p: "آبدار", ],
f: "aawdáar", p: "څملاستل",
e: "watery, damp, humid, juicy", f: "sumlaastúl",
ts: 1575058859661, e: "to lie down",
erroneousFields: ["p", "f"], ts: 1527819674,
}, erroneousFields: ["p", "f", "separationAtF"],
}, },
{ },
input: {"ts":1591033069786,"i":7717,"p":"ستړی کول","f":"stuRey kawul","g":"stuReykedul","e":"to get tired, fatigued","c":"v. stat. comp. intrans.","l":1527815306,"ec":"get","ep":"tired"}, {
output: { input: {
errors: ["wrong ending for intrans. stat. comp"], i: 5000,
p: "ستړی کول", ts: 1527819674,
f: "stuRey kawul", p: "څملاستل",
e: "to get tired, fatigued", f: "tsumlaastúl",
ts: 1591033069786, e: "to lie down",
erroneousFields: ["p", "f"], l: 1596485996977,
}, separationAtF: 4,
c: "v. intrans. seperable",
psp: "څمل",
psf: "tsaml",
noOo: true,
}, },
{ output: {
input: {"ts":1591033078746,"i":7716,"p":"ستړی کېدل","f":"stuRey kedul","g":"stuReykawul","e":"to make tired, wear out","c":"v. stat. comp. trans.","l":1527815306,"ec":"make","ep":"tired"}, errors: ["missing separationAtP"],
output: { p: "څملاستل",
errors: ["wrong ending for trans. stat. comp"], f: "tsumlaastúl",
p: "ستړی کېدل", e: "to lie down",
f: "stuRey kedul", ts: 1527819674,
e: "to make tired, wear out", erroneousFields: ["separationAtP"],
ts: 1591033078746,
erroneousFields: ["p", "f"],
},
}, },
{ },
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true}, {
output: { ok: true }, input: {
i: 2222,
ts: 1571859113828,
p: "پخول",
f: "pakhawul",
e: "to cook, prepare, to cause to ripen, mature",
c: "v. stat. comp. trans.",
}, },
{ output: {
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true}, errors: ["missing complement for compound verb"],
output: { ok: true }, p: "پخول",
f: "pakhawul",
e: "to cook, prepare, to cause to ripen, mature",
ts: 1571859113828,
erroneousFields: ["l"],
}, },
{ },
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"bechaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."}, {
output: { input: {
errors: ["spacing discrepency between p and f"], i: 2222,
p: "بې چاره", ts: 1571859113828,
f: "bechaara", p: "پخول",
e: "poor thing, pitiful", f: "pakhawul",
ts: 1527812488, e: "to cook, prepare, to cause to ripen, mature",
erroneousFields: ["p", "f"], l: 1574867531681,
}, c: "v. stat. comp. trans.",
}, },
{ output: {
input: {"ts":1527812488,"i":1934,"p":"بېچاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."}, checkComplement: true,
output: {
errors: ["spacing discrepency between p and f"],
p: "بېچاره",
f: "be chaara",
e: "poor thing, pitiful",
ts: 1527812488,
erroneousFields: ["p", "f"],
},
}, },
{ },
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."}, {
output: { ok: true } input: {
i: 2231,
ts: 1527812013,
p: "پراخ",
f: "praakh, paráakh",
e: "wide, broad, spacious, vast",
c: "adj.",
}, },
{ output: { ok: true },
input: {"ts":1527814265,"i":12969,"p":"مکتب","f":"maktab","g":"maktab","e":"school","r":4,"c":"n. m.","app":"مکاتب","apf":"ma kaatib"}, },
output: { {
errors: ["spacing discrepency between app and apf"], input: {
p: "مکتب", i: 0,
f: "maktab", ts: 1527812013,
e: "school", p: "پراخ",
ts: 1527814265, f: "praakh, paráakh",
erroneousFields: ["app", "apf"], e: "wide, broad, spacious, vast",
}, c: "adj.",
}, },
{ output: { ok: true },
input: {"ts":1527815870,"i":183,"p":"اثر","f":"asar","g":"asar","e":"influence, impression, tracks, affect","r":4,"c":"n. m.","app":"اثرات, آثار","apf":"asráat"}, },
output: { {
errors: ["difference in variation length between app and apf", "script and phonetics do not match for app and apf"], input: {
p: "اثر", i: 12,
f: "asar", ts: 1575058859661,
e: "influence, impression, tracks, affect", p: "آبدار",
ts: 1527815870, f: "aawdáar",
erroneousFields: ["app", "apf"], e: "watery, damp, humid, juicy",
}, c: "adj.",
}, },
output: {
errors: ["script and phonetics do not match for p and f"],
p: "آبدار",
f: "aawdáar",
e: "watery, damp, humid, juicy",
ts: 1575058859661,
erroneousFields: ["p", "f"],
},
},
{
input: {
ts: 1591033069786,
i: 7717,
p: "ستړی کول",
f: "stuRay kawul",
g: "stuRaykedul",
e: "to get tired, fatigued",
c: "v. stat. comp. intrans.",
l: 1527815306,
ec: "get",
ep: "tired",
},
output: {
errors: ["wrong ending for intrans. stat. comp"],
p: "ستړی کول",
f: "stuRay kawul",
e: "to get tired, fatigued",
ts: 1591033069786,
erroneousFields: ["p", "f"],
},
},
{
input: {
ts: 1591033078746,
i: 7716,
p: "ستړی کېدل",
f: "stuRay kedul",
g: "stuRaykawul",
e: "to make tired, wear out",
c: "v. stat. comp. trans.",
l: 1527815306,
ec: "make",
ep: "tired",
},
output: {
errors: ["wrong ending for trans. stat. comp"],
p: "ستړی کېدل",
f: "stuRay kedul",
e: "to make tired, wear out",
ts: 1591033078746,
erroneousFields: ["p", "f"],
},
},
{
input: {
i: 12,
ts: 1575058859661,
p: "آبدار",
f: "aawdáar",
e: "watery, damp, humid, juicy",
c: "adj.",
diacExcept: true,
},
output: { ok: true },
},
{
input: {
i: 12,
ts: 1575058859661,
p: "آبدار",
f: "aawdáar",
e: "watery, damp, humid, juicy",
c: "adj.",
diacExcept: true,
},
output: { ok: true },
},
{
input: {
ts: 1527812488,
i: 1934,
p: "بې چاره",
f: "bechaara",
g: "bechaara",
e: "poor thing, pitiful",
r: 3,
c: "adj.",
},
output: {
errors: ["spacing discrepency between p and f"],
p: "بې چاره",
f: "bechaara",
e: "poor thing, pitiful",
ts: 1527812488,
erroneousFields: ["p", "f"],
},
},
{
input: {
ts: 1527812488,
i: 1934,
p: "بېچاره",
f: "be chaara",
g: "bechaara",
e: "poor thing, pitiful",
r: 3,
c: "adj.",
},
output: {
errors: ["spacing discrepency between p and f"],
p: "بېچاره",
f: "be chaara",
e: "poor thing, pitiful",
ts: 1527812488,
erroneousFields: ["p", "f"],
},
},
{
input: {
ts: 1527812488,
i: 1934,
p: "بې چاره",
f: "be chaara",
g: "bechaara",
e: "poor thing, pitiful",
r: 3,
c: "adj.",
},
output: { ok: true },
},
{
input: {
ts: 1527814265,
i: 12969,
p: "مکتب",
f: "maktab",
g: "maktab",
e: "school",
r: 4,
c: "n. m.",
app: "مکاتب",
apf: "ma kaatib",
},
output: {
errors: ["spacing discrepency between app and apf"],
p: "مکتب",
f: "maktab",
e: "school",
ts: 1527814265,
erroneousFields: ["app", "apf"],
},
},
{
input: {
ts: 1527815870,
i: 183,
p: "اثر",
f: "asar",
g: "asar",
e: "influence, impression, tracks, affect",
r: 4,
c: "n. m.",
app: "اثرات, آثار",
apf: "asráat",
},
output: {
errors: [
"difference in variation length between app and apf",
"script and phonetics do not match for app and apf",
],
p: "اثر",
f: "asar",
e: "influence, impression, tracks, affect",
ts: 1527815870,
erroneousFields: ["app", "apf"],
},
},
]; ];
test("validateEntry should work", () => { test("validateEntry should work", () => {
toTest.forEach((t) => { toTest.forEach((t) => {
expect(validateEntry(t.input as T.DictionaryEntry)).toEqual(t.output); expect(validateEntry(t.input as T.DictionaryEntry)).toEqual(t.output);
}); });
}); });
test("standardizeEntry", () => { test("standardizeEntry", () => {
expect(standardizeEntry({"i":195,"ts":1527822036,"p":"اجتماعي","f":"ijtimaaee, ijtimaayee","g":"ijtimaaee,ijtimaayee","e":"public, social, societal","c":"adj."})) expect(
.toEqual({"i":195,"ts":1527822036,"p":"اجتماعي","f":"ijtimaa'ee, ijtimaayee","g":"ijtimaaee,ijtimaayee","e":"public, social, societal","c":"adj."}); standardizeEntry({
i: 195,
ts: 1527822036,
p: "اجتماعي",
f: "ijtimaaee, ijtimaayee",
g: "ijtimaaee,ijtimaayee",
e: "public, social, societal",
c: "adj.",
})
).toEqual({
i: 195,
ts: 1527822036,
p: "اجتماعي",
f: "ijtimaa'ee, ijtimaayee",
g: "ijtimaaee,ijtimaayee",
e: "public, social, societal",
c: "adj.",
});
}); });