phonetics conversion done
This commit is contained in:
parent
fc97db0dd3
commit
c0cd34c3d6
File diff suppressed because it is too large
Load Diff
|
@ -1,131 +1,133 @@
|
|||
import {
|
||||
splitFIntoPhonemes,
|
||||
last,
|
||||
addP,
|
||||
lastNonWhitespace,
|
||||
advanceP,
|
||||
reverseP,
|
||||
overwriteP,
|
||||
advanceForHamza,
|
||||
advanceForHamzaMid,
|
||||
splitFIntoPhonemes,
|
||||
last,
|
||||
addP,
|
||||
lastNonWhitespace,
|
||||
reverseP,
|
||||
} from "./diacritics-helpers";
|
||||
|
||||
const phonemeSplits: Array<{
|
||||
in: string,
|
||||
out: string[],
|
||||
in: string;
|
||||
out: string[];
|
||||
}> = [
|
||||
{
|
||||
in: "kor",
|
||||
out: ["k", "o", "r"],
|
||||
},
|
||||
{
|
||||
in: "raaghey",
|
||||
out: ["r", "aa", "gh", "ey"],
|
||||
},
|
||||
{
|
||||
in: "ist'imaal",
|
||||
out: ["i", "s", "t", "'", "i", "m", "aa", "l"],
|
||||
},
|
||||
{
|
||||
in: "hatsa",
|
||||
out: ["h", "a", "ts", "a"],
|
||||
},
|
||||
{
|
||||
in: "ba",
|
||||
out: ["b", "a"],
|
||||
},
|
||||
{
|
||||
in: "peydáa",
|
||||
out: ["p", "ey", "d", "aa"],
|
||||
},
|
||||
{
|
||||
in: "be kaar",
|
||||
out: ["b", "e", "k", "aa", "r"],
|
||||
},
|
||||
{
|
||||
in: "raadzeyy",
|
||||
out: ["r", "aa", "dz", "eyy"],
|
||||
},
|
||||
{
|
||||
in: "badanuy ??",
|
||||
out: ["b", "a", "d", "a", "n", "uy"],
|
||||
},
|
||||
{
|
||||
in: "tur ... pore",
|
||||
out: ["t", "u", "r", "p", "o", "r", "e"],
|
||||
},
|
||||
{
|
||||
in: "daar-Ul-iqaama",
|
||||
out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"],
|
||||
},
|
||||
{
|
||||
in: "kor",
|
||||
out: ["k", "o", "r"],
|
||||
},
|
||||
{
|
||||
in: "raaghay",
|
||||
out: ["r", "aa", "gh", "ay"],
|
||||
},
|
||||
{
|
||||
in: "ist'imaal",
|
||||
out: ["i", "s", "t", "'", "i", "m", "aa", "l"],
|
||||
},
|
||||
{
|
||||
in: "hatsa",
|
||||
out: ["h", "a", "ts", "a"],
|
||||
},
|
||||
{
|
||||
in: "ba",
|
||||
out: ["b", "a"],
|
||||
},
|
||||
{
|
||||
in: "paydáa",
|
||||
out: ["p", "ay", "d", "aa"],
|
||||
},
|
||||
{
|
||||
in: "be kaar",
|
||||
out: ["b", "e", "k", "aa", "r"],
|
||||
},
|
||||
{
|
||||
in: "raadzey",
|
||||
out: ["r", "aa", "dz", "ey"],
|
||||
},
|
||||
{
|
||||
in: "badanuy ??",
|
||||
out: ["b", "a", "d", "a", "n", "uy"],
|
||||
},
|
||||
{
|
||||
in: "tur ... pore",
|
||||
out: ["t", "u", "r", "p", "o", "r", "e"],
|
||||
},
|
||||
{
|
||||
in: "daar-Ul-iqaama",
|
||||
out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"],
|
||||
},
|
||||
];
|
||||
|
||||
phonemeSplits.forEach((s) => {
|
||||
test(`${s.in} should split properly`, () => {
|
||||
const result = splitFIntoPhonemes(s.in);
|
||||
expect(result).toEqual(s.out);
|
||||
});
|
||||
test(`${s.in} should split properly`, () => {
|
||||
const result = splitFIntoPhonemes(s.in);
|
||||
expect(result).toEqual(s.out);
|
||||
});
|
||||
});
|
||||
|
||||
const badPhonetics: Array<{
|
||||
in: string,
|
||||
problem: string,
|
||||
in: string;
|
||||
problem: string;
|
||||
}> = [
|
||||
{
|
||||
in: "acar",
|
||||
problem: "c",
|
||||
},
|
||||
{
|
||||
in: "a7am",
|
||||
problem: "7",
|
||||
},
|
||||
{
|
||||
in: "acar",
|
||||
problem: "c",
|
||||
},
|
||||
{
|
||||
in: "a7am",
|
||||
problem: "7",
|
||||
},
|
||||
];
|
||||
|
||||
test("bad phonetic characters should throw an error", () => {
|
||||
badPhonetics.forEach((s) => {
|
||||
expect(() => {
|
||||
splitFIntoPhonemes(s.in);
|
||||
}).toThrow(`illegal phonetic character: ${s.problem}`);
|
||||
});
|
||||
badPhonetics.forEach((s) => {
|
||||
expect(() => {
|
||||
splitFIntoPhonemes(s.in);
|
||||
}).toThrow(`illegal phonetic character: ${s.problem}`);
|
||||
});
|
||||
});
|
||||
|
||||
test("last should work", () => {
|
||||
expect(last("this")).toBe("s");
|
||||
expect(last("this")).toBe("s");
|
||||
});
|
||||
|
||||
test("addP should work", () => {
|
||||
expect(addP("ت")({ pIn: "", pOut: "کر" })).toEqual({
|
||||
pIn: "",
|
||||
pOut: "کرت",
|
||||
});
|
||||
expect(addP("ت")({ pIn: "", pOut: "کر" })).toEqual({
|
||||
pIn: "",
|
||||
pOut: "کرت",
|
||||
});
|
||||
});
|
||||
|
||||
test("lastNonWhiteSpace should work", () => {
|
||||
expect(lastNonWhitespace("تورن")).toBe("ن");
|
||||
expect(lastNonWhitespace("وست .. ")).toBe("ت");
|
||||
expect(lastNonWhitespace("د ... ")).toBe("د");
|
||||
expect(lastNonWhitespace("تورن")).toBe("ن");
|
||||
expect(lastNonWhitespace("وست .. ")).toBe("ت");
|
||||
expect(lastNonWhitespace("د ... ")).toBe("د");
|
||||
});
|
||||
|
||||
test("reverseP should work", () => {
|
||||
expect(reverseP({
|
||||
pIn: "کور",
|
||||
pOut: "تور ",
|
||||
})).toEqual({
|
||||
pIn: " کور",
|
||||
pOut: "تور",
|
||||
});
|
||||
expect(reverseP({
|
||||
pIn: "کور",
|
||||
pOut: "تور ... ",
|
||||
})).toEqual({
|
||||
pIn: " ... کور",
|
||||
pOut: "تور",
|
||||
});
|
||||
expect(reverseP({
|
||||
pIn: "کور",
|
||||
pOut: "تور . ",
|
||||
})).toEqual({
|
||||
pIn: " . کور",
|
||||
pOut: "تور",
|
||||
});
|
||||
})
|
||||
expect(
|
||||
reverseP({
|
||||
pIn: "کور",
|
||||
pOut: "تور ",
|
||||
})
|
||||
).toEqual({
|
||||
pIn: " کور",
|
||||
pOut: "تور",
|
||||
});
|
||||
expect(
|
||||
reverseP({
|
||||
pIn: "کور",
|
||||
pOut: "تور ... ",
|
||||
})
|
||||
).toEqual({
|
||||
pIn: " ... کور",
|
||||
pOut: "تور",
|
||||
});
|
||||
expect(
|
||||
reverseP({
|
||||
pIn: "کور",
|
||||
pOut: "تور . ",
|
||||
})
|
||||
).toEqual({
|
||||
pIn: " . کور",
|
||||
pOut: "تور",
|
||||
});
|
||||
});
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -8,25 +8,25 @@
|
|||
|
||||
import * as T from "../../types";
|
||||
import {
|
||||
splitFIntoPhonemes,
|
||||
Phoneme,
|
||||
zwar,
|
||||
zwarakey,
|
||||
zer,
|
||||
pesh,
|
||||
sukun,
|
||||
hamzaAbove,
|
||||
tashdeed,
|
||||
wasla,
|
||||
addP,
|
||||
advanceP,
|
||||
reverseP,
|
||||
overwriteP,
|
||||
advanceForHamza,
|
||||
advanceForHamzaMid,
|
||||
DiacriticsAccumulator,
|
||||
stateInfo,
|
||||
PhonemeStatus,
|
||||
splitFIntoPhonemes,
|
||||
Phoneme,
|
||||
zwar,
|
||||
zwarakay,
|
||||
zer,
|
||||
pesh,
|
||||
sukun,
|
||||
hamzaAbove,
|
||||
tashdeed,
|
||||
wasla,
|
||||
addP,
|
||||
advanceP,
|
||||
reverseP,
|
||||
overwriteP,
|
||||
advanceForHamza,
|
||||
advanceForHamzaMid,
|
||||
DiacriticsAccumulator,
|
||||
stateInfo,
|
||||
PhonemeStatus,
|
||||
} from "./diacritics-helpers";
|
||||
|
||||
import { removeFVarients } from "./accent-and-ps-utils";
|
||||
|
@ -35,176 +35,107 @@ import { pipe } from "rambda";
|
|||
/**
|
||||
* Adds diacritics to a given PsString.
|
||||
* Errors if the phonetics and script don't line up.
|
||||
*
|
||||
* IN PROGRESS - This will hopefully get done and replace the messy, unmaintainable phonetics-to-diacritics.ts currently in use
|
||||
*/
|
||||
export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString {
|
||||
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? removeFVarients(f) : f);
|
||||
const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p.trim() });
|
||||
if (pIn !== "") {
|
||||
throw new Error("phonetics error - phonetics shorter than pashto script");
|
||||
}
|
||||
return {
|
||||
p: pOut,
|
||||
f,
|
||||
};
|
||||
export function addDiacritics(
|
||||
{ p, f }: T.PsString,
|
||||
ignoreCommas?: true
|
||||
): T.PsString {
|
||||
const phonemes: Phoneme[] = splitFIntoPhonemes(
|
||||
!ignoreCommas ? removeFVarients(f) : f
|
||||
);
|
||||
const { pIn, pOut } = phonemes.reduce(processPhoneme, {
|
||||
pOut: "",
|
||||
pIn: p.trim(),
|
||||
});
|
||||
if (pIn !== "") {
|
||||
throw new Error("phonetics error - phonetics shorter than pashto script");
|
||||
}
|
||||
return {
|
||||
p: pOut,
|
||||
f,
|
||||
};
|
||||
}
|
||||
|
||||
function processPhoneme(
|
||||
acc: DiacriticsAccumulator,
|
||||
phoneme: Phoneme,
|
||||
i: number,
|
||||
phonemes: Phoneme[],
|
||||
acc: DiacriticsAccumulator,
|
||||
phoneme: Phoneme,
|
||||
i: number,
|
||||
phonemes: Phoneme[]
|
||||
): DiacriticsAccumulator {
|
||||
const state = acc.pIn.slice(0, 5) === " ... "
|
||||
? advanceP(acc, 5)
|
||||
: acc.pIn[0] === " "
|
||||
? advanceP(acc)
|
||||
: acc;
|
||||
const state =
|
||||
acc.pIn.slice(0, 5) === " ... "
|
||||
? advanceP(acc, 5)
|
||||
: acc.pIn[0] === " "
|
||||
? advanceP(acc)
|
||||
: acc;
|
||||
|
||||
const {
|
||||
phonemeInfo,
|
||||
diacritic,
|
||||
phs,
|
||||
prevPLetter,
|
||||
} = stateInfo({ state, i, phoneme, phonemes });
|
||||
const { phonemeInfo, diacritic, phs, prevPLetter } = stateInfo({
|
||||
state,
|
||||
i,
|
||||
phoneme,
|
||||
phonemes,
|
||||
});
|
||||
|
||||
return (phs === PhonemeStatus.LeadingLongVowel) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
addP(phonemeInfo.diacritic),
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
addP(diacritic),
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
|
||||
pipe(
|
||||
prevPLetter === " " ? reverseP : addP(""),
|
||||
addP(tashdeed)
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.EndingWithHeyHim) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
addP(phoneme === "u" ? hamzaAbove : sukun),
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.DirectMatch) ?
|
||||
pipe(
|
||||
addP(diacritic),
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.DirectMatchAfterSukun) ?
|
||||
pipe(
|
||||
addP(sukun),
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.PersianSilentWWithAa) ?
|
||||
pipe(
|
||||
addP("("),
|
||||
advanceP,
|
||||
addP(")"),
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.ArabicWasla) ?
|
||||
pipe(
|
||||
addP(zer),
|
||||
overwriteP(wasla),
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.Izafe) ?
|
||||
pipe(
|
||||
reverseP,
|
||||
addP(zer),
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.EndOfDuParticle) ?
|
||||
pipe(
|
||||
reverseP,
|
||||
addP(zwarakey),
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.ShortAEndingAfterHeem) ?
|
||||
pipe(
|
||||
prevPLetter === " " ? reverseP : addP(""),
|
||||
addP(zwar),
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
|
||||
pipe(
|
||||
addP(sukun),
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.AlefDaggarEnding) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.LongAinVowelMissingComma) ?
|
||||
pipe(
|
||||
addP(diacritic),
|
||||
advanceP,
|
||||
addP(diacritic)
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.ShortAinVowelMissingComma) ?
|
||||
pipe(
|
||||
addP(diacritic),
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.AinWithLongAAtBeginning) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.AlefWithHamza) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.ShortVowel) ?
|
||||
pipe(
|
||||
advanceForHamzaMid,
|
||||
addP(phonemeInfo.diacritic),
|
||||
// TODO THIS?
|
||||
advanceForHamza,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.NOnFathatan) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.HamzaOnWow) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
addP(hamzaAbove),
|
||||
addP(diacritic),
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
addP(pesh),
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.OoPrefix) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
addP(pesh),
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.GlottalStopBeforeOo) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
addP(hamzaAbove),
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.OoAfterGlottalStopOo) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
)(state)
|
||||
: (phs === PhonemeStatus.SilentAinAfterAlef) ?
|
||||
pipe(
|
||||
advanceP,
|
||||
advanceP,
|
||||
)(state)
|
||||
: state;
|
||||
return phs === PhonemeStatus.LeadingLongVowel
|
||||
? pipe(advanceP, addP(phonemeInfo.diacritic), advanceP)(state)
|
||||
: phs === PhonemeStatus.LeadingConsonantOrShortVowel
|
||||
? pipe(advanceP, addP(diacritic))(state)
|
||||
: phs === PhonemeStatus.DoubleConsonantTashdeed
|
||||
? pipe(prevPLetter === " " ? reverseP : addP(""), addP(tashdeed))(state)
|
||||
: phs === PhonemeStatus.EndingWithHayHim
|
||||
? pipe(advanceP, addP(phoneme === "u" ? hamzaAbove : sukun))(state)
|
||||
: phs === PhonemeStatus.DirectMatch
|
||||
? pipe(addP(diacritic), advanceP)(state)
|
||||
: phs === PhonemeStatus.DirectMatchAfterSukun
|
||||
? pipe(addP(sukun), advanceP)(state)
|
||||
: phs === PhonemeStatus.PersianSilentWWithAa
|
||||
? pipe(addP("("), advanceP, addP(")"), advanceP)(state)
|
||||
: phs === PhonemeStatus.ArabicWasla
|
||||
? pipe(addP(zer), overwriteP(wasla))(state)
|
||||
: phs === PhonemeStatus.Izafe
|
||||
? pipe(reverseP, addP(zer))(state)
|
||||
: phs === PhonemeStatus.EndOfDuParticle
|
||||
? pipe(reverseP, addP(zwarakay))(state)
|
||||
: phs === PhonemeStatus.ShortAEndingAfterHeem
|
||||
? pipe(prevPLetter === " " ? reverseP : addP(""), addP(zwar))(state)
|
||||
: phs === PhonemeStatus.EndingWithHayHimFromSukun
|
||||
? pipe(addP(sukun), advanceP)(state)
|
||||
: phs === PhonemeStatus.AlefDaggarEnding
|
||||
? pipe(advanceP, advanceP)(state)
|
||||
: phs === PhonemeStatus.LongAinVowelMissingComma
|
||||
? pipe(addP(diacritic), advanceP, addP(diacritic))(state)
|
||||
: phs === PhonemeStatus.ShortAinVowelMissingComma
|
||||
? pipe(addP(diacritic), advanceP)(state)
|
||||
: phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart
|
||||
? pipe(advanceP, advanceP)(state)
|
||||
: phs === PhonemeStatus.AinWithLongAAtBeginning
|
||||
? pipe(advanceP, advanceP)(state)
|
||||
: phs === PhonemeStatus.AlefWithHamza
|
||||
? pipe(advanceP)(state)
|
||||
: phs === PhonemeStatus.ShortVowel
|
||||
? pipe(
|
||||
advanceForHamzaMid,
|
||||
addP(phonemeInfo.diacritic),
|
||||
// TODO THIS?
|
||||
advanceForHamza
|
||||
)(state)
|
||||
: phs === PhonemeStatus.ShortAForAlefBeforeFathatan
|
||||
? pipe(advanceP)(state)
|
||||
: phs === PhonemeStatus.NOnFathatan
|
||||
? pipe(advanceP)(state)
|
||||
: phs === PhonemeStatus.HamzaOnWow
|
||||
? pipe(advanceP, addP(hamzaAbove), addP(diacritic))(state)
|
||||
: phs === PhonemeStatus.ArabicDefiniteArticleUl
|
||||
? pipe(advanceP, addP(pesh), advanceP)(state)
|
||||
: phs === PhonemeStatus.OoPrefix
|
||||
? pipe(advanceP, addP(pesh))(state)
|
||||
: phs === PhonemeStatus.GlottalStopBeforeOo
|
||||
? pipe(advanceP, addP(hamzaAbove))(state)
|
||||
: phs === PhonemeStatus.OoAfterGlottalStopOo
|
||||
? pipe(advanceP)(state)
|
||||
: phs === PhonemeStatus.SilentAinAfterAlef
|
||||
? pipe(advanceP, advanceP)(state)
|
||||
: state;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -7,7 +7,7 @@
|
|||
*/
|
||||
|
||||
const zwar = "َ";
|
||||
const zwarakey = "ٙ";
|
||||
const zwarakay = "ٙ";
|
||||
const zer = "ِ";
|
||||
const pesh = "ُ";
|
||||
const sukun = "ْ";
|
||||
|
@ -19,8 +19,25 @@ const fathahan = "ً";
|
|||
|
||||
// TODO: THESE OTHER TRIGRAPHS??
|
||||
const quadrigraphs = ["-Ul-"];
|
||||
const trigraphs = ["eyy", "éyy", "-i-", "-U-"]; // , "aay", "áay", "ooy", "óoy"];
|
||||
const digraphs = ["ắ", "aa", "áa", "ee", "ée", "ey", "éy", "oo", "óo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"];
|
||||
const trigraphs = ["ey", "éy", "-i-", "-U-"]; // , "aay", "áay", "ooy", "óoy"];
|
||||
const digraphs = [
|
||||
"ắ",
|
||||
"aa",
|
||||
"áa",
|
||||
"ee",
|
||||
"ée",
|
||||
"ay",
|
||||
"áy",
|
||||
"oo",
|
||||
"óo",
|
||||
"kh",
|
||||
"gh",
|
||||
"ts",
|
||||
"dz",
|
||||
"jz",
|
||||
"ch",
|
||||
"sh",
|
||||
];
|
||||
const endingDigraphs = ["uy", "úy"];
|
||||
const willIgnore = ["?", " ", "`", ".", "…"];
|
||||
|
||||
|
@ -28,7 +45,7 @@ export function splitFIntoPhonemes(f: string): string[] {
|
|||
const result: string[] = [];
|
||||
let index = 0;
|
||||
while (index < f.length) {
|
||||
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
|
||||
const isLastTwoLetters = index === f.length - 2 || f[index + 2] === " ";
|
||||
const threeLetterChunk = f.slice(index, index + 3);
|
||||
const fourLetterChunk = f.slice(index, index + 4);
|
||||
if (quadrigraphs.includes(fourLetterChunk)) {
|
||||
|
@ -89,43 +106,145 @@ const phonemeTable = [
|
|||
{ phoneme: "m", possibilities: ["م"], consonant: true },
|
||||
{ phoneme: "n", possibilities: ["ن"], consonant: true },
|
||||
{ phoneme: "N", possibilities: ["ڼ"], consonant: true },
|
||||
{ phoneme: "h", possibilities: ["ه", "ح"], consonant: true, takesSukunOnEnding: true },
|
||||
{
|
||||
phoneme: "h",
|
||||
possibilities: ["ه", "ح"],
|
||||
consonant: true,
|
||||
takesSukunOnEnding: true,
|
||||
},
|
||||
{ phoneme: "w", possibilities: ["و"], consonant: true },
|
||||
{ phoneme: "y", possibilities: ["ی"], consonant: true },
|
||||
|
||||
{ phoneme: "'", possibilities: ["ع", "ئ"], consonant: true },
|
||||
{ phoneme: "-i-", isIzafe: true },
|
||||
{ phoneme: "-U-", possibilities: [" و ", "و"]},
|
||||
{ phoneme: "-Ul-", possibilities: ["ال"]},
|
||||
{ phoneme: "-U-", possibilities: [" و ", "و"] },
|
||||
{ phoneme: "-Ul-", possibilities: ["ال"] },
|
||||
|
||||
// vowels
|
||||
{ phoneme: "aa", possibilities: ["ا"], beginning: ["آ", "ا"], endingPossibilities: ["ا", "یٰ"], isLongA: true, canStartWithAynBefore: true },
|
||||
{ phoneme: "áa", possibilities: ["ا"], beginning: ["آ", "ا"], endingPossibilities: ["ا", "یٰ"], isLongA: true, canStartWithAynBefore: true },
|
||||
{ phoneme: "ee", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ي"], diacritic: zer, canStartWithAynBefore: true },
|
||||
{ phoneme: "ée", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ي"], diacritic: zer, canStartWithAynBefore: true },
|
||||
{
|
||||
phoneme: "aa",
|
||||
possibilities: ["ا"],
|
||||
beginning: ["آ", "ا"],
|
||||
endingPossibilities: ["ا", "یٰ"],
|
||||
isLongA: true,
|
||||
canStartWithAynBefore: true,
|
||||
},
|
||||
{
|
||||
phoneme: "áa",
|
||||
possibilities: ["ا"],
|
||||
beginning: ["آ", "ا"],
|
||||
endingPossibilities: ["ا", "یٰ"],
|
||||
isLongA: true,
|
||||
canStartWithAynBefore: true,
|
||||
},
|
||||
{
|
||||
phoneme: "ee",
|
||||
possibilities: ["ی"],
|
||||
addAlefOnBeginning: true,
|
||||
endingPossibilities: ["ي"],
|
||||
diacritic: zer,
|
||||
canStartWithAynBefore: true,
|
||||
},
|
||||
{
|
||||
phoneme: "ée",
|
||||
possibilities: ["ی"],
|
||||
addAlefOnBeginning: true,
|
||||
endingPossibilities: ["ي"],
|
||||
diacritic: zer,
|
||||
canStartWithAynBefore: true,
|
||||
},
|
||||
{ phoneme: "e", possibilities: ["ې"], addAlefOnBeginning: true },
|
||||
{ phoneme: "é", possibilities: ["ې"], addAlefOnBeginning: true },
|
||||
{ phoneme: "o", possibilities: ["و"], addAlefOnBeginning: true },
|
||||
{ phoneme: "ó", possibilities: ["و"], addAlefOnBeginning: true },
|
||||
{ phoneme: "oo", possibilities: ["و"], addAlefOnBeginning: true, alsoCanBePrefix: true, diacritic: pesh },
|
||||
{ phoneme: "óo", possibilities: ["و"], addAlefOnBeginning: true, diacritic: pesh },
|
||||
{ phoneme: "ey", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ی"]},
|
||||
{ phoneme: "éy", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ی"]},
|
||||
{
|
||||
phoneme: "oo",
|
||||
possibilities: ["و"],
|
||||
addAlefOnBeginning: true,
|
||||
alsoCanBePrefix: true,
|
||||
diacritic: pesh,
|
||||
},
|
||||
{
|
||||
phoneme: "óo",
|
||||
possibilities: ["و"],
|
||||
addAlefOnBeginning: true,
|
||||
diacritic: pesh,
|
||||
},
|
||||
{
|
||||
phoneme: "ay",
|
||||
possibilities: ["ی"],
|
||||
addAlefOnBeginning: true,
|
||||
endingPossibilities: ["ی"],
|
||||
},
|
||||
{
|
||||
phoneme: "áy",
|
||||
possibilities: ["ی"],
|
||||
addAlefOnBeginning: true,
|
||||
endingPossibilities: ["ی"],
|
||||
},
|
||||
{ phoneme: "uy", possibilities: ["ۍ"], endingOnly: true },
|
||||
{ phoneme: "úy", possibilities: ["ۍ"], endingOnly: true }, // THIS CAN ONLY COME AT THE END DEAL WITH THIS
|
||||
{ phoneme: "eyy", possibilities: ["ئ"], endingOnly: true },
|
||||
{ phoneme: "éyy", possibilities: ["ئ"], endingOnly: true },
|
||||
{ phoneme: "ey", possibilities: ["ئ"], endingOnly: true },
|
||||
{ phoneme: "éy", possibilities: ["ئ"], endingOnly: true },
|
||||
|
||||
{ phoneme: "a", diacritic: zwar, endingPossibilities: ["ه"], canComeAfterHeyEnding: true, canBeFirstPartOfFathahanEnding: true },
|
||||
{ phoneme: "á", diacritic: zwar, endingPossibilities: ["ه"], canComeAfterHeyEnding: true, canBeFirstPartOfFathahanEnding: true },
|
||||
{
|
||||
phoneme: "a",
|
||||
diacritic: zwar,
|
||||
endingPossibilities: ["ه"],
|
||||
canComeAfterHayEnding: true,
|
||||
canBeFirstPartOfFathahanEnding: true,
|
||||
},
|
||||
{
|
||||
phoneme: "á",
|
||||
diacritic: zwar,
|
||||
endingPossibilities: ["ه"],
|
||||
canComeAfterHayEnding: true,
|
||||
canBeFirstPartOfFathahanEnding: true,
|
||||
},
|
||||
{ phoneme: "ă", diacritic: zwar },
|
||||
{ phoneme: "ắ", diacritic: zwar },
|
||||
{ phoneme: "u", diacritic: zwarakey, endingPossibilities: ["ه"], hamzaOnEnd: true },
|
||||
{ phoneme: "ú", diacritic: zwarakey, endingPossibilities: ["ه"], hamzaOnEnd: true },
|
||||
{ phoneme: "i", diacritic: zer, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, canBeWasla: true, beginning: ["ا", "ع"] },
|
||||
{ phoneme: "í", diacritic: zer, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, canBeWasla: true, beginning: ["ا", "ع"] },
|
||||
{ phoneme: "U", diacritic: pesh, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, beginning: ["ا", "ع"] },
|
||||
{ phoneme: "Ú", diacritic: pesh, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, beginning: ["ا", "ع"] },
|
||||
{
|
||||
phoneme: "u",
|
||||
diacritic: zwarakay,
|
||||
endingPossibilities: ["ه"],
|
||||
hamzaOnEnd: true,
|
||||
},
|
||||
{
|
||||
phoneme: "ú",
|
||||
diacritic: zwarakay,
|
||||
endingPossibilities: ["ه"],
|
||||
hamzaOnEnd: true,
|
||||
},
|
||||
{
|
||||
phoneme: "i",
|
||||
diacritic: zer,
|
||||
endingPossibilities: ["ه"],
|
||||
takesDiacriticBeforeGurdaHayEnding: true,
|
||||
canBeWasla: true,
|
||||
beginning: ["ا", "ع"],
|
||||
},
|
||||
{
|
||||
phoneme: "í",
|
||||
diacritic: zer,
|
||||
endingPossibilities: ["ه"],
|
||||
takesDiacriticBeforeGurdaHayEnding: true,
|
||||
canBeWasla: true,
|
||||
beginning: ["ا", "ع"],
|
||||
},
|
||||
{
|
||||
phoneme: "U",
|
||||
diacritic: pesh,
|
||||
endingPossibilities: ["ه"],
|
||||
takesDiacriticBeforeGurdaHayEnding: true,
|
||||
beginning: ["ا", "ع"],
|
||||
},
|
||||
{
|
||||
phoneme: "Ú",
|
||||
diacritic: pesh,
|
||||
endingPossibilities: ["ه"],
|
||||
takesDiacriticBeforeGurdaHayEnding: true,
|
||||
beginning: ["ا", "ع"],
|
||||
},
|
||||
];
|
||||
|
||||
function isSpace(s: string): boolean {
|
||||
|
@ -142,7 +261,11 @@ interface IDiacriticsErrorMessage {
|
|||
i: number;
|
||||
}
|
||||
|
||||
function possibilityMatches(p: string, pIndex: number, possibilities: string[] | undefined): boolean {
|
||||
function possibilityMatches(
|
||||
p: string,
|
||||
pIndex: number,
|
||||
possibilities: string[] | undefined
|
||||
): boolean {
|
||||
/* istanbul ignore next */
|
||||
if (!possibilities) {
|
||||
return false;
|
||||
|
@ -155,10 +278,15 @@ function possibilityMatches(p: string, pIndex: number, possibilities: string[] |
|
|||
return false;
|
||||
}
|
||||
|
||||
function isPrefixedByDirectionalPronoun(i: number, phonemes: string[]): boolean {
|
||||
function isPrefixedByDirectionalPronoun(
|
||||
i: number,
|
||||
phonemes: string[]
|
||||
): boolean {
|
||||
const potentialPronounFourCharSlice = phonemes.slice(i - 4, i).join("");
|
||||
const potentialPronounThreeCharSlice = phonemes.slice(i - 3, i).join("");
|
||||
if (["wăr-", "war-", "dăr-", "dar-"].includes(potentialPronounFourCharSlice)) {
|
||||
if (
|
||||
["wăr-", "war-", "dăr-", "dar-"].includes(potentialPronounFourCharSlice)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
if (potentialPronounThreeCharSlice === "raa-") {
|
||||
|
@ -167,7 +295,11 @@ function isPrefixedByDirectionalPronoun(i: number, phonemes: string[]): boolean
|
|||
return false;
|
||||
}
|
||||
|
||||
export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes: boolean = false): string | undefined {
|
||||
export function phoneticsToDiacritics(
|
||||
ps: string,
|
||||
ph: string,
|
||||
forbidOoPrefixes: boolean = false
|
||||
): string | undefined {
|
||||
const phonemes = splitFIntoPhonemes(ph.trim().split(",")[0]);
|
||||
const p = ps.trim();
|
||||
let result = "";
|
||||
|
@ -179,58 +311,72 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
if (phoneme === "-") {
|
||||
return;
|
||||
}
|
||||
const phonemeInfo = phonemeTable.find((element) => element.phoneme === phoneme);
|
||||
const phonemeInfo = phonemeTable.find(
|
||||
(element) => element.phoneme === phoneme
|
||||
);
|
||||
if (!phonemeInfo) {
|
||||
errored.push({ error: "phoneme info not found", phoneme, i });
|
||||
return;
|
||||
}
|
||||
const isDoubleConsonant = (
|
||||
const isDoubleConsonant =
|
||||
phonemeInfo.consonant &&
|
||||
phoneme === phonemes[i - 1] &&
|
||||
// TODO: is this thourough enough to allow double consonants on the ending of the previous word?
|
||||
!(isSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex])) // avoid false double consonant ie ازل لیک azalleek
|
||||
) ? true : false;
|
||||
const isBeginning = !isDoubleConsonant && ((i === 0) || isSpace(p[pIndex - 1]) || (phonemes[i - 1] === "-Ul-") || isPrefixedByDirectionalPronoun(i, phonemes));
|
||||
const upcomingAEndingAfterHey = (p[pIndex] === "ح" && isSpace(p[pIndex + 1]) && ["a", "á"].includes(phonemes[i + 1]));
|
||||
|
||||
// TODO: break this into a seperate function -- why can it sometimes be set to undefined?
|
||||
const isEnding = (i === phonemes.length - 1) || ((
|
||||
(phonemeInfo.possibilities && isSpace(p[pIndex + 1])) ||
|
||||
(!phonemeInfo.possibilities && isSpace(p[pIndex])) ||
|
||||
(
|
||||
(!phonemeInfo.possibilities && isSpace(p[pIndex + 1])) &&
|
||||
(possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) || (p[pIndex] === "ع" && phonemes[i + 1] !== "'"))
|
||||
)
|
||||
) && !upcomingAEndingAfterHey
|
||||
&& // makes sure the next letter isn't a double consonant like haqq <-
|
||||
!(
|
||||
phonemeInfo.consonant && phoneme === phonemes[i + 1] // &&
|
||||
// !(isSpace(p[pIndex + 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
|
||||
)
|
||||
) || // can be the trailing double consanant on the end of a word
|
||||
(
|
||||
phonemeInfo.consonant && phoneme === phonemes[i - 1] &&
|
||||
!(isEndSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
|
||||
) || // can be یٰ ending
|
||||
(
|
||||
isEndSpace(p[pIndex + 2]) && (p.slice(pIndex, pIndex + 2) === "یٰ")
|
||||
);
|
||||
? true
|
||||
: false;
|
||||
const isBeginning =
|
||||
!isDoubleConsonant &&
|
||||
(i === 0 ||
|
||||
isSpace(p[pIndex - 1]) ||
|
||||
phonemes[i - 1] === "-Ul-" ||
|
||||
isPrefixedByDirectionalPronoun(i, phonemes));
|
||||
const upcomingAEndingAfterHay =
|
||||
p[pIndex] === "ح" &&
|
||||
isSpace(p[pIndex + 1]) &&
|
||||
["a", "á"].includes(phonemes[i + 1]);
|
||||
|
||||
const isUofDu = phoneme === "u" && (
|
||||
p.slice(pIndex - 2, pIndex) === "د " || // د as previous word
|
||||
(p[pIndex] === undefined && p[pIndex - 1] === "د") || // د as the whole thing
|
||||
p.slice(pIndex - 6, pIndex) === "د ... " // ... د is as the previous word
|
||||
);
|
||||
// TODO: break this into a seperate function -- why can it sometimes be set to undefined?
|
||||
const isEnding =
|
||||
i === phonemes.length - 1 ||
|
||||
(((phonemeInfo.possibilities && isSpace(p[pIndex + 1])) ||
|
||||
(!phonemeInfo.possibilities && isSpace(p[pIndex])) ||
|
||||
(!phonemeInfo.possibilities &&
|
||||
isSpace(p[pIndex + 1]) &&
|
||||
(possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) ||
|
||||
(p[pIndex] === "ع" && phonemes[i + 1] !== "'")))) &&
|
||||
!upcomingAEndingAfterHay && // makes sure the next letter isn't a double consonant like haqq <-
|
||||
!(
|
||||
(phonemeInfo.consonant && phoneme === phonemes[i + 1]) // &&
|
||||
// !(isSpace(p[pIndex + 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
|
||||
)) || // can be the trailing double consanant on the end of a word
|
||||
(phonemeInfo.consonant &&
|
||||
phoneme === phonemes[i - 1] &&
|
||||
!(
|
||||
isEndSpace(p[pIndex - 1]) &&
|
||||
phonemeInfo.possibilities.includes(p[pIndex])
|
||||
)) || // can be یٰ ending
|
||||
(isEndSpace(p[pIndex + 2]) && p.slice(pIndex, pIndex + 2) === "یٰ");
|
||||
|
||||
const isUofDu =
|
||||
phoneme === "u" &&
|
||||
(p.slice(pIndex - 2, pIndex) === "د " || // د as previous word
|
||||
(p[pIndex] === undefined && p[pIndex - 1] === "د") || // د as the whole thing
|
||||
p.slice(pIndex - 6, pIndex) === "د ... "); // ... د is as the previous word
|
||||
// TODO: Should p[pIndex - 1] also be in there ??? It messed up قطعه for instance
|
||||
const isEndingAynVowel = isEnding && phonemeInfo.diacritic && [p[pIndex], p[pIndex - 1]].includes("ع") && p[pIndex] !== "ه";
|
||||
const isEndingAynVowel =
|
||||
isEnding &&
|
||||
phonemeInfo.diacritic &&
|
||||
[p[pIndex], p[pIndex - 1]].includes("ع") &&
|
||||
p[pIndex] !== "ه";
|
||||
const isMiddle = !isBeginning && !isEnding;
|
||||
const isSilentWaw = (
|
||||
const isSilentWaw =
|
||||
p[pIndex] === "و" &&
|
||||
p[pIndex - 1] === "خ" &&
|
||||
p[pIndex + 1] === "ا" &&
|
||||
["áa", "aa"].includes(phoneme)
|
||||
);
|
||||
const isAnAEndingAfterHey = isEnding && p[pIndex - 1] === "ح" && phonemeInfo.canComeAfterHeyEnding;
|
||||
["áa", "aa"].includes(phoneme);
|
||||
const isAnAEndingAfterHay =
|
||||
isEnding && p[pIndex - 1] === "ح" && phonemeInfo.canComeAfterHayEnding;
|
||||
if (isDoubleConsonant) {
|
||||
pIndex--;
|
||||
if (isSpace(p[pIndex])) {
|
||||
|
@ -247,14 +393,22 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
pIndex++;
|
||||
}
|
||||
// special check for Arabic wasla
|
||||
if (p.slice(0, 3) === "بال" && phonemes[i - 1] === "b" && phonemeInfo.canBeWasla && phonemes[i + 1] === "l") {
|
||||
if (
|
||||
p.slice(0, 3) === "بال" &&
|
||||
phonemes[i - 1] === "b" &&
|
||||
phonemeInfo.canBeWasla &&
|
||||
phonemes[i + 1] === "l"
|
||||
) {
|
||||
result += phonemeInfo.diacritic + wasla;
|
||||
pIndex++;
|
||||
previousPhonemeWasAConsonant = false;
|
||||
return;
|
||||
}
|
||||
// special check for fathahan ending
|
||||
if (phonemeInfo.canBeFirstPartOfFathahanEnding && p.slice(pIndex, pIndex + 2) === "اً") {
|
||||
if (
|
||||
phonemeInfo.canBeFirstPartOfFathahanEnding &&
|
||||
p.slice(pIndex, pIndex + 2) === "اً"
|
||||
) {
|
||||
result += "ا";
|
||||
pIndex++;
|
||||
return;
|
||||
|
@ -265,7 +419,12 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
return;
|
||||
}
|
||||
// special check for words starting with عا or عی
|
||||
if (isBeginning && phonemeInfo.canStartWithAynBefore && p[pIndex] === "ع" && phonemeInfo.possibilities.includes(p[pIndex + 1])) {
|
||||
if (
|
||||
isBeginning &&
|
||||
phonemeInfo.canStartWithAynBefore &&
|
||||
p[pIndex] === "ع" &&
|
||||
phonemeInfo.possibilities.includes(p[pIndex + 1])
|
||||
) {
|
||||
result += "ع";
|
||||
result += phonemeInfo.diacritic ? phonemeInfo.diacritic : "";
|
||||
result += p[pIndex + 1];
|
||||
|
@ -273,23 +432,45 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
return;
|
||||
}
|
||||
// special check for ؤ Ua
|
||||
if (phoneme === "U" && phonemes[i + 1] === "a" && phonemes[i + 2] !== "a" && p[pIndex] === "و") {
|
||||
if (
|
||||
phoneme === "U" &&
|
||||
phonemes[i + 1] === "a" &&
|
||||
phonemes[i + 2] !== "a" &&
|
||||
p[pIndex] === "و"
|
||||
) {
|
||||
result += "ؤ";
|
||||
pIndex++;
|
||||
return;
|
||||
}
|
||||
if (phoneme === "a" && phonemes[i - 1] === "U" && phonemes[i + 1] !== "a" && result.slice(-2) === "ؤ") {
|
||||
if (
|
||||
phoneme === "a" &&
|
||||
phonemes[i - 1] === "U" &&
|
||||
phonemes[i + 1] !== "a" &&
|
||||
result.slice(-2) === "ؤ"
|
||||
) {
|
||||
previousPhonemeWasAConsonant = false;
|
||||
return;
|
||||
}
|
||||
// special check for و wo
|
||||
if (isBeginning && phoneme === "w" && phonemes[i + 1] === "o" && p[pIndex] === "و" && isEndSpace(p[pIndex + 1])) {
|
||||
if (
|
||||
isBeginning &&
|
||||
phoneme === "w" &&
|
||||
phonemes[i + 1] === "o" &&
|
||||
p[pIndex] === "و" &&
|
||||
isEndSpace(p[pIndex + 1])
|
||||
) {
|
||||
result += "و";
|
||||
pIndex++;
|
||||
return;
|
||||
}
|
||||
// TODO: isEndSpace here is redundant??
|
||||
if (isEnding && phoneme === "o" && phonemes[i - 1] === "w" && p[pIndex - 1] === "و" && isEndSpace(p[pIndex])) {
|
||||
if (
|
||||
isEnding &&
|
||||
phoneme === "o" &&
|
||||
phonemes[i - 1] === "w" &&
|
||||
p[pIndex - 1] === "و" &&
|
||||
isEndSpace(p[pIndex])
|
||||
) {
|
||||
pIndex++;
|
||||
return;
|
||||
}
|
||||
|
@ -300,38 +481,67 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
return;
|
||||
}
|
||||
// special check for for أ in the middle of the word
|
||||
if (!isBeginning && p[pIndex] === "أ" && phoneme === "a" && phonemes[i + 1] === "'" && phonemes[i + 2] === "a") {
|
||||
if (
|
||||
!isBeginning &&
|
||||
p[pIndex] === "أ" &&
|
||||
phoneme === "a" &&
|
||||
phonemes[i + 1] === "'" &&
|
||||
phonemes[i + 2] === "a"
|
||||
) {
|
||||
result += "أ";
|
||||
pIndex++;
|
||||
return;
|
||||
}
|
||||
if (p[pIndex - 1] === "أ" && phonemes[i - 1] === "a" && phoneme === "'" && phonemes[i + 1] === "a") {
|
||||
if (
|
||||
p[pIndex - 1] === "أ" &&
|
||||
phonemes[i - 1] === "a" &&
|
||||
phoneme === "'" &&
|
||||
phonemes[i + 1] === "a"
|
||||
) {
|
||||
return;
|
||||
}
|
||||
if (p[pIndex - 1] === "أ" && phonemes[i - 2] === "a" && phonemes[i - 1] === "'" && phoneme === "a") {
|
||||
if (
|
||||
p[pIndex - 1] === "أ" &&
|
||||
phonemes[i - 2] === "a" &&
|
||||
phonemes[i - 1] === "'" &&
|
||||
phoneme === "a"
|
||||
) {
|
||||
previousPhonemeWasAConsonant = false;
|
||||
return;
|
||||
}
|
||||
// special check for وو 'oo
|
||||
if (!isBeginning && p[pIndex] === "و" && p[pIndex + 1] === "و" && phoneme === "'" && phonemes[i + 1] === "oo") {
|
||||
if (
|
||||
!isBeginning &&
|
||||
p[pIndex] === "و" &&
|
||||
p[pIndex + 1] === "و" &&
|
||||
phoneme === "'" &&
|
||||
phonemes[i + 1] === "oo"
|
||||
) {
|
||||
result += "وُو";
|
||||
pIndex += 2;
|
||||
return;
|
||||
}
|
||||
if (p[pIndex - 2] === "و" && p[pIndex - 1] === "و" && phonemes[i - 1] === "'" && phoneme === "oo") {
|
||||
if (
|
||||
p[pIndex - 2] === "و" &&
|
||||
p[pIndex - 1] === "و" &&
|
||||
phonemes[i - 1] === "'" &&
|
||||
phoneme === "oo"
|
||||
) {
|
||||
previousPhonemeWasAConsonant = false;
|
||||
return;
|
||||
}
|
||||
|
||||
const prevLetterWasBeginningAyn = (
|
||||
const prevLetterWasBeginningAyn =
|
||||
p[pIndex - 1] === "ع" &&
|
||||
// isEndSpace(p[pIndex]) && // This breaks it
|
||||
phoneme === "'"
|
||||
);
|
||||
phoneme === "'";
|
||||
// check if the phoneme lines up in the Pashto word
|
||||
if (isBeginning && !isUofDu && phonemeInfo.addAlefOnBeginning) {
|
||||
// TODO: Maybe a little bad because it doesn't loop through possibilities
|
||||
if ((!phonemeInfo.alsoCanBePrefix || forbidOoPrefixes) && p.slice(pIndex, pIndex + 2) !== "ا" + phonemeInfo.possibilities[0]) {
|
||||
if (
|
||||
(!phonemeInfo.alsoCanBePrefix || forbidOoPrefixes) &&
|
||||
p.slice(pIndex, pIndex + 2) !== "ا" + phonemeInfo.possibilities[0]
|
||||
) {
|
||||
errored.push({ error: "didn't start with an aleph", phoneme, i });
|
||||
return;
|
||||
}
|
||||
|
@ -348,18 +558,18 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
pIndex++;
|
||||
return;
|
||||
} else if (
|
||||
(isEnding && phonemeInfo.endingPossibilities) &&
|
||||
isEnding &&
|
||||
phonemeInfo.endingPossibilities &&
|
||||
!isUofDu &&
|
||||
(
|
||||
!possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) &&
|
||||
!isEndingAynVowel && // allowing short vowels on the end of words ending with ع
|
||||
!isAnAEndingAfterHey
|
||||
)
|
||||
!possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) &&
|
||||
!isEndingAynVowel && // allowing short vowels on the end of words ending with ع
|
||||
!isAnAEndingAfterHay
|
||||
) {
|
||||
errored.push({ error: "bad ending", phoneme, i });
|
||||
return;
|
||||
} else if (
|
||||
(isEnding && !phonemeInfo.endingPossibilities) &&
|
||||
isEnding &&
|
||||
!phonemeInfo.endingPossibilities &&
|
||||
phonemeInfo.possibilities &&
|
||||
!phonemeInfo.possibilities.includes(p[pIndex])
|
||||
) {
|
||||
|
@ -367,14 +577,17 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
errored.push({ error: "bad ending 2", phoneme, i });
|
||||
return;
|
||||
} else if (
|
||||
(phonemeInfo.possibilities && !isEnding) &&
|
||||
(
|
||||
!(phonemeInfo.possibilities.includes(p[pIndex])) &&
|
||||
!(p[pIndex] === "ن" && (p[pIndex + 1] === "ب" && phoneme === "m")) && // && // exception case with نب === mb
|
||||
!prevLetterWasBeginningAyn // exception case with words starting with ع like i'zzat
|
||||
)
|
||||
phonemeInfo.possibilities &&
|
||||
!isEnding &&
|
||||
!phonemeInfo.possibilities.includes(p[pIndex]) &&
|
||||
!(p[pIndex] === "ن" && p[pIndex + 1] === "ب" && phoneme === "m") && // && // exception case with نب === mb
|
||||
!prevLetterWasBeginningAyn // exception case with words starting with ع like i'zzat
|
||||
) {
|
||||
errored.push({ error: "improper coressponding letter in middle of word", phoneme, i });
|
||||
errored.push({
|
||||
error: "improper coressponding letter in middle of word",
|
||||
phoneme,
|
||||
i,
|
||||
});
|
||||
return;
|
||||
}
|
||||
// console.log(phoneme, pIndex, p[pIndex], isEnding);
|
||||
|
@ -382,7 +595,12 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
// OK, it lines up with the Pashto word, we're good
|
||||
// Now continue building the result string
|
||||
// deal with starting with short vowels and alef
|
||||
if (!isUofDu && isBeginning && !phonemeInfo.possibilities && !phonemeInfo.isIzafe) {
|
||||
if (
|
||||
!isUofDu &&
|
||||
isBeginning &&
|
||||
!phonemeInfo.possibilities &&
|
||||
!phonemeInfo.isIzafe
|
||||
) {
|
||||
// TODO: WHY IS THIS HERE
|
||||
if (!["ا", "ع"].includes(p[pIndex])) {
|
||||
errored.push({ error: "bad beginning 2", phoneme, i });
|
||||
|
@ -392,22 +610,30 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
pIndex++;
|
||||
}
|
||||
// if the phoneme carries a diacritic insert it (before the letter if it's coming)
|
||||
const isOoPrefix = (phonemeInfo.alsoCanBePrefix && isBeginning && (p[pIndex - 1] !== "ا"));
|
||||
const isOoPrefix =
|
||||
phonemeInfo.alsoCanBePrefix && isBeginning && p[pIndex - 1] !== "ا";
|
||||
if (phonemeInfo.diacritic && !isEnding && !isOoPrefix) {
|
||||
// using this hack to remove the space and put it after the zwarakey we're going to add after د
|
||||
if (isUofDu && result.slice(-5) === " ... ") {
|
||||
result = result.slice(0, -5) + zwarakey + " ... ";
|
||||
// using this hack to remove the space and put it after the zwarakay we're going to add after د
|
||||
if (isUofDu && result.slice(-5) === " ... ") {
|
||||
result = result.slice(0, -5) + zwarakay + " ... ";
|
||||
} else if (isUofDu && result.slice(-1) === " ") {
|
||||
result = result.slice(0, -1) + zwarakey + " ";
|
||||
result = result.slice(0, -1) + zwarakay + " ";
|
||||
} else {
|
||||
result += phonemeInfo.diacritic;
|
||||
}
|
||||
}
|
||||
// TODO: The middle stuff might be unneccessary/unhelpful
|
||||
const isACommaWithoutAyn = (phoneme === "'" && (p[pIndex] !== "ع" && !(isMiddle && p[pIndex] === "ئ")));
|
||||
const isACommaWithoutAyn =
|
||||
phoneme === "'" && p[pIndex] !== "ع" && !(isMiddle && p[pIndex] === "ئ");
|
||||
// if the previous phoneme was a consonant insert a sukun
|
||||
// console.log("Will I go into the adding thing?");
|
||||
if (!isBeginning && previousPhonemeWasAConsonant && phonemeInfo.consonant && phonemes[i - 1] !== "'" && p[pIndex] !== "ع") {
|
||||
if (
|
||||
!isBeginning &&
|
||||
previousPhonemeWasAConsonant &&
|
||||
phonemeInfo.consonant &&
|
||||
phonemes[i - 1] !== "'" &&
|
||||
p[pIndex] !== "ع"
|
||||
) {
|
||||
result += isDoubleConsonant ? tashdeed : sukun;
|
||||
}
|
||||
if (isEnding && isDoubleConsonant) {
|
||||
|
@ -417,30 +643,38 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
}
|
||||
}
|
||||
// if there's a pashto letter for the phoneme, insert it
|
||||
if (!isEndingAynVowel && !isACommaWithoutAyn && (phonemeInfo.possibilities || isEnding)) {
|
||||
if (
|
||||
!isEndingAynVowel &&
|
||||
!isACommaWithoutAyn &&
|
||||
(phonemeInfo.possibilities || isEnding)
|
||||
) {
|
||||
// need the isSpace check to prevent weird behaviour with izafe
|
||||
if (!isUofDu) {
|
||||
if (isAnAEndingAfterHey) {
|
||||
if (isAnAEndingAfterHay) {
|
||||
result += zwar;
|
||||
if (p[pIndex] === " ") {
|
||||
result += " ";
|
||||
}
|
||||
} else {
|
||||
result += (isDoubleConsonant || isSpace(p[pIndex])) ? "" : p[pIndex];
|
||||
result += isDoubleConsonant || isSpace(p[pIndex]) ? "" : p[pIndex];
|
||||
}
|
||||
}
|
||||
pIndex++;
|
||||
}
|
||||
if (isEnding) {
|
||||
if (isUofDu) {
|
||||
result += zwarakey;
|
||||
result += zwarakay;
|
||||
} else if (phonemeInfo.hamzaOnEnd) {
|
||||
result += hamzaAbove;
|
||||
} else if (phonemeInfo.takesSukunOnEnding) {
|
||||
result += sukun;
|
||||
} else if (p[pIndex] === daggerAlif) {
|
||||
result += daggerAlif;
|
||||
} else if (isEndSpace(p[pIndex]) && p[pIndex - 1] === "ه" && phonemeInfo.takesDiacriticBeforeGurdaHeyEnding) {
|
||||
} else if (
|
||||
isEndSpace(p[pIndex]) &&
|
||||
p[pIndex - 1] === "ه" &&
|
||||
phonemeInfo.takesDiacriticBeforeGurdaHayEnding
|
||||
) {
|
||||
result = result.slice(0, -1) + phonemeInfo.diacritic + "ه";
|
||||
}
|
||||
}
|
||||
|
@ -456,13 +690,20 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
}
|
||||
return;
|
||||
}
|
||||
previousPhonemeWasAConsonant = (!isEnding && phonemeInfo.consonant) ? true : false;
|
||||
previousPhonemeWasAConsonant =
|
||||
!isEnding && phonemeInfo.consonant ? true : false;
|
||||
// ignore the ع or ئ if there's not a ' in the phonetics
|
||||
const nextPhonemeInfo = phonemeTable.find((element) => phonemes[i + 1] === element.phoneme);
|
||||
const nextPhonemeInfo = phonemeTable.find(
|
||||
(element) => phonemes[i + 1] === element.phoneme
|
||||
);
|
||||
if (
|
||||
["ع", "ئ"].includes(p[pIndex]) &&
|
||||
![phonemes[i + 1], phonemes[i + 2]].includes("'") &&
|
||||
!(nextPhonemeInfo && nextPhonemeInfo.diacritic && isEndSpace(p[pIndex + 1])) && // don't skip the ع on the end if there's another short letter coming after it
|
||||
!(
|
||||
nextPhonemeInfo &&
|
||||
nextPhonemeInfo.diacritic &&
|
||||
isEndSpace(p[pIndex + 1])
|
||||
) && // don't skip the ع on the end if there's another short letter coming after it
|
||||
!(p[pIndex] === "ئ" && isEndSpace(p[pIndex + 1])) && // don't skip ئ on the end
|
||||
!phonemeInfo.isIzafe
|
||||
) {
|
||||
|
@ -476,7 +717,11 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
|||
return;
|
||||
}
|
||||
// if we've arrived at a space in the Pashto, move along before the next iteration
|
||||
if (isSpace(p[pIndex]) && phonemes[i + 1] !== "-i-" && !upcomingAEndingAfterHey) {
|
||||
if (
|
||||
isSpace(p[pIndex]) &&
|
||||
phonemes[i + 1] !== "-i-" &&
|
||||
!upcomingAEndingAfterHay
|
||||
) {
|
||||
result += " ";
|
||||
pIndex++;
|
||||
}
|
||||
|
|
|
@ -1,139 +1,139 @@
|
|||
import * as T from "../../types";
|
||||
|
||||
export const sandwiches: T.Sandwich[] = [
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "له", f: "la" },
|
||||
after: { p: "نه", f: "na" },
|
||||
e: "from",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "له", f: "la" },
|
||||
after: { p: "څخه", f: "tsuxa" },
|
||||
e: "from",
|
||||
},
|
||||
// TODO: Implement mayonaise
|
||||
// {
|
||||
// type: "sandwich",
|
||||
// before: { p: "له", f: "la" },
|
||||
// after: "mayonaise",
|
||||
// e: "from",
|
||||
// },
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "له", f: "la" },
|
||||
after: { p: "سره", f: "sara" },
|
||||
e: "with",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: undefined,
|
||||
after: { p: "ته", f: "ta" },
|
||||
e: "to",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "لپاره", f: "lapaara" },
|
||||
e: "for",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "دمخې", f: "dumúkhe" },
|
||||
e: "before/in front of",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په څانګ", f: "pu tsaang" },
|
||||
e: "beside",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "پر", f: "pur" },
|
||||
after: { p: "باندې", f: "baande" },
|
||||
e: "on",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "په", f: "pu" },
|
||||
after: { p: "کې", f: "ke" },
|
||||
e: "in",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "دننه", f: "dununa" },
|
||||
e: "inside",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "دباندې", f: "dubaande" },
|
||||
e: "outside",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "مخې ته", f: "mukhe ta" },
|
||||
e: "in front of",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "شا ته", f: "shaa ta" },
|
||||
e: "behind",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "لاندې", f: "laande" },
|
||||
e: "under",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په شان", f: "pu shaan" },
|
||||
e: "like",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "غوندې", f: "ghwunde" },
|
||||
e: "like",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په حیث", f: "pu heys" },
|
||||
e: "as",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په لور", f: "pu lor" },
|
||||
e: "towards",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په اړه", f: "pu aRa" },
|
||||
e: "about",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په باره کې", f: "pu baara ke" },
|
||||
e: "about",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په اړوند", f: "pu aRwand" },
|
||||
e: "concerning",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "له", f: "la" },
|
||||
after: { p: "نه", f: "na" },
|
||||
e: "from",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "له", f: "la" },
|
||||
after: { p: "څخه", f: "tsuxa" },
|
||||
e: "from",
|
||||
},
|
||||
// TODO: Implement mayonaise
|
||||
// {
|
||||
// type: "sandwich",
|
||||
// before: { p: "له", f: "la" },
|
||||
// after: "mayonaise",
|
||||
// e: "from",
|
||||
// },
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "له", f: "la" },
|
||||
after: { p: "سره", f: "sara" },
|
||||
e: "with",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: undefined,
|
||||
after: { p: "ته", f: "ta" },
|
||||
e: "to",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "لپاره", f: "lapaara" },
|
||||
e: "for",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "دمخې", f: "dumúkhe" },
|
||||
e: "before/in front of",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په څانګ", f: "pu tsaang" },
|
||||
e: "beside",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "پر", f: "pur" },
|
||||
after: { p: "باندې", f: "baande" },
|
||||
e: "on",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "په", f: "pu" },
|
||||
after: { p: "کې", f: "ke" },
|
||||
e: "in",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "دننه", f: "dununa" },
|
||||
e: "inside",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "دباندې", f: "dubaande" },
|
||||
e: "outside",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "مخې ته", f: "mukhe ta" },
|
||||
e: "in front of",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "شا ته", f: "shaa ta" },
|
||||
e: "behind",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "لاندې", f: "laande" },
|
||||
e: "under",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په شان", f: "pu shaan" },
|
||||
e: "like",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "غوندې", f: "ghwunde" },
|
||||
e: "like",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په حیث", f: "pu hays" },
|
||||
e: "as",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په لور", f: "pu lor" },
|
||||
e: "towards",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په اړه", f: "pu aRa" },
|
||||
e: "about",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په باره کې", f: "pu baara ke" },
|
||||
e: "about",
|
||||
},
|
||||
{
|
||||
type: "sandwich",
|
||||
before: { p: "د", f: "du" },
|
||||
after: { p: "په اړوند", f: "pu aRwand" },
|
||||
e: "concerning",
|
||||
},
|
||||
];
|
||||
|
||||
export default sandwiches;
|
||||
export default sandwiches;
|
||||
|
|
|
@ -105,14 +105,14 @@ export const replacerInfo: IReplacerInfoItem[] = [
|
|||
ipa: "ɪ́",
|
||||
},
|
||||
{
|
||||
char: "ey",
|
||||
char: "ay",
|
||||
alalc: "ay",
|
||||
ipa: "ai",
|
||||
ipa: "ay",
|
||||
},
|
||||
{
|
||||
char: "éy",
|
||||
char: "áy",
|
||||
alalc: "áy",
|
||||
ipa: "ái",
|
||||
ipa: "áj",
|
||||
},
|
||||
{
|
||||
char: "ee",
|
||||
|
@ -140,9 +140,9 @@ export const replacerInfo: IReplacerInfoItem[] = [
|
|||
ipa: "u:j",
|
||||
},
|
||||
{
|
||||
char: "eyy",
|
||||
alalc: "ạy",
|
||||
ipa: "ɛ̝j",
|
||||
char: "ey",
|
||||
alalc: "ey",
|
||||
ipa: "ej",
|
||||
},
|
||||
{
|
||||
char: "e",
|
||||
|
@ -351,4 +351,5 @@ export const replacerInfo: IReplacerInfoItem[] = [
|
|||
];
|
||||
|
||||
// tslint:disable-next-line
|
||||
export const replacerRegex = /aay|áay|aa|áa|a|á|U|Ú|u|ú|ooy|o{1,2}|óo|ó|ey|éy|e{1,2}|ée|é|uy|úy|i|í|w|y|q|g|ts|sh|s|dz|z|t|T|d|D|r|R|n|N|f|b|p|x|kh|q|k|gh|g|G|j|ch|l|l|m|h/g;
|
||||
export const replacerRegex =
|
||||
/aay|áay|aa|áa|a|á|U|Ú|u|ú|ooy|o{1,2}|óo|ó|ay|áy|e{1,2}|ée|é|ey|éy|uy|úy|i|í|w|y|q|g|ts|sh|s|dz|z|t|T|d|D|r|R|n|N|f|b|p|x|kh|q|k|gh|g|G|j|ch|l|l|m|h/g;
|
||||
|
|
|
@ -6,9 +6,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
import {
|
||||
translatePhonetics,
|
||||
} from "./translate-phonetics";
|
||||
import { translatePhonetics } from "./translate-phonetics";
|
||||
|
||||
const dialects = ["southern", "standard", "peshawer"];
|
||||
const systems = ["ipa", "alalc"];
|
||||
|
@ -54,11 +52,11 @@ const translations = [
|
|||
},
|
||||
},
|
||||
{
|
||||
original: "saRey",
|
||||
original: "saRay",
|
||||
ipa: {
|
||||
southern: "saɻai",
|
||||
standard: "saɻai",
|
||||
peshawer: "saɻai",
|
||||
southern: "saɻaj",
|
||||
standard: "saɻaj",
|
||||
peshawer: "saɻaj",
|
||||
},
|
||||
alalc: {
|
||||
southern: "saṛay",
|
||||
|
@ -72,20 +70,17 @@ translations.forEach((t) => {
|
|||
systems.forEach((system) => {
|
||||
// check each dialect with given system
|
||||
dialects.forEach((dialect) => {
|
||||
test(
|
||||
// @ts-ignore
|
||||
`${t.original} should be translated to ${t.ipa[dialect]} using ${system} with ${dialect} dialect`,
|
||||
() => {
|
||||
const translated = translatePhonetics(t.original, {
|
||||
// @ts-ignore
|
||||
system,
|
||||
// @ts-ignore
|
||||
dialect,
|
||||
});
|
||||
test(// @ts-ignore
|
||||
`${t.original} should be translated to ${t.ipa[dialect]} using ${system} with ${dialect} dialect`, () => {
|
||||
const translated = translatePhonetics(t.original, {
|
||||
// @ts-ignore
|
||||
expect(translated).toBe(t[system][dialect]);
|
||||
},
|
||||
);
|
||||
system,
|
||||
// @ts-ignore
|
||||
dialect,
|
||||
});
|
||||
// @ts-ignore
|
||||
expect(translated).toBe(t[system][dialect]);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -8,234 +8,461 @@
|
|||
|
||||
import { standardizeEntry, validateEntry } from "./validate-entry";
|
||||
import * as T from "../../types";
|
||||
import { standardizePhonetics } from "./standardize-pashto";
|
||||
|
||||
const toTest: {
|
||||
input: any,
|
||||
output: T.DictionaryEntryError | { ok: true } | { checkComplement: true },
|
||||
input: any;
|
||||
output: T.DictionaryEntryError | { ok: true } | { checkComplement: true };
|
||||
}[] = [
|
||||
{
|
||||
input: { ts: undefined },
|
||||
output: {
|
||||
errors: ["missing ts", "missing i", "missing p", "missing f", "missing e"],
|
||||
p: "",
|
||||
f: "",
|
||||
e: "",
|
||||
erroneousFields: ["ts", "i", "p", "f", "e"],
|
||||
ts: 0,
|
||||
},
|
||||
{
|
||||
input: { ts: undefined },
|
||||
output: {
|
||||
errors: [
|
||||
"missing ts",
|
||||
"missing i",
|
||||
"missing p",
|
||||
"missing f",
|
||||
"missing e",
|
||||
],
|
||||
p: "",
|
||||
f: "",
|
||||
e: "",
|
||||
erroneousFields: ["ts", "i", "p", "f", "e"],
|
||||
ts: 0,
|
||||
},
|
||||
{
|
||||
input: { ts: 123, p: "کور", e: "house" },
|
||||
output: {
|
||||
errors: ["missing i", "missing f"],
|
||||
p: "کور",
|
||||
f: "",
|
||||
ts: 123,
|
||||
e: "house",
|
||||
erroneousFields: ["i", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: { ts: 123, p: "کور", e: "house" },
|
||||
output: {
|
||||
errors: ["missing i", "missing f"],
|
||||
p: "کور",
|
||||
f: "",
|
||||
ts: 123,
|
||||
e: "house",
|
||||
erroneousFields: ["i", "f"],
|
||||
},
|
||||
{
|
||||
input: {"i":293,"ts":1527821299,"p":"اخطار","f":"ixtáar","e":"warning, reprimand, admonishment","c":"n. m."},
|
||||
output: {
|
||||
errors: ["script and phonetics do not match for p and f"],
|
||||
p: "اخطار",
|
||||
f: "ixtáar",
|
||||
e: "warning, reprimand, admonishment",
|
||||
ts: 1527821299,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 293,
|
||||
ts: 1527821299,
|
||||
p: "اخطار",
|
||||
f: "ixtáar",
|
||||
e: "warning, reprimand, admonishment",
|
||||
c: "n. m.",
|
||||
},
|
||||
{
|
||||
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puxtaanu","infbf":"puxtan"},
|
||||
output: {
|
||||
errors: ["missing infbp"],
|
||||
p: "پښتون",
|
||||
f: "puxtoon",
|
||||
e: "Pashtun",
|
||||
ts: 1527815197,
|
||||
erroneousFields: ["infbp"],
|
||||
},
|
||||
output: {
|
||||
errors: ["script and phonetics do not match for p and f"],
|
||||
p: "اخطار",
|
||||
f: "ixtáar",
|
||||
e: "warning, reprimand, admonishment",
|
||||
ts: 1527821299,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
{
|
||||
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puxtaanu","infbp":"پښتن"},
|
||||
output: {
|
||||
errors: ["missing infbf"],
|
||||
p: "پښتون",
|
||||
f: "puxtoon",
|
||||
e: "Pashtun",
|
||||
ts: 1527815197,
|
||||
erroneousFields: ["infbf"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 2433,
|
||||
ts: 1527815197,
|
||||
p: "پښتون",
|
||||
f: "puxtoon",
|
||||
e: "Pashtun",
|
||||
c: "n. m. unisex / adj. irreg.",
|
||||
infap: "پښتانه",
|
||||
infaf: "puxtaanu",
|
||||
infbf: "puxtan",
|
||||
},
|
||||
{
|
||||
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puktaanu","infbp":"پښتن"},
|
||||
output: {
|
||||
errors: ["script and phonetics do not match for infap and infaf", "missing infbf"],
|
||||
p: "پښتون",
|
||||
f: "puxtoon",
|
||||
e: "Pashtun",
|
||||
ts: 1527815197,
|
||||
erroneousFields: ["infap", "infaf", "infbf"],
|
||||
},
|
||||
output: {
|
||||
errors: ["missing infbp"],
|
||||
p: "پښتون",
|
||||
f: "puxtoon",
|
||||
e: "Pashtun",
|
||||
ts: 1527815197,
|
||||
erroneousFields: ["infbp"],
|
||||
},
|
||||
{
|
||||
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"tsumlaastúl","e":"to lie down","l":1596485996977,"separationAtP":2,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true},
|
||||
output: {
|
||||
errors: ["missing separationAtF"],
|
||||
p: "څملاستل",
|
||||
f: "tsumlaastúl",
|
||||
e: "to lie down",
|
||||
ts: 1527819674,
|
||||
erroneousFields: ["separationAtF"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 2433,
|
||||
ts: 1527815197,
|
||||
p: "پښتون",
|
||||
f: "puxtoon",
|
||||
e: "Pashtun",
|
||||
c: "n. m. unisex / adj. irreg.",
|
||||
infap: "پښتانه",
|
||||
infaf: "puxtaanu",
|
||||
infbp: "پښتن",
|
||||
},
|
||||
{
|
||||
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"sumlaastúl","e":"to lie down","l":1596485996977,"separationAtP":2,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true},
|
||||
output: {
|
||||
errors: ["script and phonetics do not match for p and f", "missing separationAtF"],
|
||||
p: "څملاستل",
|
||||
f: "sumlaastúl",
|
||||
e: "to lie down",
|
||||
ts: 1527819674,
|
||||
erroneousFields: ["p", "f", "separationAtF"],
|
||||
},
|
||||
output: {
|
||||
errors: ["missing infbf"],
|
||||
p: "پښتون",
|
||||
f: "puxtoon",
|
||||
e: "Pashtun",
|
||||
ts: 1527815197,
|
||||
erroneousFields: ["infbf"],
|
||||
},
|
||||
{
|
||||
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"tsumlaastúl","e":"to lie down","l":1596485996977,"separationAtF":4,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true},
|
||||
output: {
|
||||
errors: ["missing separationAtP"],
|
||||
p: "څملاستل",
|
||||
f: "tsumlaastúl",
|
||||
e: "to lie down",
|
||||
ts: 1527819674,
|
||||
erroneousFields: ["separationAtP"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 2433,
|
||||
ts: 1527815197,
|
||||
p: "پښتون",
|
||||
f: "puxtoon",
|
||||
e: "Pashtun",
|
||||
c: "n. m. unisex / adj. irreg.",
|
||||
infap: "پښتانه",
|
||||
infaf: "puktaanu",
|
||||
infbp: "پښتن",
|
||||
},
|
||||
{
|
||||
input: {"i":2222,"ts":1571859113828,"p":"پخول","f":"pakhawul","e":"to cook, prepare, to cause to ripen, mature","c":"v. stat. comp. trans."},
|
||||
output: {
|
||||
errors: ["missing complement for compound verb"],
|
||||
p: "پخول",
|
||||
f: "pakhawul",
|
||||
e: "to cook, prepare, to cause to ripen, mature",
|
||||
ts: 1571859113828,
|
||||
erroneousFields: ["l"],
|
||||
},
|
||||
output: {
|
||||
errors: [
|
||||
"script and phonetics do not match for infap and infaf",
|
||||
"missing infbf",
|
||||
],
|
||||
p: "پښتون",
|
||||
f: "puxtoon",
|
||||
e: "Pashtun",
|
||||
ts: 1527815197,
|
||||
erroneousFields: ["infap", "infaf", "infbf"],
|
||||
},
|
||||
{
|
||||
input: {"i":2222,"ts":1571859113828,"p":"پخول","f":"pakhawul","e":"to cook, prepare, to cause to ripen, mature","l":1574867531681,"c":"v. stat. comp. trans."},
|
||||
output: {
|
||||
checkComplement: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 5000,
|
||||
ts: 1527819674,
|
||||
p: "څملاستل",
|
||||
f: "tsumlaastúl",
|
||||
e: "to lie down",
|
||||
l: 1596485996977,
|
||||
separationAtP: 2,
|
||||
c: "v. intrans. seperable",
|
||||
psp: "څمل",
|
||||
psf: "tsaml",
|
||||
noOo: true,
|
||||
},
|
||||
{
|
||||
input: {"i":2231,"ts":1527812013,"p":"پراخ","f":"praakh, paráakh","e":"wide, broad, spacious, vast","c":"adj."},
|
||||
output: { ok: true },
|
||||
output: {
|
||||
errors: ["missing separationAtF"],
|
||||
p: "څملاستل",
|
||||
f: "tsumlaastúl",
|
||||
e: "to lie down",
|
||||
ts: 1527819674,
|
||||
erroneousFields: ["separationAtF"],
|
||||
},
|
||||
{
|
||||
input: {"i":0,"ts":1527812013,"p":"پراخ","f":"praakh, paráakh","e":"wide, broad, spacious, vast","c":"adj."},
|
||||
output: { ok: true },
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 5000,
|
||||
ts: 1527819674,
|
||||
p: "څملاستل",
|
||||
f: "sumlaastúl",
|
||||
e: "to lie down",
|
||||
l: 1596485996977,
|
||||
separationAtP: 2,
|
||||
c: "v. intrans. seperable",
|
||||
psp: "څمل",
|
||||
psf: "tsaml",
|
||||
noOo: true,
|
||||
},
|
||||
{
|
||||
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj."},
|
||||
output: {
|
||||
errors: ["script and phonetics do not match for p and f"],
|
||||
p: "آبدار",
|
||||
f: "aawdáar",
|
||||
e: "watery, damp, humid, juicy",
|
||||
ts: 1575058859661,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
output: {
|
||||
errors: [
|
||||
"script and phonetics do not match for p and f",
|
||||
"missing separationAtF",
|
||||
],
|
||||
p: "څملاستل",
|
||||
f: "sumlaastúl",
|
||||
e: "to lie down",
|
||||
ts: 1527819674,
|
||||
erroneousFields: ["p", "f", "separationAtF"],
|
||||
},
|
||||
{
|
||||
input: {"ts":1591033069786,"i":7717,"p":"ستړی کول","f":"stuRey kawul","g":"stuReykedul","e":"to get tired, fatigued","c":"v. stat. comp. intrans.","l":1527815306,"ec":"get","ep":"tired"},
|
||||
output: {
|
||||
errors: ["wrong ending for intrans. stat. comp"],
|
||||
p: "ستړی کول",
|
||||
f: "stuRey kawul",
|
||||
e: "to get tired, fatigued",
|
||||
ts: 1591033069786,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 5000,
|
||||
ts: 1527819674,
|
||||
p: "څملاستل",
|
||||
f: "tsumlaastúl",
|
||||
e: "to lie down",
|
||||
l: 1596485996977,
|
||||
separationAtF: 4,
|
||||
c: "v. intrans. seperable",
|
||||
psp: "څمل",
|
||||
psf: "tsaml",
|
||||
noOo: true,
|
||||
},
|
||||
{
|
||||
input: {"ts":1591033078746,"i":7716,"p":"ستړی کېدل","f":"stuRey kedul","g":"stuReykawul","e":"to make tired, wear out","c":"v. stat. comp. trans.","l":1527815306,"ec":"make","ep":"tired"},
|
||||
output: {
|
||||
errors: ["wrong ending for trans. stat. comp"],
|
||||
p: "ستړی کېدل",
|
||||
f: "stuRey kedul",
|
||||
e: "to make tired, wear out",
|
||||
ts: 1591033078746,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
output: {
|
||||
errors: ["missing separationAtP"],
|
||||
p: "څملاستل",
|
||||
f: "tsumlaastúl",
|
||||
e: "to lie down",
|
||||
ts: 1527819674,
|
||||
erroneousFields: ["separationAtP"],
|
||||
},
|
||||
{
|
||||
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
|
||||
output: { ok: true },
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 2222,
|
||||
ts: 1571859113828,
|
||||
p: "پخول",
|
||||
f: "pakhawul",
|
||||
e: "to cook, prepare, to cause to ripen, mature",
|
||||
c: "v. stat. comp. trans.",
|
||||
},
|
||||
{
|
||||
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
|
||||
output: { ok: true },
|
||||
output: {
|
||||
errors: ["missing complement for compound verb"],
|
||||
p: "پخول",
|
||||
f: "pakhawul",
|
||||
e: "to cook, prepare, to cause to ripen, mature",
|
||||
ts: 1571859113828,
|
||||
erroneousFields: ["l"],
|
||||
},
|
||||
{
|
||||
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"bechaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
||||
output: {
|
||||
errors: ["spacing discrepency between p and f"],
|
||||
p: "بې چاره",
|
||||
f: "bechaara",
|
||||
e: "poor thing, pitiful",
|
||||
ts: 1527812488,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 2222,
|
||||
ts: 1571859113828,
|
||||
p: "پخول",
|
||||
f: "pakhawul",
|
||||
e: "to cook, prepare, to cause to ripen, mature",
|
||||
l: 1574867531681,
|
||||
c: "v. stat. comp. trans.",
|
||||
},
|
||||
{
|
||||
input: {"ts":1527812488,"i":1934,"p":"بېچاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
||||
output: {
|
||||
errors: ["spacing discrepency between p and f"],
|
||||
p: "بېچاره",
|
||||
f: "be chaara",
|
||||
e: "poor thing, pitiful",
|
||||
ts: 1527812488,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
output: {
|
||||
checkComplement: true,
|
||||
},
|
||||
{
|
||||
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
||||
output: { ok: true }
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 2231,
|
||||
ts: 1527812013,
|
||||
p: "پراخ",
|
||||
f: "praakh, paráakh",
|
||||
e: "wide, broad, spacious, vast",
|
||||
c: "adj.",
|
||||
},
|
||||
{
|
||||
input: {"ts":1527814265,"i":12969,"p":"مکتب","f":"maktab","g":"maktab","e":"school","r":4,"c":"n. m.","app":"مکاتب","apf":"ma kaatib"},
|
||||
output: {
|
||||
errors: ["spacing discrepency between app and apf"],
|
||||
p: "مکتب",
|
||||
f: "maktab",
|
||||
e: "school",
|
||||
ts: 1527814265,
|
||||
erroneousFields: ["app", "apf"],
|
||||
},
|
||||
output: { ok: true },
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 0,
|
||||
ts: 1527812013,
|
||||
p: "پراخ",
|
||||
f: "praakh, paráakh",
|
||||
e: "wide, broad, spacious, vast",
|
||||
c: "adj.",
|
||||
},
|
||||
{
|
||||
input: {"ts":1527815870,"i":183,"p":"اثر","f":"asar","g":"asar","e":"influence, impression, tracks, affect","r":4,"c":"n. m.","app":"اثرات, آثار","apf":"asráat"},
|
||||
output: {
|
||||
errors: ["difference in variation length between app and apf", "script and phonetics do not match for app and apf"],
|
||||
p: "اثر",
|
||||
f: "asar",
|
||||
e: "influence, impression, tracks, affect",
|
||||
ts: 1527815870,
|
||||
erroneousFields: ["app", "apf"],
|
||||
},
|
||||
output: { ok: true },
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 12,
|
||||
ts: 1575058859661,
|
||||
p: "آبدار",
|
||||
f: "aawdáar",
|
||||
e: "watery, damp, humid, juicy",
|
||||
c: "adj.",
|
||||
},
|
||||
output: {
|
||||
errors: ["script and phonetics do not match for p and f"],
|
||||
p: "آبدار",
|
||||
f: "aawdáar",
|
||||
e: "watery, damp, humid, juicy",
|
||||
ts: 1575058859661,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
ts: 1591033069786,
|
||||
i: 7717,
|
||||
p: "ستړی کول",
|
||||
f: "stuRay kawul",
|
||||
g: "stuRaykedul",
|
||||
e: "to get tired, fatigued",
|
||||
c: "v. stat. comp. intrans.",
|
||||
l: 1527815306,
|
||||
ec: "get",
|
||||
ep: "tired",
|
||||
},
|
||||
output: {
|
||||
errors: ["wrong ending for intrans. stat. comp"],
|
||||
p: "ستړی کول",
|
||||
f: "stuRay kawul",
|
||||
e: "to get tired, fatigued",
|
||||
ts: 1591033069786,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
ts: 1591033078746,
|
||||
i: 7716,
|
||||
p: "ستړی کېدل",
|
||||
f: "stuRay kedul",
|
||||
g: "stuRaykawul",
|
||||
e: "to make tired, wear out",
|
||||
c: "v. stat. comp. trans.",
|
||||
l: 1527815306,
|
||||
ec: "make",
|
||||
ep: "tired",
|
||||
},
|
||||
output: {
|
||||
errors: ["wrong ending for trans. stat. comp"],
|
||||
p: "ستړی کېدل",
|
||||
f: "stuRay kedul",
|
||||
e: "to make tired, wear out",
|
||||
ts: 1591033078746,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 12,
|
||||
ts: 1575058859661,
|
||||
p: "آبدار",
|
||||
f: "aawdáar",
|
||||
e: "watery, damp, humid, juicy",
|
||||
c: "adj.",
|
||||
diacExcept: true,
|
||||
},
|
||||
output: { ok: true },
|
||||
},
|
||||
{
|
||||
input: {
|
||||
i: 12,
|
||||
ts: 1575058859661,
|
||||
p: "آبدار",
|
||||
f: "aawdáar",
|
||||
e: "watery, damp, humid, juicy",
|
||||
c: "adj.",
|
||||
diacExcept: true,
|
||||
},
|
||||
output: { ok: true },
|
||||
},
|
||||
{
|
||||
input: {
|
||||
ts: 1527812488,
|
||||
i: 1934,
|
||||
p: "بې چاره",
|
||||
f: "bechaara",
|
||||
g: "bechaara",
|
||||
e: "poor thing, pitiful",
|
||||
r: 3,
|
||||
c: "adj.",
|
||||
},
|
||||
output: {
|
||||
errors: ["spacing discrepency between p and f"],
|
||||
p: "بې چاره",
|
||||
f: "bechaara",
|
||||
e: "poor thing, pitiful",
|
||||
ts: 1527812488,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
ts: 1527812488,
|
||||
i: 1934,
|
||||
p: "بېچاره",
|
||||
f: "be chaara",
|
||||
g: "bechaara",
|
||||
e: "poor thing, pitiful",
|
||||
r: 3,
|
||||
c: "adj.",
|
||||
},
|
||||
output: {
|
||||
errors: ["spacing discrepency between p and f"],
|
||||
p: "بېچاره",
|
||||
f: "be chaara",
|
||||
e: "poor thing, pitiful",
|
||||
ts: 1527812488,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
ts: 1527812488,
|
||||
i: 1934,
|
||||
p: "بې چاره",
|
||||
f: "be chaara",
|
||||
g: "bechaara",
|
||||
e: "poor thing, pitiful",
|
||||
r: 3,
|
||||
c: "adj.",
|
||||
},
|
||||
output: { ok: true },
|
||||
},
|
||||
{
|
||||
input: {
|
||||
ts: 1527814265,
|
||||
i: 12969,
|
||||
p: "مکتب",
|
||||
f: "maktab",
|
||||
g: "maktab",
|
||||
e: "school",
|
||||
r: 4,
|
||||
c: "n. m.",
|
||||
app: "مکاتب",
|
||||
apf: "ma kaatib",
|
||||
},
|
||||
output: {
|
||||
errors: ["spacing discrepency between app and apf"],
|
||||
p: "مکتب",
|
||||
f: "maktab",
|
||||
e: "school",
|
||||
ts: 1527814265,
|
||||
erroneousFields: ["app", "apf"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {
|
||||
ts: 1527815870,
|
||||
i: 183,
|
||||
p: "اثر",
|
||||
f: "asar",
|
||||
g: "asar",
|
||||
e: "influence, impression, tracks, affect",
|
||||
r: 4,
|
||||
c: "n. m.",
|
||||
app: "اثرات, آثار",
|
||||
apf: "asráat",
|
||||
},
|
||||
output: {
|
||||
errors: [
|
||||
"difference in variation length between app and apf",
|
||||
"script and phonetics do not match for app and apf",
|
||||
],
|
||||
p: "اثر",
|
||||
f: "asar",
|
||||
e: "influence, impression, tracks, affect",
|
||||
ts: 1527815870,
|
||||
erroneousFields: ["app", "apf"],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
test("validateEntry should work", () => {
|
||||
toTest.forEach((t) => {
|
||||
expect(validateEntry(t.input as T.DictionaryEntry)).toEqual(t.output);
|
||||
});
|
||||
toTest.forEach((t) => {
|
||||
expect(validateEntry(t.input as T.DictionaryEntry)).toEqual(t.output);
|
||||
});
|
||||
});
|
||||
|
||||
test("standardizeEntry", () => {
|
||||
expect(standardizeEntry({"i":195,"ts":1527822036,"p":"اجتماعي","f":"ijtimaa‘ee, ijtimaayee","g":"ijtimaaee,ijtimaayee","e":"public, social, societal","c":"adj."}))
|
||||
.toEqual({"i":195,"ts":1527822036,"p":"اجتماعي","f":"ijtimaa'ee, ijtimaayee","g":"ijtimaaee,ijtimaayee","e":"public, social, societal","c":"adj."});
|
||||
expect(
|
||||
standardizeEntry({
|
||||
i: 195,
|
||||
ts: 1527822036,
|
||||
p: "اجتماعي",
|
||||
f: "ijtimaa‘ee, ijtimaayee",
|
||||
g: "ijtimaaee,ijtimaayee",
|
||||
e: "public, social, societal",
|
||||
c: "adj.",
|
||||
})
|
||||
).toEqual({
|
||||
i: 195,
|
||||
ts: 1527822036,
|
||||
p: "اجتماعي",
|
||||
f: "ijtimaa'ee, ijtimaayee",
|
||||
g: "ijtimaaee,ijtimaayee",
|
||||
e: "public, social, societal",
|
||||
c: "adj.",
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue