working with special du behaviour

This commit is contained in:
Bill D 2021-05-16 18:00:05 +03:00
parent 73f786890e
commit 5d41d953a0
4 changed files with 62 additions and 37 deletions

View File

@ -2,7 +2,7 @@ import {
splitFIntoPhonemes, splitFIntoPhonemes,
last, last,
addP, addP,
prev2Chars, lastNonWhitespace,
advanceP, advanceP,
reverseP, reverseP,
overwriteP, overwriteP,
@ -97,8 +97,32 @@ test("addP should work", () => {
}); });
}); });
test("prev2Chars should work", () => { test("lastNonWhiteSpace should work", () => {
expect(prev2Chars("تورن")).toBe("رن"); expect(lastNonWhitespace("تورن")).toBe("ن");
expect(prev2Chars("وست .. ")).toBe("ست"); expect(lastNonWhitespace("وست .. ")).toBe("ت");
expect(prev2Chars("دَ ... ")).toBe("دَ"); expect(lastNonWhitespace("د ... ")).toBe("د");
}); });
test("reverseP should work", () => {
expect(reverseP({
pIn: "کور",
pOut: "تور ",
})).toEqual({
pIn: " کور",
pOut: "تور",
});
expect(reverseP({
pIn: "کور",
pOut: "تور ... ",
})).toEqual({
pIn: " ... کور",
pOut: "تور",
});
expect(reverseP({
pIn: "کور",
pOut: "تور . ",
})).toEqual({
pIn: " . کور",
pOut: "تور",
});
})

View File

@ -353,19 +353,16 @@ export const overwriteP = (toWrite: string) => (state: DiacriticsAccumulator): D
}; };
/** /**
* returns the last two character in a string that was not a space or a dote * returns the last letter before any whitespace (" " / ".")
* *
* @param s * @param s
* @returns * @returns
*/ */
export function prev2Chars(s: string): string { export function lastNonWhitespace(s: string): string {
// console.log("looking at pOut", s);
const reversed = [...s].reverse(); const reversed = [...s].reverse();
// console.log(reversed.join("-"));
const lastIndex = reversed.findIndex((c) => ![" ", "."].includes(c)); const lastIndex = reversed.findIndex((c) => ![" ", "."].includes(c));
const last2 = reversed[lastIndex + 1] + reversed[lastIndex]; const penultimateChar = reversed[lastIndex];
// console.log("last2", last2); return penultimateChar;
return last2;
} }
export function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} { export function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {

View File

@ -587,13 +587,13 @@ const diacriticsSections: {
}, },
out: "د" + zwarakey + " لاس", out: "د" + zwarakey + " لاس",
}, },
// { {
// in: { in: {
// p: "د ... په شان", p: "د ... په شان",
// f: "du ... pu shaan", f: "du ... pu shaan",
// }, },
// out: "د" + zwarakey + "... پهٔ شان", out: "د" + zwarakey + " ... پهٔ شان",
// }, },
], ],
}, },
]; ];
@ -601,16 +601,14 @@ const diacriticsSections: {
diacriticsSections.forEach((section) => { diacriticsSections.forEach((section) => {
describe(section.describe, () => { describe(section.describe, () => {
section.tests.forEach((t) => { section.tests.forEach((t) => {
if (section.describe === "special behaviour with د") { if (t.out) {
if (t.out) { test(`diacritics should work for ${t.in.p} - ${t.in.f}`, () => {
test(`diacritics should work for ${t.in.p} - ${t.in.f}`, () => { expect(addDiacritics(t.in)).toEqual({ p: t.out, f: t.in.f });
expect(addDiacritics(t.in)).toEqual({ p: t.out, f: t.in.f }); });
}); } else {
} else { expect(() => {
expect(() => { expect(addDiacritics(t.in)).toThrowError();
expect(addDiacritics(t.in)).toThrowError(); });
});
}
} }
}); });
}); });

View File

@ -21,7 +21,7 @@ import {
wasla, wasla,
daggerAlif, daggerAlif,
fathahan, fathahan,
prev2Chars, lastNonWhitespace,
addP, addP,
last, last,
advanceP, advanceP,
@ -75,7 +75,10 @@ function processPhoneme(
// console.log("space coming up", acc.pIn[0] === " "); // console.log("space coming up", acc.pIn[0] === " ");
// console.log("state", acc); // console.log("state", acc);
// Prep state // Prep state
const state = acc.pIn[0] === " " // TODO: CLEANER function jump to next char
const state = acc.pIn.slice(0, 5) === " ... "
? advanceP(acc, 5)
: acc.pIn[0] === " "
? advanceP(acc) ? advanceP(acc)
: acc; : acc;
// console.log("AFTER SPACE PREP", phoneme); // console.log("AFTER SPACE PREP", phoneme);
@ -132,10 +135,10 @@ function processPhoneme(
addP(zer), addP(zer),
)(state) )(state)
: (phs === PhonemeStatus.EndOfDuParticle) ? : (phs === PhonemeStatus.EndOfDuParticle) ?
(console.log("here"), pipe( pipe(
reverseP, reverseP,
addP(zwarakey), addP(zwarakey),
)(state)) )(state)
: :
// phs === PhonemeState.ShortVowel // phs === PhonemeState.ShortVowel
pipe( pipe(
@ -177,9 +180,12 @@ function stateInfo({ state, i, phonemes, phoneme }: {
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) { if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
return PhonemeStatus.LeadingConsonantOrShortVowel; return PhonemeStatus.LeadingConsonantOrShortVowel;
} }
console.log(phoneme, phonemes, prev2Chars(state.pOut)) // console.log("------");
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && prev2Chars(state.pOut) === ("د" + zwarakey)) { // console.log("phoneme", phoneme);
// console.log("du here", phoneme, phonemes); // console.log("state", state);
// console.log("prevPLetter is space", prevPLetter === " ");
// console.log("------");
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
return PhonemeStatus.EndOfDuParticle return PhonemeStatus.EndOfDuParticle
} }
if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") { if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {