working with special du behaviour
This commit is contained in:
parent
73f786890e
commit
5d41d953a0
|
@ -2,7 +2,7 @@ import {
|
||||||
splitFIntoPhonemes,
|
splitFIntoPhonemes,
|
||||||
last,
|
last,
|
||||||
addP,
|
addP,
|
||||||
prev2Chars,
|
lastNonWhitespace,
|
||||||
advanceP,
|
advanceP,
|
||||||
reverseP,
|
reverseP,
|
||||||
overwriteP,
|
overwriteP,
|
||||||
|
@ -97,8 +97,32 @@ test("addP should work", () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
test("prev2Chars should work", () => {
|
test("lastNonWhiteSpace should work", () => {
|
||||||
expect(prev2Chars("تورن")).toBe("رن");
|
expect(lastNonWhitespace("تورن")).toBe("ن");
|
||||||
expect(prev2Chars("وست .. ")).toBe("ست");
|
expect(lastNonWhitespace("وست .. ")).toBe("ت");
|
||||||
expect(prev2Chars("دَ ... ")).toBe("دَ");
|
expect(lastNonWhitespace("د ... ")).toBe("د");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("reverseP should work", () => {
|
||||||
|
expect(reverseP({
|
||||||
|
pIn: "کور",
|
||||||
|
pOut: "تور ",
|
||||||
|
})).toEqual({
|
||||||
|
pIn: " کور",
|
||||||
|
pOut: "تور",
|
||||||
|
});
|
||||||
|
expect(reverseP({
|
||||||
|
pIn: "کور",
|
||||||
|
pOut: "تور ... ",
|
||||||
|
})).toEqual({
|
||||||
|
pIn: " ... کور",
|
||||||
|
pOut: "تور",
|
||||||
|
});
|
||||||
|
expect(reverseP({
|
||||||
|
pIn: "کور",
|
||||||
|
pOut: "تور . ",
|
||||||
|
})).toEqual({
|
||||||
|
pIn: " . کور",
|
||||||
|
pOut: "تور",
|
||||||
|
});
|
||||||
|
})
|
|
@ -353,19 +353,16 @@ export const overwriteP = (toWrite: string) => (state: DiacriticsAccumulator): D
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns the last two character in a string that was not a space or a dote
|
* returns the last letter before any whitespace (" " / ".")
|
||||||
*
|
*
|
||||||
* @param s
|
* @param s
|
||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export function prev2Chars(s: string): string {
|
export function lastNonWhitespace(s: string): string {
|
||||||
// console.log("looking at pOut", s);
|
|
||||||
const reversed = [...s].reverse();
|
const reversed = [...s].reverse();
|
||||||
// console.log(reversed.join("-"));
|
|
||||||
const lastIndex = reversed.findIndex((c) => ![" ", "."].includes(c));
|
const lastIndex = reversed.findIndex((c) => ![" ", "."].includes(c));
|
||||||
const last2 = reversed[lastIndex + 1] + reversed[lastIndex];
|
const penultimateChar = reversed[lastIndex];
|
||||||
// console.log("last2", last2);
|
return penultimateChar;
|
||||||
return last2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {
|
export function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {
|
||||||
|
|
|
@ -587,13 +587,13 @@ const diacriticsSections: {
|
||||||
},
|
},
|
||||||
out: "د" + zwarakey + " لاس",
|
out: "د" + zwarakey + " لاس",
|
||||||
},
|
},
|
||||||
// {
|
{
|
||||||
// in: {
|
in: {
|
||||||
// p: "د ... په شان",
|
p: "د ... په شان",
|
||||||
// f: "du ... pu shaan",
|
f: "du ... pu shaan",
|
||||||
// },
|
},
|
||||||
// out: "د" + zwarakey + "... پهٔ شان",
|
out: "د" + zwarakey + " ... پهٔ شان",
|
||||||
// },
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
@ -601,7 +601,6 @@ const diacriticsSections: {
|
||||||
diacriticsSections.forEach((section) => {
|
diacriticsSections.forEach((section) => {
|
||||||
describe(section.describe, () => {
|
describe(section.describe, () => {
|
||||||
section.tests.forEach((t) => {
|
section.tests.forEach((t) => {
|
||||||
if (section.describe === "special behaviour with د") {
|
|
||||||
if (t.out) {
|
if (t.out) {
|
||||||
test(`diacritics should work for ${t.in.p} - ${t.in.f}`, () => {
|
test(`diacritics should work for ${t.in.p} - ${t.in.f}`, () => {
|
||||||
expect(addDiacritics(t.in)).toEqual({ p: t.out, f: t.in.f });
|
expect(addDiacritics(t.in)).toEqual({ p: t.out, f: t.in.f });
|
||||||
|
@ -611,7 +610,6 @@ diacriticsSections.forEach((section) => {
|
||||||
expect(addDiacritics(t.in)).toThrowError();
|
expect(addDiacritics(t.in)).toThrowError();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -21,7 +21,7 @@ import {
|
||||||
wasla,
|
wasla,
|
||||||
daggerAlif,
|
daggerAlif,
|
||||||
fathahan,
|
fathahan,
|
||||||
prev2Chars,
|
lastNonWhitespace,
|
||||||
addP,
|
addP,
|
||||||
last,
|
last,
|
||||||
advanceP,
|
advanceP,
|
||||||
|
@ -75,7 +75,10 @@ function processPhoneme(
|
||||||
// console.log("space coming up", acc.pIn[0] === " ");
|
// console.log("space coming up", acc.pIn[0] === " ");
|
||||||
// console.log("state", acc);
|
// console.log("state", acc);
|
||||||
// Prep state
|
// Prep state
|
||||||
const state = acc.pIn[0] === " "
|
// TODO: CLEANER function jump to next char
|
||||||
|
const state = acc.pIn.slice(0, 5) === " ... "
|
||||||
|
? advanceP(acc, 5)
|
||||||
|
: acc.pIn[0] === " "
|
||||||
? advanceP(acc)
|
? advanceP(acc)
|
||||||
: acc;
|
: acc;
|
||||||
// console.log("AFTER SPACE PREP", phoneme);
|
// console.log("AFTER SPACE PREP", phoneme);
|
||||||
|
@ -132,10 +135,10 @@ function processPhoneme(
|
||||||
addP(zer),
|
addP(zer),
|
||||||
)(state)
|
)(state)
|
||||||
: (phs === PhonemeStatus.EndOfDuParticle) ?
|
: (phs === PhonemeStatus.EndOfDuParticle) ?
|
||||||
(console.log("here"), pipe(
|
pipe(
|
||||||
reverseP,
|
reverseP,
|
||||||
addP(zwarakey),
|
addP(zwarakey),
|
||||||
)(state))
|
)(state)
|
||||||
:
|
:
|
||||||
// phs === PhonemeState.ShortVowel
|
// phs === PhonemeState.ShortVowel
|
||||||
pipe(
|
pipe(
|
||||||
|
@ -177,9 +180,12 @@ function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
|
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
|
||||||
return PhonemeStatus.LeadingConsonantOrShortVowel;
|
return PhonemeStatus.LeadingConsonantOrShortVowel;
|
||||||
}
|
}
|
||||||
console.log(phoneme, phonemes, prev2Chars(state.pOut))
|
// console.log("------");
|
||||||
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && prev2Chars(state.pOut) === ("د" + zwarakey)) {
|
// console.log("phoneme", phoneme);
|
||||||
// console.log("du here", phoneme, phonemes);
|
// console.log("state", state);
|
||||||
|
// console.log("prevPLetter is space", prevPLetter === " ");
|
||||||
|
// console.log("------");
|
||||||
|
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
|
||||||
return PhonemeStatus.EndOfDuParticle
|
return PhonemeStatus.EndOfDuParticle
|
||||||
}
|
}
|
||||||
if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
|
if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
|
||||||
|
|
Loading…
Reference in New Issue