This commit is contained in:
adueck 2023-07-23 17:17:38 +04:00
parent 0c2948721b
commit 0846fee749
1 changed files with 115 additions and 103 deletions

View File

@ -16,19 +16,25 @@ import { makePsString, removeFVarients } from "./accent-and-ps-utils";
* @param s * @param s
*/ */
export function accentOnFront(s: T.PsString): T.PsString; export function accentOnFront(s: T.PsString): T.PsString;
export function accentOnFront(s: T.LengthOptions<T.PsString>): T.LengthOptions<T.PsString>; export function accentOnFront(
export function accentOnFront(s: T.SingleOrLengthOpts<T.PsString>): T.SingleOrLengthOpts<T.PsString>; s: T.LengthOptions<T.PsString>
export function accentOnFront(s: T.SingleOrLengthOpts<T.PsString>): T.SingleOrLengthOpts<T.PsString> { ): T.LengthOptions<T.PsString>;
if ("long" in s) { export function accentOnFront(
return { s: T.SingleOrLengthOpts<T.PsString>
short: accentOnFront(s.short), ): T.SingleOrLengthOpts<T.PsString>;
long: accentOnFront(s.long), export function accentOnFront(
}; s: T.SingleOrLengthOpts<T.PsString>
} ): T.SingleOrLengthOpts<T.PsString> {
if ("long" in s) {
return { return {
...s, short: accentOnFront(s.short),
f: accentLetter(removeAccents(s.f)), long: accentOnFront(s.long),
}; };
}
return {
...s,
f: accentLetter(removeAccents(s.f)),
};
} }
/** /**
@ -37,33 +43,35 @@ export function accentOnFront(s: T.SingleOrLengthOpts<T.PsString>): T.SingleOrLe
* @param s - the Pashto string (with Pashto and Phonetics) to ensure the accent on * @param s - the Pashto string (with Pashto and Phonetics) to ensure the accent on
*/ */
export function accentPastParticiple(s: T.PsString): T.PsString { export function accentPastParticiple(s: T.PsString): T.PsString {
// check for accent placing in words like wáayuley and azmóyuley // check for accent placing in words like wáayuley and azmóyuley
const accentFallsOnThirdLast = (syls: string[]) => { const accentFallsOnThirdLast = (syls: string[]) => {
if (syls.length < 3) return false; if (syls.length < 3) return false;
const secondLast = syls[syls.length-2]; const secondLast = syls[syls.length - 2];
const thirdLast = syls[syls.length-3]; const thirdLast = syls[syls.length - 3];
const lastLetterOfThirdLast = thirdLast.slice(-1); const lastLetterOfThirdLast = thirdLast.slice(-1);
return ( return secondLast === "ul" && lastLetterOfThirdLast === "y";
(secondLast === "ul") && (lastLetterOfThirdLast === "y") };
); // remove all accents
} const accentsRemoved = removeAccents(s.f);
// remove all accents // split up the syllables (preserving the spaces)
const accentsRemoved = removeAccents(s.f); const syllables = splitUpSyllables(accentsRemoved);
// split up the syllables (preserving the spaces) // add an accent on the appropriate syllable
const syllables = splitUpSyllables(accentsRemoved); const n = accentFallsOnThirdLast(syllables) ? 2 : 1;
// add an accent on the appropriate syllable const accentedF = accentFSylsOnNFromEnd(syllables, n);
const n = accentFallsOnThirdLast(syllables) ? 2 : 1; return makePsString(s.p, accentedF);
const accentedF = accentFSylsOnNFromEnd(syllables, n);
return makePsString(s.p, accentedF);
} }
export function splitUpSyllables(f: string): string[] { export function splitUpSyllables(f: string): string[] {
return f.match(/ |([^a|á|e|é|i|í|o|ó|u|ú| ]*(aa|áa|a|á|ey|éy|ee|ée|e|é|oo|óo|o|ó|i|í|u|ú)[^a|á|e|é|i|í|o|ó|u|ú| ]*)/ig) || [] as string[]; return (
f.match(
/ |([^a|á|e|é|i|í|o|ó|u|ú| ]*(aa|áa|a|á|ey|éy|ee|ée|e|é|oo|óo|o|ó|i|í|u|ú)[^a|á|e|é|i|í|o|ó|u|ú| ]*)/gi
) || ([] as string[])
);
} }
export function countSyllables(f: T.PsString | string): number { export function countSyllables(f: T.PsString | string): number {
if (typeof f !== "string") return countSyllables(f.f); if (typeof f !== "string") return countSyllables(f.f);
return splitUpSyllables(removeFVarients(removeAccents(f))).length; return splitUpSyllables(removeFVarients(removeAccents(f))).length;
} }
/** /**
@ -72,87 +80,91 @@ export function countSyllables(f: T.PsString | string): number {
* @param syls - an array of syllables in phonetic strings without accents (including spaces as extra items) * @param syls - an array of syllables in phonetic strings without accents (including spaces as extra items)
* @param n - the number of syllables from the end to put the accent * @param n - the number of syllables from the end to put the accent
*/ */
export function accentFSylsOnNFromEnd(syls: string[] | string, n: number): string { export function accentFSylsOnNFromEnd(
if (typeof syls === "string") { syls: string[] | string,
return accentFSylsOnNFromEnd(splitUpSyllables(syls), n); n: number
} ): string {
return [ if (typeof syls === "string") {
...syls.slice(0, syls.length-(n+1)), // before accent return accentFSylsOnNFromEnd(splitUpSyllables(syls), n);
accentLetter(syls[syls.length-(n+1)]), // syllable to be accented }
...(n !== 0) ? syls.slice(syls.length-n) : [], // after syllable to be accented return [
].join(""); ...syls.slice(0, syls.length - (n + 1)), // before accent
accentLetter(syls[syls.length - (n + 1)]), // syllable to be accented
...(n !== 0 ? syls.slice(syls.length - n) : []), // after syllable to be accented
].join("");
} }
export function accentOnNFromEnd(ps: T.PsString, n: number): T.PsString { export function accentOnNFromEnd(ps: T.PsString, n: number): T.PsString {
const fNoAccents = removeAccents(ps.f); const fNoAccents = removeAccents(ps.f);
const fSyls = splitUpSyllables(fNoAccents); const fSyls = splitUpSyllables(fNoAccents);
// TODO: enable this and fix the tests it breaks!!! // TODO: enable this and fix the tests it breaks!!!
// don't add accent if only one syllable // don't add accent if only one syllable
// if (fSyls.length === 1) return makePsString(ps.p, fNoAccents); // if (fSyls.length === 1) return makePsString(ps.p, fNoAccents);
return makePsString( return makePsString(ps.p, accentFSylsOnNFromEnd(fSyls, n));
ps.p,
accentFSylsOnNFromEnd(fSyls, n),
);
} }
const accentReplacer = [ const accentReplacer = [
{ vowel: "a", accented: "á" }, { vowel: "a", accented: "á" },
{ vowel: "e", accented: "é" }, { vowel: "ă", accented: "á" },
{ vowel: "i", accented: "í" }, { vowel: "e", accented: "é" },
{ vowel: "o", accented: "ó" }, { vowel: "i", accented: "í" },
{ vowel: "u", accented: "ú" }, { vowel: "o", accented: "ó" },
{ vowel: "U", accented: "Ú" }, { vowel: "u", accented: "ú" },
{ vowel: "U", accented: "Ú" },
]; ];
export function accentLetter(s: string): string { export function accentLetter(s: string): string {
return s.replace(/a|e|i|o|u|U/, (match) => { return s.replace(/a|e|i|o|u|U/, (match) => {
const r = accentReplacer.find((x) => x.vowel === match); const r = accentReplacer.find((x) => x.vowel === match);
/* istanbul ignore next */ /* istanbul ignore next */
return r?.accented || ""; return r?.accented || "";
}); });
} }
export function accentPsSyllable(ps: T.PsString): T.PsString { export function accentPsSyllable(ps: T.PsString): T.PsString {
return {
p: ps.p,
f: accentLetter(ps.f),
};
}
export function removeAccentsWLength(
s: T.SingleOrLengthOpts<T.PsString[]>
): T.SingleOrLengthOpts<T.PsString[]> {
if ("long" in s) {
return { return {
p: ps.p, long: removeAccentsWLength(s.long) as T.PsString[],
f: accentLetter(ps.f), short: removeAccentsWLength(s.short) as T.PsString[],
...(s.mini
? {
mini: removeAccentsWLength(s.mini) as T.PsString[],
}
: {}),
}; };
}
return removeAccents(s);
} }
export function removeAccentsWLength(s: T.SingleOrLengthOpts<T.PsString[]>): T.SingleOrLengthOpts<T.PsString[]> {
if ("long" in s) {
return {
long: removeAccentsWLength(s.long) as T.PsString[],
short: removeAccentsWLength(s.short) as T.PsString[],
...s.mini ? {
mini: removeAccentsWLength(s.mini) as T.PsString[],
} : {},
};
}
return removeAccents(s);
}
export function removeAccents(s: T.PsString): T.PsString; export function removeAccents(s: T.PsString): T.PsString;
export function removeAccents(s: string): string; export function removeAccents(s: string): string;
export function removeAccents(s: T.PsString[]): T.PsString[]; export function removeAccents(s: T.PsString[]): T.PsString[];
export function removeAccents(s: T.PsString | string | T.PsString[]): T.PsString | string | T.PsString[] { export function removeAccents(
if (Array.isArray(s)) { s: T.PsString | string | T.PsString[]
return s.map(t => removeAccents(t)); ): T.PsString | string | T.PsString[] {
} if (Array.isArray(s)) {
if (typeof s !== "string") { return s.map((t) => removeAccents(t));
return { }
...s, if (typeof s !== "string") {
f: removeAccents(s.f), return {
}; ...s,
} f: removeAccents(s.f),
return s.replace(/á|é|í|ó|ú|Ú/, (match) => { };
const r = accentReplacer.find((x) => x.accented === match); }
/* istanbul ignore next */ return s.replace(/á|é|í|ó|ú|Ú/, (match) => {
return r?.vowel || ""; const r = accentReplacer.find((x) => x.accented === match);
}); /* istanbul ignore next */
return r?.vowel || "";
});
} }
/** /**
@ -161,6 +173,6 @@ export function removeAccents(s: T.PsString | string | T.PsString[]): T.PsString
* @param s a string of Pashto phonetics * @param s a string of Pashto phonetics
*/ */
export function hasAccents(s: string | T.PsString): boolean { export function hasAccents(s: string | T.PsString): boolean {
if (typeof s !== "string") return hasAccents(s.f); if (typeof s !== "string") return hasAccents(s.f);
return accentReplacer.some((x) => s.includes(x.accented)); return accentReplacer.some((x) => s.includes(x.accented));
} }