more on upcoming diacritics engine / cool functional refactor

This commit is contained in:
Bill D 2021-05-07 14:48:33 +03:00
parent 3aaee3b6f2
commit 6053d11bc0
4 changed files with 97 additions and 21 deletions

View File

@ -24,7 +24,8 @@
}, },
"dependencies": { "dependencies": {
"classnames": "^2.2.6", "classnames": "^2.2.6",
"pbf": "^3.2.1" "pbf": "^3.2.1",
"rambda": "^6.7.0"
}, },
"devDependencies": { "devDependencies": {
"@fortawesome/fontawesome-free": "^5.15.2", "@fortawesome/fontawesome-free": "^5.15.2",

View File

@ -258,7 +258,7 @@ const diacriticsTest: Array<{
}, },
out: "اِیسار", out: "اِیسار",
}, },
// double consonant // double consonant / tashdeed
{ {
in: { in: {
p: "بتن", p: "بتن",
@ -266,6 +266,50 @@ const diacriticsTest: Array<{
}, },
out: "ب" + zwar + "ت" + tashdeed + zwar + "ن", out: "ب" + zwar + "ت" + tashdeed + zwar + "ن",
}, },
{
in: {
p: "بتطن",
f: "battan",
},
out: "ب" + zwar + "ت" + sukun + "ط" + zwar + "ن",
},
// vowel endings working
{
in: {
p: "بته",
f: "bata",
},
out: "بَتَه",
},
{
in: {
p: "بته",
f: "bati",
},
out: "بَتِه",
},
{
in: {
p: "پرمختیا",
f: "parmakhtyaa",
},
out: "پَرْمَخْتْیا",
},
// {
// in: {
// p: "پته",
// f: "patta",
// },
// out: "پَتّه",
// },
// get ayn stuff working
// {
// in: {
// p: "اعتصاب شکن",
// f: "itisaabshikan",
// },
// out: "اِعتِصاب شِکَن",
// },
// avoid false double consonant // avoid false double consonant
{ {
in: { in: {

View File

@ -9,6 +9,7 @@
import * as T from "../types"; import * as T from "../types";
import { removeAccents } from "./accent-helpers"; import { removeAccents } from "./accent-helpers";
import { firstPhonetics } from "./p-text-helpers"; import { firstPhonetics } from "./p-text-helpers";
import { pipe } from "rambda";
const zwar = "َ"; const zwar = "َ";
const zwarakey = "ٙ"; const zwarakey = "ٙ";
@ -341,36 +342,54 @@ function processPhoneme(
const currentPLetter = state.pIn[0]; const currentPLetter = state.pIn[0];
const nextPLetter = state.pIn[1]; const nextPLetter = state.pIn[1];
const isBeginningOfWord = state.pOut === "" || prevPLetter === " "; const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
// const isEndOfWord = !nextPLetter || nextPLetter === " ";
const phonemeInfo = phonemeTable[phoneme]; const phonemeInfo = phonemeTable[phoneme];
const previousPhoneme = i > 0 && phonemes[i-1]; const previousPhoneme = i > 0 && phonemes[i-1];
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]]; const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant); const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
const needsTashdeed = doubleConsonant && (previousPhoneme === phoneme); const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
const needsSukun = doubleConsonant && (previousPhoneme !== phoneme); const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
const sukunOrDiacritic = (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : "");
if (needsTashdeed) { // if it's not an exception (TODO)
return addP(state, tashdeed); // it must be one of the following 5 possibilities
}
// 1. beginning a word with a long vowel
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) { if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) { if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
throw Error("phonetics error - needs alef prefix"); throw Error("phonetics error - needs alef prefix");
} }
const ns = advanceP(state); return pipe(
const ns2 = phonemeInfo.diacritic ? addP(ns, phonemeInfo.diacritic) : ns; advanceP,
return advanceP(ns2); addP(phonemeInfo.diacritic),
advanceP,
)(state);
// 2. beginning a word with something else
} else if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) { } else if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
const ns = advanceP(state); return pipe(
return addP(ns, (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : "")); advanceP,
addP(sukunOrDiacritic),
)(state);
// 3. double consonant to be marked with tashdeed
} else if (needsTashdeed) {
return addP(tashdeed)(state);
// 4. direct match of phoneme / P letter
} else if (phonemeInfo.matches?.includes(currentPLetter)) { } else if (phonemeInfo.matches?.includes(currentPLetter)) {
const ns = addP(state, (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : "")); return pipe(
return advanceP(ns); addP(sukunOrDiacritic),
} advanceP,
)(state);
if (phonemeInfo.diacritic) { // 5. just a diacritic for short vowel
return addP(state, phonemeInfo.diacritic); } else if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
return pipe(
addP(phonemeInfo.diacritic),
advanceIfReachedEndingHamza,
)(state);
} }
// anything that gets to this point is a failure/error
// console.log(state); // console.log(state);
throw new Error("phonetics error"); throw new Error("phonetics error");
} }
@ -391,9 +410,16 @@ function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumu
} }
} }
function addP(state: DiacriticsAccumulator, toAdd: string): DiacriticsAccumulator { const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
return { return {
...state, ...state,
pOut: state.pOut + toAdd, pOut: toAdd ? (state.pOut + toAdd) : state.pOut,
}; };
} }
function advanceIfReachedEndingHamza(state: DiacriticsAccumulator): DiacriticsAccumulator {
if (state.pIn[0] === "ه" && (!state.pIn[1] || state.pIn[1] === " ")) {
return advanceP(state);
}
return state;
}

View File

@ -9112,6 +9112,11 @@ raf@^3.4.1:
dependencies: dependencies:
performance-now "^2.1.0" performance-now "^2.1.0"
rambda@^6.7.0:
version "6.7.0"
resolved "https://registry.yarnpkg.com/rambda/-/rambda-6.7.0.tgz#50322efdd23a108b61eb6ac4e0868d10dd95b4aa"
integrity sha512-qg2atEwhAS4ipYoNfggkIP7qBUbY2OqdW17n25VqZIz5YC1MIwSpIToQ7XacvqSCZz16efM8Y8QKLx+Js1Sybg==
randombytes@^2.0.0, randombytes@^2.0.1, randombytes@^2.0.5, randombytes@^2.1.0: randombytes@^2.0.0, randombytes@^2.0.1, randombytes@^2.0.5, randombytes@^2.1.0:
version "2.1.0" version "2.1.0"
resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a" resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"