more on upcoming diacritics engine / cool functional refactor
This commit is contained in:
parent
3aaee3b6f2
commit
6053d11bc0
|
@ -24,7 +24,8 @@
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"classnames": "^2.2.6",
|
"classnames": "^2.2.6",
|
||||||
"pbf": "^3.2.1"
|
"pbf": "^3.2.1",
|
||||||
|
"rambda": "^6.7.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@fortawesome/fontawesome-free": "^5.15.2",
|
"@fortawesome/fontawesome-free": "^5.15.2",
|
||||||
|
|
|
@ -258,7 +258,7 @@ const diacriticsTest: Array<{
|
||||||
},
|
},
|
||||||
out: "اِیسار",
|
out: "اِیسار",
|
||||||
},
|
},
|
||||||
// double consonant
|
// double consonant / tashdeed
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "بتن",
|
p: "بتن",
|
||||||
|
@ -266,6 +266,50 @@ const diacriticsTest: Array<{
|
||||||
},
|
},
|
||||||
out: "ب" + zwar + "ت" + tashdeed + zwar + "ن",
|
out: "ب" + zwar + "ت" + tashdeed + zwar + "ن",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "بتطن",
|
||||||
|
f: "battan",
|
||||||
|
},
|
||||||
|
out: "ب" + zwar + "ت" + sukun + "ط" + zwar + "ن",
|
||||||
|
},
|
||||||
|
// vowel endings working
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "بته",
|
||||||
|
f: "bata",
|
||||||
|
},
|
||||||
|
out: "بَتَه",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "بته",
|
||||||
|
f: "bati",
|
||||||
|
},
|
||||||
|
out: "بَتِه",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
in: {
|
||||||
|
p: "پرمختیا",
|
||||||
|
f: "parmakhtyaa",
|
||||||
|
},
|
||||||
|
out: "پَرْمَخْتْیا",
|
||||||
|
},
|
||||||
|
// {
|
||||||
|
// in: {
|
||||||
|
// p: "پته",
|
||||||
|
// f: "patta",
|
||||||
|
// },
|
||||||
|
// out: "پَتّه",
|
||||||
|
// },
|
||||||
|
// get ayn stuff working
|
||||||
|
// {
|
||||||
|
// in: {
|
||||||
|
// p: "اعتصاب شکن",
|
||||||
|
// f: "itisaabshikan",
|
||||||
|
// },
|
||||||
|
// out: "اِعتِصاب شِکَن",
|
||||||
|
// },
|
||||||
// avoid false double consonant
|
// avoid false double consonant
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
import * as T from "../types";
|
import * as T from "../types";
|
||||||
import { removeAccents } from "./accent-helpers";
|
import { removeAccents } from "./accent-helpers";
|
||||||
import { firstPhonetics } from "./p-text-helpers";
|
import { firstPhonetics } from "./p-text-helpers";
|
||||||
|
import { pipe } from "rambda";
|
||||||
|
|
||||||
const zwar = "َ";
|
const zwar = "َ";
|
||||||
const zwarakey = "ٙ";
|
const zwarakey = "ٙ";
|
||||||
|
@ -341,36 +342,54 @@ function processPhoneme(
|
||||||
const currentPLetter = state.pIn[0];
|
const currentPLetter = state.pIn[0];
|
||||||
const nextPLetter = state.pIn[1];
|
const nextPLetter = state.pIn[1];
|
||||||
const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
|
const isBeginningOfWord = state.pOut === "" || prevPLetter === " ";
|
||||||
|
// const isEndOfWord = !nextPLetter || nextPLetter === " ";
|
||||||
const phonemeInfo = phonemeTable[phoneme];
|
const phonemeInfo = phonemeTable[phoneme];
|
||||||
const previousPhoneme = i > 0 && phonemes[i-1];
|
const previousPhoneme = i > 0 && phonemes[i-1];
|
||||||
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
|
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
|
||||||
|
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
|
||||||
|
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
|
||||||
const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
|
const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
|
||||||
const needsTashdeed = doubleConsonant && (previousPhoneme === phoneme);
|
const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter);
|
||||||
const needsSukun = doubleConsonant && (previousPhoneme !== phoneme);
|
const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter));
|
||||||
|
const sukunOrDiacritic = (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : "");
|
||||||
|
|
||||||
if (needsTashdeed) {
|
// if it's not an exception (TODO)
|
||||||
return addP(state, tashdeed);
|
// it must be one of the following 5 possibilities
|
||||||
}
|
|
||||||
|
|
||||||
|
// 1. beginning a word with a long vowel
|
||||||
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
|
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
|
||||||
if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
|
if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
|
||||||
throw Error("phonetics error - needs alef prefix");
|
throw Error("phonetics error - needs alef prefix");
|
||||||
}
|
}
|
||||||
const ns = advanceP(state);
|
return pipe(
|
||||||
const ns2 = phonemeInfo.diacritic ? addP(ns, phonemeInfo.diacritic) : ns;
|
advanceP,
|
||||||
return advanceP(ns2);
|
addP(phonemeInfo.diacritic),
|
||||||
|
advanceP,
|
||||||
|
)(state);
|
||||||
|
// 2. beginning a word with something else
|
||||||
} else if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
|
} else if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
|
||||||
const ns = advanceP(state);
|
return pipe(
|
||||||
return addP(ns, (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : ""));
|
advanceP,
|
||||||
|
addP(sukunOrDiacritic),
|
||||||
|
)(state);
|
||||||
|
// 3. double consonant to be marked with tashdeed
|
||||||
|
} else if (needsTashdeed) {
|
||||||
|
return addP(tashdeed)(state);
|
||||||
|
// 4. direct match of phoneme / P letter
|
||||||
} else if (phonemeInfo.matches?.includes(currentPLetter)) {
|
} else if (phonemeInfo.matches?.includes(currentPLetter)) {
|
||||||
const ns = addP(state, (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : ""));
|
return pipe(
|
||||||
return advanceP(ns);
|
addP(sukunOrDiacritic),
|
||||||
}
|
advanceP,
|
||||||
|
)(state);
|
||||||
if (phonemeInfo.diacritic) {
|
// 5. just a diacritic for short vowel
|
||||||
return addP(state, phonemeInfo.diacritic);
|
} else if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
|
||||||
|
return pipe(
|
||||||
|
addP(phonemeInfo.diacritic),
|
||||||
|
advanceIfReachedEndingHamza,
|
||||||
|
)(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// anything that gets to this point is a failure/error
|
||||||
// console.log(state);
|
// console.log(state);
|
||||||
throw new Error("phonetics error");
|
throw new Error("phonetics error");
|
||||||
}
|
}
|
||||||
|
@ -391,9 +410,16 @@ function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function addP(state: DiacriticsAccumulator, toAdd: string): DiacriticsAccumulator {
|
const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
|
||||||
return {
|
return {
|
||||||
...state,
|
...state,
|
||||||
pOut: state.pOut + toAdd,
|
pOut: toAdd ? (state.pOut + toAdd) : state.pOut,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function advanceIfReachedEndingHamza(state: DiacriticsAccumulator): DiacriticsAccumulator {
|
||||||
|
if (state.pIn[0] === "ه" && (!state.pIn[1] || state.pIn[1] === " ")) {
|
||||||
|
return advanceP(state);
|
||||||
|
}
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
|
@ -9112,6 +9112,11 @@ raf@^3.4.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
performance-now "^2.1.0"
|
performance-now "^2.1.0"
|
||||||
|
|
||||||
|
rambda@^6.7.0:
|
||||||
|
version "6.7.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/rambda/-/rambda-6.7.0.tgz#50322efdd23a108b61eb6ac4e0868d10dd95b4aa"
|
||||||
|
integrity sha512-qg2atEwhAS4ipYoNfggkIP7qBUbY2OqdW17n25VqZIz5YC1MIwSpIToQ7XacvqSCZz16efM8Y8QKLx+Js1Sybg==
|
||||||
|
|
||||||
randombytes@^2.0.0, randombytes@^2.0.1, randombytes@^2.0.5, randombytes@^2.1.0:
|
randombytes@^2.0.0, randombytes@^2.0.1, randombytes@^2.0.5, randombytes@^2.1.0:
|
||||||
version "2.1.0"
|
version "2.1.0"
|
||||||
resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
|
resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
|
||||||
|
|
Loading…
Reference in New Issue