From 6053d11bc0a512708a14a3280fc4c31c42a829de Mon Sep 17 00:00:00 2001 From: Bill D Date: Fri, 7 May 2021 14:48:33 +0300 Subject: [PATCH] more on upcoming diacritics engine / cool functional refactor --- package.json | 3 +- src/lib/diacritics.test.ts | 46 ++++++++++++++++++++++++++- src/lib/diacritics.ts | 64 +++++++++++++++++++++++++++----------- yarn.lock | 5 +++ 4 files changed, 97 insertions(+), 21 deletions(-) diff --git a/package.json b/package.json index 05fb0ef..4a71598 100644 --- a/package.json +++ b/package.json @@ -24,7 +24,8 @@ }, "dependencies": { "classnames": "^2.2.6", - "pbf": "^3.2.1" + "pbf": "^3.2.1", + "rambda": "^6.7.0" }, "devDependencies": { "@fortawesome/fontawesome-free": "^5.15.2", diff --git a/src/lib/diacritics.test.ts b/src/lib/diacritics.test.ts index ae2ff88..512d5cb 100644 --- a/src/lib/diacritics.test.ts +++ b/src/lib/diacritics.test.ts @@ -258,7 +258,7 @@ const diacriticsTest: Array<{ }, out: "اِیسار", }, - // double consonant + // double consonant / tashdeed { in: { p: "بتن", @@ -266,6 +266,50 @@ const diacriticsTest: Array<{ }, out: "ب" + zwar + "ت" + tashdeed + zwar + "ن", }, + { + in: { + p: "بتطن", + f: "battan", + }, + out: "ب" + zwar + "ت" + sukun + "ط" + zwar + "ن", + }, + // vowel endings working + { + in: { + p: "بته", + f: "bata", + }, + out: "بَتَه", + }, + { + in: { + p: "بته", + f: "bati", + }, + out: "بَتِه", + }, + { + in: { + p: "پرمختیا", + f: "parmakhtyaa", + }, + out: "پَرْمَخْتْیا", + }, + // { + // in: { + // p: "پته", + // f: "patta", + // }, + // out: "پَتّه", + // }, + // get ayn stuff working + // { + // in: { + // p: "اعتصاب شکن", + // f: "itisaabshikan", + // }, + // out: "اِعتِصاب شِکَن", + // }, // avoid false double consonant { in: { diff --git a/src/lib/diacritics.ts b/src/lib/diacritics.ts index a823dde..b80b00e 100644 --- a/src/lib/diacritics.ts +++ b/src/lib/diacritics.ts @@ -9,6 +9,7 @@ import * as T from "../types"; import { removeAccents } from "./accent-helpers"; import { firstPhonetics } from "./p-text-helpers"; +import { pipe } from "rambda"; const zwar = "َ"; const zwarakey = "ٙ"; @@ -341,36 +342,54 @@ function processPhoneme( const currentPLetter = state.pIn[0]; const nextPLetter = state.pIn[1]; const isBeginningOfWord = state.pOut === "" || prevPLetter === " "; + // const isEndOfWord = !nextPLetter || nextPLetter === " "; const phonemeInfo = phonemeTable[phoneme]; const previousPhoneme = i > 0 && phonemes[i-1]; const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]]; + // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1]; + // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined; const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant); - const needsTashdeed = doubleConsonant && (previousPhoneme === phoneme); - const needsSukun = doubleConsonant && (previousPhoneme !== phoneme); + const needsTashdeed = !isBeginningOfWord && doubleConsonant && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter); + const needsSukun = doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter)); + const sukunOrDiacritic = (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : ""); - if (needsTashdeed) { - return addP(state, tashdeed); - } + // if it's not an exception (TODO) + // it must be one of the following 5 possibilities + // 1. beginning a word with a long vowel if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) { if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) { throw Error("phonetics error - needs alef prefix"); } - const ns = advanceP(state); - const ns2 = phonemeInfo.diacritic ? addP(ns, phonemeInfo.diacritic) : ns; - return advanceP(ns2); + return pipe( + advanceP, + addP(phonemeInfo.diacritic), + advanceP, + )(state); + // 2. beginning a word with something else } else if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) { - const ns = advanceP(state); - return addP(ns, (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : "")); + return pipe( + advanceP, + addP(sukunOrDiacritic), + )(state); + // 3. double consonant to be marked with tashdeed + } else if (needsTashdeed) { + return addP(tashdeed)(state); + // 4. direct match of phoneme / P letter } else if (phonemeInfo.matches?.includes(currentPLetter)) { - const ns = addP(state, (needsSukun ? sukun : phonemeInfo.diacritic ? phonemeInfo.diacritic : "")); - return advanceP(ns); - } - - if (phonemeInfo.diacritic) { - return addP(state, phonemeInfo.diacritic); + return pipe( + addP(sukunOrDiacritic), + advanceP, + )(state); + // 5. just a diacritic for short vowel + } else if (phonemeInfo.diacritic && !phonemeInfo.longVowel) { + return pipe( + addP(phonemeInfo.diacritic), + advanceIfReachedEndingHamza, + )(state); } + // anything that gets to this point is a failure/error // console.log(state); throw new Error("phonetics error"); } @@ -391,9 +410,16 @@ function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumu } } -function addP(state: DiacriticsAccumulator, toAdd: string): DiacriticsAccumulator { +const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => { return { ...state, - pOut: state.pOut + toAdd, + pOut: toAdd ? (state.pOut + toAdd) : state.pOut, }; -} \ No newline at end of file +} + +function advanceIfReachedEndingHamza(state: DiacriticsAccumulator): DiacriticsAccumulator { + if (state.pIn[0] === "ه" && (!state.pIn[1] || state.pIn[1] === " ")) { + return advanceP(state); + } + return state; +} diff --git a/yarn.lock b/yarn.lock index 88ca6e3..abbffd0 100644 --- a/yarn.lock +++ b/yarn.lock @@ -9112,6 +9112,11 @@ raf@^3.4.1: dependencies: performance-now "^2.1.0" +rambda@^6.7.0: + version "6.7.0" + resolved "https://registry.yarnpkg.com/rambda/-/rambda-6.7.0.tgz#50322efdd23a108b61eb6ac4e0868d10dd95b4aa" + integrity sha512-qg2atEwhAS4ipYoNfggkIP7qBUbY2OqdW17n25VqZIz5YC1MIwSpIToQ7XacvqSCZz16efM8Y8QKLx+Js1Sybg== + randombytes@^2.0.0, randombytes@^2.0.1, randombytes@^2.0.5, randombytes@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"