diff --git a/src/components/src/blocks/Block.tsx b/src/components/src/blocks/Block.tsx
index ab1f7f9..f58443a 100644
--- a/src/components/src/blocks/Block.tsx
+++ b/src/components/src/blocks/Block.tsx
@@ -1,134 +1,226 @@
import * as T from "../../../types";
import classNames from "classnames";
+import { getEnglishFromRendered } from "../../../lib/src/phrase-building/np-tools";
import {
- getEnglishFromRendered,
-} from "../../../lib/src/phrase-building/np-tools";
-import { getEnglishPersonInfo, getEnglishParticipleInflection, getEnglishGenNumInfo } from "../../../lib/src/misc-helpers";
+ getEnglishPersonInfo,
+ getEnglishParticipleInflection,
+ getEnglishGenNumInfo,
+} from "../../../lib/src/misc-helpers";
import { useState } from "react";
import { getLength } from "../../../lib/src/p-text-helpers";
import { roleIcon } from "../vp-explorer/VPExplorerExplanationModal";
import { negativeParticle } from "../../../lib/src/grammar-units";
-function Block({ opts, block, king, script }: {
- opts: T.TextOptions,
- block: T.Block,
- king?: "subject" | "object" | undefined,
- script: "p" | "f";
+function Block({
+ opts,
+ block,
+ king,
+ script,
+}: {
+ opts: T.TextOptions;
+ block: T.Block;
+ king?: "subject" | "object" | undefined;
+ script: "p" | "f";
}) {
- if ("equative" in block.block) {
- return ;
- }
- if (block.block.type === "AP") {
- const english = getEnglishFromRendered(block.block);
- return {block.block}
- }
- if (block.block.type === "subjectSelection") {
- const role = king === "subject" ? "king" : king === "object" ? "servant" : undefined;
- return
- }
- if (block.block.type === "objectSelection") {
- const role = king === "object" ? "king" : king === "subject" ? "servant" : undefined;
- return ;
- }
- if (block.block.type === "predicateSelection") {
- const english = getEnglishFromRendered(block.block.selection);
- return
-
Predicate
- {block.block.selection.type === "complement"
- ?
- :
{block.block.selection}}
+ if ("equative" in block.block) {
+ return (
+
+ );
+ }
+ if (block.block.type === "AP") {
+ const english = getEnglishFromRendered(block.block);
+ return (
+
+ {block.block}
+
+ );
+ }
+ if (block.block.type === "subjectSelection") {
+ const role =
+ king === "subject" ? "king" : king === "object" ? "servant" : undefined;
+ return (
+
+ );
+ }
+ if (block.block.type === "objectSelection") {
+ const role =
+ king === "object" ? "king" : king === "subject" ? "servant" : undefined;
+ return (
+
+ );
+ }
+ if (block.block.type === "predicateSelection") {
+ const english = getEnglishFromRendered(block.block.selection);
+ return (
+
+
+ Predicate
- }
- if (block.block.type === "negative") {
- return
- }
- if (block.block.type === "PH") {
- return
;
- }
- if (block.block.type === "VB") {
- return
;
- }
- if (block.block.type === "complement") {
- return
- }
- if (block.block.type === "NComp") {
- return
- }
- return
+ {block.block.selection.type === "complement" ? (
+
+ ) : (
+
+ {block.block.selection}
+
+ )}
+
+ );
+ }
+ if (block.block.type === "negative") {
+ return (
+
+ );
+ }
+ if (block.block.type === "PH") {
+ return
;
+ }
+ if (block.block.type === "VB") {
+ return
;
+ }
+ if (block.block.type === "complement") {
+ return (
+
+ );
+ }
+ if (block.block.type === "NComp") {
+ return
;
+ }
+ return
;
}
export default Block;
-function Border({ children, extraClassName, padding }: { children: JSX.Element | JSX.Element[] | string, extraClassName?: string, padding?: string }) {
- return
- <>{children}>
+ <>{children}>
+ );
}
-function VBBlock({ opts, block, script }: {
- opts: T.TextOptions,
- script: "p" | "f",
- block: T.VBBasic | T.VBGenNum | (T.VBBasic & {
+function VBBlock({
+ opts,
+ block,
+ script,
+}: {
+ opts: T.TextOptions;
+ script: "p" | "f";
+ block:
+ | T.VBBasic
+ | T.VBGenNum
+ | (T.VBBasic & {
person: T.Person;
- }),
+ });
}) {
- const [length, setLength] = useState
("long");
- const [version, setVersion] = useState(0);
- const ps = getLength(block.ps, length);
- function changeVersion() {
- setVersion(o => (o + 1) % ps.length);
- }
- function changeLength() {
- setLength(o => (
- o === "long"
- ? "short"
- : o === "short" && "mini" in block.ps
- ? "mini"
- : "long"
- ));
- }
- const infInfo = "gender" in block
- ? getEnglishGenNumInfo(block.gender, block.number)
- : "person" in block
- ? getEnglishPersonInfo(block.person, "short")
- : "";
- return
-
- {"long" in block.ps &&
{length}
}
- {ps.length > 1 &&
v. {version + 1}
}
-
-
- <>
- {ps[version][script]}
- >
-
-
VBlock
-
{infInfo}
+ const [length, setLength] = useState
("long");
+ const [version, setVersion] = useState(0);
+ const ps = getLength(block.ps, length);
+ function changeVersion() {
+ setVersion((o) => (o + 1) % ps.length);
+ }
+ function changeLength() {
+ setLength((o) =>
+ o === "long"
+ ? "short"
+ : o === "short" && "mini" in block.ps
+ ? "mini"
+ : "long"
+ );
+ }
+ const infInfo =
+ "gender" in block
+ ? getEnglishGenNumInfo(block.gender, block.number)
+ : "person" in block
+ ? getEnglishPersonInfo(block.person, "short")
+ : "";
+ return (
+
+
+ {"long" in block.ps && (
+
+ {length}
+
+ )}
+ {ps.length > 1 && (
+
+ v. {version + 1}
+
+ )}
+
+
+ <>{ps[version][script]}>
+
+
VBlock
+
{infInfo}
+ );
}
-function WeldedBlock({ opts, welded, script }: {
- opts: T.TextOptions,
- script: "p" | "f",
- welded: T.Welded,
+function WeldedBlock({
+ opts,
+ welded,
+ script,
+}: {
+ opts: T.TextOptions;
+ script: "p" | "f";
+ welded: T.Welded;
}) {
- return
-
- {welded.left.type === "NComp"
- ?
- : welded.left.type === "VB"
- ?
- : }
-
-
+ return (
+
+
+ {welded.left.type === "NComp" ? (
+
+ ) : welded.left.type === "VB" ? (
+
+ ) : (
+
+ )}
+
+
+ );
}
// function VerbSBlock({ opts, v, script }: {
@@ -192,19 +284,22 @@ function WeldedBlock({ opts, welded, script }: {
//
// }
-function PerfHeadBlock({ opts, ps, script }: {
- opts: T.TextOptions,
- ps: T.PsString,
- script: "p" | "f",
-
+function PerfHeadBlock({
+ opts,
+ ps,
+ script,
+}: {
+ opts: T.TextOptions;
+ ps: T.PsString;
+ script: "p" | "f";
}) {
- return
-
- {ps[script]}
-
-
perf. head
-
{'\u00A0'}
-
;
+ return (
+
+
{ps[script]}
+
perf. head
+
{"\u00A0"}
+
+ );
}
// function ModalAuxBlock({ opts, aux, script }: {
@@ -222,313 +317,498 @@ function PerfHeadBlock({ opts, ps, script }: {
// ;
// }
-function NegBlock({ opts, imperative, script }: {
- opts: T.TextOptions,
- imperative: boolean,
- script: "p" | "f",
+function NegBlock({
+ opts,
+ imperative,
+ script,
+}: {
+ opts: T.TextOptions;
+ imperative: boolean;
+ script: "p" | "f";
}) {
- return
-
- {negativeParticle[imperative ? "imperative" : "nonImperative"][script]}
-
-
Neg.
-
{imperative ? "don't" : "not"}
-
;
+ return (
+
+
+ {negativeParticle[imperative ? "imperative" : "nonImperative"][script]}
+
+
Neg.
+
{imperative ? "don't" : "not"}
+
+ );
}
-function EquativeBlock({ opts, eq, script }: {
- opts: T.TextOptions,
- eq: T.EquativeRendered,
- script: "p" | "f",
+function EquativeBlock({
+ opts,
+ eq,
+ script,
+}: {
+ opts: T.TextOptions;
+ eq: T.EquativeRendered;
+ script: "p" | "f";
}) {
- const [length, setLength] = useState("long");
- function changeLength() {
- setLength(o => (
- o === "long"
- ? "short"
- : o === "short" && "mini" in eq.ps
- ? "mini"
- : "long"
- ));
- }
- return
- {"long" in eq.ps &&
{length}
}
-
- {getLength(eq.ps, length)[0][script]}
-
-
Equative
-
{getEnglishPersonInfo(eq.person, "short")}
-
;
+ const [length, setLength] = useState("long");
+ function changeLength() {
+ setLength((o) =>
+ o === "long"
+ ? "short"
+ : o === "short" && "mini" in eq.ps
+ ? "mini"
+ : "long"
+ );
+ }
+ return (
+
+ {"long" in eq.ps && (
+
+ {length}
+
+ )}
+
{getLength(eq.ps, length)[0][script]}
+
Equative
+
{getEnglishPersonInfo(eq.person, "short")}
+
+ );
}
-function SubjectBlock({ opts, np, role, script }: {
- opts: T.TextOptions,
- np: T.Rendered,
- role: "king" | "servant" | undefined,
- script: "p" | "f",
+function SubjectBlock({
+ opts,
+ np,
+ role,
+ script,
+}: {
+ opts: T.TextOptions;
+ np: T.Rendered;
+ role: "king" | "servant" | undefined;
+ script: "p" | "f";
}) {
- const english = getEnglishFromRendered(np);
- return
-
Subject{role ? roleIcon[role] : ""}
-
{np}
-
;
+ const english = getEnglishFromRendered(np);
+ return (
+
+
+ Subject
+ {role ? roleIcon[role] : ""}
+
+
+ {np}
+
+
+ );
}
-function ObjectBlock({ opts, obj, role, script }: {
- opts: T.TextOptions,
- obj: T.Rendered["selection"],
- role: "king" | "servant" | undefined,
- script: "p" | "f",
+function ObjectBlock({
+ opts,
+ obj,
+ role,
+ script,
+}: {
+ opts: T.TextOptions;
+ obj: T.Rendered["selection"];
+ role: "king" | "servant" | undefined;
+ script: "p" | "f";
}) {
- if (typeof obj !== "object") {
- return null;
- }
- const english = getEnglishFromRendered(obj);
- return
-
Object{role ? roleIcon[role] : ""}
-
{obj}
-
;
+ if (typeof obj !== "object") {
+ return null;
+ }
+ const english = getEnglishFromRendered(obj);
+ return (
+
+
+ Object
+ {role ? roleIcon[role] : ""}
+
+
+ {obj}
+
+
+ );
}
-function NCompBlock({ opts, comp, script }: {
- script: "p" | "f",
- opts: T.TextOptions,
- comp: T.Comp,
+function NCompBlock({
+ opts,
+ comp,
+ script,
+}: {
+ script: "p" | "f";
+ opts: T.TextOptions;
+ comp: T.Comp;
}) {
- return
-
- {comp.ps[script]}
-
- {comp.type === "AdjComp"
- ?
adj. {getEnglishGenNumInfo(comp.gender, comp.number)}
- :
TODO
}
-
- todo
- {/* {adj.e} */}
-
-
;
+ return (
+
+
{comp.ps[script]}
+ {comp.type === "AdjComp" && (
+
+
+ adj.{" "}
+
+ {getEnglishGenNumInfo(comp.gender, comp.number)}
+
+
+
{comp.ps.e}
+
+ )}
+
+ );
}
-function ComplementBlock({ opts, comp, script, inside }: {
- script: "p" | "f",
- opts: T.TextOptions,
- comp: T.Rendered | T.Rendered["selection"],
- inside?: boolean,
+function ComplementBlock({
+ opts,
+ comp,
+ script,
+ inside,
+}: {
+ script: "p" | "f";
+ opts: T.TextOptions;
+ comp:
+ | T.Rendered
+ | T.Rendered["selection"];
+ inside?: boolean;
}) {
- function AdjectiveBlock({ opts, adj }: {
- opts: T.TextOptions,
- adj: T.Rendered,
- }) {
- return
-
- {adj.ps[0][script]}
-
-
Adj. ({getEnglishParticipleInflection(adj.person, "short")})
-
{adj.e}
-
;
- }
+ function AdjectiveBlock({
+ opts,
+ adj,
+ }: {
+ opts: T.TextOptions;
+ adj: T.Rendered;
+ }) {
+ return (
+
+
{adj.ps[0][script]}
+
+ Adj.{" "}
+
+ ({getEnglishParticipleInflection(adj.person, "short")})
+
+
+
{adj.e}
+
+ );
+ }
- function LocAdvBlock({ opts, adv }: {
- opts: T.TextOptions,
- adv: T.Rendered,
- }) {
- return
-
- {adv.ps[0][script]}
-
-
Loc. Adv.
-
{adv.e}
-
;
- }
- return
-
Complement
- {comp.type === "adjective"
- ?
- : comp.type === "loc. adv."
- ?
- : comp.type === "noun"
- ?
- : comp.type === "unselected"
- ?
-
- ____
-
- {!inside && <>
-
-
{comp.e}
- >}
-
- :
-
-
Sandwich
-
{comp.e}
-
}
-
;
+ function LocAdvBlock({
+ opts,
+ adv,
+ }: {
+ opts: T.TextOptions;
+ adv: T.Rendered;
+ }) {
+ return (
+
+
{adv.ps[0][script]}
+
Loc. Adv.
+
{adv.e}
+
+ );
+ }
+ return (
+
+
Complement
+ {comp.type === "adjective" ? (
+
+ ) : comp.type === "loc. adv." ? (
+
+ ) : comp.type === "noun" ? (
+
+ ) : comp.type === "unselected" ? (
+
+
____
+ {!inside && (
+ <>
+
+
{comp.e}
+ >
+ )}
+
+ ) : (
+
+
+
Sandwich
+
{comp.e}
+
+ )}
+
+ );
}
-export function APBlock({ opts, children, english, script }: {
- opts: T.TextOptions,
- children: T.Rendered,
- english?: string,
- script: "p" | "f",
+export function APBlock({
+ opts,
+ children,
+ english,
+ script,
+}: {
+ opts: T.TextOptions;
+ children: T.Rendered;
+ english?: string;
+ script: "p" | "f";
}) {
- const ap = children;
- if (ap.selection.type === "adverb") {
- return
-
- {ap.selection.ps[0][script]}
-
-
AP
-
{english}
-
;
- }
- return
-
+ const ap = children;
+ if (ap.selection.type === "adverb") {
+ return (
+
+
{ap.selection.ps[0][script]}
AP
{english}
-
;
-}
-
-function Sandwich({ opts, sandwich, script }: {
- opts: T.TextOptions,
- sandwich: T.Rendered
>,
- script: "p" | "f",
-}) {
- return
-
Sandwich 🥪
-
-
-
{sandwich.inside.selection.type !== "pronoun" ? sandwich.inside.selection.possesor : undefined}
-
{sandwich.before ? sandwich.before.f : ""}
-
- {sandwich.inside}
-
-
{sandwich.after ? sandwich.after.f : ""}
-
-
-
;
-}
-
-function CompNounBlock({ opts, noun, script }: {
- opts: T.TextOptions,
- noun: T.Rendered,
- script: "p" | "f",
-}) {
- return
-
- {noun.ps[0][script]}
-
-
- Comp. Noun
-
-
{noun.e}
+
+ );
+ }
+ return (
+
+ );
}
-export function NPBlock({ opts, children, inside, english, script }: {
- opts: T.TextOptions,
- children: T.Rendered,
- inside?: boolean,
- english?: string,
- script: "p" | "f",
+function Sandwich({
+ opts,
+ sandwich,
+ script,
+}: {
+ opts: T.TextOptions;
+ sandwich: T.Rendered>;
+ script: "p" | "f";
}) {
- const np = children;
- const hasPossesor = !!(np.selection.type !== "pronoun" && np.selection.possesor && !np.selection.possesor.shrunken);
- const elements = [
- ...!inside ? [{np.selection.type !== "pronoun" ? np.selection.possesor : undefined}] : [],
- {np.selection.adjectives},
- {np.selection.ps[0][script]}
,
- ];
- const el = script === "p" ? elements.reverse() : elements;
- return
-
+ Sandwich 🥪
+
+
- {el}
-
-
- NP
- {!inside ? <>
- {` `}
-
({getEnglishPersonInfo(np.selection.person, "short")})
- > : <>>}
+
+ {sandwich.inside.selection.type !== "pronoun"
+ ? sandwich.inside.selection.possesor
+ : undefined}
+
+
+ {sandwich.before ? sandwich.before.f : ""}
+
+
+
+ {sandwich.inside}
+
+
+
+ {sandwich.after ? sandwich.after.f : ""}
+
- {!inside &&
{english}}
+
+ );
}
-function Possesors({ opts, children, script }: {
- opts: T.TextOptions,
- children: { shrunken: boolean, np: T.Rendered } | undefined,
- script: "p" | "f",
+function CompNounBlock({
+ opts,
+ noun,
+ script,
+}: {
+ opts: T.TextOptions;
+ noun: T.Rendered;
+ script: "p" | "f";
}) {
- if (!children) {
- return null;
- }
- if (children.shrunken) {
- return null;
- }
- const contraction = checkForContraction(children.np, script);
- return
+
+ {noun.ps[0][script]}
+
+
Comp. Noun
+
{noun.e}
+
+ );
+}
+
+export function NPBlock({
+ opts,
+ children,
+ inside,
+ english,
+ script,
+}: {
+ opts: T.TextOptions;
+ children: T.Rendered;
+ inside?: boolean;
+ english?: string;
+ script: "p" | "f";
+}) {
+ const np = children;
+ const hasPossesor = !!(
+ np.selection.type !== "pronoun" &&
+ np.selection.possesor &&
+ !np.selection.possesor.shrunken
+ );
+ const elements = [
+ ...(!inside
+ ? [
+
+ {np.selection.type !== "pronoun"
+ ? np.selection.possesor
+ : undefined}
+ ,
+ ]
+ : []),
+
+ {np.selection.adjectives}
+ ,
+
+ {" "}
+ {np.selection.ps[0][script]}
+
,
+ ];
+ const el = script === "p" ? elements.reverse() : elements;
+ return (
+
+
+ {el}
+
+
+ NP
+ {!inside ? (
+ <>
+ {` `}
+
+ ({getEnglishPersonInfo(np.selection.person, "short")})
+
+ >
+ ) : (
+ <>>
+ )}
+
+ {!inside &&
{english}}
+
+ );
+}
+
+function Possesors({
+ opts,
+ children,
+ script,
+}: {
+ opts: T.TextOptions;
+ children: { shrunken: boolean; np: T.Rendered } | undefined;
+ script: "p" | "f";
+}) {
+ if (!children) {
+ return null;
+ }
+ if (children.shrunken) {
+ return null;
+ }
+ const contraction = checkForContraction(children.np, script);
+ return (
+
- {children.np.selection.type !== "pronoun" &&
{children.np.selection.possesor}}
-
- {contraction &&
({contraction})
}
-
-
{script === "p" ? "د" : "du"}
-
- {children.np}
-
-
-
+ }}
+ >
+ {children.np.selection.type !== "pronoun" && (
+
+ {children.np.selection.possesor}
+
+ )}
+
+ {contraction &&
({contraction})
}
+
+
{script === "p" ? "د" : "du"}
+
+
+ {children.np}
+
+
+
+ );
}
-function Adjectives({ opts, children, script }: {
- opts: T.TextOptions,
- children: T.Rendered
[] | undefined,
- script: "p" | "f",
+function Adjectives({
+ opts,
+ children,
+ script,
+}: {
+ opts: T.TextOptions;
+ children: T.Rendered[] | undefined;
+ script: "p" | "f";
}) {
- if (!children) {
- return null;
- }
- const c = script === "p"
- ? children.reverse()
- : children;
- return
- {c.map(a => a.ps[0][script]).join(" ")}{` `}
+ if (!children) {
+ return null;
+ }
+ const c = script === "p" ? children.reverse() : children;
+ return (
+
+ {c.map((a) => a.ps[0][script]).join(" ")}
+ {` `}
+ );
}
function SubText({ children: e }: { children: string | undefined }) {
- return {e ? e : ""}
;
+ }}
+ >
+ {e ? e : ""}
+
+ );
}
-function checkForContraction(np: T.Rendered, script: "p" | "f"): string | undefined {
- if (np.selection.type !== "pronoun") return undefined;
- if (np.selection.person === T.Person.FirstSingMale || np.selection.person === T.Person.FirstSingFemale) {
- return script === "f" ? "zmaa" : "زما";
- }
- if (np.selection.person === T.Person.SecondSingMale || np.selection.person === T.Person.SecondSingFemale) {
- return script === "f" ? "staa" : "ستا";
- }
- if (np.selection.person === T.Person.FirstPlurMale || np.selection.person === T.Person.FirstPlurFemale) {
- return script === "f" ? "zmoonG" : "زمونږ";
- }
- if (np.selection.person === T.Person.SecondPlurMale || np.selection.person === T.Person.SecondPlurFemale) {
- return script === "f" ? "staaso" : "ستاسو";
- }
- return undefined;
+function checkForContraction(
+ np: T.Rendered,
+ script: "p" | "f"
+): string | undefined {
+ if (np.selection.type !== "pronoun") return undefined;
+ if (
+ np.selection.person === T.Person.FirstSingMale ||
+ np.selection.person === T.Person.FirstSingFemale
+ ) {
+ return script === "f" ? "zmaa" : "زما";
+ }
+ if (
+ np.selection.person === T.Person.SecondSingMale ||
+ np.selection.person === T.Person.SecondSingFemale
+ ) {
+ return script === "f" ? "staa" : "ستا";
+ }
+ if (
+ np.selection.person === T.Person.FirstPlurMale ||
+ np.selection.person === T.Person.FirstPlurFemale
+ ) {
+ return script === "f" ? "zmoonG" : "زمونږ";
+ }
+ if (
+ np.selection.person === T.Person.SecondPlurMale ||
+ np.selection.person === T.Person.SecondPlurFemale
+ ) {
+ return script === "f" ? "staaso" : "ستاسو";
+ }
+ return undefined;
}
-
diff --git a/src/lib/src/diacritics-helpers.test.ts b/src/lib/src/diacritics-helpers.test.ts
index a827ee0..9e4f247 100644
--- a/src/lib/src/diacritics-helpers.test.ts
+++ b/src/lib/src/diacritics-helpers.test.ts
@@ -1,131 +1,133 @@
import {
- splitFIntoPhonemes,
- last,
- addP,
- lastNonWhitespace,
- advanceP,
- reverseP,
- overwriteP,
- advanceForHamza,
- advanceForHamzaMid,
+ splitFIntoPhonemes,
+ last,
+ addP,
+ lastNonWhitespace,
+ reverseP,
} from "./diacritics-helpers";
const phonemeSplits: Array<{
- in: string,
- out: string[],
+ in: string;
+ out: string[];
}> = [
- {
- in: "kor",
- out: ["k", "o", "r"],
- },
- {
- in: "raaghey",
- out: ["r", "aa", "gh", "ey"],
- },
- {
- in: "ist'imaal",
- out: ["i", "s", "t", "'", "i", "m", "aa", "l"],
- },
- {
- in: "hatsa",
- out: ["h", "a", "ts", "a"],
- },
- {
- in: "ba",
- out: ["b", "a"],
- },
- {
- in: "peydáa",
- out: ["p", "ey", "d", "aa"],
- },
- {
- in: "be kaar",
- out: ["b", "e", "k", "aa", "r"],
- },
- {
- in: "raadzeyy",
- out: ["r", "aa", "dz", "eyy"],
- },
- {
- in: "badanuy ??",
- out: ["b", "a", "d", "a", "n", "uy"],
- },
- {
- in: "tur ... pore",
- out: ["t", "u", "r", "p", "o", "r", "e"],
- },
- {
- in: "daar-Ul-iqaama",
- out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"],
- },
+ {
+ in: "kor",
+ out: ["k", "o", "r"],
+ },
+ {
+ in: "raaghay",
+ out: ["r", "aa", "gh", "ay"],
+ },
+ {
+ in: "ist'imaal",
+ out: ["i", "s", "t", "'", "i", "m", "aa", "l"],
+ },
+ {
+ in: "hatsa",
+ out: ["h", "a", "ts", "a"],
+ },
+ {
+ in: "ba",
+ out: ["b", "a"],
+ },
+ {
+ in: "paydáa",
+ out: ["p", "ay", "d", "aa"],
+ },
+ {
+ in: "be kaar",
+ out: ["b", "e", "k", "aa", "r"],
+ },
+ {
+ in: "raadzey",
+ out: ["r", "aa", "dz", "ey"],
+ },
+ {
+ in: "badanuy ??",
+ out: ["b", "a", "d", "a", "n", "uy"],
+ },
+ {
+ in: "tur ... pore",
+ out: ["t", "u", "r", "p", "o", "r", "e"],
+ },
+ {
+ in: "daar-Ul-iqaama",
+ out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"],
+ },
];
phonemeSplits.forEach((s) => {
- test(`${s.in} should split properly`, () => {
- const result = splitFIntoPhonemes(s.in);
- expect(result).toEqual(s.out);
- });
+ test(`${s.in} should split properly`, () => {
+ const result = splitFIntoPhonemes(s.in);
+ expect(result).toEqual(s.out);
+ });
});
const badPhonetics: Array<{
- in: string,
- problem: string,
+ in: string;
+ problem: string;
}> = [
- {
- in: "acar",
- problem: "c",
- },
- {
- in: "a7am",
- problem: "7",
- },
+ {
+ in: "acar",
+ problem: "c",
+ },
+ {
+ in: "a7am",
+ problem: "7",
+ },
];
test("bad phonetic characters should throw an error", () => {
- badPhonetics.forEach((s) => {
- expect(() => {
- splitFIntoPhonemes(s.in);
- }).toThrow(`illegal phonetic character: ${s.problem}`);
- });
+ badPhonetics.forEach((s) => {
+ expect(() => {
+ splitFIntoPhonemes(s.in);
+ }).toThrow(`illegal phonetic character: ${s.problem}`);
+ });
});
test("last should work", () => {
- expect(last("this")).toBe("s");
+ expect(last("this")).toBe("s");
});
test("addP should work", () => {
- expect(addP("ت")({ pIn: "", pOut: "کر" })).toEqual({
- pIn: "",
- pOut: "کرت",
- });
+ expect(addP("ت")({ pIn: "", pOut: "کر" })).toEqual({
+ pIn: "",
+ pOut: "کرت",
+ });
});
test("lastNonWhiteSpace should work", () => {
- expect(lastNonWhitespace("تورن")).toBe("ن");
- expect(lastNonWhitespace("وست .. ")).toBe("ت");
- expect(lastNonWhitespace("د ... ")).toBe("د");
+ expect(lastNonWhitespace("تورن")).toBe("ن");
+ expect(lastNonWhitespace("وست .. ")).toBe("ت");
+ expect(lastNonWhitespace("د ... ")).toBe("د");
});
test("reverseP should work", () => {
- expect(reverseP({
- pIn: "کور",
- pOut: "تور ",
- })).toEqual({
- pIn: " کور",
- pOut: "تور",
- });
- expect(reverseP({
- pIn: "کور",
- pOut: "تور ... ",
- })).toEqual({
- pIn: " ... کور",
- pOut: "تور",
- });
- expect(reverseP({
- pIn: "کور",
- pOut: "تور . ",
- })).toEqual({
- pIn: " . کور",
- pOut: "تور",
- });
-})
\ No newline at end of file
+ expect(
+ reverseP({
+ pIn: "کور",
+ pOut: "تور ",
+ })
+ ).toEqual({
+ pIn: " کور",
+ pOut: "تور",
+ });
+ expect(
+ reverseP({
+ pIn: "کور",
+ pOut: "تور ... ",
+ })
+ ).toEqual({
+ pIn: " ... کور",
+ pOut: "تور",
+ });
+ expect(
+ reverseP({
+ pIn: "کور",
+ pOut: "تور . ",
+ })
+ ).toEqual({
+ pIn: " . کور",
+ pOut: "تور",
+ });
+});
diff --git a/src/lib/src/diacritics-helpers.ts b/src/lib/src/diacritics-helpers.ts
index ee6c487..347eb40 100644
--- a/src/lib/src/diacritics-helpers.ts
+++ b/src/lib/src/diacritics-helpers.ts
@@ -8,31 +8,62 @@
import { removeAccents } from "./accent-helpers";
-export type DiacriticsAccumulator = { pIn: string, pOut: string };
+export type DiacriticsAccumulator = { pIn: string; pOut: string };
-type Consonant = "b" | "p" | "t" | "T" | "s" | "j" | "ch" | "kh" | "ts" | "dz" | "d" | "D" | "r" | "R" | "z" | "jz" | "G" | "sh" | "x" | "gh" | "f" | "q" | "k" | "g" | "l" | "m" | "n" | "N" | "h" | "w" | "y";
-type Ain = "'"
-type JoiningVowel = "-i-" | "-U-" | "-Ul-";
-type LongVowel = "aa" | "ee" | "e" | "oo" | "o" | "ey" | "uy" | "eyy";
+type Consonant =
+ | "b"
+ | "p"
+ | "t"
+ | "T"
+ | "s"
+ | "j"
+ | "ch"
+ | "kh"
+ | "ts"
+ | "dz"
+ | "d"
+ | "D"
+ | "r"
+ | "R"
+ | "z"
+ | "jz"
+ | "G"
+ | "sh"
+ | "x"
+ | "gh"
+ | "f"
+ | "q"
+ | "k"
+ | "g"
+ | "l"
+ | "m"
+ | "n"
+ | "N"
+ | "h"
+ | "w"
+ | "y";
+type Ain = "'";
+type JoiningVowel = "-i-" | "-U-" | "-Ul-";
+type LongVowel = "aa" | "ee" | "e" | "oo" | "o" | "ay" | "uy" | "ey";
type ShortVowel = "a" | "i" | "u" | "U";
export type Phoneme = Consonant | Ain | LongVowel | ShortVowel | JoiningVowel;
type PhonemeInfo = {
- matches?: string[],
- beginningMatches?: string[],
- endingMatches?: string[],
- consonant?: true,
- diacritic?: string,
- endingOnly?: true,
- takesSukunOnEnding?: true,
- longVowel?: true,
- canStartWithAynBefore?: true,
- useEndingDiacritic?: true,
- ainBlendDiacritic?: string,
-}
+ matches?: string[];
+ beginningMatches?: string[];
+ endingMatches?: string[];
+ consonant?: true;
+ diacritic?: string;
+ endingOnly?: true;
+ takesSukunOnEnding?: true;
+ longVowel?: true;
+ canStartWithAynBefore?: true;
+ useEndingDiacritic?: true;
+ ainBlendDiacritic?: string;
+};
export const zwar = "َ";
-export const zwarakey = "ٙ";
+export const zwarakay = "ٙ";
export const zer = "ِ";
export const pesh = "ُ";
export const sukun = "ْ";
@@ -43,513 +74,677 @@ export const daggerAlif = "ٰ";
export const fathahan = "ً";
export const phonemeTable: Record = {
- // Consonants
- "b": {
- matches: ["ب"],
- consonant: true,
- },
- "p": {
- matches: ["پ"],
- consonant: true,
- },
- "t": {
- matches: ["ت", "ط"],
- consonant: true,
- },
- "T": {
- matches: ["ټ"],
- consonant: true,
- },
- "s": {
- matches: ["س", "ص", "ث"],
- consonant: true,
- },
- "j": {
- matches: ["ج"],
- consonant: true,
- },
- "ch": {
- matches: ["چ"],
- consonant: true,
- },
- "kh": {
- matches: ["خ"],
- consonant: true,
- },
- "ts": {
- matches: ["څ"],
- consonant: true,
- },
- "dz": {
- matches: ["ځ"],
- consonant: true,
- },
- "d": {
- matches: ["د"],
- consonant: true,
- },
- "D": {
- matches: ["ډ"],
- consonant: true,
- },
- "r": {
- matches: ["ر"],
- consonant: true,
- },
- "R": {
- matches: ["ړ"],
- consonant: true,
- },
- "z": {
- matches: ["ز", "ذ", "ظ", "ض"],
- consonant: true,
- },
- "jz": {
- matches: ["ژ"],
- consonant: true,
- },
- "G": {
- matches: ["ږ"],
- consonant: true,
- },
- "sh": {
- matches: ["ش"],
- consonant: true,
- },
- "x": {
- matches: ["ښ"],
- consonant: true,
- },
- "gh": {
- matches: ["غ"],
- consonant: true,
- },
- "f": {
- matches: ["ف"],
- consonant: true,
- },
- "q": {
- matches: ["ق"],
- consonant: true,
- },
- "k": {
- matches: ["ک"],
- consonant: true,
- },
- "g": {
- matches: ["ګ"],
- consonant: true,
- },
- "l": {
- matches: ["ل"],
- consonant: true,
- },
- "m": {
- matches: ["م"],
- consonant: true,
- },
- "n": {
- matches: ["ن"],
- consonant: true,
- },
- "N": {
- matches: ["ڼ"],
- consonant: true,
- },
- "h": {
- matches: ["ه", "ح"],
- consonant: true,
- takesSukunOnEnding: true,
- },
- "w": {
- matches: ["و"],
- consonant: true,
- },
- "y": {
- matches: ["ی"],
- consonant: true,
- },
- // Ain
- "'": {
- matches: ["ع", "ئ"],
- consonant: true,
- },
- // Joining Vowels
- "-i-": {
- },
- "-U-": {
- matches: [" و ", "و"],
- },
- "-Ul-": {
- matches: ["ال"],
- },
- // Long Vowels
- "aa": {
- matches: ["ا", "أ"],
- beginningMatches: ["آ", "ا"],
- endingMatches: ["ا", "یٰ"],
- longVowel: true,
- ainBlendDiacritic: zwar,
- },
- "ee": {
- matches: ["ی"],
- longVowel: true,
- endingMatches: ["ي"],
- diacritic: zer,
- canStartWithAynBefore: true,
- ainBlendDiacritic: zer,
- },
- "e": {
- matches: ["ې"],
- longVowel: true,
- },
- "o": {
- matches: ["و"],
- longVowel: true,
- },
- "oo": {
- matches: ["و"],
- longVowel: true,
- diacritic: pesh,
- useEndingDiacritic: true,
- ainBlendDiacritic: pesh,
- },
- "ey": {
- matches: ["ی"],
- longVowel: true,
- endingMatches: ["ی"],
- },
- "uy": {
- matches: ["ۍ"],
- longVowel: true,
- endingOnly: true,
- },
- "eyy": {
- matches: ["ئ"],
- longVowel: true,
- endingOnly: true,
- },
- // Short Vowels
- "a": {
- diacritic: zwar,
- endingMatches: ["ه"],
- beginningMatches: ["ا", "ع"],
- // canComeAfterHeyEnding: true,
- },
- "u": {
- diacritic: zwarakey,
- endingMatches: ["ه"],
- },
- "i": {
- diacritic: zer,
- endingMatches: ["ه"],
- beginningMatches: ["ا", "ع"],
- // takesDiacriticBeforeGurdaHeyEnding: true,
- // canBeWasla: true,
- },
- "U": {
- diacritic: pesh,
- endingMatches: ["ه"],
- // takesDiacriticBeforeGurdaHeyEnding: true,
- beginningMatches: ["ا", "ع"],
- },
-}
+ // Consonants
+ b: {
+ matches: ["ب"],
+ consonant: true,
+ },
+ p: {
+ matches: ["پ"],
+ consonant: true,
+ },
+ t: {
+ matches: ["ت", "ط"],
+ consonant: true,
+ },
+ T: {
+ matches: ["ټ"],
+ consonant: true,
+ },
+ s: {
+ matches: ["س", "ص", "ث"],
+ consonant: true,
+ },
+ j: {
+ matches: ["ج"],
+ consonant: true,
+ },
+ ch: {
+ matches: ["چ"],
+ consonant: true,
+ },
+ kh: {
+ matches: ["خ"],
+ consonant: true,
+ },
+ ts: {
+ matches: ["څ"],
+ consonant: true,
+ },
+ dz: {
+ matches: ["ځ"],
+ consonant: true,
+ },
+ d: {
+ matches: ["د"],
+ consonant: true,
+ },
+ D: {
+ matches: ["ډ"],
+ consonant: true,
+ },
+ r: {
+ matches: ["ر"],
+ consonant: true,
+ },
+ R: {
+ matches: ["ړ"],
+ consonant: true,
+ },
+ z: {
+ matches: ["ز", "ذ", "ظ", "ض"],
+ consonant: true,
+ },
+ jz: {
+ matches: ["ژ"],
+ consonant: true,
+ },
+ G: {
+ matches: ["ږ"],
+ consonant: true,
+ },
+ sh: {
+ matches: ["ش"],
+ consonant: true,
+ },
+ x: {
+ matches: ["ښ"],
+ consonant: true,
+ },
+ gh: {
+ matches: ["غ"],
+ consonant: true,
+ },
+ f: {
+ matches: ["ف"],
+ consonant: true,
+ },
+ q: {
+ matches: ["ق"],
+ consonant: true,
+ },
+ k: {
+ matches: ["ک"],
+ consonant: true,
+ },
+ g: {
+ matches: ["ګ"],
+ consonant: true,
+ },
+ l: {
+ matches: ["ل"],
+ consonant: true,
+ },
+ m: {
+ matches: ["م"],
+ consonant: true,
+ },
+ n: {
+ matches: ["ن"],
+ consonant: true,
+ },
+ N: {
+ matches: ["ڼ"],
+ consonant: true,
+ },
+ h: {
+ matches: ["ه", "ح"],
+ consonant: true,
+ takesSukunOnEnding: true,
+ },
+ w: {
+ matches: ["و"],
+ consonant: true,
+ },
+ y: {
+ matches: ["ی"],
+ consonant: true,
+ },
+ // Ain
+ "'": {
+ matches: ["ع", "ئ"],
+ consonant: true,
+ },
+ // Joining Vowels
+ "-i-": {},
+ "-U-": {
+ matches: [" و ", "و"],
+ },
+ "-Ul-": {
+ matches: ["ال"],
+ },
+ // Long Vowels
+ aa: {
+ matches: ["ا", "أ"],
+ beginningMatches: ["آ", "ا"],
+ endingMatches: ["ا", "یٰ"],
+ longVowel: true,
+ ainBlendDiacritic: zwar,
+ },
+ ee: {
+ matches: ["ی"],
+ longVowel: true,
+ endingMatches: ["ي"],
+ diacritic: zer,
+ canStartWithAynBefore: true,
+ ainBlendDiacritic: zer,
+ },
+ e: {
+ matches: ["ې"],
+ longVowel: true,
+ },
+ o: {
+ matches: ["و"],
+ longVowel: true,
+ },
+ oo: {
+ matches: ["و"],
+ longVowel: true,
+ diacritic: pesh,
+ useEndingDiacritic: true,
+ ainBlendDiacritic: pesh,
+ },
+ ay: {
+ matches: ["ی"],
+ longVowel: true,
+ endingMatches: ["ی"],
+ },
+ uy: {
+ matches: ["ۍ"],
+ longVowel: true,
+ endingOnly: true,
+ },
+ ey: {
+ matches: ["ئ"],
+ longVowel: true,
+ endingOnly: true,
+ },
+ // Short Vowels
+ a: {
+ diacritic: zwar,
+ endingMatches: ["ه"],
+ beginningMatches: ["ا", "ع"],
+ // canComeAfterHayEnding: true,
+ },
+ u: {
+ diacritic: zwarakay,
+ endingMatches: ["ه"],
+ },
+ i: {
+ diacritic: zer,
+ endingMatches: ["ه"],
+ beginningMatches: ["ا", "ع"],
+ // takesDiacriticBeforeGurdaHayEnding: true,
+ // canBeWasla: true,
+ },
+ U: {
+ diacritic: pesh,
+ endingMatches: ["ه"],
+ // takesDiacriticBeforeGurdaHayEnding: true,
+ beginningMatches: ["ا", "ع"],
+ },
+};
/**
* splits a phonetics string into an array of Phonemes
- *
+ *
* will error if there is an illeagal phonetics character
- *
+ *
* @param fIn a phonetics string
* @returns an array of phonemes
*/
- export function splitFIntoPhonemes(fIn: string): Phoneme[] {
- const singleLetterPhonemes: Phoneme[] = ["a", "i", "u", "o", "e", "U", "b", "p", "t", "T", "s", "j", "d", "D", "r", "R", "z", "G", "x", "f", "q", "k", "g", "l", "m", "n", "N", "h", "w", "y", "'"];
-
- const quadrigraphs: Phoneme[] = ["-Ul-"];
- const trigraphs: Phoneme[] = ["eyy", "-i-", "-U-"];
- const digraphs: Phoneme[] = ["aa", "ee", "ey", "oo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"];
- const endingDigraphs: Phoneme[] = ["uy"];
- const willIgnore = ["?", " ", "`", ".", "…", ",", "-"];
-
- const result: Phoneme[] = [];
- const f = removeAccents(fIn).replace(/ă/g, "a");
- let index = 0;
- while (index < f.length) {
- const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
- const threeLetterChunk = f.slice(index, index + 3) as Phoneme;
- const fourLetterChunk = f.slice(index, index + 4) as Phoneme;
- if (quadrigraphs.includes(fourLetterChunk)) {
- result.push(fourLetterChunk);
- index += 4;
- continue;
- }
- if (trigraphs.includes(threeLetterChunk)) {
- result.push(threeLetterChunk);
- index += 3;
- continue;
- }
- const twoLetterChunk = f.slice(index, index + 2) as Phoneme;
- if (
- digraphs.includes(twoLetterChunk) ||
- (isLastTwoLetters && endingDigraphs.includes(twoLetterChunk))
- ) {
- result.push(twoLetterChunk);
- index += 2;
- continue;
- }
- const singleLetter = f.slice(index, index + 1) as Phoneme;
- if (!willIgnore.includes(singleLetter)) {
- if (!singleLetterPhonemes.includes(singleLetter)) {
- throw new Error(`illegal phonetic character: ${singleLetter}`);
- }
- result.push(singleLetter);
- }
- index++;
+export function splitFIntoPhonemes(fIn: string): Phoneme[] {
+ const singleLetterPhonemes: Phoneme[] = [
+ "a",
+ "i",
+ "u",
+ "o",
+ "e",
+ "U",
+ "b",
+ "p",
+ "t",
+ "T",
+ "s",
+ "j",
+ "d",
+ "D",
+ "r",
+ "R",
+ "z",
+ "G",
+ "x",
+ "f",
+ "q",
+ "k",
+ "g",
+ "l",
+ "m",
+ "n",
+ "N",
+ "h",
+ "w",
+ "y",
+ "'",
+ ];
+
+ const quadrigraphs: Phoneme[] = ["-Ul-"];
+ const trigraphs: Phoneme[] = ["ey", "-i-", "-U-"];
+ const digraphs: Phoneme[] = [
+ "aa",
+ "ee",
+ "ay",
+ "oo",
+ "kh",
+ "gh",
+ "ts",
+ "dz",
+ "jz",
+ "ch",
+ "sh",
+ ];
+ const endingDigraphs: Phoneme[] = ["uy"];
+ const willIgnore = ["?", " ", "`", ".", "…", ",", "-"];
+
+ const result: Phoneme[] = [];
+ const f = removeAccents(fIn).replace(/ă/g, "a");
+ let index = 0;
+ while (index < f.length) {
+ const isLastTwoLetters = index === f.length - 2 || f[index + 2] === " ";
+ const threeLetterChunk = f.slice(index, index + 3) as Phoneme;
+ const fourLetterChunk = f.slice(index, index + 4) as Phoneme;
+ if (quadrigraphs.includes(fourLetterChunk)) {
+ result.push(fourLetterChunk);
+ index += 4;
+ continue;
}
- return result;
+ if (trigraphs.includes(threeLetterChunk)) {
+ result.push(threeLetterChunk);
+ index += 3;
+ continue;
+ }
+ const twoLetterChunk = f.slice(index, index + 2) as Phoneme;
+ if (
+ digraphs.includes(twoLetterChunk) ||
+ (isLastTwoLetters && endingDigraphs.includes(twoLetterChunk))
+ ) {
+ result.push(twoLetterChunk);
+ index += 2;
+ continue;
+ }
+ const singleLetter = f.slice(index, index + 1) as Phoneme;
+ if (!willIgnore.includes(singleLetter)) {
+ if (!singleLetterPhonemes.includes(singleLetter)) {
+ throw new Error(`illegal phonetic character: ${singleLetter}`);
+ }
+ result.push(singleLetter);
+ }
+ index++;
+ }
+ return result;
}
export enum PhonemeStatus {
- LeadingLongVowel,
- LeadingConsonantOrShortVowel,
- DoubleConsonantTashdeed,
- EndingWithHeyHim,
- DirectMatch,
- DirectMatchAfterSukun,
- EndingWithHeyHimFromSukun,
- ShortVowel,
- PersianSilentWWithAa,
- ArabicWasla,
- Izafe,
- EndOfDuParticle,
- ShortAEndingAfterHeem,
- AlefDaggarEnding,
- SilentAinAfterAlef,
- AinWithLongAAtBeginning,
- LongAinVowelMissingComma,
- ShortAinVowelMissingComma,
- ShortAinVowelMissingCommaAfterAlefStart,
- AinBeginningAfterShortVowel,
- AlefWithHamza,
- AlefWithHamzaWithGlottalStop,
- WoEndingO,
- ShortAForAlefBeforeFathatan,
- NOnFathatan,
- HamzaOnWow,
- ArabicDefiniteArticleUl,
- OoPrefix,
- AlefHamzaBeg,
- GlottalStopBeforeOo,
- OoAfterGlottalStopOo,
- EndingSmallH,
+ LeadingLongVowel,
+ LeadingConsonantOrShortVowel,
+ DoubleConsonantTashdeed,
+ EndingWithHayHim,
+ DirectMatch,
+ DirectMatchAfterSukun,
+ EndingWithHayHimFromSukun,
+ ShortVowel,
+ PersianSilentWWithAa,
+ ArabicWasla,
+ Izafe,
+ EndOfDuParticle,
+ ShortAEndingAfterHeem,
+ AlefDaggarEnding,
+ SilentAinAfterAlef,
+ AinWithLongAAtBeginning,
+ LongAinVowelMissingComma,
+ ShortAinVowelMissingComma,
+ ShortAinVowelMissingCommaAfterAlefStart,
+ AinBeginningAfterShortVowel,
+ AlefWithHamza,
+ AlefWithHamzaWithGlottalStop,
+ WoEndingO,
+ ShortAForAlefBeforeFathatan,
+ NOnFathatan,
+ HamzaOnWow,
+ ArabicDefiniteArticleUl,
+ OoPrefix,
+ AlefHamzaBeg,
+ GlottalStopBeforeOo,
+ OoAfterGlottalStopOo,
+ EndingSmallH,
}
-export function stateInfo({ state, i, phonemes, phoneme }: {
- state: DiacriticsAccumulator,
- i: number,
- phonemes: Phoneme[],
- phoneme: Phoneme,
+export function stateInfo({
+ state,
+ i,
+ phonemes,
+ phoneme,
+}: {
+ state: DiacriticsAccumulator;
+ i: number;
+ phonemes: Phoneme[];
+ phoneme: Phoneme;
}) {
- const isOutOfWord = (char: string) => !char || char === " ";
- const prevPLetter = last(state.pOut);
- const currentPLetter = state.pIn[0];
- const nextPLetter = state.pIn[1];
- const nextPhoneme = phonemes[i+1];
- const previousPhoneme = i > 0 && phonemes[i-1];
- const lastThreePLetters = last(state.pOut, 3) + last(state.pOut, 2) + prevPLetter;
- const isBeginningOfWord = (state.pOut === "" || prevPLetter === " ") || (previousPhoneme === "-Ul-" && prevPLetter === "ل") || (["دَر", "وَر"].includes(lastThreePLetters) || (last(state.pOut, 2) + prevPLetter) === "را");
- const isEndOfWord = isOutOfWord(nextPLetter);
- const phonemeInfo = phonemeTable[phoneme];
- const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
- // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
- // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
- const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
- const needsSukun = (doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter))) // || (isEndOfWord && phonemeInfo.takesSukunOnEnding);
- const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع"));
- const diacritic = useAinBlendDiacritics
+ const isOutOfWord = (char: string) => !char || char === " ";
+ const prevPLetter = last(state.pOut);
+ const currentPLetter = state.pIn[0];
+ const nextPLetter = state.pIn[1];
+ const nextPhoneme = phonemes[i + 1];
+ const previousPhoneme = i > 0 && phonemes[i - 1];
+ const lastThreePLetters =
+ last(state.pOut, 3) + last(state.pOut, 2) + prevPLetter;
+ const isBeginningOfWord =
+ state.pOut === "" ||
+ prevPLetter === " " ||
+ (previousPhoneme === "-Ul-" && prevPLetter === "ل") ||
+ ["دَر", "وَر"].includes(lastThreePLetters) ||
+ last(state.pOut, 2) + prevPLetter === "را";
+ const isEndOfWord = isOutOfWord(nextPLetter);
+ const phonemeInfo = phonemeTable[phoneme];
+ const previousPhonemeInfo =
+ !isBeginningOfWord && i > 0 && phonemeTable[phonemes[i - 1]];
+ // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
+ // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
+ const doubleConsonant =
+ previousPhonemeInfo &&
+ phonemeInfo.consonant &&
+ previousPhonemeInfo.consonant;
+ const needsSukun =
+ doubleConsonant &&
+ (previousPhoneme !== phoneme ||
+ phonemeInfo.matches?.includes(currentPLetter)); // || (isEndOfWord && phonemeInfo.takesSukunOnEnding);
+ const useAinBlendDiacritics =
+ !isBeginningOfWord &&
+ phonemeInfo.ainBlendDiacritic &&
+ currentPLetter === "ع";
+ const diacritic = useAinBlendDiacritics
? phonemeInfo.ainBlendDiacritic
- : isEndOfWord
- ? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
-
- const lastWordEndedW = (char: string) => ((prevPLetter === char && !currentPLetter) || (prevPLetter === " " && last(state.pOut, 2) === char));
+ : isEndOfWord
+ ? !phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic
+ ? phonemeInfo.diacritic
+ : undefined
+ : phonemeInfo.diacritic;
- function getPhonemeState(): PhonemeStatus {
- if (isBeginningOfWord && phoneme === "aa" && phonemeInfo.beginningMatches?.includes(currentPLetter)) {
- return PhonemeStatus.DirectMatch;
- }
- if (isBeginningOfWord && phoneme === "oo" && currentPLetter === "و") {
- return PhonemeStatus.OoPrefix;
- }
- if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
- if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
- throw Error("phonetics error - needs alef prefix");
- }
- return PhonemeStatus.LeadingLongVowel;
- }
- if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
- return PhonemeStatus.LeadingConsonantOrShortVowel;
- }
- if (isBeginningOfWord && phoneme === "aa" && currentPLetter === "ع" && nextPLetter === "ا") {
- return PhonemeStatus.AinWithLongAAtBeginning;
- }
- if (currentPLetter === "ا" && nextPLetter === "ع" && phoneme === "aa" && nextPhoneme !== "'") {
- return PhonemeStatus.SilentAinAfterAlef;
- }
- // console.log("------");
- // console.log("phoneme", phoneme);
- // console.log("state", state);
- // console.log("prevPLetter is space", prevPLetter === " ");
- // console.log("------");
- if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
- return PhonemeStatus.EndOfDuParticle
- }
- if (isBeginningOfWord && phoneme === "-Ul-" && currentPLetter === "ا" && nextPLetter === "ل") {
- return PhonemeStatus.ArabicDefiniteArticleUl;
- }
- if (phoneme === "a" && nextPhoneme === "'" && phonemes[i+2] === "a" && currentPLetter === "أ") {
- return PhonemeStatus.AlefHamzaBeg;
- }
- if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
- return PhonemeStatus.HamzaOnWow;
- }
- if (phoneme === "a" && currentPLetter === "ا" && nextPLetter === fathahan) {
- return PhonemeStatus.ShortAForAlefBeforeFathatan;
- }
- if (phoneme === "'" && currentPLetter === "و" && nextPLetter === "و") {
- return PhonemeStatus.GlottalStopBeforeOo;
- }
- if (phoneme === "oo" && previousPhoneme === "'" && currentPLetter === "و" && prevPLetter === hamzaAbove) {
- return PhonemeStatus.OoAfterGlottalStopOo;
- }
- if (phoneme === "'" && last(state.pOut, 2) === "ع" && isOutOfWord(last(state.pOut, 3))) {
- return PhonemeStatus.AinBeginningAfterShortVowel;
- }
- if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
- return PhonemeStatus.PersianSilentWWithAa;
- }
- if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") {
- return PhonemeStatus.ArabicWasla;
- }
- if (phoneme === "-i-" && isBeginningOfWord) {
- return PhonemeStatus.Izafe;
- }
- if (phoneme === "a" && currentPLetter === "أ") {
- return PhonemeStatus.AlefWithHamza;
- }
- if (phoneme === "'" && nextPhoneme === "a" && currentPLetter === "أ") {
- return PhonemeStatus.AlefWithHamzaWithGlottalStop;
- }
- if (currentPLetter === "ع" && phoneme !== "'" && nextPhoneme !== "'") {
- if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
- return PhonemeStatus.ShortAinVowelMissingComma;
- }
- if ((last(state.pOut, 2) === "ا") && isOutOfWord(last(state.pOut, 3))) {
- return PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart;
- }
- }
- if (useAinBlendDiacritics) {
- return PhonemeStatus.LongAinVowelMissingComma;
- }
- if (((!isBeginningOfWord && doubleConsonant) || prevPLetter === " ") && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter)) {
- return PhonemeStatus.DoubleConsonantTashdeed;
- }
- if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
- return PhonemeStatus.AlefDaggarEnding;
- }
- if (phoneme === "a" && lastWordEndedW("ح")) {
- return PhonemeStatus.ShortAEndingAfterHeem;
- }
- if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
- return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
- }
- if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
- return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
- }
- if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
- return PhonemeStatus.ShortVowel;
- }
- if (phoneme === "o" && previousPhoneme === "w" && lastWordEndedW("و")) {
- return PhonemeStatus.WoEndingO;
- }
- if (isEndOfWord && phoneme === "n" && currentPLetter === fathahan && prevPLetter === "ا") {
- return PhonemeStatus.NOnFathatan;
- }
- // console.log("errored", "current", phoneme, "next", nextPhoneme);
- // console.log("bad phoneme is ", phoneme);
- throw new Error("phonetics error - no status found for phoneme: " + phoneme);
+ const lastWordEndedW = (char: string) =>
+ (prevPLetter === char && !currentPLetter) ||
+ (prevPLetter === " " && last(state.pOut, 2) === char);
+
+ function getPhonemeState(): PhonemeStatus {
+ if (
+ isBeginningOfWord &&
+ phoneme === "aa" &&
+ phonemeInfo.beginningMatches?.includes(currentPLetter)
+ ) {
+ return PhonemeStatus.DirectMatch;
}
+ if (isBeginningOfWord && phoneme === "oo" && currentPLetter === "و") {
+ return PhonemeStatus.OoPrefix;
+ }
+ if (isBeginningOfWord && phonemeInfo.longVowel && !phonemeInfo.endingOnly) {
+ if (
+ phoneme !== "aa" &&
+ currentPLetter !== "ا" &&
+ !phonemeInfo.matches?.includes(nextPLetter)
+ ) {
+ throw Error("phonetics error - needs alef prefix");
+ }
+ return PhonemeStatus.LeadingLongVowel;
+ }
+ if (
+ isBeginningOfWord &&
+ (phonemeInfo.beginningMatches?.includes(currentPLetter) ||
+ phonemeInfo.matches?.includes(currentPLetter))
+ ) {
+ return PhonemeStatus.LeadingConsonantOrShortVowel;
+ }
+ if (
+ isBeginningOfWord &&
+ phoneme === "aa" &&
+ currentPLetter === "ع" &&
+ nextPLetter === "ا"
+ ) {
+ return PhonemeStatus.AinWithLongAAtBeginning;
+ }
+ if (
+ currentPLetter === "ا" &&
+ nextPLetter === "ع" &&
+ phoneme === "aa" &&
+ nextPhoneme !== "'"
+ ) {
+ return PhonemeStatus.SilentAinAfterAlef;
+ }
+ // console.log("------");
+ // console.log("phoneme", phoneme);
+ // console.log("state", state);
+ // console.log("prevPLetter is space", prevPLetter === " ");
+ // console.log("------");
+ if (
+ isBeginningOfWord &&
+ phoneme === "u" &&
+ prevPLetter === " " &&
+ lastNonWhitespace(state.pOut) === "د"
+ ) {
+ return PhonemeStatus.EndOfDuParticle;
+ }
+ if (
+ isBeginningOfWord &&
+ phoneme === "-Ul-" &&
+ currentPLetter === "ا" &&
+ nextPLetter === "ل"
+ ) {
+ return PhonemeStatus.ArabicDefiniteArticleUl;
+ }
+ if (
+ phoneme === "a" &&
+ nextPhoneme === "'" &&
+ phonemes[i + 2] === "a" &&
+ currentPLetter === "أ"
+ ) {
+ return PhonemeStatus.AlefHamzaBeg;
+ }
+ if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
+ return PhonemeStatus.HamzaOnWow;
+ }
+ if (phoneme === "a" && currentPLetter === "ا" && nextPLetter === fathahan) {
+ return PhonemeStatus.ShortAForAlefBeforeFathatan;
+ }
+ if (phoneme === "'" && currentPLetter === "و" && nextPLetter === "و") {
+ return PhonemeStatus.GlottalStopBeforeOo;
+ }
+ if (
+ phoneme === "oo" &&
+ previousPhoneme === "'" &&
+ currentPLetter === "و" &&
+ prevPLetter === hamzaAbove
+ ) {
+ return PhonemeStatus.OoAfterGlottalStopOo;
+ }
+ if (
+ phoneme === "'" &&
+ last(state.pOut, 2) === "ع" &&
+ isOutOfWord(last(state.pOut, 3))
+ ) {
+ return PhonemeStatus.AinBeginningAfterShortVowel;
+ }
+ if (
+ !isBeginningOfWord &&
+ phoneme === "aa" &&
+ currentPLetter === "و" &&
+ nextPLetter === "ا"
+ ) {
+ return PhonemeStatus.PersianSilentWWithAa;
+ }
+ if (
+ !isBeginningOfWord &&
+ phoneme === "i" &&
+ currentPLetter === "ا" &&
+ nextPLetter === "ل"
+ ) {
+ return PhonemeStatus.ArabicWasla;
+ }
+ if (phoneme === "-i-" && isBeginningOfWord) {
+ return PhonemeStatus.Izafe;
+ }
+ if (phoneme === "a" && currentPLetter === "أ") {
+ return PhonemeStatus.AlefWithHamza;
+ }
+ if (phoneme === "'" && nextPhoneme === "a" && currentPLetter === "أ") {
+ return PhonemeStatus.AlefWithHamzaWithGlottalStop;
+ }
+ if (currentPLetter === "ع" && phoneme !== "'" && nextPhoneme !== "'") {
+ if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
+ return PhonemeStatus.ShortAinVowelMissingComma;
+ }
+ if (last(state.pOut, 2) === "ا" && isOutOfWord(last(state.pOut, 3))) {
+ return PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart;
+ }
+ }
+ if (useAinBlendDiacritics) {
+ return PhonemeStatus.LongAinVowelMissingComma;
+ }
+ if (
+ ((!isBeginningOfWord && doubleConsonant) || prevPLetter === " ") &&
+ previousPhoneme === phoneme &&
+ !phonemeInfo.matches?.includes(currentPLetter)
+ ) {
+ return PhonemeStatus.DoubleConsonantTashdeed;
+ }
+ if (
+ phoneme === "aa" &&
+ currentPLetter === "ی" &&
+ nextPLetter === daggerAlif
+ ) {
+ return PhonemeStatus.AlefDaggarEnding;
+ }
+ if (phoneme === "a" && lastWordEndedW("ح")) {
+ return PhonemeStatus.ShortAEndingAfterHeem;
+ }
+ if (
+ isEndOfWord &&
+ ((phoneme === "u" && currentPLetter === "ه") ||
+ (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))
+ ) {
+ return needsSukun
+ ? PhonemeStatus.EndingWithHayHimFromSukun
+ : PhonemeStatus.EndingWithHayHim;
+ }
+ if (
+ phonemeInfo.matches?.includes(currentPLetter) ||
+ (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) ||
+ (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب")
+ ) {
+ return needsSukun
+ ? PhonemeStatus.DirectMatchAfterSukun
+ : PhonemeStatus.DirectMatch;
+ }
+ if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
+ return PhonemeStatus.ShortVowel;
+ }
+ if (phoneme === "o" && previousPhoneme === "w" && lastWordEndedW("و")) {
+ return PhonemeStatus.WoEndingO;
+ }
+ if (
+ isEndOfWord &&
+ phoneme === "n" &&
+ currentPLetter === fathahan &&
+ prevPLetter === "ا"
+ ) {
+ return PhonemeStatus.NOnFathatan;
+ }
+ // console.log("errored", "current", phoneme, "next", nextPhoneme);
+ // console.log("bad phoneme is ", phoneme);
+ throw new Error(
+ "phonetics error - no status found for phoneme: " + phoneme
+ );
+ }
- const phs = getPhonemeState();
-
- return {
- phs, phonemeInfo, diacritic, prevPLetter,
- };
-};
+ const phs = getPhonemeState();
+ return {
+ phs,
+ phonemeInfo,
+ diacritic,
+ prevPLetter,
+ };
+}
/**
* returns the nth last character of a string
- *
- * @param s
+ *
+ * @param s
*/
export function last(s: string, n = 1) {
- return s[s.length - n];
+ return s[s.length - n];
}
-export function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator {
- return {
- pIn: state.pIn.slice(n),
- pOut: state.pOut + state.pIn.slice(0, n),
- };
+export function advanceP(
+ state: DiacriticsAccumulator,
+ n: number = 1
+): DiacriticsAccumulator {
+ return {
+ pIn: state.pIn.slice(n),
+ pOut: state.pOut + state.pIn.slice(0, n),
+ };
}
/**
* moves back to the last character that wasn't a " " or "."
- *
- * @param state
- * @returns
+ *
+ * @param state
+ * @returns
*/
export function reverseP(state: DiacriticsAccumulator): DiacriticsAccumulator {
- const reversed = [...state.pOut].reverse();
- const howFar = reversed.findIndex((c) => ![" ", "."].includes(c));
- return {
- pIn: state.pOut.slice(-howFar) + state.pIn,
- pOut: state.pOut.slice(0, -howFar),
- };
+ const reversed = [...state.pOut].reverse();
+ const howFar = reversed.findIndex((c) => ![" ", "."].includes(c));
+ return {
+ pIn: state.pOut.slice(-howFar) + state.pIn,
+ pOut: state.pOut.slice(0, -howFar),
+ };
}
-export const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
+export const addP =
+ (toAdd: string | undefined) =>
+ (state: DiacriticsAccumulator): DiacriticsAccumulator => {
return {
- ...state,
- pOut: toAdd ? (state.pOut + toAdd) : state.pOut,
+ ...state,
+ pOut: toAdd ? state.pOut + toAdd : state.pOut,
};
-};
+ };
-export const overwriteP = (toWrite: string) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
+export const overwriteP =
+ (toWrite: string) =>
+ (state: DiacriticsAccumulator): DiacriticsAccumulator => {
return {
- pIn: state.pIn.slice(1),
- pOut: state.pOut + toWrite,
+ pIn: state.pIn.slice(1),
+ pOut: state.pOut + toWrite,
};
-};
+ };
/**
* returns the last letter before any whitespace (" " / ".")
- *
- * @param s
- * @returns
+ *
+ * @param s
+ * @returns
*/
export function lastNonWhitespace(s: string): string {
- const reversed = [...s].reverse();
- const lastIndex = reversed.findIndex((c) => ![" ", "."].includes(c));
- const penultimateChar = reversed[lastIndex];
- return penultimateChar;
+ const reversed = [...s].reverse();
+ const lastIndex = reversed.findIndex((c) => ![" ", "."].includes(c));
+ const penultimateChar = reversed[lastIndex];
+ return penultimateChar;
}
-export function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {
- return {
- current: state.pIn[0],
- next: state.pIn[1],
- };
+export function getCurrentNext(state: DiacriticsAccumulator): {
+ current: string;
+ next: string;
+} {
+ return {
+ current: state.pIn[0],
+ next: state.pIn[1],
+ };
}
// export function advanceForAin(state: DiacriticsAccumulator): DiacriticsAccumulator {
@@ -557,22 +752,25 @@ export function getCurrentNext(state: DiacriticsAccumulator): { current: string,
// return (current === "ع") ? advanceP(state) : state;
// }
-export function advanceForHamzaMid(state: DiacriticsAccumulator): DiacriticsAccumulator {
- const { current, next } = getCurrentNext(state);
- if (current === "ئ" && next && next !== "ئ") {
- return advanceP(state);
- }
- return state;
+export function advanceForHamzaMid(
+ state: DiacriticsAccumulator
+): DiacriticsAccumulator {
+ const { current, next } = getCurrentNext(state);
+ if (current === "ئ" && next && next !== "ئ") {
+ return advanceP(state);
+ }
+ return state;
}
-export function advanceForHamza(state: DiacriticsAccumulator): DiacriticsAccumulator {
- const { current, next } = getCurrentNext(state);
- if (current === "ه" && (!next || next === " ")) {
- return advanceP(state);
- }
- // if (current === "ع") {
- // return advanceP(state);
- // }
- return state;
+export function advanceForHamza(
+ state: DiacriticsAccumulator
+): DiacriticsAccumulator {
+ const { current, next } = getCurrentNext(state);
+ if (current === "ه" && (!next || next === " ")) {
+ return advanceP(state);
+ }
+ // if (current === "ع") {
+ // return advanceP(state);
+ // }
+ return state;
}
-
diff --git a/src/lib/src/diacritics.test.ts b/src/lib/src/diacritics.test.ts
index 47fa109..36dd8b4 100644
--- a/src/lib/src/diacritics.test.ts
+++ b/src/lib/src/diacritics.test.ts
@@ -6,1286 +6,1280 @@
*
*/
-import {
- addDiacritics,
-} from "./diacritics";
-import {
- zwar,
- zwarakey,
- sukun,
- tashdeed,
-} from "./diacritics-helpers";
+import { addDiacritics } from "./diacritics";
+import { zwar, zwarakay, sukun, tashdeed } from "./diacritics-helpers";
import * as T from "../../types";
const diacriticsSections: {
- describe: string,
- tests: {
- in: T.PsString,
- out: string | null,
- }[],
+ describe: string;
+ tests: {
+ in: T.PsString;
+ out: string | null;
+ }[];
}[] = [
- {
- describe: "regular, native Pashto script/sounds",
- tests: [
- {
- in: {
- p: "کور",
- f: "kor",
- },
- out: "کور",
- },
- {
- in: {
- p: "کور",
- f: "koor",
- },
- out: "کُور",
- },
- {
- in: {
- p: "کور کور",
- f: "kor koor",
- },
- out: "کور کُور",
- },
- {
- in: {
- p: "تب",
- f: "tib",
- },
- out: "تِب",
- },
- {
- in: {
- p: "تب",
- f: "tab",
- },
- out: "تَب",
- },
- {
- in: {
- p: "تب",
- f: "tUb",
- },
- out: "تُب",
- },
- {
- in: {
- p: "تب",
- f: "tub",
- },
- out: "تٙب",
- },
- {
- in: {
- p: "تب",
- f: "tb",
- },
- out: "تْب",
- },
- {
- in: {
- p: "تلب",
- f: "tilab",
- },
- out: "تِلَب",
- },
- {
- in: {
- p: "تشناب",
- f: "tashnaab",
- },
- out: "تَشْناب",
- },
- {
- in: {
- p: "پسته",
- f: "pasta",
- },
- out: "پَسْتَه",
- },
- // working with ئ as vowel at end
- {
- in: {
- p: "شئ",
- f: "sheyy",
- },
- out: "شئ",
- },
- {
- in: {
- p: "کار کوئ چې لاړ شئ",
- f: "kaar kawéyy che laaR sheyy",
- },
- out: "کار کَوئ چې لاړ شئ",
- },
- // working with وs
- {
- in: {
- p: "کول",
- f: "kwal",
- },
- out: "کْوَل",
- },
- {
- in: {
- p: "تول",
- f: "tool",
- },
- out: "تُول",
- },
- {
- in: {
- p: "مقبول",
- f: "maqbool",
- },
- out: "مَقْبُول",
- },
- {
- in: {
- p: "کول",
- f: "kawul",
- },
- out: "کَو" + zwarakey + "ل",
- },
- {
- in: {
- p: "کول",
- f: "kiwul",
- },
- out: "کِو" + zwarakey + "ل",
- },
- {
- in: {
- p: "کول",
- f: "kUwul",
- },
- out: "کُو" + zwarakey + "ل",
- },
- {
- in: {
- p: "کول",
- f: "kuwul",
- },
- out: "ک" + zwarakey + "و" + zwarakey + "ل",
- },
- {
- in: {
- p: "کول",
- f: "kawal",
- },
- out: "کَوَل",
- },
- {
- in: {
- p: "کول",
- f: "kUwal",
- },
- out: "کُوَل",
- },
- {
- in: {
- p: "پشتګرد",
- f: "pishtgird",
- },
- out: "پِشْتْګِرْد",
- },
- {
- in: {
- p: "سپین",
- f: "speen",
- },
- out: "سْپِین",
- },
- {
- in: {
- p: "سپین",
- f: "speyn",
- },
- out: "سْپین",
- },
- {
- in: {
- p: "پېش",
- f: "pesh",
- },
- out: "پېش",
- },
- {
- in: {
- p: "لیک",
- f: "leek",
- },
- out: "لِیک",
- },
- {
- in: {
- p: "ماضی",
- f: "maazee",
- },
- out: null,
- },
- {
- in: {
- p: "وسېدل",
- f: "osedul",
- },
- out: null,
- },
- {
- in: {
- p: "يست",
- f: "eest",
- },
- out: null,
- },
- {
- in: {
- p: "ست",
- f: "ist",
- },
- out: null,
- },
- {
- in: {
- p: "haca",
- f: "هځه",
- },
- out: null,
- },
- {
- in: {
- p: "تشناب",
- f: "peshnaab",
- },
- out: null,
- },
- {
- in: {
- p: "وسېدل",
- f: "osedul",
- },
- out: null,
- },
- {
- in: {
- p: "رغېدل",
- f: "raghedul",
- },
- out: "رَغېد" + zwarakey + "ل",
- },
- {
- in: {
- p: "کارول",
- f: "kaarawul",
- },
- out: "کارَو" + zwarakey + "ل",
- },
- {
- in: {
- p: "پېښېدل",
- f: "pexedul",
- },
- out: "پېښېد" + zwarakey + "ل",
- },
- {
- in: {
- p: "مین",
- f: "mayín",
- },
- out: "مَیِن",
- },
- {
- in: {
- p: "سړی",
- f: "saRey",
- },
- out: "سَړی",
- },
- {
- in: {
- p: "سړي",
- f: "saRee",
- },
- out: "سَړي",
- },
- {
- in: {
- p: "زه",
- f: "zu",
- },
- out: "زهٔ",
- },
- {
- in: {
- p: "زه",
- f: "za",
- },
- out: "زَه",
- },
- {
- in: {
- p: "پېشنهاد",
- f: "peshniháad",
- },
- out: "پېشْنِهاد",
- },
- {
- in: {
- p: "ایستل",
- f: "eestul",
- },
- out: "اِیسْت" + zwarakey + "ل",
- },
- {
- in: {
- p: "ایستل",
- f: "eystul",
- },
- out: "ایسْت" + zwarakey + "ل",
- },
- {
- in: {
- p: "اېسېدل",
- f: "esedul",
- },
- out: "اېسېد" + zwarakey + "ل",
- },
- {
- in: {
- p: "اوسېدل",
- f: "osedul",
- },
- out: "اوسېد" + zwarakey + "ل",
- },
- {
- in: {
- p: "اواز",
- f: "awaaz",
- },
- out: "اَواز",
- },
- {
- in: {
- p: "اسلام",
- f: "islaam",
- },
- out: "اِسْلام",
- },
- {
- in: {
- p: "واردول",
- f: "waaridawul",
- },
- out: "وارِدَو" + zwarakey + "ل",
- },
- {
- in: {
- p: "غاړه",
- f: "ghaaRa",
- },
- out: "غاړَه",
- },
- {
- in: {
- p: "اوتر",
- f: "awtár",
- },
- out: "اَوْتَر",
- },
- {
- in: {
- p: "اختیار",
- f: "ikhtiyáar",
- },
- out: "اِخْتِیار",
- },
- {
- in: {
- p: "فریاد",
- f: "faryáad",
- },
- out: "فَرْیاد",
- },
- {
- in: {
- p: "کارغه",
- f: "kaarghu",
- },
- out: "کارْغهٔ",
- },
- {
- in: {
- p: "بې کار",
- f: "be kaar",
- },
- out: "بې کار",
- },
- {
- in: {
- p: "بې کار",
- f: "bekaar",
- },
- out: "بې کار",
- },
- {
- in: {
- p: "ارغون",
- f: "arghóon",
- },
- out: "اَرْغُون",
- },
- {
- in: {
- p: "ارمټه",
- f: "armaTa",
- },
- out: "اَرْمَټَه",
- },
- {
- in: {
- p: "اروا پوه",
- f: "arwaa poh",
- },
- out: "اَرْوا پوهْ",
- },
- // starting alefs
- {
- in: {
- p: "اسلام",
- f: "islaam",
- },
- out: "اِسْلام",
- },
- // starting long vowels with ا
- {
- in: {
- p: "ایسار",
- f: "eesaar",
- },
- out: "اِیسار",
- },
- // double consonant / tashdeed
- {
- in: {
- p: "بتن",
- f: "battan",
- },
- out: "ب" + zwar + "ت" + tashdeed + zwar + "ن",
- },
- {
- in: {
- p: "بتطن",
- f: "battan",
- },
- out: "ب" + zwar + "ت" + sukun + "ط" + zwar + "ن",
- },
- // vowel endings working
- {
- in: {
- p: "بته",
- f: "bata",
- },
- out: "بَتَه",
- },
- {
- in: {
- p: "بته",
- f: "bati",
- },
- out: "بَتِه",
- },
- {
- in: {
- p: "پرمختیا",
- f: "parmakhtyaa",
- },
- out: "پَرْمَخْتْیا",
- },
- {
- in: {
- p: "پته",
- f: "patta",
- },
- out: "پَتَّه",
- },
- {
- in: {
- p: "پته تور",
- f: "patta toor",
- },
- out: "پَتَّه تُور",
- },
- {
- in: {
- p: "لکۍ وال",
- f: "lakuy waal",
- },
- out: "لَکۍ وال",
- },
- // avoid false double consonant
- {
- in: {
- p: "ازل لیک",
- f: "azalléek",
- },
- out: "اَزَل لِیک",
- },
- {
- in: {
- p: "سه",
- f: "si",
- },
- out: "سِه",
- },
- {
- in: {
- p: "سه شنبه",
- f: "sishamba",
- },
- out: "سِه شَنْبَه",
- },
- {
- in: {
- p: "توجه",
- f: "tawajÚ",
- },
- out: "تَوَجُه",
- },
- {
- in: {
- p: "توجه کول",
- f: "tawajU kawul",
- },
- out: "تَوَجُه کَو" + zwarakey + "ل",
- },
- {
- in: {
- p: "با استعداد",
- f: "baa isti'dáad",
- },
- out: "با اِسْتِعْداد",
- },
- {
- in: {
- p: "آدم",
- f: "aadam",
- },
- out: "آدَم",
- },
- {
- in: {
- p: "آسان",
- f: "aasáan",
- },
- out: "آسان",
- },
- {
- in: {
- p: "آسان",
- f: "asáan",
- },
- out: null,
- },
- {
- in: {
- p: "یدام",
- f: "aadam",
- },
- out: null,
- },
- {
- in: {
- p: "سختسری",
- f: "sakht sărey",
- },
- out: "سَخْتْسَری",
- },
- {
- in: {
- p: " سپین کړه",
- f: " speen kRu",
- },
- out: "سْپِین کْړهٔ",
- },
- {
- in: {
- p: "اوب",
- f: "ob",
- },
- out: "اوب",
- },
- {
- in: {
- p: "قطعه بازي",
- f: "qit'a baazee",
- },
- out: "قِطْعَه بازي",
- },
- {
- in: {
- p: "مقرر",
- f: "mUqarrár",
- },
- out: "مُقَرٌَر",
- },
- {
- in: {
- p: "متردد",
- f: "mUtariddíd",
- },
- out: "مُتَرِدِّد",
- },
- {
- in: {
- p: "زره",
- f: "zirih",
- },
- out: "زِرِهْ",
- },
- {
- in: {
- p: "وری",
- f: "waréy",
- },
- out: "وَری",
- },
- {
- in: {
- p: "فلاح",
- f: "faláa",
- },
- out: "فَلاح",
- },
- {
- in: {
- p: "امزری",
- f: "umzaréy",
- },
- out: zwarakey + "مْزَری",
- },
- ],
- },
- {
- describe: "ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی",
- tests: [
- {
- in: {
- p: "پتېیل",
- f: "pateyúl",
- },
- out: null,
- },
- {
- in: {
- p: "پتېیل",
- f: "pate`yúl",
- },
- out: "پَتېی" + zwarakey + "ل",
- },
- {
- in: {
- p: "درېیم",
- f: "dre`yum",
- },
- out: "دْرېی" + zwarakey + "م",
- },
- ],
- },
- {
- describe: "handle circumpositions",
- tests: [
- {
- in: {
- p: "تر ... پورې",
- f: "tur ... pore",
- },
- out: "ت" + zwarakey + "ر ... پورې",
- },
- ],
- },
- {
- describe: "nm - mb thing",
- tests: [
- {
- in: {
- p: "انبار",
- f: "ambáar",
- },
- out: "اَنْبار",
- },
- ],
- },
- {
- describe: "excetption for و - wo",
- tests: [
- {
- in: {
- p: "و",
- f: "wo",
- },
- out: "و",
- },
- {
- in: {
- p: "سړی و",
- f: "saRey wo",
- },
- out: "سَړی و",
- },
- ],
- },
- {
- describe: "alef with hamza above",
- tests: [
- {
- in: {
- p: "جرأت",
- f: "jUrát",
- },
- out: "جُرأت",
- },
- {
- in: {
- p: "جرأت",
- f: "jUr'át",
- },
- out: "جُرأت",
- },
- ],
- },
- {
- describe: "ayn stuff",
- tests: [
- {
- in: {
- p: "بعد",
- f: "ba'd",
- },
- out: "بَعْد",
- },
- {
- in: {
- p: "بعد",
- f: "b'ad",
- },
- out: "بْعَد",
- },
- {
- in: {
- p: "بعد",
- f: "ba'ad",
- },
- out: "بَعَد",
- },
- {
- in: {
- p: "بعد",
- f: "baad",
- },
- out: "بَعَد",
- },
- {
- in: {
- p: "بعد",
- f: "bad",
- },
- out: "بَعد",
- },
- {
- in: {
- p: "معلوم",
- f: "maaloom",
- },
- out: "مَعَلُوم",
- },
- {
- in: {
- p: "منبع",
- f: "manbi'",
- },
- out: "مَنْبِع",
- },
- {
- in: {
- p: "منبع",
- f: "manb'i",
- },
- out: "مَنْبْعِ"
- },
- {
- in: {
- p: "منبع",
- f: "manbee",
- },
- out: "مَنْبِعِ",
- },
- {
- in: {
- p: "منبع",
- f: "manbi",
- },
- out: "مَنْبِع"
- },
- {
- in: {
- p: "معنا",
- f: "ma'náa",
- },
- out: "مَعْنا",
- },
- {
- in: {
- p: "معنا",
- f: "maanáa",
- },
- out: "مَعَنا",
- },
- {
- in: {
- p: "طمع استعمال",
- f: "tama istimaal",
- },
- out: "طَمَع اِسْتِعمال",
- },
- {
- in: {
- p: "مربع",
- f: "mUraba'",
- },
- out: "مُرَبَع",
- },
- {
- in: {
- p: "مربع جذر",
- f: "mUraba' jazúr",
- },
- out: "مُرَبَع جَذ" + zwarakey + "ر",
- },
- {
- in: {
- p: "عام",
- f: "'aam",
- },
- out: "عام",
- },
- {
- in: {
- p: "قتل عام",
- f: "qatl-i-aam",
- },
- out: "قَتْلِ عام",
- },
- {
- in: {
- p: "توقع",
- f: "tawaqqÚ",
- },
- out: "تَوَقُّع",
- },
- {
- in: {
- p: "راجع کېدل",
- f: "raaji kedul",
- },
- out: "راجِع کېد" + zwarakey + "ل",
- },
- {
- in: {
- p: "ربیع",
- f: "rabee'",
- },
- out: "رَبِیع",
- },
- ],
- },
- {
- describe: "ayn at the beginning",
- tests: [
- // as a short vowel at the beginning
- {
- in: {
- p: "عزت",
- f: "izzat",
- },
- out: "عِزَّت",
- },
- {
- in: {
- p: "عزت",
- f: "i'zzat",
- },
- out: "عِْزَّت",
- },
- {
- in: {
- p: "عذر",
- f: "Uzar",
- },
- out: "عُذَر",
- },
- {
- in: {
- p: "عذر",
- f: "U'zar",
- },
- out: "عُْذَر",
- },
- // as a short i with an alef
- {
- in: {
- p: "اعتصاب شکن",
- f: "itisaab shakan",
- },
- out: "اِعتِصاب شَکَن",
- },
- {
- in: {
- p: "اعتصاب شکن",
- f: "i'tisaab shakan",
- },
- out: "اِعْتِصاب شَکَن",
- },
- // as a long aa at beginning
- {
- in: {
- p: "عادل",
- f: "aadíl",
- },
- out: "عادِل",
- },
- {
- in: {
- p: "عید",
- f: "eed",
- },
- out: "عِید",
- },
- ],
- },
- {
- describe: "ayn at the end",
- tests: [
- {
- in: {
- p: "اجماع",
- f: "ijmaa",
- },
- out: "اِجْماع",
- },
- {
- in: {
- p: "اجماع",
- f: "ijmaa'",
- },
- out: "اِجْماع",
- }
- ],
- },
- {
- describe: "ئ in the middle",
- tests: [
- {
- in: {
- p: "برائت",
- f: "baraa'at",
- },
- out: "بَرائَت",
- },
- {
- in: {
- p: "فائده",
- f: "faaida",
- },
- out: "فائِدَه",
- },
- ],
- },
- {
- describe: "واخ being khaa in the middle of a word",
- tests: [
- {
- in: {
- p: "استخوان",
- f: "UstUkháan",
- },
- out: "اُسْتُخ(و)ان",
- },
- ],
- },
- {
- describe: "Arabic wasla",
- tests: [
- {
- in: {
- p: "بالکل",
- f: "bilkUl",
- },
- out: "بِٱلْکُل",
- },
- ],
- },
- {
- describe: "izafe",
- tests: [
- {
- in: {
- p: "ایصال ثواب",
- f: "eesaal-i-sawaab",
- },
- out: "اِیصالِ ثَواب",
- },
- ],
- },
- {
- describe: "joiner و",
- tests: [
- {
- in: {
- p: "کار و بار",
- f: "kaar-U-baar",
- },
- out: "کار و بار",
- },
- {
- in: {
- p: "کاروبار",
- f: "kaar-U-baar",
- },
- out: "کاروبار",
- },
- ],
- },
- {
- describe: "special behaviour with د",
- tests: [
- {
- in: {
- p: "د",
- f: "du",
- },
- out: "د" + zwarakey,
- },
- {
- in: {
- p: "د لاس",
- f: "du laas",
- },
- out: "د" + zwarakey + " لاس",
- },
- {
- in: {
- p: "د ... په شان",
- f: "du ... pu shaan",
- },
- out: "د" + zwarakey + " ... پهٔ شان",
- },
- ],
- },
- {
- describe: "ha ending with ح",
- tests: [
- {
- in: {
- p: "ذبح",
- f: "zabha",
- },
- out: "ذَبْحَ",
- },
- {
- in: {
- p: "ذبح کول",
- f: "zabha kawul",
- },
- out: "ذَبْحَ کَو" + zwarakey + "ل",
- },
- ],
- },
- {
- describe: "require dagger alif on words ending with یٰ",
- tests: [
- {
- in: {
- p: "یحیی",
- f: "yahyaa",
- },
- out: null,
- },
- {
- in: {
- p: "یحییٰ",
- f: "yahyaa",
- },
- out: "یَحْییٰ",
- },
- {
- in: {
- p: "یحییٰ چېرته",
- f: "yahyaa cherta",
- },
- out: "یَحْییٰ چېرْتَه",
- },
- {
- in: {
- p: "معنیٰ",
- f: "ma'anaa",
- },
- out: "مَعَنیٰ",
- },
- ],
- },
- {
- describe: "require fathatan on words ending in اً ",
- tests: [
- {
- in: {
- p: "دقیقا",
- f: "daqeeqan",
- },
- out: null,
- },
- {
- in: {
- p: "دقیقاً",
- f: "daqeeqan",
- },
- out: "دَقِیقاً",
- },
- ],
- },
- {
- describe: "Ua ؤ",
- tests: [
- {
- in: {
- p: "مودب",
- f: "mUaddab",
- },
- out: "مُؤَدَّب",
- },
- ],
- },
- {
- describe: "With Arabic definate article -Ul- ال",
- tests: [
- {
- in: {
- p: "حق الاجاره",
- f: "haq-Ul-ijaara",
- },
- out: "حَق اُلاِجارَه",
- },
- {
- in: {
- p: "دار العلوم",
- f: "daar-Ul-Ulóom",
- },
- out: "دار اُلعُلُوم",
- },
- ],
- },
- {
- describe: "double consonants on end of words",
- tests: [
- {
- in: {
- p: "حق",
- f: "haqq",
- },
- out: "حَقّ",
- },
- {
- in: {
- p: "حق پر",
- f: "haqq par",
- },
- out: "حَقّ پَر",
- },
- ],
- },
- {
- describe: "أ in the middle of the word",
- tests: [
- {
- in: {
- p: "متأسف",
- f: "mUtaassif",
- },
- out: "مُتأسِّف",
- },
- {
- in: {
- p: "متأسف",
- f: "mUta'assif",
- },
- out: "مُتأسِّف",
- },
- ],
- },
- {
- describe: "ؤو in middle of the word",
- tests: [
- {
- in: {
- p: "مسوول",
- f: "mas'ool",
- },
- out: "مَسؤول", // TODO: Is this best??
- },
- ],
- },
- {
- describe: "allow for beginnings prefixed with ور در را",
- tests: [
- {
- in: {
- p: "وراوږد",
- f: "wăr-ooGad",
- },
- out: "وَراُوږَد",
- },
- {
- in: {
- p: "دراوږد",
- f: "dăr-ooGad",
- },
- out: "دَراُوږَد",
- },
- {
- in: {
- p: "رااوږد",
- f: "raa-ooGad",
- },
- out: "رااُوږَد",
- },
- ],
- },
- {
- describe: "allow oo at start with و prefix",
- tests: [
- {
- in: {
- p: "وباسي",
- f: "oobaasee",
- },
- out: "وُباسي",
- },
- {
- in: {
- p: "وځم",
- f: "oodzum",
- },
- out: "وُځ" + zwarakey + "م",
- },
- {
- in: {
- p: "وځم",
- f: "wUdzum",
- },
- out: "وُځ" + zwarakey + "م",
- },
- ],
- },
+ {
+ describe: "regular, native Pashto script/sounds",
+ tests: [
+ {
+ in: {
+ p: "کور",
+ f: "kor",
+ },
+ out: "کور",
+ },
+ {
+ in: {
+ p: "کور",
+ f: "koor",
+ },
+ out: "کُور",
+ },
+ {
+ in: {
+ p: "کور کور",
+ f: "kor koor",
+ },
+ out: "کور کُور",
+ },
+ {
+ in: {
+ p: "تب",
+ f: "tib",
+ },
+ out: "تِب",
+ },
+ {
+ in: {
+ p: "تب",
+ f: "tab",
+ },
+ out: "تَب",
+ },
+ {
+ in: {
+ p: "تب",
+ f: "tUb",
+ },
+ out: "تُب",
+ },
+ {
+ in: {
+ p: "تب",
+ f: "tub",
+ },
+ out: "تٙب",
+ },
+ {
+ in: {
+ p: "تب",
+ f: "tb",
+ },
+ out: "تْب",
+ },
+ {
+ in: {
+ p: "تلب",
+ f: "tilab",
+ },
+ out: "تِلَب",
+ },
+ {
+ in: {
+ p: "تشناب",
+ f: "tashnaab",
+ },
+ out: "تَشْناب",
+ },
+ {
+ in: {
+ p: "پسته",
+ f: "pasta",
+ },
+ out: "پَسْتَه",
+ },
+ // working with ئ as vowel at end
+ {
+ in: {
+ p: "شئ",
+ f: "shey",
+ },
+ out: "شئ",
+ },
+ {
+ in: {
+ p: "کار کوئ چې لاړ شئ",
+ f: "kaar kawéy che laaR shey",
+ },
+ out: "کار کَوئ چې لاړ شئ",
+ },
+ // working with وs
+ {
+ in: {
+ p: "کول",
+ f: "kwal",
+ },
+ out: "کْوَل",
+ },
+ {
+ in: {
+ p: "تول",
+ f: "tool",
+ },
+ out: "تُول",
+ },
+ {
+ in: {
+ p: "مقبول",
+ f: "maqbool",
+ },
+ out: "مَقْبُول",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kawul",
+ },
+ out: "کَو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kiwul",
+ },
+ out: "کِو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kUwul",
+ },
+ out: "کُو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kuwul",
+ },
+ out: "ک" + zwarakay + "و" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kawal",
+ },
+ out: "کَوَل",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kUwal",
+ },
+ out: "کُوَل",
+ },
+ {
+ in: {
+ p: "پشتګرد",
+ f: "pishtgird",
+ },
+ out: "پِشْتْګِرْد",
+ },
+ {
+ in: {
+ p: "سپین",
+ f: "speen",
+ },
+ out: "سْپِین",
+ },
+ {
+ in: {
+ p: "سپین",
+ f: "spayn",
+ },
+ out: "سْپین",
+ },
+ {
+ in: {
+ p: "پېش",
+ f: "pesh",
+ },
+ out: "پېش",
+ },
+ {
+ in: {
+ p: "لیک",
+ f: "leek",
+ },
+ out: "لِیک",
+ },
+ {
+ in: {
+ p: "ماضی",
+ f: "maazee",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "وسېدل",
+ f: "osedul",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "يست",
+ f: "eest",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "ست",
+ f: "ist",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "haca",
+ f: "هځه",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "تشناب",
+ f: "peshnaab",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "وسېدل",
+ f: "osedul",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "رغېدل",
+ f: "raghedul",
+ },
+ out: "رَغېد" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "کارول",
+ f: "kaarawul",
+ },
+ out: "کارَو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "پېښېدل",
+ f: "pexedul",
+ },
+ out: "پېښېد" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "مین",
+ f: "mayín",
+ },
+ out: "مَیِن",
+ },
+ {
+ in: {
+ p: "سړی",
+ f: "saRay",
+ },
+ out: "سَړی",
+ },
+ {
+ in: {
+ p: "سړي",
+ f: "saRee",
+ },
+ out: "سَړي",
+ },
+ {
+ in: {
+ p: "زه",
+ f: "zu",
+ },
+ out: "زهٔ",
+ },
+ {
+ in: {
+ p: "زه",
+ f: "za",
+ },
+ out: "زَه",
+ },
+ {
+ in: {
+ p: "پېشنهاد",
+ f: "peshniháad",
+ },
+ out: "پېشْنِهاد",
+ },
+ {
+ in: {
+ p: "ایستل",
+ f: "eestul",
+ },
+ out: "اِیسْت" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "ایستل",
+ f: "aystul",
+ },
+ out: "ایسْت" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "اېسېدل",
+ f: "esedul",
+ },
+ out: "اېسېد" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "اوسېدل",
+ f: "osedul",
+ },
+ out: "اوسېد" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "اواز",
+ f: "awaaz",
+ },
+ out: "اَواز",
+ },
+ {
+ in: {
+ p: "اسلام",
+ f: "islaam",
+ },
+ out: "اِسْلام",
+ },
+ {
+ in: {
+ p: "واردول",
+ f: "waaridawul",
+ },
+ out: "وارِدَو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "غاړه",
+ f: "ghaaRa",
+ },
+ out: "غاړَه",
+ },
+ {
+ in: {
+ p: "اوتر",
+ f: "awtár",
+ },
+ out: "اَوْتَر",
+ },
+ {
+ in: {
+ p: "اختیار",
+ f: "ikhtiyáar",
+ },
+ out: "اِخْتِیار",
+ },
+ {
+ in: {
+ p: "فریاد",
+ f: "faryáad",
+ },
+ out: "فَرْیاد",
+ },
+ {
+ in: {
+ p: "کارغه",
+ f: "kaarghu",
+ },
+ out: "کارْغهٔ",
+ },
+ {
+ in: {
+ p: "بې کار",
+ f: "be kaar",
+ },
+ out: "بې کار",
+ },
+ {
+ in: {
+ p: "بې کار",
+ f: "bekaar",
+ },
+ out: "بې کار",
+ },
+ {
+ in: {
+ p: "ارغون",
+ f: "arghóon",
+ },
+ out: "اَرْغُون",
+ },
+ {
+ in: {
+ p: "ارمټه",
+ f: "armaTa",
+ },
+ out: "اَرْمَټَه",
+ },
+ {
+ in: {
+ p: "اروا پوه",
+ f: "arwaa poh",
+ },
+ out: "اَرْوا پوهْ",
+ },
+ // starting alefs
+ {
+ in: {
+ p: "اسلام",
+ f: "islaam",
+ },
+ out: "اِسْلام",
+ },
+ // starting long vowels with ا
+ {
+ in: {
+ p: "ایسار",
+ f: "eesaar",
+ },
+ out: "اِیسار",
+ },
+ // double consonant / tashdeed
+ {
+ in: {
+ p: "بتن",
+ f: "battan",
+ },
+ out: "ب" + zwar + "ت" + tashdeed + zwar + "ن",
+ },
+ {
+ in: {
+ p: "بتطن",
+ f: "battan",
+ },
+ out: "ب" + zwar + "ت" + sukun + "ط" + zwar + "ن",
+ },
+ // vowel endings working
+ {
+ in: {
+ p: "بته",
+ f: "bata",
+ },
+ out: "بَتَه",
+ },
+ {
+ in: {
+ p: "بته",
+ f: "bati",
+ },
+ out: "بَتِه",
+ },
+ {
+ in: {
+ p: "پرمختیا",
+ f: "parmakhtyaa",
+ },
+ out: "پَرْمَخْتْیا",
+ },
+ {
+ in: {
+ p: "پته",
+ f: "patta",
+ },
+ out: "پَتَّه",
+ },
+ {
+ in: {
+ p: "پته تور",
+ f: "patta toor",
+ },
+ out: "پَتَّه تُور",
+ },
+ {
+ in: {
+ p: "لکۍ وال",
+ f: "lakuy waal",
+ },
+ out: "لَکۍ وال",
+ },
+ // avoid false double consonant
+ {
+ in: {
+ p: "ازل لیک",
+ f: "azalléek",
+ },
+ out: "اَزَل لِیک",
+ },
+ {
+ in: {
+ p: "سه",
+ f: "si",
+ },
+ out: "سِه",
+ },
+ {
+ in: {
+ p: "سه شنبه",
+ f: "sishamba",
+ },
+ out: "سِه شَنْبَه",
+ },
+ {
+ in: {
+ p: "توجه",
+ f: "tawajÚ",
+ },
+ out: "تَوَجُه",
+ },
+ {
+ in: {
+ p: "توجه کول",
+ f: "tawajU kawul",
+ },
+ out: "تَوَجُه کَو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "با استعداد",
+ f: "baa isti'dáad",
+ },
+ out: "با اِسْتِعْداد",
+ },
+ {
+ in: {
+ p: "آدم",
+ f: "aadam",
+ },
+ out: "آدَم",
+ },
+ {
+ in: {
+ p: "آسان",
+ f: "aasáan",
+ },
+ out: "آسان",
+ },
+ {
+ in: {
+ p: "آسان",
+ f: "asáan",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "یدام",
+ f: "aadam",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "سختسری",
+ f: "sakht săray",
+ },
+ out: "سَخْتْسَری",
+ },
+ {
+ in: {
+ p: " سپین کړه",
+ f: " speen kRu",
+ },
+ out: "سْپِین کْړهٔ",
+ },
+ {
+ in: {
+ p: "اوب",
+ f: "ob",
+ },
+ out: "اوب",
+ },
+ {
+ in: {
+ p: "قطعه بازي",
+ f: "qit'a baazee",
+ },
+ out: "قِطْعَه بازي",
+ },
+ {
+ in: {
+ p: "مقرر",
+ f: "mUqarrár",
+ },
+ out: "مُقَرٌَر",
+ },
+ {
+ in: {
+ p: "متردد",
+ f: "mUtariddíd",
+ },
+ out: "مُتَرِدِّد",
+ },
+ {
+ in: {
+ p: "زره",
+ f: "zirih",
+ },
+ out: "زِرِهْ",
+ },
+ {
+ in: {
+ p: "وری",
+ f: "waráy",
+ },
+ out: "وَری",
+ },
+ {
+ in: {
+ p: "فلاح",
+ f: "faláa",
+ },
+ out: "فَلاح",
+ },
+ {
+ in: {
+ p: "امزری",
+ f: "umzaráy",
+ },
+ out: zwarakay + "مْزَری",
+ },
+ ],
+ },
+ {
+ describe:
+ "ې followed by ی - y needs to be written as e`y to be distinguished from ay - ی",
+ tests: [
+ {
+ in: {
+ p: "پتېیل",
+ f: "patayúl",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "پتېیل",
+ f: "pate`yúl",
+ },
+ out: "پَتېی" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "درېیم",
+ f: "dre`yum",
+ },
+ out: "دْرېی" + zwarakay + "م",
+ },
+ ],
+ },
+ {
+ describe: "handle circumpositions",
+ tests: [
+ {
+ in: {
+ p: "تر ... پورې",
+ f: "tur ... pore",
+ },
+ out: "ت" + zwarakay + "ر ... پورې",
+ },
+ ],
+ },
+ {
+ describe: "nm - mb thing",
+ tests: [
+ {
+ in: {
+ p: "انبار",
+ f: "ambáar",
+ },
+ out: "اَنْبار",
+ },
+ ],
+ },
+ {
+ describe: "excetption for و - wo",
+ tests: [
+ {
+ in: {
+ p: "و",
+ f: "wo",
+ },
+ out: "و",
+ },
+ {
+ in: {
+ p: "سړی و",
+ f: "saRay wo",
+ },
+ out: "سَړی و",
+ },
+ ],
+ },
+ {
+ describe: "alef with hamza above",
+ tests: [
+ {
+ in: {
+ p: "جرأت",
+ f: "jUrát",
+ },
+ out: "جُرأت",
+ },
+ {
+ in: {
+ p: "جرأت",
+ f: "jUr'át",
+ },
+ out: "جُرأت",
+ },
+ ],
+ },
+ {
+ describe: "ayn stuff",
+ tests: [
+ {
+ in: {
+ p: "بعد",
+ f: "ba'd",
+ },
+ out: "بَعْد",
+ },
+ {
+ in: {
+ p: "بعد",
+ f: "b'ad",
+ },
+ out: "بْعَد",
+ },
+ {
+ in: {
+ p: "بعد",
+ f: "ba'ad",
+ },
+ out: "بَعَد",
+ },
+ {
+ in: {
+ p: "بعد",
+ f: "baad",
+ },
+ out: "بَعَد",
+ },
+ {
+ in: {
+ p: "بعد",
+ f: "bad",
+ },
+ out: "بَعد",
+ },
+ {
+ in: {
+ p: "معلوم",
+ f: "maaloom",
+ },
+ out: "مَعَلُوم",
+ },
+ {
+ in: {
+ p: "منبع",
+ f: "manbi'",
+ },
+ out: "مَنْبِع",
+ },
+ {
+ in: {
+ p: "منبع",
+ f: "manb'i",
+ },
+ out: "مَنْبْعِ",
+ },
+ {
+ in: {
+ p: "منبع",
+ f: "manbee",
+ },
+ out: "مَنْبِعِ",
+ },
+ {
+ in: {
+ p: "منبع",
+ f: "manbi",
+ },
+ out: "مَنْبِع",
+ },
+ {
+ in: {
+ p: "معنا",
+ f: "ma'náa",
+ },
+ out: "مَعْنا",
+ },
+ {
+ in: {
+ p: "معنا",
+ f: "maanáa",
+ },
+ out: "مَعَنا",
+ },
+ {
+ in: {
+ p: "طمع استعمال",
+ f: "tama istimaal",
+ },
+ out: "طَمَع اِسْتِعمال",
+ },
+ {
+ in: {
+ p: "مربع",
+ f: "mUraba'",
+ },
+ out: "مُرَبَع",
+ },
+ {
+ in: {
+ p: "مربع جذر",
+ f: "mUraba' jazúr",
+ },
+ out: "مُرَبَع جَذ" + zwarakay + "ر",
+ },
+ {
+ in: {
+ p: "عام",
+ f: "'aam",
+ },
+ out: "عام",
+ },
+ {
+ in: {
+ p: "قتل عام",
+ f: "qatl-i-aam",
+ },
+ out: "قَتْلِ عام",
+ },
+ {
+ in: {
+ p: "توقع",
+ f: "tawaqqÚ",
+ },
+ out: "تَوَقُّع",
+ },
+ {
+ in: {
+ p: "راجع کېدل",
+ f: "raaji kedul",
+ },
+ out: "راجِع کېد" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "ربیع",
+ f: "rabee'",
+ },
+ out: "رَبِیع",
+ },
+ ],
+ },
+ {
+ describe: "ayn at the beginning",
+ tests: [
+ // as a short vowel at the beginning
+ {
+ in: {
+ p: "عزت",
+ f: "izzat",
+ },
+ out: "عِزَّت",
+ },
+ {
+ in: {
+ p: "عزت",
+ f: "i'zzat",
+ },
+ out: "عِْزَّت",
+ },
+ {
+ in: {
+ p: "عذر",
+ f: "Uzar",
+ },
+ out: "عُذَر",
+ },
+ {
+ in: {
+ p: "عذر",
+ f: "U'zar",
+ },
+ out: "عُْذَر",
+ },
+ // as a short i with an alef
+ {
+ in: {
+ p: "اعتصاب شکن",
+ f: "itisaab shakan",
+ },
+ out: "اِعتِصاب شَکَن",
+ },
+ {
+ in: {
+ p: "اعتصاب شکن",
+ f: "i'tisaab shakan",
+ },
+ out: "اِعْتِصاب شَکَن",
+ },
+ // as a long aa at beginning
+ {
+ in: {
+ p: "عادل",
+ f: "aadíl",
+ },
+ out: "عادِل",
+ },
+ {
+ in: {
+ p: "عید",
+ f: "eed",
+ },
+ out: "عِید",
+ },
+ ],
+ },
+ {
+ describe: "ayn at the end",
+ tests: [
+ {
+ in: {
+ p: "اجماع",
+ f: "ijmaa",
+ },
+ out: "اِجْماع",
+ },
+ {
+ in: {
+ p: "اجماع",
+ f: "ijmaa'",
+ },
+ out: "اِجْماع",
+ },
+ ],
+ },
+ {
+ describe: "ئ in the middle",
+ tests: [
+ {
+ in: {
+ p: "برائت",
+ f: "baraa'at",
+ },
+ out: "بَرائَت",
+ },
+ {
+ in: {
+ p: "فائده",
+ f: "faaida",
+ },
+ out: "فائِدَه",
+ },
+ ],
+ },
+ {
+ describe: "واخ being khaa in the middle of a word",
+ tests: [
+ {
+ in: {
+ p: "استخوان",
+ f: "UstUkháan",
+ },
+ out: "اُسْتُخ(و)ان",
+ },
+ ],
+ },
+ {
+ describe: "Arabic wasla",
+ tests: [
+ {
+ in: {
+ p: "بالکل",
+ f: "bilkUl",
+ },
+ out: "بِٱلْکُل",
+ },
+ ],
+ },
+ {
+ describe: "izafe",
+ tests: [
+ {
+ in: {
+ p: "ایصال ثواب",
+ f: "eesaal-i-sawaab",
+ },
+ out: "اِیصالِ ثَواب",
+ },
+ ],
+ },
+ {
+ describe: "joiner و",
+ tests: [
+ {
+ in: {
+ p: "کار و بار",
+ f: "kaar-U-baar",
+ },
+ out: "کار و بار",
+ },
+ {
+ in: {
+ p: "کاروبار",
+ f: "kaar-U-baar",
+ },
+ out: "کاروبار",
+ },
+ ],
+ },
+ {
+ describe: "special behaviour with د",
+ tests: [
+ {
+ in: {
+ p: "د",
+ f: "du",
+ },
+ out: "د" + zwarakay,
+ },
+ {
+ in: {
+ p: "د لاس",
+ f: "du laas",
+ },
+ out: "د" + zwarakay + " لاس",
+ },
+ {
+ in: {
+ p: "د ... په شان",
+ f: "du ... pu shaan",
+ },
+ out: "د" + zwarakay + " ... پهٔ شان",
+ },
+ ],
+ },
+ {
+ describe: "ha ending with ح",
+ tests: [
+ {
+ in: {
+ p: "ذبح",
+ f: "zabha",
+ },
+ out: "ذَبْحَ",
+ },
+ {
+ in: {
+ p: "ذبح کول",
+ f: "zabha kawul",
+ },
+ out: "ذَبْحَ کَو" + zwarakay + "ل",
+ },
+ ],
+ },
+ {
+ describe: "require dagger alif on words ending with یٰ",
+ tests: [
+ {
+ in: {
+ p: "یحیی",
+ f: "yahyaa",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "یحییٰ",
+ f: "yahyaa",
+ },
+ out: "یَحْییٰ",
+ },
+ {
+ in: {
+ p: "یحییٰ چېرته",
+ f: "yahyaa cherta",
+ },
+ out: "یَحْییٰ چېرْتَه",
+ },
+ {
+ in: {
+ p: "معنیٰ",
+ f: "ma'anaa",
+ },
+ out: "مَعَنیٰ",
+ },
+ ],
+ },
+ {
+ describe: "require fathatan on words ending in اً ",
+ tests: [
+ {
+ in: {
+ p: "دقیقا",
+ f: "daqeeqan",
+ },
+ out: null,
+ },
+ {
+ in: {
+ p: "دقیقاً",
+ f: "daqeeqan",
+ },
+ out: "دَقِیقاً",
+ },
+ ],
+ },
+ {
+ describe: "Ua ؤ",
+ tests: [
+ {
+ in: {
+ p: "مودب",
+ f: "mUaddab",
+ },
+ out: "مُؤَدَّب",
+ },
+ ],
+ },
+ {
+ describe: "With Arabic definate article -Ul- ال",
+ tests: [
+ {
+ in: {
+ p: "حق الاجاره",
+ f: "haq-Ul-ijaara",
+ },
+ out: "حَق اُلاِجارَه",
+ },
+ {
+ in: {
+ p: "دار العلوم",
+ f: "daar-Ul-Ulóom",
+ },
+ out: "دار اُلعُلُوم",
+ },
+ ],
+ },
+ {
+ describe: "double consonants on end of words",
+ tests: [
+ {
+ in: {
+ p: "حق",
+ f: "haqq",
+ },
+ out: "حَقّ",
+ },
+ {
+ in: {
+ p: "حق پر",
+ f: "haqq par",
+ },
+ out: "حَقّ پَر",
+ },
+ ],
+ },
+ {
+ describe: "أ in the middle of the word",
+ tests: [
+ {
+ in: {
+ p: "متأسف",
+ f: "mUtaassif",
+ },
+ out: "مُتأسِّف",
+ },
+ {
+ in: {
+ p: "متأسف",
+ f: "mUta'assif",
+ },
+ out: "مُتأسِّف",
+ },
+ ],
+ },
+ {
+ describe: "ؤو in middle of the word",
+ tests: [
+ {
+ in: {
+ p: "مسوول",
+ f: "mas'ool",
+ },
+ out: "مَسؤول", // TODO: Is this best??
+ },
+ ],
+ },
+ {
+ describe: "allow for beginnings prefixed with ور در را",
+ tests: [
+ {
+ in: {
+ p: "وراوږد",
+ f: "wăr-ooGad",
+ },
+ out: "وَراُوږَد",
+ },
+ {
+ in: {
+ p: "دراوږد",
+ f: "dăr-ooGad",
+ },
+ out: "دَراُوږَد",
+ },
+ {
+ in: {
+ p: "رااوږد",
+ f: "raa-ooGad",
+ },
+ out: "رااُوږَد",
+ },
+ ],
+ },
+ {
+ describe: "allow oo at start with و prefix",
+ tests: [
+ {
+ in: {
+ p: "وباسي",
+ f: "oobaasee",
+ },
+ out: "وُباسي",
+ },
+ {
+ in: {
+ p: "وځم",
+ f: "oodzum",
+ },
+ out: "وُځ" + zwarakay + "م",
+ },
+ {
+ in: {
+ p: "وځم",
+ f: "wUdzum",
+ },
+ out: "وُځ" + zwarakay + "م",
+ },
+ ],
+ },
];
// diacriticsSections.forEach((section) => {
@@ -1306,15 +1300,13 @@ const diacriticsSections: {
// });
test("ending with left over Pashto script will throw an error", () => {
- expect(() => {
- addDiacritics({ p: "کور ته", f: "kor" });
- }).toThrow(`phonetics error - phonetics shorter than pashto script`);
+ expect(() => {
+ addDiacritics({ p: "کور ته", f: "kor" });
+ }).toThrow(`phonetics error - phonetics shorter than pashto script`);
});
test("ending with left over phonetics will throw an error", () => {
- expect(() => {
- addDiacritics({ p: "کار", f: "kaar kawul" });
- }).toThrow();
+ expect(() => {
+ addDiacritics({ p: "کار", f: "kaar kawul" });
+ }).toThrow();
});
-
-
diff --git a/src/lib/src/diacritics.ts b/src/lib/src/diacritics.ts
index f806890..d73a266 100644
--- a/src/lib/src/diacritics.ts
+++ b/src/lib/src/diacritics.ts
@@ -8,25 +8,25 @@
import * as T from "../../types";
import {
- splitFIntoPhonemes,
- Phoneme,
- zwar,
- zwarakey,
- zer,
- pesh,
- sukun,
- hamzaAbove,
- tashdeed,
- wasla,
- addP,
- advanceP,
- reverseP,
- overwriteP,
- advanceForHamza,
- advanceForHamzaMid,
- DiacriticsAccumulator,
- stateInfo,
- PhonemeStatus,
+ splitFIntoPhonemes,
+ Phoneme,
+ zwar,
+ zwarakay,
+ zer,
+ pesh,
+ sukun,
+ hamzaAbove,
+ tashdeed,
+ wasla,
+ addP,
+ advanceP,
+ reverseP,
+ overwriteP,
+ advanceForHamza,
+ advanceForHamzaMid,
+ DiacriticsAccumulator,
+ stateInfo,
+ PhonemeStatus,
} from "./diacritics-helpers";
import { removeFVarients } from "./accent-and-ps-utils";
@@ -35,176 +35,107 @@ import { pipe } from "rambda";
/**
* Adds diacritics to a given PsString.
* Errors if the phonetics and script don't line up.
+ *
+ * IN PROGRESS - This will hopefully get done and replace the messy, unmaintainable phonetics-to-diacritics.ts currently in use
*/
- export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString {
- const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? removeFVarients(f) : f);
- const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p.trim() });
- if (pIn !== "") {
- throw new Error("phonetics error - phonetics shorter than pashto script");
- }
- return {
- p: pOut,
- f,
- };
+export function addDiacritics(
+ { p, f }: T.PsString,
+ ignoreCommas?: true
+): T.PsString {
+ const phonemes: Phoneme[] = splitFIntoPhonemes(
+ !ignoreCommas ? removeFVarients(f) : f
+ );
+ const { pIn, pOut } = phonemes.reduce(processPhoneme, {
+ pOut: "",
+ pIn: p.trim(),
+ });
+ if (pIn !== "") {
+ throw new Error("phonetics error - phonetics shorter than pashto script");
+ }
+ return {
+ p: pOut,
+ f,
+ };
}
function processPhoneme(
- acc: DiacriticsAccumulator,
- phoneme: Phoneme,
- i: number,
- phonemes: Phoneme[],
+ acc: DiacriticsAccumulator,
+ phoneme: Phoneme,
+ i: number,
+ phonemes: Phoneme[]
): DiacriticsAccumulator {
- const state = acc.pIn.slice(0, 5) === " ... "
- ? advanceP(acc, 5)
- : acc.pIn[0] === " "
- ? advanceP(acc)
- : acc;
+ const state =
+ acc.pIn.slice(0, 5) === " ... "
+ ? advanceP(acc, 5)
+ : acc.pIn[0] === " "
+ ? advanceP(acc)
+ : acc;
- const {
- phonemeInfo,
- diacritic,
- phs,
- prevPLetter,
- } = stateInfo({ state, i, phoneme, phonemes });
+ const { phonemeInfo, diacritic, phs, prevPLetter } = stateInfo({
+ state,
+ i,
+ phoneme,
+ phonemes,
+ });
- return (phs === PhonemeStatus.LeadingLongVowel) ?
- pipe(
- advanceP,
- addP(phonemeInfo.diacritic),
- advanceP,
- )(state)
- : (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
- pipe(
- advanceP,
- addP(diacritic),
- )(state)
- : (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
- pipe(
- prevPLetter === " " ? reverseP : addP(""),
- addP(tashdeed)
- )(state)
- : (phs === PhonemeStatus.EndingWithHeyHim) ?
- pipe(
- advanceP,
- addP(phoneme === "u" ? hamzaAbove : sukun),
- )(state)
- : (phs === PhonemeStatus.DirectMatch) ?
- pipe(
- addP(diacritic),
- advanceP,
- )(state)
- : (phs === PhonemeStatus.DirectMatchAfterSukun) ?
- pipe(
- addP(sukun),
- advanceP,
- )(state)
- : (phs === PhonemeStatus.PersianSilentWWithAa) ?
- pipe(
- addP("("),
- advanceP,
- addP(")"),
- advanceP,
- )(state)
- : (phs === PhonemeStatus.ArabicWasla) ?
- pipe(
- addP(zer),
- overwriteP(wasla),
- )(state)
- : (phs === PhonemeStatus.Izafe) ?
- pipe(
- reverseP,
- addP(zer),
- )(state)
- : (phs === PhonemeStatus.EndOfDuParticle) ?
- pipe(
- reverseP,
- addP(zwarakey),
- )(state)
- : (phs === PhonemeStatus.ShortAEndingAfterHeem) ?
- pipe(
- prevPLetter === " " ? reverseP : addP(""),
- addP(zwar),
- )(state)
- : (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
- pipe(
- addP(sukun),
- advanceP,
- )(state)
- : (phs === PhonemeStatus.AlefDaggarEnding) ?
- pipe(
- advanceP,
- advanceP,
- )(state)
- : (phs === PhonemeStatus.LongAinVowelMissingComma) ?
- pipe(
- addP(diacritic),
- advanceP,
- addP(diacritic)
- )(state)
- : (phs === PhonemeStatus.ShortAinVowelMissingComma) ?
- pipe(
- addP(diacritic),
- advanceP,
- )(state)
- : (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ?
- pipe(
- advanceP,
- advanceP,
- )(state)
- : (phs === PhonemeStatus.AinWithLongAAtBeginning) ?
- pipe(
- advanceP,
- advanceP,
- )(state)
- : (phs === PhonemeStatus.AlefWithHamza) ?
- pipe(
- advanceP,
- )(state)
- : (phs === PhonemeStatus.ShortVowel) ?
- pipe(
- advanceForHamzaMid,
- addP(phonemeInfo.diacritic),
- // TODO THIS?
- advanceForHamza,
- )(state)
- : (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ?
- pipe(
- advanceP,
- )(state)
- : (phs === PhonemeStatus.NOnFathatan) ?
- pipe(
- advanceP,
- )(state)
- : (phs === PhonemeStatus.HamzaOnWow) ?
- pipe(
- advanceP,
- addP(hamzaAbove),
- addP(diacritic),
- )(state)
- : (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
- pipe(
- advanceP,
- addP(pesh),
- advanceP,
- )(state)
- : (phs === PhonemeStatus.OoPrefix) ?
- pipe(
- advanceP,
- addP(pesh),
- )(state)
- : (phs === PhonemeStatus.GlottalStopBeforeOo) ?
- pipe(
- advanceP,
- addP(hamzaAbove),
- )(state)
- : (phs === PhonemeStatus.OoAfterGlottalStopOo) ?
- pipe(
- advanceP,
- )(state)
- : (phs === PhonemeStatus.SilentAinAfterAlef) ?
- pipe(
- advanceP,
- advanceP,
- )(state)
- : state;
+ return phs === PhonemeStatus.LeadingLongVowel
+ ? pipe(advanceP, addP(phonemeInfo.diacritic), advanceP)(state)
+ : phs === PhonemeStatus.LeadingConsonantOrShortVowel
+ ? pipe(advanceP, addP(diacritic))(state)
+ : phs === PhonemeStatus.DoubleConsonantTashdeed
+ ? pipe(prevPLetter === " " ? reverseP : addP(""), addP(tashdeed))(state)
+ : phs === PhonemeStatus.EndingWithHayHim
+ ? pipe(advanceP, addP(phoneme === "u" ? hamzaAbove : sukun))(state)
+ : phs === PhonemeStatus.DirectMatch
+ ? pipe(addP(diacritic), advanceP)(state)
+ : phs === PhonemeStatus.DirectMatchAfterSukun
+ ? pipe(addP(sukun), advanceP)(state)
+ : phs === PhonemeStatus.PersianSilentWWithAa
+ ? pipe(addP("("), advanceP, addP(")"), advanceP)(state)
+ : phs === PhonemeStatus.ArabicWasla
+ ? pipe(addP(zer), overwriteP(wasla))(state)
+ : phs === PhonemeStatus.Izafe
+ ? pipe(reverseP, addP(zer))(state)
+ : phs === PhonemeStatus.EndOfDuParticle
+ ? pipe(reverseP, addP(zwarakay))(state)
+ : phs === PhonemeStatus.ShortAEndingAfterHeem
+ ? pipe(prevPLetter === " " ? reverseP : addP(""), addP(zwar))(state)
+ : phs === PhonemeStatus.EndingWithHayHimFromSukun
+ ? pipe(addP(sukun), advanceP)(state)
+ : phs === PhonemeStatus.AlefDaggarEnding
+ ? pipe(advanceP, advanceP)(state)
+ : phs === PhonemeStatus.LongAinVowelMissingComma
+ ? pipe(addP(diacritic), advanceP, addP(diacritic))(state)
+ : phs === PhonemeStatus.ShortAinVowelMissingComma
+ ? pipe(addP(diacritic), advanceP)(state)
+ : phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart
+ ? pipe(advanceP, advanceP)(state)
+ : phs === PhonemeStatus.AinWithLongAAtBeginning
+ ? pipe(advanceP, advanceP)(state)
+ : phs === PhonemeStatus.AlefWithHamza
+ ? pipe(advanceP)(state)
+ : phs === PhonemeStatus.ShortVowel
+ ? pipe(
+ advanceForHamzaMid,
+ addP(phonemeInfo.diacritic),
+ // TODO THIS?
+ advanceForHamza
+ )(state)
+ : phs === PhonemeStatus.ShortAForAlefBeforeFathatan
+ ? pipe(advanceP)(state)
+ : phs === PhonemeStatus.NOnFathatan
+ ? pipe(advanceP)(state)
+ : phs === PhonemeStatus.HamzaOnWow
+ ? pipe(advanceP, addP(hamzaAbove), addP(diacritic))(state)
+ : phs === PhonemeStatus.ArabicDefiniteArticleUl
+ ? pipe(advanceP, addP(pesh), advanceP)(state)
+ : phs === PhonemeStatus.OoPrefix
+ ? pipe(advanceP, addP(pesh))(state)
+ : phs === PhonemeStatus.GlottalStopBeforeOo
+ ? pipe(advanceP, addP(hamzaAbove))(state)
+ : phs === PhonemeStatus.OoAfterGlottalStopOo
+ ? pipe(advanceP)(state)
+ : phs === PhonemeStatus.SilentAinAfterAlef
+ ? pipe(advanceP, advanceP)(state)
+ : state;
}
diff --git a/src/lib/src/phonetics-to-diacritics.test.ts b/src/lib/src/phonetics-to-diacritics.test.ts
index c123a14..3d58668 100644
--- a/src/lib/src/phonetics-to-diacritics.test.ts
+++ b/src/lib/src/phonetics-to-diacritics.test.ts
@@ -7,1109 +7,1110 @@
*/
import {
- phoneticsToDiacritics,
- splitFIntoPhonemes,
+ phoneticsToDiacritics,
+ splitFIntoPhonemes,
} from "./phonetics-to-diacritics";
-const zwarakey = "ٙ";
+const zwarakay = "ٙ";
const phonemeSplits: Array<{
- in: string,
- out: string[],
+ in: string;
+ out: string[];
}> = [
- {
- in: "kor",
- out: ["k", "o", "r"],
- },
- {
- in: "raaghey",
- out: ["r", "aa", "gh", "ey"],
- },
- {
- in: "hatsa",
- out: ["h", "a", "ts", "a"],
- },
- {
- in: "ba",
- out: ["b", "a"],
- },
- {
- in: "peydáa",
- out: ["p", "ey", "d", "áa"],
- },
- {
- in: "be kaar",
- out: ["b", "e", "k", "aa", "r"],
- },
- {
- in: "raadzeyy",
- out: ["r", "aa", "dz", "eyy"],
- },
- {
- in: "badanuy ??",
- out: ["b", "a", "d", "a", "n", "uy"],
- },
- {
- in: "tur ... pore",
- out: ["t", "u", "r", "p", "o", "r", "e"],
- },
- {
- in: "daar-Ul-iqaama",
- out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"],
- },
+ {
+ in: "kor",
+ out: ["k", "o", "r"],
+ },
+ {
+ in: "raaghay",
+ out: ["r", "aa", "gh", "ay"],
+ },
+ {
+ in: "hatsa",
+ out: ["h", "a", "ts", "a"],
+ },
+ {
+ in: "ba",
+ out: ["b", "a"],
+ },
+ {
+ in: "paydáa",
+ out: ["p", "ay", "d", "áa"],
+ },
+ {
+ in: "be kaar",
+ out: ["b", "e", "k", "aa", "r"],
+ },
+ {
+ in: "raadzey",
+ out: ["r", "aa", "dz", "ey"],
+ },
+ {
+ in: "badanuy ??",
+ out: ["b", "a", "d", "a", "n", "uy"],
+ },
+ {
+ in: "tur ... pore",
+ out: ["t", "u", "r", "p", "o", "r", "e"],
+ },
+ {
+ in: "daar-Ul-iqaama",
+ out: ["d", "aa", "r", "-Ul-", "i", "q", "aa", "m", "a"],
+ },
];
phonemeSplits.forEach((s) => {
- test(`${s.in} should split properly`, () => {
- const result = splitFIntoPhonemes(s.in);
- expect(result).toEqual(s.out);
- });
+ test(`${s.in} should split properly`, () => {
+ const result = splitFIntoPhonemes(s.in);
+ expect(result).toEqual(s.out);
+ });
});
const toTest: Array<{
- in: { p: string, f: string },
- out: string | undefined,
+ in: { p: string; f: string };
+ out: string | undefined;
}> = [
- {
- in: {
- p: "کور",
- f: "kor",
- },
- out: "کور",
- },
- {
- in: {
- p: "کور",
- f: "koor",
- },
- out: "کُور",
- },
- {
- in: {
- p: "تب",
- f: "tib",
- },
- out: "تِب",
- },
- {
- in: {
- p: "تب",
- f: "tab",
- },
- out: "تَب",
- },
- {
- in: {
- p: "تب",
- f: "tUb",
- },
- out: "تُب",
- },
- {
- in: {
- p: "تب",
- f: "tub",
- },
- out: "تٙب",
- },
- {
- in: {
- p: "تب",
- f: "tb",
- },
- out: "تْب",
- },
- {
- in: {
- p: "تلب",
- f: "tilab",
- },
- out: "تِلَب",
- },
- {
- in: {
- p: "تشناب",
- f: "tashnaab",
- },
- out: "تَشْناب",
- },
- // broken phonetics will return undefined
- {
- in: {
- p: "تشناب",
- f: "peshnaab",
- },
- out: undefined,
- },
- // working with وs
- {
- in: {
- p: "کول",
- f: "kwal",
- },
- out: "کْوَل",
- },
- {
- in: {
- p: "تول",
- f: "tool",
- },
- out: "تُول",
- },
- {
- in: {
- p: "مقبول",
- f: "maqbool",
- },
- out: "مَقْبُول",
- },
- {
- in: {
- p: "کول",
- f: "kawul",
- },
- out: "کَو" + zwarakey + "ل",
- },
- {
- in: {
- p: "کول",
- f: "kiwul",
- },
- out: "کِو" + zwarakey + "ل",
- },
- {
- in: {
- p: "کول",
- f: "kUwul",
- },
- out: "کُو" + zwarakey + "ل",
- },
- {
- in: {
- p: "کول",
- f: "kuwul",
- },
- out: "ک" + zwarakey + "و" + zwarakey + "ل",
- },
- {
- in: {
- p: "کول",
- f: "kawal",
- },
- out: "کَوَل",
- },
- {
- in: {
- p: "کول",
- f: "kUwal",
- },
- out: "کُوَل",
- },
- {
- in: {
- p: "پشتګرد",
- f: "pishtgird",
- },
- out: "پِشْتْګِرْد",
- },
- {
- in: {
- p: "سپین",
- f: "speen",
- },
- out: "سْپِین",
- },
- {
- in: {
- p: "سپین",
- f: "speyn",
- },
- out: "سْپین",
- },
- {
- in: {
- p: "پېش",
- f: "pesh",
- },
- out: "پېش",
- },
- {
- in: {
- p: "پېش",
- f: "peysh",
- },
- out: undefined,
- },
- {
- in: {
- p: "رغېدل",
- f: "raghedul",
- },
- out: "رَغېد" + zwarakey + "ل",
- },
- {
- in: {
- p: "کارول",
- f: "kaarawul",
- },
- out: "کارَو" + zwarakey + "ل",
- },
- {
- in: {
- p: "پېښېدل",
- f: "pexedul",
- },
- out: "پېښېد" + zwarakey + "ل",
- },
- {
- in: {
- p: "مین",
- f: "mayín",
- },
- out: "مَیِن",
- },
- {
- in: {
- p: "سړی",
- f: "saRey",
- },
- out: "سَړی",
- },
- {
- in: {
- p: "سړي",
- f: "saRee",
- },
- out: "سَړي",
- },
- {
- in: {
- p: "زه",
- f: "zu",
- },
- out: "زهٔ",
- },
- {
- in: {
- p: "زه",
- f: "za",
- },
- out: "زه",
- },
- {
- in: {
- p: "پېشنهاد",
- f: "peshniháad",
- },
- out: "پېشْنِهاد",
- },
- {
- in: {
- p: "ایستل",
- f: "eestul",
- },
- out: "اِیسْت" + zwarakey + "ل",
- },
- {
- in: {
- p: "ایستل",
- f: "eystul",
- },
- out: "ایسْت" + zwarakey + "ل",
- },
- {
- in: {
- p: "اېسېدل",
- f: "esedul",
- },
- out: "اېسېد" + zwarakey + "ل",
- },
- {
- in: {
- p: "اوسېدل",
- f: "osedul",
- },
- out: "اوسېد" + zwarakey + "ل",
- },
- {
- in: {
- p: "اواز",
- f: "awaaz",
- },
- out: "اَواز",
- },
- {
- in: {
- p: "اسلام",
- f: "islaam",
- },
- out: "اِسْلام",
- },
- {
- in: {
- p: "واردول",
- f: "waaridawul",
- },
- out: "وارِدَو" + zwarakey + "ل",
- },
- {
- in: {
- p: "غاړه",
- f: "ghaaRa",
- },
- out: "غاړه",
- },
- {
- in: {
- p: "اوتر",
- f: "awtár",
- },
- out: "اَوْتَر",
- },
- {
- in: {
- p: "اختیار",
- f: "ikhtiyáar",
- },
- out: "اِخْتِیار",
- },
- {
- in: {
- p: "فریاد",
- f: "faryáad",
- },
- out: "فَرْیاد",
- },
- {
- in: {
- p: "کارغه",
- f: "kaarghu",
- },
- out: "کارْغهٔ",
- },
- {
- in: {
- p: "بې کار",
- f: "be kaar",
- },
- out: "بې کار",
- },
- {
- in: {
- p: "بې کار",
- f: "bekaar",
- },
- out: "بې کار",
- },
- {
- in: {
- p: "انبار",
- f: "ambáar",
- },
- out: "اَنْبار",
- },
- {
- in: {
- p: "ارغون",
- f: "arghóon",
- },
- out: "اَرْغُون",
- },
- {
- in: {
- p: "ارمټه",
- f: "armaTa",
- },
- out: "اَرْمَټه",
- },
- {
- in: {
- p: "اروا پوه",
- f: "arwaa poh",
- },
- out: "اَرْوا پوهْ",
- },
- {
- in: {
- p: "اسحاق",
- f: "ishaaq",
- },
- out: undefined,
- },
- {
- in: {
- p: "اسحاق",
- f: "is`haaq",
- },
- out: "اِسْحاق",
- },
- {
- in: {
- p: "سعات",
- f: "saat",
- },
- out: "سعات",
- },
- {
- in: {
- p: "سعات",
- f: "sa'aat",
- },
- out: "سَعات",
- },
- {
- in: {
- p: "استعمال",
- f: "ist'imaal",
- },
- out: "اِسْتعِمال",
- },
- {
- in: {
- p: "استعمال",
- f: "istimaal",
- },
- out: "اِسْتعِمال",
- },
- {
- in: {
- p: "اروایي",
- f: "arwaayee",
- },
- out: "اَرْوایي",
- },
- {
- in: {
- p: "اریځ",
- f: "Uryadz",
- },
- out: "اُرْیَځ",
- },
- {
- in: {
- p: "ازغن تار",
- f: "azghun taar",
- },
- out: "اَزْغ" + zwarakey + "ن" + " تار",
- },
- {
- in: {
- p: "اره څکول",
- f: "ara tskawul",
- },
- out: "اَره څْکَو" + zwarakey + "ل",
- },
- {
- in: {
- p: "اږیل",
- f: "aGuyúl",
- },
- out: "اَږ" + zwarakey + "ی" + zwarakey + "ل",
- },
- {
- in: {
- p: "استازندوی",
- f: "astaazandoy",
- },
- out: "اَسْتازَنْدوی",
- },
- // واخ being khaa in the middle of a word
- {
- in: {
- p: "استخوان",
- f: "UstUkháan",
- },
- out: "اُسْتُخ(و)ان",
- },
- {
- in: {
- p: "اسطلاع",
- f: "istilaa",
- },
- out: "اِسْطِلاع",
- },
- {
- in: {
- p: "اسهال",
- f: "is`háal",
- },
- out: "اِسْهال",
- },
- {
- in: {
- p: "اسهامي",
- f: "as`haamee",
- },
- out: "اَسْهامي",
- },
- // avoid false double consonant
- {
- in: {
- p: "ازل لیک",
- f: "azalléek",
- },
- out: "اَزَل لِیک",
- },
- // bad ending test
- {
- in: {
- p: "ماضی",
- f: "maazee",
- },
- out: undefined,
- },
- // bad beginning test
- {
- in: {
- p: "وسېدل",
- f: "osedul",
- },
- out: undefined,
- },
- {
- in: {
- p: "يست",
- f: "eest",
- },
- out: undefined,
- },
- {
- in: {
- p: "ست",
- f: "ist",
- },
- out: undefined,
- },
- {
- in: {
- p: "haca",
- f: "هځه",
- },
- out: undefined,
- },
- // tashdeed
- {
- in: {
- p: "پته",
- f: "patta",
- },
- out: "پَتّه",
- },
- {
- in: {
- p: "اعتصاب شکن",
- f: "itisaabshikan",
- },
- out: "اِعتِصاب شِکَن",
- },
- // Arabic wasla
- {
- in: {
- p: "بالکل",
- f: "bilkUl",
- },
- out: "بِٱلْکُل",
- },
- // izafe
- {
- in: {
- p: "ایصال ثواب",
- f: "eesaal-i-sawaab",
- },
- out: "اِیصالِ ثَواب",
- },
- {
- in: {
- p: "با استعداد",
- f: "baa isti'dáad",
- },
- out: "با اِسْتِعداد",
- },
- // starting with ع
- {
- in: {
- p: "عزت",
- f: "izzat",
- },
- out: "عِزَّت",
- },
- {
- in: {
- p: "عزت",
- f: "i'zzat",
- },
- out: "عِزَّت",
- },
- // ئ in the middle
- {
- in: {
- p: "برائت",
- f: "baraa'at",
- },
- out: "بَرائَت",
- },
- {
- in: {
- p: "فائده",
- f: "faaida",
- },
- out: "فائِده",
- },
- // starting with long aa
- {
- in: {
- p: "آدم",
- f: "aadam",
- },
- out: "آدَم",
- },
- {
- in: {
- p: "یدام",
- f: "aadam",
- },
- out: undefined,
- }, {
- in: {
- p: "منع",
- f: "mán'a",
- },
- out: "مَنعَ",
- },
- {
- in: {
- p: "منع",
- f: "mana",
- },
- out: "مَنعَ",
- },
- {
- in: {
- p: "منابع",
- f: "mUnaabí",
- },
- out: "مُنابعِ",
- },
- {
- // TODO: Is this correct??
- in: {
- p: "اسان",
- f: "aasaan",
- },
- out: "اسان",
- },
- // ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی
- {
- in: {
- p: "پتېیل",
- f: "pateyúl",
- },
- out: undefined,
- },
- {
- in: {
- p: "پتېیل",
- f: "pate`yúl",
- },
- out: "پَتېی" + zwarakey + "ل",
- },
- {
- in: {
- p: "درېیم",
- f: "dre`yum",
- },
- out: "دْرېی" + zwarakey + "م",
- },
- {
- in: {
- p: "تابع دار",
- f: "taabidaar",
- },
- out: "تابعِ دار",
- },
- // handle circumpositions
- {
- in: {
- p: "تر ... پورې",
- f: "tur ... pore",
- },
- out: "ت" + zwarakey + "ر ... پورې",
- },
- // joiner و
- {
- in: {
- p: "کار و بار",
- f: "kaar-U-baar",
- },
- out: "کار و بار",
- },
- {
- in: {
- p: "کاروبار",
- f: "kaar-U-baar",
- },
- out: "کاروبار",
- },
- {
- in: {
- p: "توقع",
- f: "tawaqqÚ",
- },
- out: "تَوَقّعُ",
- },
- // special behaviour with د
- {
- in: {
- p: "د",
- f: "du",
- },
- out: "د" + zwarakey,
- },
- {
- in: {
- p: "د لاس",
- f: "du laas",
- },
- out: "د" + zwarakey + " لاس",
- },
- {
- in: {
- p: "د ... په شان",
- f: "du ... pu shaan",
- },
- out: "د" + zwarakey + " ... پهٔ شان",
- },
- {
- in: {
- p: "ذبح",
- f: "zabha",
- },
- out: "ذَبْحَ",
- },
- {
- in: {
- p: "ذبح",
- f: "zabha",
- },
- out: "ذَبْحَ",
- },
- {
- in: {
- p: "ذبح کول",
- f: "zabha kawul",
- },
- out: "ذَبْحَ کَو" + zwarakey + "ل",
- },
- // require dagger alif on words ending with یٰ
- {
- in: {
- p: "یحیی",
- f: "yahyaa",
- },
- out: undefined,
- },
- {
- in: {
- p: "یحییٰ",
- f: "yahyaa",
- },
- out: "یَحْییٰ",
- },
- {
- in: {
- p: "معنیٰ",
- f: "ma'anaa",
- },
- out: "مَعَنیٰ",
- },
- // require fathatan on words ending in اً
- {
- in: {
- p: "دقیقا",
- f: "daqeeqan",
- },
- out: undefined,
- },
- {
- in: {
- p: "دقیقاً",
- f: "daqeeqan",
- },
- out: "دَقِیقاً",
- },
- // words starting in عا
- {
- in: {
- p: "عام",
- f: "aam",
- },
- out: "عام",
- },
- {
- in: {
- p: "عام",
- f: "'aam",
- },
- out: "عام",
- },
- {
- in: {
- p: "قتل عام",
- f: "qatl-i-aam",
- },
- out: "قَتْلِ عام",
- },
- {
- in: {
- p: "طمع لرل",
- f: "tama larul",
- },
- out: "طَمعَ لَر" + zwarakey + "ل",
- },
- // Ua ؤ
- {
- in: {
- p: "مودب",
- f: "mUaddab",
- },
- out: "مؤدَّب",
- },
- {
- in: {
- p: "لکۍ وال",
- f: "lakuy waal",
- },
- out: "لَکۍ وال",
- },
- // shouldn't skip the ئ at the end
- {
- in: {
- p: "شئ",
- f: "sheyy",
- },
- out: "شئ",
- },
- // excetption for و - wo
- {
- in: {
- p: "و",
- f: "wo",
- },
- out: "و",
- },
- {
- in: {
- p: "سړی و",
- f: "saRey wo",
- },
- out: "سَړی و",
- },
- {
- in: {
- p: "عید",
- f: "eed",
- },
- out: "عِید",
- },
- // i ending can also be i
- {
- in: {
- p: "سه",
- f: "si",
- },
- out: "سِه",
- },
- {
- in: {
- p: "سه شنبه",
- f: "sishamba",
- },
- out: "سِه شَنْبه",
- },
- {
- in: {
- p: "توجه",
- f: "tawajÚ",
- },
- out: "تَوَجُه",
- },
- {
- in: {
- p: "توجه کول",
- f: "tawajU kawul",
- },
- out: "تَوَجُه کَو" + zwarakey + "ل",
- },
- // With Arabic definate article -Ul- ال
- {
- in: {
- p: "حق الاجاره",
- f: "haq-Ul-ijaara",
- },
- out: "حَق اُلاِجاره",
- },
- {
- in: {
- p: "دار العلوم",
- f: "daar-Ul-Ulóom",
- },
- out: "دار اُلعُلُوم",
- },
- // double consonants on end of words
- {
- in: {
- p: "حق",
- f: "haqq",
- },
- out: "حَقّ",
- },
- {
- in: {
- p: "حق پر",
- f: "haqq par",
- },
- out: "حَقّ پَر",
- },
- {
- in: {
- p: "راجع کېدل",
- f: "raaji kedul",
- },
- out: "راجعِ کېد" + zwarakey + "ل",
- },
- {
- in: {
- p: "ربیع",
- f: "rabee'",
- },
- out: "رَبِیع",
- },
- {
- in: {
- p: "سختسری",
- f: "sakht sărey",
- },
- out: "سَخْتْسَری",
- },
- {
- in: {
- p: "معنیٰ",
- f: "ma'naa",
- },
- out: "مَعنیٰ",
- },
- // issue with یٰ ending and then continuing to the next word
- {
- in: {
- p: "معنیٰ دار",
- f: "ma'naa daar",
- },
- out: "مَعنیٰ دار",
- },
- {
- in: {
- p: "اله",
- f: "ilah",
- },
- out: "اِلَهْ",
- },
- // issue with words ending in عه going to the next word
- {
- in: {
- p: "قطعه بازي",
- f: "qit'a baazee",
- },
- out: "قِطعه بازي",
- },
- // أ in the middle of the word
- {
- in: {
- p: "متأسف",
- f: "mUta'assif",
- },
- out: "مُتأسِّف",
- },
- // words ending in ع a' on to the next word
- {
- in: {
- p: "مربع",
- f: "mUraba'",
- },
- out: "مُرَبَع",
- },
- {
- in: {
- p: "مربع جذر",
- f: "mUraba' jazúr",
- },
- out: "مُرَبَع جَذ" + zwarakey + "ر",
- },
- {
- in: {
- p: "مسوول",
- f: "mas'ool",
- },
- out: "مَسوُول", // TODO: Is this best??
- },
- // allow for beginnings prefixed with ور در را
- {
- in: {
- p: "وراوږد",
- f: "wăr-ooGad",
- },
- out: "وَراُوږَد",
- },
- {
- in: {
- p: "دراوږد",
- f: "dăr-ooGad",
- },
- out: "دَراُوږَد",
- },
- {
- in: {
- p: "رااوږد",
- f: "raa-ooGad",
- },
- out: "رااُوږَد",
- },
- // allow for spaces at beginning of phonetics etc.
- {
- in: {
- p: " سپین کړه",
- f: " speen kRu",
- },
- out: "سْپِین کْړهٔ",
- },
- {
- in: {
- p: "اوب",
- f: "ob",
- },
- out: "اوب",
- },
- // allow oo at start with و prefix
- {
- in: {
- p: "وباسي",
- f: "oobaasee",
- },
- out: "وباسي",
- },
- {
- in: {
- p: "وځم",
- f: "oodzum",
- },
- out: "وځ" + zwarakey + "م",
- },
- {
- in: {
- p: "وځم",
- f: "wUdzum",
- },
- out: "وُځ" + zwarakey + "م",
- },
+ {
+ in: {
+ p: "کور",
+ f: "kor",
+ },
+ out: "کور",
+ },
+ {
+ in: {
+ p: "کور",
+ f: "koor",
+ },
+ out: "کُور",
+ },
+ {
+ in: {
+ p: "تب",
+ f: "tib",
+ },
+ out: "تِب",
+ },
+ {
+ in: {
+ p: "تب",
+ f: "tab",
+ },
+ out: "تَب",
+ },
+ {
+ in: {
+ p: "تب",
+ f: "tUb",
+ },
+ out: "تُب",
+ },
+ {
+ in: {
+ p: "تب",
+ f: "tub",
+ },
+ out: "تٙب",
+ },
+ {
+ in: {
+ p: "تب",
+ f: "tb",
+ },
+ out: "تْب",
+ },
+ {
+ in: {
+ p: "تلب",
+ f: "tilab",
+ },
+ out: "تِلَب",
+ },
+ {
+ in: {
+ p: "تشناب",
+ f: "tashnaab",
+ },
+ out: "تَشْناب",
+ },
+ // broken phonetics will return undefined
+ {
+ in: {
+ p: "تشناب",
+ f: "peshnaab",
+ },
+ out: undefined,
+ },
+ // working with وs
+ {
+ in: {
+ p: "کول",
+ f: "kwal",
+ },
+ out: "کْوَل",
+ },
+ {
+ in: {
+ p: "تول",
+ f: "tool",
+ },
+ out: "تُول",
+ },
+ {
+ in: {
+ p: "مقبول",
+ f: "maqbool",
+ },
+ out: "مَقْبُول",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kawul",
+ },
+ out: "کَو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kiwul",
+ },
+ out: "کِو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kUwul",
+ },
+ out: "کُو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kuwul",
+ },
+ out: "ک" + zwarakay + "و" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kawal",
+ },
+ out: "کَوَل",
+ },
+ {
+ in: {
+ p: "کول",
+ f: "kUwal",
+ },
+ out: "کُوَل",
+ },
+ {
+ in: {
+ p: "پشتګرد",
+ f: "pishtgird",
+ },
+ out: "پِشْتْګِرْد",
+ },
+ {
+ in: {
+ p: "سپین",
+ f: "speen",
+ },
+ out: "سْپِین",
+ },
+ {
+ in: {
+ p: "سپین",
+ f: "spayn",
+ },
+ out: "سْپین",
+ },
+ {
+ in: {
+ p: "پېش",
+ f: "pesh",
+ },
+ out: "پېش",
+ },
+ {
+ in: {
+ p: "پېش",
+ f: "paysh",
+ },
+ out: undefined,
+ },
+ {
+ in: {
+ p: "رغېدل",
+ f: "raghedul",
+ },
+ out: "رَغېد" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "کارول",
+ f: "kaarawul",
+ },
+ out: "کارَو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "پېښېدل",
+ f: "pexedul",
+ },
+ out: "پېښېد" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "مین",
+ f: "ma`yín",
+ },
+ out: "مَیِن",
+ },
+ {
+ in: {
+ p: "سړی",
+ f: "saRay",
+ },
+ out: "سَړی",
+ },
+ {
+ in: {
+ p: "سړي",
+ f: "saRee",
+ },
+ out: "سَړي",
+ },
+ {
+ in: {
+ p: "زه",
+ f: "zu",
+ },
+ out: "زهٔ",
+ },
+ {
+ in: {
+ p: "زه",
+ f: "za",
+ },
+ out: "زه",
+ },
+ {
+ in: {
+ p: "پېشنهاد",
+ f: "peshniháad",
+ },
+ out: "پېشْنِهاد",
+ },
+ {
+ in: {
+ p: "ایستل",
+ f: "eestul",
+ },
+ out: "اِیسْت" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "ایستل",
+ f: "aystul",
+ },
+ out: "ایسْت" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "اېسېدل",
+ f: "esedul",
+ },
+ out: "اېسېد" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "اوسېدل",
+ f: "osedul",
+ },
+ out: "اوسېد" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "اواز",
+ f: "awaaz",
+ },
+ out: "اَواز",
+ },
+ {
+ in: {
+ p: "اسلام",
+ f: "islaam",
+ },
+ out: "اِسْلام",
+ },
+ {
+ in: {
+ p: "واردول",
+ f: "waaridawul",
+ },
+ out: "وارِدَو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "غاړه",
+ f: "ghaaRa",
+ },
+ out: "غاړه",
+ },
+ {
+ in: {
+ p: "اوتر",
+ f: "awtár",
+ },
+ out: "اَوْتَر",
+ },
+ {
+ in: {
+ p: "اختیار",
+ f: "ikhtiyáar",
+ },
+ out: "اِخْتِیار",
+ },
+ {
+ in: {
+ p: "فریاد",
+ f: "faryáad",
+ },
+ out: "فَرْیاد",
+ },
+ {
+ in: {
+ p: "کارغه",
+ f: "kaarghu",
+ },
+ out: "کارْغهٔ",
+ },
+ {
+ in: {
+ p: "بې کار",
+ f: "be kaar",
+ },
+ out: "بې کار",
+ },
+ {
+ in: {
+ p: "بې کار",
+ f: "bekaar",
+ },
+ out: "بې کار",
+ },
+ {
+ in: {
+ p: "انبار",
+ f: "ambáar",
+ },
+ out: "اَنْبار",
+ },
+ {
+ in: {
+ p: "ارغون",
+ f: "arghóon",
+ },
+ out: "اَرْغُون",
+ },
+ {
+ in: {
+ p: "ارمټه",
+ f: "armaTa",
+ },
+ out: "اَرْمَټه",
+ },
+ {
+ in: {
+ p: "اروا پوه",
+ f: "arwaa poh",
+ },
+ out: "اَرْوا پوهْ",
+ },
+ {
+ in: {
+ p: "اسحاق",
+ f: "ishaaq",
+ },
+ out: undefined,
+ },
+ {
+ in: {
+ p: "اسحاق",
+ f: "is`haaq",
+ },
+ out: "اِسْحاق",
+ },
+ {
+ in: {
+ p: "سعات",
+ f: "saat",
+ },
+ out: "سعات",
+ },
+ {
+ in: {
+ p: "سعات",
+ f: "sa'aat",
+ },
+ out: "سَعات",
+ },
+ {
+ in: {
+ p: "استعمال",
+ f: "ist'imaal",
+ },
+ out: "اِسْتعِمال",
+ },
+ {
+ in: {
+ p: "استعمال",
+ f: "istimaal",
+ },
+ out: "اِسْتعِمال",
+ },
+ {
+ in: {
+ p: "اروایي",
+ f: "arwaayee",
+ },
+ out: "اَرْوایي",
+ },
+ {
+ in: {
+ p: "اریځ",
+ f: "Uryadz",
+ },
+ out: "اُرْیَځ",
+ },
+ {
+ in: {
+ p: "ازغن تار",
+ f: "azghun taar",
+ },
+ out: "اَزْغ" + zwarakay + "ن" + " تار",
+ },
+ {
+ in: {
+ p: "اره څکول",
+ f: "ara tskawul",
+ },
+ out: "اَره څْکَو" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "اږیل",
+ f: "aGuyúl",
+ },
+ out: "اَږ" + zwarakay + "ی" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "استازندوی",
+ f: "astaazandoy",
+ },
+ out: "اَسْتازَنْدوی",
+ },
+ // واخ being khaa in the middle of a word
+ {
+ in: {
+ p: "استخوان",
+ f: "UstUkháan",
+ },
+ out: "اُسْتُخ(و)ان",
+ },
+ {
+ in: {
+ p: "اسطلاع",
+ f: "istilaa",
+ },
+ out: "اِسْطِلاع",
+ },
+ {
+ in: {
+ p: "اسهال",
+ f: "is`háal",
+ },
+ out: "اِسْهال",
+ },
+ {
+ in: {
+ p: "اسهامي",
+ f: "as`haamee",
+ },
+ out: "اَسْهامي",
+ },
+ // avoid false double consonant
+ {
+ in: {
+ p: "ازل لیک",
+ f: "azalléek",
+ },
+ out: "اَزَل لِیک",
+ },
+ // bad ending test
+ {
+ in: {
+ p: "ماضی",
+ f: "maazee",
+ },
+ out: undefined,
+ },
+ // bad beginning test
+ {
+ in: {
+ p: "وسېدل",
+ f: "osedul",
+ },
+ out: undefined,
+ },
+ {
+ in: {
+ p: "يست",
+ f: "eest",
+ },
+ out: undefined,
+ },
+ {
+ in: {
+ p: "ست",
+ f: "ist",
+ },
+ out: undefined,
+ },
+ {
+ in: {
+ p: "haca",
+ f: "هځه",
+ },
+ out: undefined,
+ },
+ // tashdeed
+ {
+ in: {
+ p: "پته",
+ f: "patta",
+ },
+ out: "پَتّه",
+ },
+ {
+ in: {
+ p: "اعتصاب شکن",
+ f: "itisaabshikan",
+ },
+ out: "اِعتِصاب شِکَن",
+ },
+ // Arabic wasla
+ {
+ in: {
+ p: "بالکل",
+ f: "bilkUl",
+ },
+ out: "بِٱلْکُل",
+ },
+ // izafe
+ {
+ in: {
+ p: "ایصال ثواب",
+ f: "eesaal-i-sawaab",
+ },
+ out: "اِیصالِ ثَواب",
+ },
+ {
+ in: {
+ p: "با استعداد",
+ f: "baa isti'dáad",
+ },
+ out: "با اِسْتِعداد",
+ },
+ // starting with ع
+ {
+ in: {
+ p: "عزت",
+ f: "izzat",
+ },
+ out: "عِزَّت",
+ },
+ {
+ in: {
+ p: "عزت",
+ f: "i'zzat",
+ },
+ out: "عِزَّت",
+ },
+ // ئ in the middle
+ {
+ in: {
+ p: "برائت",
+ f: "baraa'at",
+ },
+ out: "بَرائَت",
+ },
+ {
+ in: {
+ p: "فائده",
+ f: "faaida",
+ },
+ out: "فائِده",
+ },
+ // starting with long aa
+ {
+ in: {
+ p: "آدم",
+ f: "aadam",
+ },
+ out: "آدَم",
+ },
+ {
+ in: {
+ p: "یدام",
+ f: "aadam",
+ },
+ out: undefined,
+ },
+ {
+ in: {
+ p: "منع",
+ f: "mán'a",
+ },
+ out: "مَنعَ",
+ },
+ {
+ in: {
+ p: "منع",
+ f: "mana",
+ },
+ out: "مَنعَ",
+ },
+ {
+ in: {
+ p: "منابع",
+ f: "mUnaabí",
+ },
+ out: "مُنابعِ",
+ },
+ {
+ // TODO: Is this correct??
+ in: {
+ p: "اسان",
+ f: "aasaan",
+ },
+ out: "اسان",
+ },
+ // ې followed by ی - y needs to be written as e`y to be distinguished from ay - ی
+ {
+ in: {
+ p: "پتېیل",
+ f: "patayúl",
+ },
+ out: undefined,
+ },
+ {
+ in: {
+ p: "پتېیل",
+ f: "pate`yúl",
+ },
+ out: "پَتېی" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "درېیم",
+ f: "dre`yum",
+ },
+ out: "دْرېی" + zwarakay + "م",
+ },
+ {
+ in: {
+ p: "تابع دار",
+ f: "taabidaar",
+ },
+ out: "تابعِ دار",
+ },
+ // handle circumpositions
+ {
+ in: {
+ p: "تر ... پورې",
+ f: "tur ... pore",
+ },
+ out: "ت" + zwarakay + "ر ... پورې",
+ },
+ // joiner و
+ {
+ in: {
+ p: "کار و بار",
+ f: "kaar-U-baar",
+ },
+ out: "کار و بار",
+ },
+ {
+ in: {
+ p: "کاروبار",
+ f: "kaar-U-baar",
+ },
+ out: "کاروبار",
+ },
+ {
+ in: {
+ p: "توقع",
+ f: "tawaqqÚ",
+ },
+ out: "تَوَقّعُ",
+ },
+ // special behaviour with د
+ {
+ in: {
+ p: "د",
+ f: "du",
+ },
+ out: "د" + zwarakay,
+ },
+ {
+ in: {
+ p: "د لاس",
+ f: "du laas",
+ },
+ out: "د" + zwarakay + " لاس",
+ },
+ {
+ in: {
+ p: "د ... په شان",
+ f: "du ... pu shaan",
+ },
+ out: "د" + zwarakay + " ... پهٔ شان",
+ },
+ {
+ in: {
+ p: "ذبح",
+ f: "zabha",
+ },
+ out: "ذَبْحَ",
+ },
+ {
+ in: {
+ p: "ذبح",
+ f: "zabha",
+ },
+ out: "ذَبْحَ",
+ },
+ {
+ in: {
+ p: "ذبح کول",
+ f: "zabha kawul",
+ },
+ out: "ذَبْحَ کَو" + zwarakay + "ل",
+ },
+ // require dagger alif on words ending with یٰ
+ {
+ in: {
+ p: "یحیی",
+ f: "yahyaa",
+ },
+ out: undefined,
+ },
+ {
+ in: {
+ p: "یحییٰ",
+ f: "yahyaa",
+ },
+ out: "یَحْییٰ",
+ },
+ {
+ in: {
+ p: "معنیٰ",
+ f: "ma'anaa",
+ },
+ out: "مَعَنیٰ",
+ },
+ // require fathatan on words ending in اً
+ {
+ in: {
+ p: "دقیقا",
+ f: "daqeeqan",
+ },
+ out: undefined,
+ },
+ {
+ in: {
+ p: "دقیقاً",
+ f: "daqeeqan",
+ },
+ out: "دَقِیقاً",
+ },
+ // words starting in عا
+ {
+ in: {
+ p: "عام",
+ f: "aam",
+ },
+ out: "عام",
+ },
+ {
+ in: {
+ p: "عام",
+ f: "'aam",
+ },
+ out: "عام",
+ },
+ {
+ in: {
+ p: "قتل عام",
+ f: "qatl-i-aam",
+ },
+ out: "قَتْلِ عام",
+ },
+ {
+ in: {
+ p: "طمع لرل",
+ f: "tama larul",
+ },
+ out: "طَمعَ لَر" + zwarakay + "ل",
+ },
+ // Ua ؤ
+ {
+ in: {
+ p: "مودب",
+ f: "mUaddab",
+ },
+ out: "مؤدَّب",
+ },
+ {
+ in: {
+ p: "لکۍ وال",
+ f: "lakuy waal",
+ },
+ out: "لَکۍ وال",
+ },
+ // shouldn't skip the ئ at the end
+ {
+ in: {
+ p: "شئ",
+ f: "shey",
+ },
+ out: "شئ",
+ },
+ // excetption for و - wo
+ {
+ in: {
+ p: "و",
+ f: "wo",
+ },
+ out: "و",
+ },
+ {
+ in: {
+ p: "سړی و",
+ f: "saRay wo",
+ },
+ out: "سَړی و",
+ },
+ {
+ in: {
+ p: "عید",
+ f: "eed",
+ },
+ out: "عِید",
+ },
+ // i ending can also be i
+ {
+ in: {
+ p: "سه",
+ f: "si",
+ },
+ out: "سِه",
+ },
+ {
+ in: {
+ p: "سه شنبه",
+ f: "sishamba",
+ },
+ out: "سِه شَنْبه",
+ },
+ {
+ in: {
+ p: "توجه",
+ f: "tawajÚ",
+ },
+ out: "تَوَجُه",
+ },
+ {
+ in: {
+ p: "توجه کول",
+ f: "tawajU kawul",
+ },
+ out: "تَوَجُه کَو" + zwarakay + "ل",
+ },
+ // With Arabic definate article -Ul- ال
+ {
+ in: {
+ p: "حق الاجاره",
+ f: "haq-Ul-ijaara",
+ },
+ out: "حَق اُلاِجاره",
+ },
+ {
+ in: {
+ p: "دار العلوم",
+ f: "daar-Ul-Ulóom",
+ },
+ out: "دار اُلعُلُوم",
+ },
+ // double consonants on end of words
+ {
+ in: {
+ p: "حق",
+ f: "haqq",
+ },
+ out: "حَقّ",
+ },
+ {
+ in: {
+ p: "حق پر",
+ f: "haqq par",
+ },
+ out: "حَقّ پَر",
+ },
+ {
+ in: {
+ p: "راجع کېدل",
+ f: "raaji kedul",
+ },
+ out: "راجعِ کېد" + zwarakay + "ل",
+ },
+ {
+ in: {
+ p: "ربیع",
+ f: "rabee'",
+ },
+ out: "رَبِیع",
+ },
+ {
+ in: {
+ p: "سختسری",
+ f: "sakht săray",
+ },
+ out: "سَخْتْسَری",
+ },
+ {
+ in: {
+ p: "معنیٰ",
+ f: "ma'naa",
+ },
+ out: "مَعنیٰ",
+ },
+ // issue with یٰ ending and then continuing to the next word
+ {
+ in: {
+ p: "معنیٰ دار",
+ f: "ma'naa daar",
+ },
+ out: "مَعنیٰ دار",
+ },
+ {
+ in: {
+ p: "اله",
+ f: "ilah",
+ },
+ out: "اِلَهْ",
+ },
+ // issue with words ending in عه going to the next word
+ {
+ in: {
+ p: "قطعه بازي",
+ f: "qit'a baazee",
+ },
+ out: "قِطعه بازي",
+ },
+ // أ in the middle of the word
+ {
+ in: {
+ p: "متأسف",
+ f: "mUta'assif",
+ },
+ out: "مُتأسِّف",
+ },
+ // words ending in ع a' on to the next word
+ {
+ in: {
+ p: "مربع",
+ f: "mUraba'",
+ },
+ out: "مُرَبَع",
+ },
+ {
+ in: {
+ p: "مربع جذر",
+ f: "mUraba' jazúr",
+ },
+ out: "مُرَبَع جَذ" + zwarakay + "ر",
+ },
+ {
+ in: {
+ p: "مسوول",
+ f: "mas'ool",
+ },
+ out: "مَسوُول", // TODO: Is this best??
+ },
+ // allow for beginnings prefixed with ور در را
+ {
+ in: {
+ p: "وراوږد",
+ f: "wăr-ooGad",
+ },
+ out: "وَراُوږَد",
+ },
+ {
+ in: {
+ p: "دراوږد",
+ f: "dăr-ooGad",
+ },
+ out: "دَراُوږَد",
+ },
+ {
+ in: {
+ p: "رااوږد",
+ f: "raa-ooGad",
+ },
+ out: "رااُوږَد",
+ },
+ // allow for spaces at beginning of phonetics etc.
+ {
+ in: {
+ p: " سپین کړه",
+ f: " speen kRu",
+ },
+ out: "سْپِین کْړهٔ",
+ },
+ {
+ in: {
+ p: "اوب",
+ f: "ob",
+ },
+ out: "اوب",
+ },
+ // allow oo at start with و prefix
+ {
+ in: {
+ p: "وباسي",
+ f: "oobaasee",
+ },
+ out: "وباسي",
+ },
+ {
+ in: {
+ p: "وځم",
+ f: "oodzum",
+ },
+ out: "وځ" + zwarakay + "م",
+ },
+ {
+ in: {
+ p: "وځم",
+ f: "wUdzum",
+ },
+ out: "وُځ" + zwarakay + "م",
+ },
];
// TODO: قطع کول - qat'a kawul - failing
@@ -1117,13 +1118,13 @@ const toTest: Array<{
// TODO: الله words
toTest.forEach((t) => {
- test(`${t.in.p} given phonetics ${t.in.f} should translate to ${t.out}`, () => {
- const output = phoneticsToDiacritics(t.in.p, t.in.f);
- expect(output).toBe(t.out);
- });
+ test(`${t.in.p} given phonetics ${t.in.f} should translate to ${t.out}`, () => {
+ const output = phoneticsToDiacritics(t.in.p, t.in.f);
+ expect(output).toBe(t.out);
+ });
});
test("should forbid oo prefixes when the option is passed", () => {
- const output = phoneticsToDiacritics("وځم", "oodzum", true);
- expect(output).toBe(undefined);
+ const output = phoneticsToDiacritics("وځم", "oodzum", true);
+ expect(output).toBe(undefined);
});
diff --git a/src/lib/src/phonetics-to-diacritics.ts b/src/lib/src/phonetics-to-diacritics.ts
index 6ab1b2b..9c78917 100644
--- a/src/lib/src/phonetics-to-diacritics.ts
+++ b/src/lib/src/phonetics-to-diacritics.ts
@@ -7,7 +7,7 @@
*/
const zwar = "َ";
-const zwarakey = "ٙ";
+const zwarakay = "ٙ";
const zer = "ِ";
const pesh = "ُ";
const sukun = "ْ";
@@ -19,8 +19,25 @@ const fathahan = "ً";
// TODO: THESE OTHER TRIGRAPHS??
const quadrigraphs = ["-Ul-"];
-const trigraphs = ["eyy", "éyy", "-i-", "-U-"]; // , "aay", "áay", "ooy", "óoy"];
-const digraphs = ["ắ", "aa", "áa", "ee", "ée", "ey", "éy", "oo", "óo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"];
+const trigraphs = ["ey", "éy", "-i-", "-U-"]; // , "aay", "áay", "ooy", "óoy"];
+const digraphs = [
+ "ắ",
+ "aa",
+ "áa",
+ "ee",
+ "ée",
+ "ay",
+ "áy",
+ "oo",
+ "óo",
+ "kh",
+ "gh",
+ "ts",
+ "dz",
+ "jz",
+ "ch",
+ "sh",
+];
const endingDigraphs = ["uy", "úy"];
const willIgnore = ["?", " ", "`", ".", "…"];
@@ -28,7 +45,7 @@ export function splitFIntoPhonemes(f: string): string[] {
const result: string[] = [];
let index = 0;
while (index < f.length) {
- const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
+ const isLastTwoLetters = index === f.length - 2 || f[index + 2] === " ";
const threeLetterChunk = f.slice(index, index + 3);
const fourLetterChunk = f.slice(index, index + 4);
if (quadrigraphs.includes(fourLetterChunk)) {
@@ -89,43 +106,145 @@ const phonemeTable = [
{ phoneme: "m", possibilities: ["م"], consonant: true },
{ phoneme: "n", possibilities: ["ن"], consonant: true },
{ phoneme: "N", possibilities: ["ڼ"], consonant: true },
- { phoneme: "h", possibilities: ["ه", "ح"], consonant: true, takesSukunOnEnding: true },
+ {
+ phoneme: "h",
+ possibilities: ["ه", "ح"],
+ consonant: true,
+ takesSukunOnEnding: true,
+ },
{ phoneme: "w", possibilities: ["و"], consonant: true },
{ phoneme: "y", possibilities: ["ی"], consonant: true },
{ phoneme: "'", possibilities: ["ع", "ئ"], consonant: true },
{ phoneme: "-i-", isIzafe: true },
- { phoneme: "-U-", possibilities: [" و ", "و"]},
- { phoneme: "-Ul-", possibilities: ["ال"]},
+ { phoneme: "-U-", possibilities: [" و ", "و"] },
+ { phoneme: "-Ul-", possibilities: ["ال"] },
// vowels
- { phoneme: "aa", possibilities: ["ا"], beginning: ["آ", "ا"], endingPossibilities: ["ا", "یٰ"], isLongA: true, canStartWithAynBefore: true },
- { phoneme: "áa", possibilities: ["ا"], beginning: ["آ", "ا"], endingPossibilities: ["ا", "یٰ"], isLongA: true, canStartWithAynBefore: true },
- { phoneme: "ee", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ي"], diacritic: zer, canStartWithAynBefore: true },
- { phoneme: "ée", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ي"], diacritic: zer, canStartWithAynBefore: true },
+ {
+ phoneme: "aa",
+ possibilities: ["ا"],
+ beginning: ["آ", "ا"],
+ endingPossibilities: ["ا", "یٰ"],
+ isLongA: true,
+ canStartWithAynBefore: true,
+ },
+ {
+ phoneme: "áa",
+ possibilities: ["ا"],
+ beginning: ["آ", "ا"],
+ endingPossibilities: ["ا", "یٰ"],
+ isLongA: true,
+ canStartWithAynBefore: true,
+ },
+ {
+ phoneme: "ee",
+ possibilities: ["ی"],
+ addAlefOnBeginning: true,
+ endingPossibilities: ["ي"],
+ diacritic: zer,
+ canStartWithAynBefore: true,
+ },
+ {
+ phoneme: "ée",
+ possibilities: ["ی"],
+ addAlefOnBeginning: true,
+ endingPossibilities: ["ي"],
+ diacritic: zer,
+ canStartWithAynBefore: true,
+ },
{ phoneme: "e", possibilities: ["ې"], addAlefOnBeginning: true },
{ phoneme: "é", possibilities: ["ې"], addAlefOnBeginning: true },
{ phoneme: "o", possibilities: ["و"], addAlefOnBeginning: true },
{ phoneme: "ó", possibilities: ["و"], addAlefOnBeginning: true },
- { phoneme: "oo", possibilities: ["و"], addAlefOnBeginning: true, alsoCanBePrefix: true, diacritic: pesh },
- { phoneme: "óo", possibilities: ["و"], addAlefOnBeginning: true, diacritic: pesh },
- { phoneme: "ey", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ی"]},
- { phoneme: "éy", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ی"]},
+ {
+ phoneme: "oo",
+ possibilities: ["و"],
+ addAlefOnBeginning: true,
+ alsoCanBePrefix: true,
+ diacritic: pesh,
+ },
+ {
+ phoneme: "óo",
+ possibilities: ["و"],
+ addAlefOnBeginning: true,
+ diacritic: pesh,
+ },
+ {
+ phoneme: "ay",
+ possibilities: ["ی"],
+ addAlefOnBeginning: true,
+ endingPossibilities: ["ی"],
+ },
+ {
+ phoneme: "áy",
+ possibilities: ["ی"],
+ addAlefOnBeginning: true,
+ endingPossibilities: ["ی"],
+ },
{ phoneme: "uy", possibilities: ["ۍ"], endingOnly: true },
{ phoneme: "úy", possibilities: ["ۍ"], endingOnly: true }, // THIS CAN ONLY COME AT THE END DEAL WITH THIS
- { phoneme: "eyy", possibilities: ["ئ"], endingOnly: true },
- { phoneme: "éyy", possibilities: ["ئ"], endingOnly: true },
+ { phoneme: "ey", possibilities: ["ئ"], endingOnly: true },
+ { phoneme: "éy", possibilities: ["ئ"], endingOnly: true },
- { phoneme: "a", diacritic: zwar, endingPossibilities: ["ه"], canComeAfterHeyEnding: true, canBeFirstPartOfFathahanEnding: true },
- { phoneme: "á", diacritic: zwar, endingPossibilities: ["ه"], canComeAfterHeyEnding: true, canBeFirstPartOfFathahanEnding: true },
+ {
+ phoneme: "a",
+ diacritic: zwar,
+ endingPossibilities: ["ه"],
+ canComeAfterHayEnding: true,
+ canBeFirstPartOfFathahanEnding: true,
+ },
+ {
+ phoneme: "á",
+ diacritic: zwar,
+ endingPossibilities: ["ه"],
+ canComeAfterHayEnding: true,
+ canBeFirstPartOfFathahanEnding: true,
+ },
{ phoneme: "ă", diacritic: zwar },
{ phoneme: "ắ", diacritic: zwar },
- { phoneme: "u", diacritic: zwarakey, endingPossibilities: ["ه"], hamzaOnEnd: true },
- { phoneme: "ú", diacritic: zwarakey, endingPossibilities: ["ه"], hamzaOnEnd: true },
- { phoneme: "i", diacritic: zer, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, canBeWasla: true, beginning: ["ا", "ع"] },
- { phoneme: "í", diacritic: zer, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, canBeWasla: true, beginning: ["ا", "ع"] },
- { phoneme: "U", diacritic: pesh, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, beginning: ["ا", "ع"] },
- { phoneme: "Ú", diacritic: pesh, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, beginning: ["ا", "ع"] },
+ {
+ phoneme: "u",
+ diacritic: zwarakay,
+ endingPossibilities: ["ه"],
+ hamzaOnEnd: true,
+ },
+ {
+ phoneme: "ú",
+ diacritic: zwarakay,
+ endingPossibilities: ["ه"],
+ hamzaOnEnd: true,
+ },
+ {
+ phoneme: "i",
+ diacritic: zer,
+ endingPossibilities: ["ه"],
+ takesDiacriticBeforeGurdaHayEnding: true,
+ canBeWasla: true,
+ beginning: ["ا", "ع"],
+ },
+ {
+ phoneme: "í",
+ diacritic: zer,
+ endingPossibilities: ["ه"],
+ takesDiacriticBeforeGurdaHayEnding: true,
+ canBeWasla: true,
+ beginning: ["ا", "ع"],
+ },
+ {
+ phoneme: "U",
+ diacritic: pesh,
+ endingPossibilities: ["ه"],
+ takesDiacriticBeforeGurdaHayEnding: true,
+ beginning: ["ا", "ع"],
+ },
+ {
+ phoneme: "Ú",
+ diacritic: pesh,
+ endingPossibilities: ["ه"],
+ takesDiacriticBeforeGurdaHayEnding: true,
+ beginning: ["ا", "ع"],
+ },
];
function isSpace(s: string): boolean {
@@ -142,7 +261,11 @@ interface IDiacriticsErrorMessage {
i: number;
}
-function possibilityMatches(p: string, pIndex: number, possibilities: string[] | undefined): boolean {
+function possibilityMatches(
+ p: string,
+ pIndex: number,
+ possibilities: string[] | undefined
+): boolean {
/* istanbul ignore next */
if (!possibilities) {
return false;
@@ -155,10 +278,15 @@ function possibilityMatches(p: string, pIndex: number, possibilities: string[] |
return false;
}
-function isPrefixedByDirectionalPronoun(i: number, phonemes: string[]): boolean {
+function isPrefixedByDirectionalPronoun(
+ i: number,
+ phonemes: string[]
+): boolean {
const potentialPronounFourCharSlice = phonemes.slice(i - 4, i).join("");
const potentialPronounThreeCharSlice = phonemes.slice(i - 3, i).join("");
- if (["wăr-", "war-", "dăr-", "dar-"].includes(potentialPronounFourCharSlice)) {
+ if (
+ ["wăr-", "war-", "dăr-", "dar-"].includes(potentialPronounFourCharSlice)
+ ) {
return true;
}
if (potentialPronounThreeCharSlice === "raa-") {
@@ -167,7 +295,11 @@ function isPrefixedByDirectionalPronoun(i: number, phonemes: string[]): boolean
return false;
}
-export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes: boolean = false): string | undefined {
+export function phoneticsToDiacritics(
+ ps: string,
+ ph: string,
+ forbidOoPrefixes: boolean = false
+): string | undefined {
const phonemes = splitFIntoPhonemes(ph.trim().split(",")[0]);
const p = ps.trim();
let result = "";
@@ -179,58 +311,72 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
if (phoneme === "-") {
return;
}
- const phonemeInfo = phonemeTable.find((element) => element.phoneme === phoneme);
+ const phonemeInfo = phonemeTable.find(
+ (element) => element.phoneme === phoneme
+ );
if (!phonemeInfo) {
errored.push({ error: "phoneme info not found", phoneme, i });
return;
}
- const isDoubleConsonant = (
+ const isDoubleConsonant =
phonemeInfo.consonant &&
phoneme === phonemes[i - 1] &&
// TODO: is this thourough enough to allow double consonants on the ending of the previous word?
!(isSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex])) // avoid false double consonant ie ازل لیک azalleek
- ) ? true : false;
- const isBeginning = !isDoubleConsonant && ((i === 0) || isSpace(p[pIndex - 1]) || (phonemes[i - 1] === "-Ul-") || isPrefixedByDirectionalPronoun(i, phonemes));
- const upcomingAEndingAfterHey = (p[pIndex] === "ح" && isSpace(p[pIndex + 1]) && ["a", "á"].includes(phonemes[i + 1]));
-
- // TODO: break this into a seperate function -- why can it sometimes be set to undefined?
- const isEnding = (i === phonemes.length - 1) || ((
- (phonemeInfo.possibilities && isSpace(p[pIndex + 1])) ||
- (!phonemeInfo.possibilities && isSpace(p[pIndex])) ||
- (
- (!phonemeInfo.possibilities && isSpace(p[pIndex + 1])) &&
- (possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) || (p[pIndex] === "ع" && phonemes[i + 1] !== "'"))
- )
- ) && !upcomingAEndingAfterHey
- && // makes sure the next letter isn't a double consonant like haqq <-
- !(
- phonemeInfo.consonant && phoneme === phonemes[i + 1] // &&
- // !(isSpace(p[pIndex + 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
- )
- ) || // can be the trailing double consanant on the end of a word
- (
- phonemeInfo.consonant && phoneme === phonemes[i - 1] &&
- !(isEndSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
- ) || // can be یٰ ending
- (
- isEndSpace(p[pIndex + 2]) && (p.slice(pIndex, pIndex + 2) === "یٰ")
- );
+ ? true
+ : false;
+ const isBeginning =
+ !isDoubleConsonant &&
+ (i === 0 ||
+ isSpace(p[pIndex - 1]) ||
+ phonemes[i - 1] === "-Ul-" ||
+ isPrefixedByDirectionalPronoun(i, phonemes));
+ const upcomingAEndingAfterHay =
+ p[pIndex] === "ح" &&
+ isSpace(p[pIndex + 1]) &&
+ ["a", "á"].includes(phonemes[i + 1]);
- const isUofDu = phoneme === "u" && (
- p.slice(pIndex - 2, pIndex) === "د " || // د as previous word
- (p[pIndex] === undefined && p[pIndex - 1] === "د") || // د as the whole thing
- p.slice(pIndex - 6, pIndex) === "د ... " // ... د is as the previous word
- );
+ // TODO: break this into a seperate function -- why can it sometimes be set to undefined?
+ const isEnding =
+ i === phonemes.length - 1 ||
+ (((phonemeInfo.possibilities && isSpace(p[pIndex + 1])) ||
+ (!phonemeInfo.possibilities && isSpace(p[pIndex])) ||
+ (!phonemeInfo.possibilities &&
+ isSpace(p[pIndex + 1]) &&
+ (possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) ||
+ (p[pIndex] === "ع" && phonemes[i + 1] !== "'")))) &&
+ !upcomingAEndingAfterHay && // makes sure the next letter isn't a double consonant like haqq <-
+ !(
+ (phonemeInfo.consonant && phoneme === phonemes[i + 1]) // &&
+ // !(isSpace(p[pIndex + 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
+ )) || // can be the trailing double consanant on the end of a word
+ (phonemeInfo.consonant &&
+ phoneme === phonemes[i - 1] &&
+ !(
+ isEndSpace(p[pIndex - 1]) &&
+ phonemeInfo.possibilities.includes(p[pIndex])
+ )) || // can be یٰ ending
+ (isEndSpace(p[pIndex + 2]) && p.slice(pIndex, pIndex + 2) === "یٰ");
+
+ const isUofDu =
+ phoneme === "u" &&
+ (p.slice(pIndex - 2, pIndex) === "د " || // د as previous word
+ (p[pIndex] === undefined && p[pIndex - 1] === "د") || // د as the whole thing
+ p.slice(pIndex - 6, pIndex) === "د ... "); // ... د is as the previous word
// TODO: Should p[pIndex - 1] also be in there ??? It messed up قطعه for instance
- const isEndingAynVowel = isEnding && phonemeInfo.diacritic && [p[pIndex], p[pIndex - 1]].includes("ع") && p[pIndex] !== "ه";
+ const isEndingAynVowel =
+ isEnding &&
+ phonemeInfo.diacritic &&
+ [p[pIndex], p[pIndex - 1]].includes("ع") &&
+ p[pIndex] !== "ه";
const isMiddle = !isBeginning && !isEnding;
- const isSilentWaw = (
+ const isSilentWaw =
p[pIndex] === "و" &&
p[pIndex - 1] === "خ" &&
p[pIndex + 1] === "ا" &&
- ["áa", "aa"].includes(phoneme)
- );
- const isAnAEndingAfterHey = isEnding && p[pIndex - 1] === "ح" && phonemeInfo.canComeAfterHeyEnding;
+ ["áa", "aa"].includes(phoneme);
+ const isAnAEndingAfterHay =
+ isEnding && p[pIndex - 1] === "ح" && phonemeInfo.canComeAfterHayEnding;
if (isDoubleConsonant) {
pIndex--;
if (isSpace(p[pIndex])) {
@@ -247,14 +393,22 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
pIndex++;
}
// special check for Arabic wasla
- if (p.slice(0, 3) === "بال" && phonemes[i - 1] === "b" && phonemeInfo.canBeWasla && phonemes[i + 1] === "l") {
+ if (
+ p.slice(0, 3) === "بال" &&
+ phonemes[i - 1] === "b" &&
+ phonemeInfo.canBeWasla &&
+ phonemes[i + 1] === "l"
+ ) {
result += phonemeInfo.diacritic + wasla;
pIndex++;
previousPhonemeWasAConsonant = false;
return;
}
// special check for fathahan ending
- if (phonemeInfo.canBeFirstPartOfFathahanEnding && p.slice(pIndex, pIndex + 2) === "اً") {
+ if (
+ phonemeInfo.canBeFirstPartOfFathahanEnding &&
+ p.slice(pIndex, pIndex + 2) === "اً"
+ ) {
result += "ا";
pIndex++;
return;
@@ -265,7 +419,12 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return;
}
// special check for words starting with عا or عی
- if (isBeginning && phonemeInfo.canStartWithAynBefore && p[pIndex] === "ع" && phonemeInfo.possibilities.includes(p[pIndex + 1])) {
+ if (
+ isBeginning &&
+ phonemeInfo.canStartWithAynBefore &&
+ p[pIndex] === "ع" &&
+ phonemeInfo.possibilities.includes(p[pIndex + 1])
+ ) {
result += "ع";
result += phonemeInfo.diacritic ? phonemeInfo.diacritic : "";
result += p[pIndex + 1];
@@ -273,23 +432,45 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return;
}
// special check for ؤ Ua
- if (phoneme === "U" && phonemes[i + 1] === "a" && phonemes[i + 2] !== "a" && p[pIndex] === "و") {
+ if (
+ phoneme === "U" &&
+ phonemes[i + 1] === "a" &&
+ phonemes[i + 2] !== "a" &&
+ p[pIndex] === "و"
+ ) {
result += "ؤ";
pIndex++;
return;
}
- if (phoneme === "a" && phonemes[i - 1] === "U" && phonemes[i + 1] !== "a" && result.slice(-2) === "ؤ") {
+ if (
+ phoneme === "a" &&
+ phonemes[i - 1] === "U" &&
+ phonemes[i + 1] !== "a" &&
+ result.slice(-2) === "ؤ"
+ ) {
previousPhonemeWasAConsonant = false;
return;
}
// special check for و wo
- if (isBeginning && phoneme === "w" && phonemes[i + 1] === "o" && p[pIndex] === "و" && isEndSpace(p[pIndex + 1])) {
+ if (
+ isBeginning &&
+ phoneme === "w" &&
+ phonemes[i + 1] === "o" &&
+ p[pIndex] === "و" &&
+ isEndSpace(p[pIndex + 1])
+ ) {
result += "و";
pIndex++;
return;
}
// TODO: isEndSpace here is redundant??
- if (isEnding && phoneme === "o" && phonemes[i - 1] === "w" && p[pIndex - 1] === "و" && isEndSpace(p[pIndex])) {
+ if (
+ isEnding &&
+ phoneme === "o" &&
+ phonemes[i - 1] === "w" &&
+ p[pIndex - 1] === "و" &&
+ isEndSpace(p[pIndex])
+ ) {
pIndex++;
return;
}
@@ -300,38 +481,67 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return;
}
// special check for for أ in the middle of the word
- if (!isBeginning && p[pIndex] === "أ" && phoneme === "a" && phonemes[i + 1] === "'" && phonemes[i + 2] === "a") {
+ if (
+ !isBeginning &&
+ p[pIndex] === "أ" &&
+ phoneme === "a" &&
+ phonemes[i + 1] === "'" &&
+ phonemes[i + 2] === "a"
+ ) {
result += "أ";
pIndex++;
return;
}
- if (p[pIndex - 1] === "أ" && phonemes[i - 1] === "a" && phoneme === "'" && phonemes[i + 1] === "a") {
+ if (
+ p[pIndex - 1] === "أ" &&
+ phonemes[i - 1] === "a" &&
+ phoneme === "'" &&
+ phonemes[i + 1] === "a"
+ ) {
return;
}
- if (p[pIndex - 1] === "أ" && phonemes[i - 2] === "a" && phonemes[i - 1] === "'" && phoneme === "a") {
+ if (
+ p[pIndex - 1] === "أ" &&
+ phonemes[i - 2] === "a" &&
+ phonemes[i - 1] === "'" &&
+ phoneme === "a"
+ ) {
previousPhonemeWasAConsonant = false;
return;
}
// special check for وو 'oo
- if (!isBeginning && p[pIndex] === "و" && p[pIndex + 1] === "و" && phoneme === "'" && phonemes[i + 1] === "oo") {
+ if (
+ !isBeginning &&
+ p[pIndex] === "و" &&
+ p[pIndex + 1] === "و" &&
+ phoneme === "'" &&
+ phonemes[i + 1] === "oo"
+ ) {
result += "وُو";
pIndex += 2;
return;
}
- if (p[pIndex - 2] === "و" && p[pIndex - 1] === "و" && phonemes[i - 1] === "'" && phoneme === "oo") {
+ if (
+ p[pIndex - 2] === "و" &&
+ p[pIndex - 1] === "و" &&
+ phonemes[i - 1] === "'" &&
+ phoneme === "oo"
+ ) {
previousPhonemeWasAConsonant = false;
return;
}
- const prevLetterWasBeginningAyn = (
+ const prevLetterWasBeginningAyn =
p[pIndex - 1] === "ع" &&
// isEndSpace(p[pIndex]) && // This breaks it
- phoneme === "'"
- );
+ phoneme === "'";
// check if the phoneme lines up in the Pashto word
if (isBeginning && !isUofDu && phonemeInfo.addAlefOnBeginning) {
// TODO: Maybe a little bad because it doesn't loop through possibilities
- if ((!phonemeInfo.alsoCanBePrefix || forbidOoPrefixes) && p.slice(pIndex, pIndex + 2) !== "ا" + phonemeInfo.possibilities[0]) {
+ if (
+ (!phonemeInfo.alsoCanBePrefix || forbidOoPrefixes) &&
+ p.slice(pIndex, pIndex + 2) !== "ا" + phonemeInfo.possibilities[0]
+ ) {
errored.push({ error: "didn't start with an aleph", phoneme, i });
return;
}
@@ -348,18 +558,18 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
pIndex++;
return;
} else if (
- (isEnding && phonemeInfo.endingPossibilities) &&
+ isEnding &&
+ phonemeInfo.endingPossibilities &&
!isUofDu &&
- (
- !possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) &&
- !isEndingAynVowel && // allowing short vowels on the end of words ending with ع
- !isAnAEndingAfterHey
- )
+ !possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) &&
+ !isEndingAynVowel && // allowing short vowels on the end of words ending with ع
+ !isAnAEndingAfterHay
) {
errored.push({ error: "bad ending", phoneme, i });
return;
} else if (
- (isEnding && !phonemeInfo.endingPossibilities) &&
+ isEnding &&
+ !phonemeInfo.endingPossibilities &&
phonemeInfo.possibilities &&
!phonemeInfo.possibilities.includes(p[pIndex])
) {
@@ -367,14 +577,17 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
errored.push({ error: "bad ending 2", phoneme, i });
return;
} else if (
- (phonemeInfo.possibilities && !isEnding) &&
- (
- !(phonemeInfo.possibilities.includes(p[pIndex])) &&
- !(p[pIndex] === "ن" && (p[pIndex + 1] === "ب" && phoneme === "m")) && // && // exception case with نب === mb
- !prevLetterWasBeginningAyn // exception case with words starting with ع like i'zzat
- )
+ phonemeInfo.possibilities &&
+ !isEnding &&
+ !phonemeInfo.possibilities.includes(p[pIndex]) &&
+ !(p[pIndex] === "ن" && p[pIndex + 1] === "ب" && phoneme === "m") && // && // exception case with نب === mb
+ !prevLetterWasBeginningAyn // exception case with words starting with ع like i'zzat
) {
- errored.push({ error: "improper coressponding letter in middle of word", phoneme, i });
+ errored.push({
+ error: "improper coressponding letter in middle of word",
+ phoneme,
+ i,
+ });
return;
}
// console.log(phoneme, pIndex, p[pIndex], isEnding);
@@ -382,7 +595,12 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
// OK, it lines up with the Pashto word, we're good
// Now continue building the result string
// deal with starting with short vowels and alef
- if (!isUofDu && isBeginning && !phonemeInfo.possibilities && !phonemeInfo.isIzafe) {
+ if (
+ !isUofDu &&
+ isBeginning &&
+ !phonemeInfo.possibilities &&
+ !phonemeInfo.isIzafe
+ ) {
// TODO: WHY IS THIS HERE
if (!["ا", "ع"].includes(p[pIndex])) {
errored.push({ error: "bad beginning 2", phoneme, i });
@@ -392,22 +610,30 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
pIndex++;
}
// if the phoneme carries a diacritic insert it (before the letter if it's coming)
- const isOoPrefix = (phonemeInfo.alsoCanBePrefix && isBeginning && (p[pIndex - 1] !== "ا"));
+ const isOoPrefix =
+ phonemeInfo.alsoCanBePrefix && isBeginning && p[pIndex - 1] !== "ا";
if (phonemeInfo.diacritic && !isEnding && !isOoPrefix) {
- // using this hack to remove the space and put it after the zwarakey we're going to add after د
- if (isUofDu && result.slice(-5) === " ... ") {
- result = result.slice(0, -5) + zwarakey + " ... ";
+ // using this hack to remove the space and put it after the zwarakay we're going to add after د
+ if (isUofDu && result.slice(-5) === " ... ") {
+ result = result.slice(0, -5) + zwarakay + " ... ";
} else if (isUofDu && result.slice(-1) === " ") {
- result = result.slice(0, -1) + zwarakey + " ";
+ result = result.slice(0, -1) + zwarakay + " ";
} else {
result += phonemeInfo.diacritic;
}
}
// TODO: The middle stuff might be unneccessary/unhelpful
- const isACommaWithoutAyn = (phoneme === "'" && (p[pIndex] !== "ع" && !(isMiddle && p[pIndex] === "ئ")));
+ const isACommaWithoutAyn =
+ phoneme === "'" && p[pIndex] !== "ع" && !(isMiddle && p[pIndex] === "ئ");
// if the previous phoneme was a consonant insert a sukun
// console.log("Will I go into the adding thing?");
- if (!isBeginning && previousPhonemeWasAConsonant && phonemeInfo.consonant && phonemes[i - 1] !== "'" && p[pIndex] !== "ع") {
+ if (
+ !isBeginning &&
+ previousPhonemeWasAConsonant &&
+ phonemeInfo.consonant &&
+ phonemes[i - 1] !== "'" &&
+ p[pIndex] !== "ع"
+ ) {
result += isDoubleConsonant ? tashdeed : sukun;
}
if (isEnding && isDoubleConsonant) {
@@ -417,30 +643,38 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
}
}
// if there's a pashto letter for the phoneme, insert it
- if (!isEndingAynVowel && !isACommaWithoutAyn && (phonemeInfo.possibilities || isEnding)) {
+ if (
+ !isEndingAynVowel &&
+ !isACommaWithoutAyn &&
+ (phonemeInfo.possibilities || isEnding)
+ ) {
// need the isSpace check to prevent weird behaviour with izafe
if (!isUofDu) {
- if (isAnAEndingAfterHey) {
+ if (isAnAEndingAfterHay) {
result += zwar;
if (p[pIndex] === " ") {
result += " ";
}
} else {
- result += (isDoubleConsonant || isSpace(p[pIndex])) ? "" : p[pIndex];
+ result += isDoubleConsonant || isSpace(p[pIndex]) ? "" : p[pIndex];
}
}
pIndex++;
}
if (isEnding) {
if (isUofDu) {
- result += zwarakey;
+ result += zwarakay;
} else if (phonemeInfo.hamzaOnEnd) {
result += hamzaAbove;
} else if (phonemeInfo.takesSukunOnEnding) {
result += sukun;
} else if (p[pIndex] === daggerAlif) {
result += daggerAlif;
- } else if (isEndSpace(p[pIndex]) && p[pIndex - 1] === "ه" && phonemeInfo.takesDiacriticBeforeGurdaHeyEnding) {
+ } else if (
+ isEndSpace(p[pIndex]) &&
+ p[pIndex - 1] === "ه" &&
+ phonemeInfo.takesDiacriticBeforeGurdaHayEnding
+ ) {
result = result.slice(0, -1) + phonemeInfo.diacritic + "ه";
}
}
@@ -456,13 +690,20 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
}
return;
}
- previousPhonemeWasAConsonant = (!isEnding && phonemeInfo.consonant) ? true : false;
+ previousPhonemeWasAConsonant =
+ !isEnding && phonemeInfo.consonant ? true : false;
// ignore the ع or ئ if there's not a ' in the phonetics
- const nextPhonemeInfo = phonemeTable.find((element) => phonemes[i + 1] === element.phoneme);
+ const nextPhonemeInfo = phonemeTable.find(
+ (element) => phonemes[i + 1] === element.phoneme
+ );
if (
["ع", "ئ"].includes(p[pIndex]) &&
![phonemes[i + 1], phonemes[i + 2]].includes("'") &&
- !(nextPhonemeInfo && nextPhonemeInfo.diacritic && isEndSpace(p[pIndex + 1])) && // don't skip the ع on the end if there's another short letter coming after it
+ !(
+ nextPhonemeInfo &&
+ nextPhonemeInfo.diacritic &&
+ isEndSpace(p[pIndex + 1])
+ ) && // don't skip the ع on the end if there's another short letter coming after it
!(p[pIndex] === "ئ" && isEndSpace(p[pIndex + 1])) && // don't skip ئ on the end
!phonemeInfo.isIzafe
) {
@@ -476,7 +717,11 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return;
}
// if we've arrived at a space in the Pashto, move along before the next iteration
- if (isSpace(p[pIndex]) && phonemes[i + 1] !== "-i-" && !upcomingAEndingAfterHey) {
+ if (
+ isSpace(p[pIndex]) &&
+ phonemes[i + 1] !== "-i-" &&
+ !upcomingAEndingAfterHay
+ ) {
result += " ";
pIndex++;
}
diff --git a/src/lib/src/sandwiches.ts b/src/lib/src/sandwiches.ts
index bd9b4c2..9f72bb2 100644
--- a/src/lib/src/sandwiches.ts
+++ b/src/lib/src/sandwiches.ts
@@ -1,139 +1,139 @@
import * as T from "../../types";
export const sandwiches: T.Sandwich[] = [
- {
- type: "sandwich",
- before: { p: "له", f: "la" },
- after: { p: "نه", f: "na" },
- e: "from",
- },
- {
- type: "sandwich",
- before: { p: "له", f: "la" },
- after: { p: "څخه", f: "tsuxa" },
- e: "from",
- },
- // TODO: Implement mayonaise
- // {
- // type: "sandwich",
- // before: { p: "له", f: "la" },
- // after: "mayonaise",
- // e: "from",
- // },
- {
- type: "sandwich",
- before: { p: "له", f: "la" },
- after: { p: "سره", f: "sara" },
- e: "with",
- },
- {
- type: "sandwich",
- before: undefined,
- after: { p: "ته", f: "ta" },
- e: "to",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "لپاره", f: "lapaara" },
- e: "for",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "دمخې", f: "dumúkhe" },
- e: "before/in front of",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "په څانګ", f: "pu tsaang" },
- e: "beside",
- },
- {
- type: "sandwich",
- before: { p: "پر", f: "pur" },
- after: { p: "باندې", f: "baande" },
- e: "on",
- },
- {
- type: "sandwich",
- before: { p: "په", f: "pu" },
- after: { p: "کې", f: "ke" },
- e: "in",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "دننه", f: "dununa" },
- e: "inside",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "دباندې", f: "dubaande" },
- e: "outside",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "مخې ته", f: "mukhe ta" },
- e: "in front of",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "شا ته", f: "shaa ta" },
- e: "behind",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "لاندې", f: "laande" },
- e: "under",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "په شان", f: "pu shaan" },
- e: "like",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "غوندې", f: "ghwunde" },
- e: "like",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "په حیث", f: "pu heys" },
- e: "as",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "په لور", f: "pu lor" },
- e: "towards",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "په اړه", f: "pu aRa" },
- e: "about",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "په باره کې", f: "pu baara ke" },
- e: "about",
- },
- {
- type: "sandwich",
- before: { p: "د", f: "du" },
- after: { p: "په اړوند", f: "pu aRwand" },
- e: "concerning",
- },
+ {
+ type: "sandwich",
+ before: { p: "له", f: "la" },
+ after: { p: "نه", f: "na" },
+ e: "from",
+ },
+ {
+ type: "sandwich",
+ before: { p: "له", f: "la" },
+ after: { p: "څخه", f: "tsuxa" },
+ e: "from",
+ },
+ // TODO: Implement mayonaise
+ // {
+ // type: "sandwich",
+ // before: { p: "له", f: "la" },
+ // after: "mayonaise",
+ // e: "from",
+ // },
+ {
+ type: "sandwich",
+ before: { p: "له", f: "la" },
+ after: { p: "سره", f: "sara" },
+ e: "with",
+ },
+ {
+ type: "sandwich",
+ before: undefined,
+ after: { p: "ته", f: "ta" },
+ e: "to",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "لپاره", f: "lapaara" },
+ e: "for",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "دمخې", f: "dumúkhe" },
+ e: "before/in front of",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "په څانګ", f: "pu tsaang" },
+ e: "beside",
+ },
+ {
+ type: "sandwich",
+ before: { p: "پر", f: "pur" },
+ after: { p: "باندې", f: "baande" },
+ e: "on",
+ },
+ {
+ type: "sandwich",
+ before: { p: "په", f: "pu" },
+ after: { p: "کې", f: "ke" },
+ e: "in",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "دننه", f: "dununa" },
+ e: "inside",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "دباندې", f: "dubaande" },
+ e: "outside",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "مخې ته", f: "mukhe ta" },
+ e: "in front of",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "شا ته", f: "shaa ta" },
+ e: "behind",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "لاندې", f: "laande" },
+ e: "under",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "په شان", f: "pu shaan" },
+ e: "like",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "غوندې", f: "ghwunde" },
+ e: "like",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "په حیث", f: "pu hays" },
+ e: "as",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "په لور", f: "pu lor" },
+ e: "towards",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "په اړه", f: "pu aRa" },
+ e: "about",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "په باره کې", f: "pu baara ke" },
+ e: "about",
+ },
+ {
+ type: "sandwich",
+ before: { p: "د", f: "du" },
+ after: { p: "په اړوند", f: "pu aRwand" },
+ e: "concerning",
+ },
];
-export default sandwiches;
\ No newline at end of file
+export default sandwiches;
diff --git a/src/lib/src/translate-phonetics-replacer.ts b/src/lib/src/translate-phonetics-replacer.ts
index 3b475d6..1a6b3ee 100644
--- a/src/lib/src/translate-phonetics-replacer.ts
+++ b/src/lib/src/translate-phonetics-replacer.ts
@@ -105,14 +105,14 @@ export const replacerInfo: IReplacerInfoItem[] = [
ipa: "ɪ́",
},
{
- char: "ey",
+ char: "ay",
alalc: "ay",
- ipa: "ai",
+ ipa: "ay",
},
{
- char: "éy",
+ char: "áy",
alalc: "áy",
- ipa: "ái",
+ ipa: "áj",
},
{
char: "ee",
@@ -140,9 +140,9 @@ export const replacerInfo: IReplacerInfoItem[] = [
ipa: "u:j",
},
{
- char: "eyy",
- alalc: "ạy",
- ipa: "ɛ̝j",
+ char: "ey",
+ alalc: "ey",
+ ipa: "ej",
},
{
char: "e",
@@ -351,4 +351,5 @@ export const replacerInfo: IReplacerInfoItem[] = [
];
// tslint:disable-next-line
-export const replacerRegex = /aay|áay|aa|áa|a|á|U|Ú|u|ú|ooy|o{1,2}|óo|ó|ey|éy|e{1,2}|ée|é|uy|úy|i|í|w|y|q|g|ts|sh|s|dz|z|t|T|d|D|r|R|n|N|f|b|p|x|kh|q|k|gh|g|G|j|ch|l|l|m|h/g;
+export const replacerRegex =
+ /aay|áay|aa|áa|a|á|U|Ú|u|ú|ooy|o{1,2}|óo|ó|ay|áy|e{1,2}|ée|é|ey|éy|uy|úy|i|í|w|y|q|g|ts|sh|s|dz|z|t|T|d|D|r|R|n|N|f|b|p|x|kh|q|k|gh|g|G|j|ch|l|l|m|h/g;
diff --git a/src/lib/src/translate-phonetics.test.ts b/src/lib/src/translate-phonetics.test.ts
index 524c96e..b500a11 100644
--- a/src/lib/src/translate-phonetics.test.ts
+++ b/src/lib/src/translate-phonetics.test.ts
@@ -6,9 +6,7 @@
*
*/
-import {
- translatePhonetics,
-} from "./translate-phonetics";
+import { translatePhonetics } from "./translate-phonetics";
const dialects = ["southern", "standard", "peshawer"];
const systems = ["ipa", "alalc"];
@@ -54,11 +52,11 @@ const translations = [
},
},
{
- original: "saRey",
+ original: "saRay",
ipa: {
- southern: "saɻai",
- standard: "saɻai",
- peshawer: "saɻai",
+ southern: "saɻaj",
+ standard: "saɻaj",
+ peshawer: "saɻaj",
},
alalc: {
southern: "saṛay",
@@ -72,20 +70,17 @@ translations.forEach((t) => {
systems.forEach((system) => {
// check each dialect with given system
dialects.forEach((dialect) => {
- test(
- // @ts-ignore
- `${t.original} should be translated to ${t.ipa[dialect]} using ${system} with ${dialect} dialect`,
- () => {
- const translated = translatePhonetics(t.original, {
- // @ts-ignore
- system,
- // @ts-ignore
- dialect,
- });
+ test(// @ts-ignore
+ `${t.original} should be translated to ${t.ipa[dialect]} using ${system} with ${dialect} dialect`, () => {
+ const translated = translatePhonetics(t.original, {
// @ts-ignore
- expect(translated).toBe(t[system][dialect]);
- },
- );
+ system,
+ // @ts-ignore
+ dialect,
+ });
+ // @ts-ignore
+ expect(translated).toBe(t[system][dialect]);
+ });
});
});
});
diff --git a/src/lib/src/validate-entry.test.ts b/src/lib/src/validate-entry.test.ts
index 38a0b08..2ef7c55 100644
--- a/src/lib/src/validate-entry.test.ts
+++ b/src/lib/src/validate-entry.test.ts
@@ -8,234 +8,461 @@
import { standardizeEntry, validateEntry } from "./validate-entry";
import * as T from "../../types";
-import { standardizePhonetics } from "./standardize-pashto";
const toTest: {
- input: any,
- output: T.DictionaryEntryError | { ok: true } | { checkComplement: true },
+ input: any;
+ output: T.DictionaryEntryError | { ok: true } | { checkComplement: true };
}[] = [
- {
- input: { ts: undefined },
- output: {
- errors: ["missing ts", "missing i", "missing p", "missing f", "missing e"],
- p: "",
- f: "",
- e: "",
- erroneousFields: ["ts", "i", "p", "f", "e"],
- ts: 0,
- },
+ {
+ input: { ts: undefined },
+ output: {
+ errors: [
+ "missing ts",
+ "missing i",
+ "missing p",
+ "missing f",
+ "missing e",
+ ],
+ p: "",
+ f: "",
+ e: "",
+ erroneousFields: ["ts", "i", "p", "f", "e"],
+ ts: 0,
},
- {
- input: { ts: 123, p: "کور", e: "house" },
- output: {
- errors: ["missing i", "missing f"],
- p: "کور",
- f: "",
- ts: 123,
- e: "house",
- erroneousFields: ["i", "f"],
- },
+ },
+ {
+ input: { ts: 123, p: "کور", e: "house" },
+ output: {
+ errors: ["missing i", "missing f"],
+ p: "کور",
+ f: "",
+ ts: 123,
+ e: "house",
+ erroneousFields: ["i", "f"],
},
- {
- input: {"i":293,"ts":1527821299,"p":"اخطار","f":"ixtáar","e":"warning, reprimand, admonishment","c":"n. m."},
- output: {
- errors: ["script and phonetics do not match for p and f"],
- p: "اخطار",
- f: "ixtáar",
- e: "warning, reprimand, admonishment",
- ts: 1527821299,
- erroneousFields: ["p", "f"],
- },
+ },
+ {
+ input: {
+ i: 293,
+ ts: 1527821299,
+ p: "اخطار",
+ f: "ixtáar",
+ e: "warning, reprimand, admonishment",
+ c: "n. m.",
},
- {
- input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puxtaanu","infbf":"puxtan"},
- output: {
- errors: ["missing infbp"],
- p: "پښتون",
- f: "puxtoon",
- e: "Pashtun",
- ts: 1527815197,
- erroneousFields: ["infbp"],
- },
+ output: {
+ errors: ["script and phonetics do not match for p and f"],
+ p: "اخطار",
+ f: "ixtáar",
+ e: "warning, reprimand, admonishment",
+ ts: 1527821299,
+ erroneousFields: ["p", "f"],
},
- {
- input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puxtaanu","infbp":"پښتن"},
- output: {
- errors: ["missing infbf"],
- p: "پښتون",
- f: "puxtoon",
- e: "Pashtun",
- ts: 1527815197,
- erroneousFields: ["infbf"],
- },
+ },
+ {
+ input: {
+ i: 2433,
+ ts: 1527815197,
+ p: "پښتون",
+ f: "puxtoon",
+ e: "Pashtun",
+ c: "n. m. unisex / adj. irreg.",
+ infap: "پښتانه",
+ infaf: "puxtaanu",
+ infbf: "puxtan",
},
- {
- input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puktaanu","infbp":"پښتن"},
- output: {
- errors: ["script and phonetics do not match for infap and infaf", "missing infbf"],
- p: "پښتون",
- f: "puxtoon",
- e: "Pashtun",
- ts: 1527815197,
- erroneousFields: ["infap", "infaf", "infbf"],
- },
+ output: {
+ errors: ["missing infbp"],
+ p: "پښتون",
+ f: "puxtoon",
+ e: "Pashtun",
+ ts: 1527815197,
+ erroneousFields: ["infbp"],
},
- {
- input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"tsumlaastúl","e":"to lie down","l":1596485996977,"separationAtP":2,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true},
- output: {
- errors: ["missing separationAtF"],
- p: "څملاستل",
- f: "tsumlaastúl",
- e: "to lie down",
- ts: 1527819674,
- erroneousFields: ["separationAtF"],
- },
+ },
+ {
+ input: {
+ i: 2433,
+ ts: 1527815197,
+ p: "پښتون",
+ f: "puxtoon",
+ e: "Pashtun",
+ c: "n. m. unisex / adj. irreg.",
+ infap: "پښتانه",
+ infaf: "puxtaanu",
+ infbp: "پښتن",
},
- {
- input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"sumlaastúl","e":"to lie down","l":1596485996977,"separationAtP":2,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true},
- output: {
- errors: ["script and phonetics do not match for p and f", "missing separationAtF"],
- p: "څملاستل",
- f: "sumlaastúl",
- e: "to lie down",
- ts: 1527819674,
- erroneousFields: ["p", "f", "separationAtF"],
- },
+ output: {
+ errors: ["missing infbf"],
+ p: "پښتون",
+ f: "puxtoon",
+ e: "Pashtun",
+ ts: 1527815197,
+ erroneousFields: ["infbf"],
},
- {
- input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"tsumlaastúl","e":"to lie down","l":1596485996977,"separationAtF":4,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true},
- output: {
- errors: ["missing separationAtP"],
- p: "څملاستل",
- f: "tsumlaastúl",
- e: "to lie down",
- ts: 1527819674,
- erroneousFields: ["separationAtP"],
- },
+ },
+ {
+ input: {
+ i: 2433,
+ ts: 1527815197,
+ p: "پښتون",
+ f: "puxtoon",
+ e: "Pashtun",
+ c: "n. m. unisex / adj. irreg.",
+ infap: "پښتانه",
+ infaf: "puktaanu",
+ infbp: "پښتن",
},
- {
- input: {"i":2222,"ts":1571859113828,"p":"پخول","f":"pakhawul","e":"to cook, prepare, to cause to ripen, mature","c":"v. stat. comp. trans."},
- output: {
- errors: ["missing complement for compound verb"],
- p: "پخول",
- f: "pakhawul",
- e: "to cook, prepare, to cause to ripen, mature",
- ts: 1571859113828,
- erroneousFields: ["l"],
- },
+ output: {
+ errors: [
+ "script and phonetics do not match for infap and infaf",
+ "missing infbf",
+ ],
+ p: "پښتون",
+ f: "puxtoon",
+ e: "Pashtun",
+ ts: 1527815197,
+ erroneousFields: ["infap", "infaf", "infbf"],
},
- {
- input: {"i":2222,"ts":1571859113828,"p":"پخول","f":"pakhawul","e":"to cook, prepare, to cause to ripen, mature","l":1574867531681,"c":"v. stat. comp. trans."},
- output: {
- checkComplement: true,
- },
+ },
+ {
+ input: {
+ i: 5000,
+ ts: 1527819674,
+ p: "څملاستل",
+ f: "tsumlaastúl",
+ e: "to lie down",
+ l: 1596485996977,
+ separationAtP: 2,
+ c: "v. intrans. seperable",
+ psp: "څمل",
+ psf: "tsaml",
+ noOo: true,
},
- {
- input: {"i":2231,"ts":1527812013,"p":"پراخ","f":"praakh, paráakh","e":"wide, broad, spacious, vast","c":"adj."},
- output: { ok: true },
+ output: {
+ errors: ["missing separationAtF"],
+ p: "څملاستل",
+ f: "tsumlaastúl",
+ e: "to lie down",
+ ts: 1527819674,
+ erroneousFields: ["separationAtF"],
},
- {
- input: {"i":0,"ts":1527812013,"p":"پراخ","f":"praakh, paráakh","e":"wide, broad, spacious, vast","c":"adj."},
- output: { ok: true },
+ },
+ {
+ input: {
+ i: 5000,
+ ts: 1527819674,
+ p: "څملاستل",
+ f: "sumlaastúl",
+ e: "to lie down",
+ l: 1596485996977,
+ separationAtP: 2,
+ c: "v. intrans. seperable",
+ psp: "څمل",
+ psf: "tsaml",
+ noOo: true,
},
- {
- input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj."},
- output: {
- errors: ["script and phonetics do not match for p and f"],
- p: "آبدار",
- f: "aawdáar",
- e: "watery, damp, humid, juicy",
- ts: 1575058859661,
- erroneousFields: ["p", "f"],
- },
+ output: {
+ errors: [
+ "script and phonetics do not match for p and f",
+ "missing separationAtF",
+ ],
+ p: "څملاستل",
+ f: "sumlaastúl",
+ e: "to lie down",
+ ts: 1527819674,
+ erroneousFields: ["p", "f", "separationAtF"],
},
- {
- input: {"ts":1591033069786,"i":7717,"p":"ستړی کول","f":"stuRey kawul","g":"stuReykedul","e":"to get tired, fatigued","c":"v. stat. comp. intrans.","l":1527815306,"ec":"get","ep":"tired"},
- output: {
- errors: ["wrong ending for intrans. stat. comp"],
- p: "ستړی کول",
- f: "stuRey kawul",
- e: "to get tired, fatigued",
- ts: 1591033069786,
- erroneousFields: ["p", "f"],
- },
+ },
+ {
+ input: {
+ i: 5000,
+ ts: 1527819674,
+ p: "څملاستل",
+ f: "tsumlaastúl",
+ e: "to lie down",
+ l: 1596485996977,
+ separationAtF: 4,
+ c: "v. intrans. seperable",
+ psp: "څمل",
+ psf: "tsaml",
+ noOo: true,
},
- {
- input: {"ts":1591033078746,"i":7716,"p":"ستړی کېدل","f":"stuRey kedul","g":"stuReykawul","e":"to make tired, wear out","c":"v. stat. comp. trans.","l":1527815306,"ec":"make","ep":"tired"},
- output: {
- errors: ["wrong ending for trans. stat. comp"],
- p: "ستړی کېدل",
- f: "stuRey kedul",
- e: "to make tired, wear out",
- ts: 1591033078746,
- erroneousFields: ["p", "f"],
- },
+ output: {
+ errors: ["missing separationAtP"],
+ p: "څملاستل",
+ f: "tsumlaastúl",
+ e: "to lie down",
+ ts: 1527819674,
+ erroneousFields: ["separationAtP"],
},
- {
- input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
- output: { ok: true },
+ },
+ {
+ input: {
+ i: 2222,
+ ts: 1571859113828,
+ p: "پخول",
+ f: "pakhawul",
+ e: "to cook, prepare, to cause to ripen, mature",
+ c: "v. stat. comp. trans.",
},
- {
- input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
- output: { ok: true },
+ output: {
+ errors: ["missing complement for compound verb"],
+ p: "پخول",
+ f: "pakhawul",
+ e: "to cook, prepare, to cause to ripen, mature",
+ ts: 1571859113828,
+ erroneousFields: ["l"],
},
- {
- input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"bechaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
- output: {
- errors: ["spacing discrepency between p and f"],
- p: "بې چاره",
- f: "bechaara",
- e: "poor thing, pitiful",
- ts: 1527812488,
- erroneousFields: ["p", "f"],
- },
+ },
+ {
+ input: {
+ i: 2222,
+ ts: 1571859113828,
+ p: "پخول",
+ f: "pakhawul",
+ e: "to cook, prepare, to cause to ripen, mature",
+ l: 1574867531681,
+ c: "v. stat. comp. trans.",
},
- {
- input: {"ts":1527812488,"i":1934,"p":"بېچاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
- output: {
- errors: ["spacing discrepency between p and f"],
- p: "بېچاره",
- f: "be chaara",
- e: "poor thing, pitiful",
- ts: 1527812488,
- erroneousFields: ["p", "f"],
- },
+ output: {
+ checkComplement: true,
},
- {
- input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
- output: { ok: true }
+ },
+ {
+ input: {
+ i: 2231,
+ ts: 1527812013,
+ p: "پراخ",
+ f: "praakh, paráakh",
+ e: "wide, broad, spacious, vast",
+ c: "adj.",
},
- {
- input: {"ts":1527814265,"i":12969,"p":"مکتب","f":"maktab","g":"maktab","e":"school","r":4,"c":"n. m.","app":"مکاتب","apf":"ma kaatib"},
- output: {
- errors: ["spacing discrepency between app and apf"],
- p: "مکتب",
- f: "maktab",
- e: "school",
- ts: 1527814265,
- erroneousFields: ["app", "apf"],
- },
+ output: { ok: true },
+ },
+ {
+ input: {
+ i: 0,
+ ts: 1527812013,
+ p: "پراخ",
+ f: "praakh, paráakh",
+ e: "wide, broad, spacious, vast",
+ c: "adj.",
},
- {
- input: {"ts":1527815870,"i":183,"p":"اثر","f":"asar","g":"asar","e":"influence, impression, tracks, affect","r":4,"c":"n. m.","app":"اثرات, آثار","apf":"asráat"},
- output: {
- errors: ["difference in variation length between app and apf", "script and phonetics do not match for app and apf"],
- p: "اثر",
- f: "asar",
- e: "influence, impression, tracks, affect",
- ts: 1527815870,
- erroneousFields: ["app", "apf"],
- },
+ output: { ok: true },
+ },
+ {
+ input: {
+ i: 12,
+ ts: 1575058859661,
+ p: "آبدار",
+ f: "aawdáar",
+ e: "watery, damp, humid, juicy",
+ c: "adj.",
},
+ output: {
+ errors: ["script and phonetics do not match for p and f"],
+ p: "آبدار",
+ f: "aawdáar",
+ e: "watery, damp, humid, juicy",
+ ts: 1575058859661,
+ erroneousFields: ["p", "f"],
+ },
+ },
+ {
+ input: {
+ ts: 1591033069786,
+ i: 7717,
+ p: "ستړی کول",
+ f: "stuRay kawul",
+ g: "stuRaykedul",
+ e: "to get tired, fatigued",
+ c: "v. stat. comp. intrans.",
+ l: 1527815306,
+ ec: "get",
+ ep: "tired",
+ },
+ output: {
+ errors: ["wrong ending for intrans. stat. comp"],
+ p: "ستړی کول",
+ f: "stuRay kawul",
+ e: "to get tired, fatigued",
+ ts: 1591033069786,
+ erroneousFields: ["p", "f"],
+ },
+ },
+ {
+ input: {
+ ts: 1591033078746,
+ i: 7716,
+ p: "ستړی کېدل",
+ f: "stuRay kedul",
+ g: "stuRaykawul",
+ e: "to make tired, wear out",
+ c: "v. stat. comp. trans.",
+ l: 1527815306,
+ ec: "make",
+ ep: "tired",
+ },
+ output: {
+ errors: ["wrong ending for trans. stat. comp"],
+ p: "ستړی کېدل",
+ f: "stuRay kedul",
+ e: "to make tired, wear out",
+ ts: 1591033078746,
+ erroneousFields: ["p", "f"],
+ },
+ },
+ {
+ input: {
+ i: 12,
+ ts: 1575058859661,
+ p: "آبدار",
+ f: "aawdáar",
+ e: "watery, damp, humid, juicy",
+ c: "adj.",
+ diacExcept: true,
+ },
+ output: { ok: true },
+ },
+ {
+ input: {
+ i: 12,
+ ts: 1575058859661,
+ p: "آبدار",
+ f: "aawdáar",
+ e: "watery, damp, humid, juicy",
+ c: "adj.",
+ diacExcept: true,
+ },
+ output: { ok: true },
+ },
+ {
+ input: {
+ ts: 1527812488,
+ i: 1934,
+ p: "بې چاره",
+ f: "bechaara",
+ g: "bechaara",
+ e: "poor thing, pitiful",
+ r: 3,
+ c: "adj.",
+ },
+ output: {
+ errors: ["spacing discrepency between p and f"],
+ p: "بې چاره",
+ f: "bechaara",
+ e: "poor thing, pitiful",
+ ts: 1527812488,
+ erroneousFields: ["p", "f"],
+ },
+ },
+ {
+ input: {
+ ts: 1527812488,
+ i: 1934,
+ p: "بېچاره",
+ f: "be chaara",
+ g: "bechaara",
+ e: "poor thing, pitiful",
+ r: 3,
+ c: "adj.",
+ },
+ output: {
+ errors: ["spacing discrepency between p and f"],
+ p: "بېچاره",
+ f: "be chaara",
+ e: "poor thing, pitiful",
+ ts: 1527812488,
+ erroneousFields: ["p", "f"],
+ },
+ },
+ {
+ input: {
+ ts: 1527812488,
+ i: 1934,
+ p: "بې چاره",
+ f: "be chaara",
+ g: "bechaara",
+ e: "poor thing, pitiful",
+ r: 3,
+ c: "adj.",
+ },
+ output: { ok: true },
+ },
+ {
+ input: {
+ ts: 1527814265,
+ i: 12969,
+ p: "مکتب",
+ f: "maktab",
+ g: "maktab",
+ e: "school",
+ r: 4,
+ c: "n. m.",
+ app: "مکاتب",
+ apf: "ma kaatib",
+ },
+ output: {
+ errors: ["spacing discrepency between app and apf"],
+ p: "مکتب",
+ f: "maktab",
+ e: "school",
+ ts: 1527814265,
+ erroneousFields: ["app", "apf"],
+ },
+ },
+ {
+ input: {
+ ts: 1527815870,
+ i: 183,
+ p: "اثر",
+ f: "asar",
+ g: "asar",
+ e: "influence, impression, tracks, affect",
+ r: 4,
+ c: "n. m.",
+ app: "اثرات, آثار",
+ apf: "asráat",
+ },
+ output: {
+ errors: [
+ "difference in variation length between app and apf",
+ "script and phonetics do not match for app and apf",
+ ],
+ p: "اثر",
+ f: "asar",
+ e: "influence, impression, tracks, affect",
+ ts: 1527815870,
+ erroneousFields: ["app", "apf"],
+ },
+ },
];
test("validateEntry should work", () => {
- toTest.forEach((t) => {
- expect(validateEntry(t.input as T.DictionaryEntry)).toEqual(t.output);
- });
+ toTest.forEach((t) => {
+ expect(validateEntry(t.input as T.DictionaryEntry)).toEqual(t.output);
+ });
});
test("standardizeEntry", () => {
- expect(standardizeEntry({"i":195,"ts":1527822036,"p":"اجتماعي","f":"ijtimaa‘ee, ijtimaayee","g":"ijtimaaee,ijtimaayee","e":"public, social, societal","c":"adj."}))
- .toEqual({"i":195,"ts":1527822036,"p":"اجتماعي","f":"ijtimaa'ee, ijtimaayee","g":"ijtimaaee,ijtimaayee","e":"public, social, societal","c":"adj."});
+ expect(
+ standardizeEntry({
+ i: 195,
+ ts: 1527822036,
+ p: "اجتماعي",
+ f: "ijtimaa‘ee, ijtimaayee",
+ g: "ijtimaaee,ijtimaayee",
+ e: "public, social, societal",
+ c: "adj.",
+ })
+ ).toEqual({
+ i: 195,
+ ts: 1527822036,
+ p: "اجتماعي",
+ f: "ijtimaa'ee, ijtimaayee",
+ g: "ijtimaaee,ijtimaayee",
+ e: "public, social, societal",
+ c: "adj.",
+ });
});