phonetics conversion done
This commit is contained in:
parent
fc97db0dd3
commit
c0cd34c3d6
|
@ -1,46 +1,93 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import classNames from "classnames";
|
import classNames from "classnames";
|
||||||
|
import { getEnglishFromRendered } from "../../../lib/src/phrase-building/np-tools";
|
||||||
import {
|
import {
|
||||||
getEnglishFromRendered,
|
getEnglishPersonInfo,
|
||||||
} from "../../../lib/src/phrase-building/np-tools";
|
getEnglishParticipleInflection,
|
||||||
import { getEnglishPersonInfo, getEnglishParticipleInflection, getEnglishGenNumInfo } from "../../../lib/src/misc-helpers";
|
getEnglishGenNumInfo,
|
||||||
|
} from "../../../lib/src/misc-helpers";
|
||||||
import { useState } from "react";
|
import { useState } from "react";
|
||||||
import { getLength } from "../../../lib/src/p-text-helpers";
|
import { getLength } from "../../../lib/src/p-text-helpers";
|
||||||
import { roleIcon } from "../vp-explorer/VPExplorerExplanationModal";
|
import { roleIcon } from "../vp-explorer/VPExplorerExplanationModal";
|
||||||
import { negativeParticle } from "../../../lib/src/grammar-units";
|
import { negativeParticle } from "../../../lib/src/grammar-units";
|
||||||
|
|
||||||
function Block({ opts, block, king, script }: {
|
function Block({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
block: T.Block,
|
block,
|
||||||
king?: "subject" | "object" | undefined,
|
king,
|
||||||
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
block: T.Block;
|
||||||
|
king?: "subject" | "object" | undefined;
|
||||||
script: "p" | "f";
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
if ("equative" in block.block) {
|
if ("equative" in block.block) {
|
||||||
return <EquativeBlock opts={opts} eq={block.block.equative} script={script} />;
|
return (
|
||||||
|
<EquativeBlock opts={opts} eq={block.block.equative} script={script} />
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if (block.block.type === "AP") {
|
if (block.block.type === "AP") {
|
||||||
const english = getEnglishFromRendered(block.block);
|
const english = getEnglishFromRendered(block.block);
|
||||||
return <APBlock opts={opts} english={english} script={script}>{block.block}</APBlock>
|
return (
|
||||||
|
<APBlock opts={opts} english={english} script={script}>
|
||||||
|
{block.block}
|
||||||
|
</APBlock>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if (block.block.type === "subjectSelection") {
|
if (block.block.type === "subjectSelection") {
|
||||||
const role = king === "subject" ? "king" : king === "object" ? "servant" : undefined;
|
const role =
|
||||||
return <SubjectBlock opts={opts} np={block.block.selection} role={role} script={script} />
|
king === "subject" ? "king" : king === "object" ? "servant" : undefined;
|
||||||
|
return (
|
||||||
|
<SubjectBlock
|
||||||
|
opts={opts}
|
||||||
|
np={block.block.selection}
|
||||||
|
role={role}
|
||||||
|
script={script}
|
||||||
|
/>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if (block.block.type === "objectSelection") {
|
if (block.block.type === "objectSelection") {
|
||||||
const role = king === "object" ? "king" : king === "subject" ? "servant" : undefined;
|
const role =
|
||||||
return <ObjectBlock opts={opts} obj={block.block.selection} role={role} script={script} />;
|
king === "object" ? "king" : king === "subject" ? "servant" : undefined;
|
||||||
|
return (
|
||||||
|
<ObjectBlock
|
||||||
|
opts={opts}
|
||||||
|
obj={block.block.selection}
|
||||||
|
role={role}
|
||||||
|
script={script}
|
||||||
|
/>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if (block.block.type === "predicateSelection") {
|
if (block.block.type === "predicateSelection") {
|
||||||
const english = getEnglishFromRendered(block.block.selection);
|
const english = getEnglishFromRendered(block.block.selection);
|
||||||
return <div className="text-center">
|
return (
|
||||||
<div><strong>Predicate</strong></div>
|
<div className="text-center">
|
||||||
{block.block.selection.type === "complement"
|
<div>
|
||||||
? <ComplementBlock opts={opts} comp={block.block.selection.selection} script={script} />
|
<strong>Predicate</strong>
|
||||||
: <NPBlock opts={opts} english={english} script={script}>{block.block.selection}</NPBlock>}
|
|
||||||
</div>
|
</div>
|
||||||
|
{block.block.selection.type === "complement" ? (
|
||||||
|
<ComplementBlock
|
||||||
|
opts={opts}
|
||||||
|
comp={block.block.selection.selection}
|
||||||
|
script={script}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<NPBlock opts={opts} english={english} script={script}>
|
||||||
|
{block.block.selection}
|
||||||
|
</NPBlock>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if (block.block.type === "negative") {
|
if (block.block.type === "negative") {
|
||||||
return <NegBlock opts={opts} imperative={block.block.imperative} script={script} />
|
return (
|
||||||
|
<NegBlock
|
||||||
|
opts={opts}
|
||||||
|
imperative={block.block.imperative}
|
||||||
|
script={script}
|
||||||
|
/>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if (block.block.type === "PH") {
|
if (block.block.type === "PH") {
|
||||||
return <PerfHeadBlock opts={opts} ps={block.block.ps} script={script} />;
|
return <PerfHeadBlock opts={opts} ps={block.block.ps} script={script} />;
|
||||||
|
@ -49,19 +96,36 @@ function Block({ opts, block, king, script }: {
|
||||||
return <VBBlock opts={opts} block={block.block} script={script} />;
|
return <VBBlock opts={opts} block={block.block} script={script} />;
|
||||||
}
|
}
|
||||||
if (block.block.type === "complement") {
|
if (block.block.type === "complement") {
|
||||||
return <ComplementBlock opts={opts} comp={block.block.selection} script={script} />
|
return (
|
||||||
|
<ComplementBlock
|
||||||
|
opts={opts}
|
||||||
|
comp={block.block.selection}
|
||||||
|
script={script}
|
||||||
|
/>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if (block.block.type === "NComp") {
|
if (block.block.type === "NComp") {
|
||||||
return <NCompBlock opts={opts} comp={block.block.comp} script={script} />
|
return <NCompBlock opts={opts} comp={block.block.comp} script={script} />;
|
||||||
}
|
}
|
||||||
return <WeldedBlock opts={opts} welded={block.block} script={script} />
|
return <WeldedBlock opts={opts} welded={block.block} script={script} />;
|
||||||
}
|
}
|
||||||
|
|
||||||
export default Block;
|
export default Block;
|
||||||
|
|
||||||
function Border({ children, extraClassName, padding }: { children: JSX.Element | JSX.Element[] | string, extraClassName?: string, padding?: string }) {
|
function Border({
|
||||||
return <div
|
children,
|
||||||
className={`block-border d-flex flex-row justify-content-center align-items-center ${extraClassName ? extraClassName : ""}`}
|
extraClassName,
|
||||||
|
padding,
|
||||||
|
}: {
|
||||||
|
children: JSX.Element | JSX.Element[] | string;
|
||||||
|
extraClassName?: string;
|
||||||
|
padding?: string;
|
||||||
|
}) {
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className={`block-border d-flex flex-row justify-content-center align-items-center ${
|
||||||
|
extraClassName ? extraClassName : ""
|
||||||
|
}`}
|
||||||
style={{
|
style={{
|
||||||
padding: padding ? padding : "1rem",
|
padding: padding ? padding : "1rem",
|
||||||
textAlign: "center",
|
textAlign: "center",
|
||||||
|
@ -70,65 +134,93 @@ function Border({ children, extraClassName, padding }: { children: JSX.Element |
|
||||||
>
|
>
|
||||||
<>{children}</>
|
<>{children}</>
|
||||||
</div>
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function VBBlock({ opts, block, script }: {
|
function VBBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
script: "p" | "f",
|
block,
|
||||||
block: T.VBBasic | T.VBGenNum | (T.VBBasic & {
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
script: "p" | "f";
|
||||||
|
block:
|
||||||
|
| T.VBBasic
|
||||||
|
| T.VBGenNum
|
||||||
|
| (T.VBBasic & {
|
||||||
person: T.Person;
|
person: T.Person;
|
||||||
}),
|
});
|
||||||
}) {
|
}) {
|
||||||
const [length, setLength] = useState<T.Length>("long");
|
const [length, setLength] = useState<T.Length>("long");
|
||||||
const [version, setVersion] = useState<number>(0);
|
const [version, setVersion] = useState<number>(0);
|
||||||
const ps = getLength(block.ps, length);
|
const ps = getLength(block.ps, length);
|
||||||
function changeVersion() {
|
function changeVersion() {
|
||||||
setVersion(o => (o + 1) % ps.length);
|
setVersion((o) => (o + 1) % ps.length);
|
||||||
}
|
}
|
||||||
function changeLength() {
|
function changeLength() {
|
||||||
setLength(o => (
|
setLength((o) =>
|
||||||
o === "long"
|
o === "long"
|
||||||
? "short"
|
? "short"
|
||||||
: o === "short" && "mini" in block.ps
|
: o === "short" && "mini" in block.ps
|
||||||
? "mini"
|
? "mini"
|
||||||
: "long"
|
: "long"
|
||||||
));
|
);
|
||||||
}
|
}
|
||||||
const infInfo = "gender" in block
|
const infInfo =
|
||||||
|
"gender" in block
|
||||||
? getEnglishGenNumInfo(block.gender, block.number)
|
? getEnglishGenNumInfo(block.gender, block.number)
|
||||||
: "person" in block
|
: "person" in block
|
||||||
? getEnglishPersonInfo(block.person, "short")
|
? getEnglishPersonInfo(block.person, "short")
|
||||||
: "";
|
: "";
|
||||||
return <div className="text-center">
|
return (
|
||||||
|
<div className="text-center">
|
||||||
<div className="d-flex flex-row justify-content-around">
|
<div className="d-flex flex-row justify-content-around">
|
||||||
{"long" in block.ps && <div className="clickable small mb-1" onClick={changeLength}>{length}</div>}
|
{"long" in block.ps && (
|
||||||
{ps.length > 1 && <div className="clickable small mb-1" onClick={changeVersion}>v. {version + 1}</div>}
|
<div className="clickable small mb-1" onClick={changeLength}>
|
||||||
|
{length}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{ps.length > 1 && (
|
||||||
|
<div className="clickable small mb-1" onClick={changeVersion}>
|
||||||
|
v. {version + 1}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
<Border>
|
<Border>
|
||||||
<>
|
<>{ps[version][script]}</>
|
||||||
{ps[version][script]}
|
|
||||||
</>
|
|
||||||
</Border>
|
</Border>
|
||||||
<div>VBlock</div>
|
<div>VBlock</div>
|
||||||
<SubText>{infInfo}</SubText>
|
<SubText>{infInfo}</SubText>
|
||||||
</div>
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function WeldedBlock({ opts, welded, script }: {
|
function WeldedBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
script: "p" | "f",
|
welded,
|
||||||
welded: T.Welded,
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
script: "p" | "f";
|
||||||
|
welded: T.Welded;
|
||||||
}) {
|
}) {
|
||||||
return <div className="text-center">
|
return (
|
||||||
<Border padding="0.5rem" extraClassName={script === "p" ? "flex-row-reverse" : ""}>
|
<div className="text-center">
|
||||||
{welded.left.type === "NComp"
|
<Border
|
||||||
? <NCompBlock opts={opts} comp={welded.left.comp} script={script} />
|
padding="0.5rem"
|
||||||
: welded.left.type === "VB"
|
extraClassName={script === "p" ? "flex-row-reverse" : ""}
|
||||||
? <VBBlock opts={opts} block={welded.left} script={script} />
|
>
|
||||||
: <WeldedBlock opts={opts} welded={welded.left} script={script} />}
|
{welded.left.type === "NComp" ? (
|
||||||
|
<NCompBlock opts={opts} comp={welded.left.comp} script={script} />
|
||||||
|
) : welded.left.type === "VB" ? (
|
||||||
|
<VBBlock opts={opts} block={welded.left} script={script} />
|
||||||
|
) : (
|
||||||
|
<WeldedBlock opts={opts} welded={welded.left} script={script} />
|
||||||
|
)}
|
||||||
<VBBlock opts={opts} block={welded.right} script={script} />
|
<VBBlock opts={opts} block={welded.right} script={script} />
|
||||||
</Border>
|
</Border>
|
||||||
</div>
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// function VerbSBlock({ opts, v, script }: {
|
// function VerbSBlock({ opts, v, script }: {
|
||||||
|
@ -192,19 +284,22 @@ function WeldedBlock({ opts, welded, script }: {
|
||||||
// </div>
|
// </div>
|
||||||
// }
|
// }
|
||||||
|
|
||||||
function PerfHeadBlock({ opts, ps, script }: {
|
function PerfHeadBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
ps: T.PsString,
|
ps,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
ps: T.PsString;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
return <div className="text-center">
|
return (
|
||||||
<Border>
|
<div className="text-center">
|
||||||
{ps[script]}
|
<Border>{ps[script]}</Border>
|
||||||
</Border>
|
|
||||||
<div>perf. head</div>
|
<div>perf. head</div>
|
||||||
<SubText>{'\u00A0'}</SubText>
|
<SubText>{"\u00A0"}</SubText>
|
||||||
</div>;
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// function ModalAuxBlock({ opts, aux, script }: {
|
// function ModalAuxBlock({ opts, aux, script }: {
|
||||||
|
@ -222,249 +317,389 @@ function PerfHeadBlock({ opts, ps, script }: {
|
||||||
// </div>;
|
// </div>;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
function NegBlock({ opts, imperative, script }: {
|
function NegBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
imperative: boolean,
|
imperative,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
imperative: boolean;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
return <div className="text-center">
|
return (
|
||||||
|
<div className="text-center">
|
||||||
<Border>
|
<Border>
|
||||||
{negativeParticle[imperative ? "imperative" : "nonImperative"][script]}
|
{negativeParticle[imperative ? "imperative" : "nonImperative"][script]}
|
||||||
</Border>
|
</Border>
|
||||||
<div>Neg.</div>
|
<div>Neg.</div>
|
||||||
<SubText>{imperative ? "don't" : "not"}</SubText>
|
<SubText>{imperative ? "don't" : "not"}</SubText>
|
||||||
</div>;
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function EquativeBlock({ opts, eq, script }: {
|
function EquativeBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
eq: T.EquativeRendered,
|
eq,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
eq: T.EquativeRendered;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
const [length, setLength] = useState<T.Length>("long");
|
const [length, setLength] = useState<T.Length>("long");
|
||||||
function changeLength() {
|
function changeLength() {
|
||||||
setLength(o => (
|
setLength((o) =>
|
||||||
o === "long"
|
o === "long"
|
||||||
? "short"
|
? "short"
|
||||||
: o === "short" && "mini" in eq.ps
|
: o === "short" && "mini" in eq.ps
|
||||||
? "mini"
|
? "mini"
|
||||||
: "long"
|
: "long"
|
||||||
));
|
);
|
||||||
}
|
}
|
||||||
return <div className="text-center">
|
return (
|
||||||
{"long" in eq.ps && <div className="clickable small mb-1" onClick={changeLength}>{length}</div>}
|
<div className="text-center">
|
||||||
<Border>
|
{"long" in eq.ps && (
|
||||||
{getLength(eq.ps, length)[0][script]}
|
<div className="clickable small mb-1" onClick={changeLength}>
|
||||||
</Border>
|
{length}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<Border>{getLength(eq.ps, length)[0][script]}</Border>
|
||||||
<div>Equative</div>
|
<div>Equative</div>
|
||||||
<SubText>{getEnglishPersonInfo(eq.person, "short")}</SubText>
|
<SubText>{getEnglishPersonInfo(eq.person, "short")}</SubText>
|
||||||
</div>;
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function SubjectBlock({ opts, np, role, script }: {
|
function SubjectBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
np: T.Rendered<T.NPSelection>,
|
np,
|
||||||
role: "king" | "servant" | undefined,
|
role,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
np: T.Rendered<T.NPSelection>;
|
||||||
|
role: "king" | "servant" | undefined;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
const english = getEnglishFromRendered(np);
|
const english = getEnglishFromRendered(np);
|
||||||
return <div className="text-center">
|
return (
|
||||||
<div><strong>Subject</strong>{role ? roleIcon[role] : ""}</div>
|
<div className="text-center">
|
||||||
<NPBlock opts={opts} english={english} script={script}>{np}</NPBlock>
|
<div>
|
||||||
</div>;
|
<strong>Subject</strong>
|
||||||
|
{role ? roleIcon[role] : ""}
|
||||||
|
</div>
|
||||||
|
<NPBlock opts={opts} english={english} script={script}>
|
||||||
|
{np}
|
||||||
|
</NPBlock>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function ObjectBlock({ opts, obj, role, script }: {
|
function ObjectBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
obj: T.Rendered<T.ObjectSelectionComplete>["selection"],
|
obj,
|
||||||
role: "king" | "servant" | undefined,
|
role,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
obj: T.Rendered<T.ObjectSelectionComplete>["selection"];
|
||||||
|
role: "king" | "servant" | undefined;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
if (typeof obj !== "object") {
|
if (typeof obj !== "object") {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
const english = getEnglishFromRendered(obj);
|
const english = getEnglishFromRendered(obj);
|
||||||
return <div className="text-center">
|
return (
|
||||||
<div><strong>Object</strong>{role ? roleIcon[role] : ""}</div>
|
<div className="text-center">
|
||||||
<NPBlock opts={opts} english={english} script={script}>{obj}</NPBlock>
|
<div>
|
||||||
</div>;
|
<strong>Object</strong>
|
||||||
|
{role ? roleIcon[role] : ""}
|
||||||
|
</div>
|
||||||
|
<NPBlock opts={opts} english={english} script={script}>
|
||||||
|
{obj}
|
||||||
|
</NPBlock>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function NCompBlock({ opts, comp, script }: {
|
function NCompBlock({
|
||||||
script: "p" | "f",
|
opts,
|
||||||
opts: T.TextOptions,
|
comp,
|
||||||
comp: T.Comp,
|
script,
|
||||||
|
}: {
|
||||||
|
script: "p" | "f";
|
||||||
|
opts: T.TextOptions;
|
||||||
|
comp: T.Comp;
|
||||||
}) {
|
}) {
|
||||||
return <div className="text-center">
|
return (
|
||||||
<Border>
|
<div className="text-center">
|
||||||
{comp.ps[script]}
|
<Border>{comp.ps[script]}</Border>
|
||||||
</Border>
|
{comp.type === "AdjComp" && (
|
||||||
{comp.type === "AdjComp"
|
<div>
|
||||||
? <div>adj. <span className="text-muted small">{getEnglishGenNumInfo(comp.gender, comp.number)}</span></div>
|
<div>
|
||||||
: <div>TODO</div>}
|
adj.{" "}
|
||||||
<SubText>
|
<span className="text-muted small">
|
||||||
todo
|
{getEnglishGenNumInfo(comp.gender, comp.number)}
|
||||||
{/* {adj.e} */}
|
</span>
|
||||||
</SubText>
|
</div>
|
||||||
</div>;
|
<SubText>{comp.ps.e}</SubText>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function ComplementBlock({ opts, comp, script, inside }: {
|
function ComplementBlock({
|
||||||
script: "p" | "f",
|
opts,
|
||||||
opts: T.TextOptions,
|
comp,
|
||||||
comp: T.Rendered<T.ComplementSelection["selection"]> | T.Rendered<T.UnselectedComplementSelection>["selection"],
|
script,
|
||||||
inside?: boolean,
|
inside,
|
||||||
|
}: {
|
||||||
|
script: "p" | "f";
|
||||||
|
opts: T.TextOptions;
|
||||||
|
comp:
|
||||||
|
| T.Rendered<T.ComplementSelection["selection"]>
|
||||||
|
| T.Rendered<T.UnselectedComplementSelection>["selection"];
|
||||||
|
inside?: boolean;
|
||||||
}) {
|
}) {
|
||||||
function AdjectiveBlock({ opts, adj }: {
|
function AdjectiveBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
adj: T.Rendered<T.AdjectiveSelection>,
|
adj,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
adj: T.Rendered<T.AdjectiveSelection>;
|
||||||
}) {
|
}) {
|
||||||
return <div className="text-center">
|
return (
|
||||||
<Border>
|
<div className="text-center">
|
||||||
{adj.ps[0][script]}
|
<Border>{adj.ps[0][script]}</Border>
|
||||||
</Border>
|
<div>
|
||||||
<div>Adj. <span className="text-muted small">({getEnglishParticipleInflection(adj.person, "short")})</span></div>
|
Adj.{" "}
|
||||||
|
<span className="text-muted small">
|
||||||
|
({getEnglishParticipleInflection(adj.person, "short")})
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
<SubText>{adj.e}</SubText>
|
<SubText>{adj.e}</SubText>
|
||||||
</div>;
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function LocAdvBlock({ opts, adv }: {
|
function LocAdvBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
adv: T.Rendered<T.LocativeAdverbSelection>,
|
adv,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
adv: T.Rendered<T.LocativeAdverbSelection>;
|
||||||
}) {
|
}) {
|
||||||
return <div className="text-center">
|
return (
|
||||||
<Border>
|
<div className="text-center">
|
||||||
{adv.ps[0][script]}
|
<Border>{adv.ps[0][script]}</Border>
|
||||||
</Border>
|
|
||||||
<div>Loc. Adv.</div>
|
<div>Loc. Adv.</div>
|
||||||
<SubText>{adv.e}</SubText>
|
<SubText>{adv.e}</SubText>
|
||||||
</div>;
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
return <div className="text-center">
|
return (
|
||||||
|
<div className="text-center">
|
||||||
<div>Complement</div>
|
<div>Complement</div>
|
||||||
{comp.type === "adjective"
|
{comp.type === "adjective" ? (
|
||||||
? <AdjectiveBlock opts={opts} adj={comp} />
|
<AdjectiveBlock opts={opts} adj={comp} />
|
||||||
: comp.type === "loc. adv."
|
) : comp.type === "loc. adv." ? (
|
||||||
? <LocAdvBlock opts={opts} adv={comp} />
|
<LocAdvBlock opts={opts} adv={comp} />
|
||||||
: comp.type === "noun"
|
) : comp.type === "noun" ? (
|
||||||
? <CompNounBlock opts={opts} noun={comp} script={script} />
|
<CompNounBlock opts={opts} noun={comp} script={script} />
|
||||||
: comp.type === "unselected"
|
) : comp.type === "unselected" ? (
|
||||||
? <div>
|
<div>
|
||||||
<Border>
|
<Border>____</Border>
|
||||||
____
|
{!inside && (
|
||||||
</Border>
|
<>
|
||||||
{!inside && <>
|
|
||||||
<div> </div>
|
<div> </div>
|
||||||
<SubText>{comp.e}</SubText>
|
<SubText>{comp.e}</SubText>
|
||||||
</>}
|
</>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
: <div>
|
) : (
|
||||||
|
<div>
|
||||||
<Sandwich opts={opts} sandwich={comp} script={script} />
|
<Sandwich opts={opts} sandwich={comp} script={script} />
|
||||||
<div>Sandwich</div>
|
<div>Sandwich</div>
|
||||||
<SubText>{comp.e}</SubText>
|
<SubText>{comp.e}</SubText>
|
||||||
</div>}
|
</div>
|
||||||
</div>;
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function APBlock({ opts, children, english, script }: {
|
export function APBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
children: T.Rendered<T.APSelection>,
|
children,
|
||||||
english?: string,
|
english,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
children: T.Rendered<T.APSelection>;
|
||||||
|
english?: string;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
const ap = children;
|
const ap = children;
|
||||||
if (ap.selection.type === "adverb") {
|
if (ap.selection.type === "adverb") {
|
||||||
return <div className="text-center">
|
return (
|
||||||
<Border>
|
<div className="text-center">
|
||||||
{ap.selection.ps[0][script]}
|
<Border>{ap.selection.ps[0][script]}</Border>
|
||||||
</Border>
|
|
||||||
<div>AP</div>
|
<div>AP</div>
|
||||||
<SubText>{english}</SubText>
|
<SubText>{english}</SubText>
|
||||||
</div>;
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
return <div>
|
return (
|
||||||
|
<div>
|
||||||
<Sandwich opts={opts} sandwich={ap.selection} script={script} />
|
<Sandwich opts={opts} sandwich={ap.selection} script={script} />
|
||||||
<div>AP</div>
|
<div>AP</div>
|
||||||
<SubText>{english}</SubText>
|
<SubText>{english}</SubText>
|
||||||
</div>;
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function Sandwich({ opts, sandwich, script }: {
|
function Sandwich({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
sandwich: T.Rendered<T.SandwichSelection<T.Sandwich>>,
|
sandwich,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
sandwich: T.Rendered<T.SandwichSelection<T.Sandwich>>;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
return <div className="text-center">
|
return (
|
||||||
|
<div className="text-center">
|
||||||
<div className="text-center">Sandwich 🥪</div>
|
<div className="text-center">Sandwich 🥪</div>
|
||||||
<Border padding="0.75rem 0.5rem 0.25rem 0.5rem">
|
<Border padding="0.75rem 0.5rem 0.25rem 0.5rem">
|
||||||
<div className={`d-flex flex-row${script === "p" ? "-reverse" : ""} justify-content-between align-items-end`}>
|
<div
|
||||||
<Possesors opts={opts} script={script}>{sandwich.inside.selection.type !== "pronoun" ? sandwich.inside.selection.possesor : undefined}</Possesors>
|
className={`d-flex flex-row${
|
||||||
<div className="mr-2 ml-1 mb-1"><strong>{sandwich.before ? sandwich.before.f : ""}</strong></div>
|
script === "p" ? "-reverse" : ""
|
||||||
<div>
|
} justify-content-between align-items-end`}
|
||||||
<NPBlock opts={opts} inside script={script}>{sandwich.inside}</NPBlock>
|
>
|
||||||
|
<Possesors opts={opts} script={script}>
|
||||||
|
{sandwich.inside.selection.type !== "pronoun"
|
||||||
|
? sandwich.inside.selection.possesor
|
||||||
|
: undefined}
|
||||||
|
</Possesors>
|
||||||
|
<div className="mr-2 ml-1 mb-1">
|
||||||
|
<strong>{sandwich.before ? sandwich.before.f : ""}</strong>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<NPBlock opts={opts} inside script={script}>
|
||||||
|
{sandwich.inside}
|
||||||
|
</NPBlock>
|
||||||
|
</div>
|
||||||
|
<div className="ml-2 mr-1 mb-1">
|
||||||
|
<strong>{sandwich.after ? sandwich.after.f : ""}</strong>
|
||||||
</div>
|
</div>
|
||||||
<div className="ml-2 mr-1 mb-1"><strong>{sandwich.after ? sandwich.after.f : ""}</strong></div>
|
|
||||||
</div>
|
</div>
|
||||||
</Border>
|
</Border>
|
||||||
</div>;
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function CompNounBlock({ opts, noun, script }: {
|
function CompNounBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
noun: T.Rendered<T.NounSelection>,
|
noun,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
noun: T.Rendered<T.NounSelection>;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
return <div className="text-center">
|
return (
|
||||||
|
<div className="text-center">
|
||||||
<Border
|
<Border
|
||||||
extraClassName={`!inside && hasPossesor ? "pt-2" : ""`}
|
extraClassName={`!inside && hasPossesor ? "pt-2" : ""`}
|
||||||
padding={"1rem"}
|
padding={"1rem"}
|
||||||
>
|
>
|
||||||
{noun.ps[0][script]}
|
{noun.ps[0][script]}
|
||||||
</Border>
|
</Border>
|
||||||
<div>
|
<div>Comp. Noun</div>
|
||||||
Comp. Noun
|
|
||||||
</div>
|
|
||||||
<SubText>{noun.e}</SubText>
|
<SubText>{noun.e}</SubText>
|
||||||
</div>
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function NPBlock({ opts, children, inside, english, script }: {
|
export function NPBlock({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
children: T.Rendered<T.NPSelection>,
|
children,
|
||||||
inside?: boolean,
|
inside,
|
||||||
english?: string,
|
english,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
children: T.Rendered<T.NPSelection>;
|
||||||
|
inside?: boolean;
|
||||||
|
english?: string;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
const np = children;
|
const np = children;
|
||||||
const hasPossesor = !!(np.selection.type !== "pronoun" && np.selection.possesor && !np.selection.possesor.shrunken);
|
const hasPossesor = !!(
|
||||||
|
np.selection.type !== "pronoun" &&
|
||||||
|
np.selection.possesor &&
|
||||||
|
!np.selection.possesor.shrunken
|
||||||
|
);
|
||||||
const elements = [
|
const elements = [
|
||||||
...!inside ? [<Possesors opts={opts} script={script}>{np.selection.type !== "pronoun" ? np.selection.possesor : undefined}</Possesors>] : [],
|
...(!inside
|
||||||
<Adjectives opts={opts} script={script}>{np.selection.adjectives}</Adjectives>,
|
? [
|
||||||
<div className={np.selection.adjectives?.length ? "mx-1" : ""}> {np.selection.ps[0][script]}</div>,
|
<Possesors opts={opts} script={script}>
|
||||||
|
{np.selection.type !== "pronoun"
|
||||||
|
? np.selection.possesor
|
||||||
|
: undefined}
|
||||||
|
</Possesors>,
|
||||||
|
]
|
||||||
|
: []),
|
||||||
|
<Adjectives opts={opts} script={script}>
|
||||||
|
{np.selection.adjectives}
|
||||||
|
</Adjectives>,
|
||||||
|
<div className={np.selection.adjectives?.length ? "mx-1" : ""}>
|
||||||
|
{" "}
|
||||||
|
{np.selection.ps[0][script]}
|
||||||
|
</div>,
|
||||||
];
|
];
|
||||||
const el = script === "p" ? elements.reverse() : elements;
|
const el = script === "p" ? elements.reverse() : elements;
|
||||||
return <div className="text-center">
|
return (
|
||||||
|
<div className="text-center">
|
||||||
<Border
|
<Border
|
||||||
extraClassName={`!inside && hasPossesor ? "pt-2" : ""`}
|
extraClassName={`!inside && hasPossesor ? "pt-2" : ""`}
|
||||||
padding={inside ? "0.3rem" : hasPossesor ? "0.5rem 0.8rem 0.25rem 0.8rem" : "1rem"}
|
padding={
|
||||||
|
inside
|
||||||
|
? "0.3rem"
|
||||||
|
: hasPossesor
|
||||||
|
? "0.5rem 0.8rem 0.25rem 0.8rem"
|
||||||
|
: "1rem"
|
||||||
|
}
|
||||||
>
|
>
|
||||||
{el}
|
{el}
|
||||||
</Border>
|
</Border>
|
||||||
<div className={inside ? "small" : ""}>
|
<div className={inside ? "small" : ""}>
|
||||||
NP
|
NP
|
||||||
{!inside ? <>
|
{!inside ? (
|
||||||
|
<>
|
||||||
{` `}
|
{` `}
|
||||||
<span className="text-muted small">({getEnglishPersonInfo(np.selection.person, "short")})</span>
|
<span className="text-muted small">
|
||||||
</> : <></>}
|
({getEnglishPersonInfo(np.selection.person, "short")})
|
||||||
|
</span>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<></>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
{!inside && <SubText>{english}</SubText>}
|
{!inside && <SubText>{english}</SubText>}
|
||||||
</div>
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function Possesors({ opts, children, script }: {
|
function Possesors({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
children: { shrunken: boolean, np: T.Rendered<T.NPSelection> } | undefined,
|
children,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
children: { shrunken: boolean; np: T.Rendered<T.NPSelection> } | undefined;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
if (!children) {
|
if (!children) {
|
||||||
return null;
|
return null;
|
||||||
|
@ -473,62 +708,107 @@ function Possesors({ opts, children, script }: {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
const contraction = checkForContraction(children.np, script);
|
const contraction = checkForContraction(children.np, script);
|
||||||
return <div className={`d-flex flex-row${script === "p" ? "-reverse" : ""} mr-1 align-items-end`} style={{
|
return (
|
||||||
|
<div
|
||||||
|
className={`d-flex flex-row${
|
||||||
|
script === "p" ? "-reverse" : ""
|
||||||
|
} mr-1 align-items-end`}
|
||||||
|
style={{
|
||||||
marginBottom: "0.5rem",
|
marginBottom: "0.5rem",
|
||||||
borderBottom: "1px solid grey",
|
borderBottom: "1px solid grey",
|
||||||
}}>
|
}}
|
||||||
{children.np.selection.type !== "pronoun" && <Possesors opts={opts} script={script}>{children.np.selection.possesor}</Possesors>}
|
>
|
||||||
|
{children.np.selection.type !== "pronoun" && (
|
||||||
|
<Possesors opts={opts} script={script}>
|
||||||
|
{children.np.selection.possesor}
|
||||||
|
</Possesors>
|
||||||
|
)}
|
||||||
<div>
|
<div>
|
||||||
{contraction && <div className="mb-1">({contraction})</div>}
|
{contraction && <div className="mb-1">({contraction})</div>}
|
||||||
<div className={classNames("d-flex", (script === "f" ? "flex-row" : "flex-row-reverse"), "align-items-center", { "text-muted": contraction })}>
|
<div
|
||||||
|
className={classNames(
|
||||||
|
"d-flex",
|
||||||
|
script === "f" ? "flex-row" : "flex-row-reverse",
|
||||||
|
"align-items-center",
|
||||||
|
{ "text-muted": contraction }
|
||||||
|
)}
|
||||||
|
>
|
||||||
<div className="mx-1 pb-2">{script === "p" ? "د" : "du"}</div>
|
<div className="mx-1 pb-2">{script === "p" ? "د" : "du"}</div>
|
||||||
<div>
|
<div>
|
||||||
<NPBlock script={script} opts={opts} inside>{children.np}</NPBlock>
|
<NPBlock script={script} opts={opts} inside>
|
||||||
|
{children.np}
|
||||||
|
</NPBlock>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function Adjectives({ opts, children, script }: {
|
function Adjectives({
|
||||||
opts: T.TextOptions,
|
opts,
|
||||||
children: T.Rendered<T.AdjectiveSelection>[] | undefined,
|
children,
|
||||||
script: "p" | "f",
|
script,
|
||||||
|
}: {
|
||||||
|
opts: T.TextOptions;
|
||||||
|
children: T.Rendered<T.AdjectiveSelection>[] | undefined;
|
||||||
|
script: "p" | "f";
|
||||||
}) {
|
}) {
|
||||||
if (!children) {
|
if (!children) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
const c = script === "p"
|
const c = script === "p" ? children.reverse() : children;
|
||||||
? children.reverse()
|
return (
|
||||||
: children;
|
<em className="mr-1">
|
||||||
return <em className="mr-1">
|
{c.map((a) => a.ps[0][script]).join(" ")}
|
||||||
{c.map(a => a.ps[0][script]).join(" ")}{` `}
|
{` `}
|
||||||
</em>
|
</em>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function SubText({ children: e }: { children: string | undefined }) {
|
function SubText({ children: e }: { children: string | undefined }) {
|
||||||
return <div className="small text-muted text-center" style={{
|
return (
|
||||||
|
<div
|
||||||
|
className="small text-muted text-center"
|
||||||
|
style={{
|
||||||
margin: "0 auto",
|
margin: "0 auto",
|
||||||
maxWidth: "300px",
|
maxWidth: "300px",
|
||||||
height: "1rem",
|
height: "1rem",
|
||||||
}}>{e ? e : ""}</div>;
|
}}
|
||||||
|
>
|
||||||
|
{e ? e : ""}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function checkForContraction(np: T.Rendered<T.NPSelection>, script: "p" | "f"): string | undefined {
|
function checkForContraction(
|
||||||
|
np: T.Rendered<T.NPSelection>,
|
||||||
|
script: "p" | "f"
|
||||||
|
): string | undefined {
|
||||||
if (np.selection.type !== "pronoun") return undefined;
|
if (np.selection.type !== "pronoun") return undefined;
|
||||||
if (np.selection.person === T.Person.FirstSingMale || np.selection.person === T.Person.FirstSingFemale) {
|
if (
|
||||||
|
np.selection.person === T.Person.FirstSingMale ||
|
||||||
|
np.selection.person === T.Person.FirstSingFemale
|
||||||
|
) {
|
||||||
return script === "f" ? "zmaa" : "زما";
|
return script === "f" ? "zmaa" : "زما";
|
||||||
}
|
}
|
||||||
if (np.selection.person === T.Person.SecondSingMale || np.selection.person === T.Person.SecondSingFemale) {
|
if (
|
||||||
|
np.selection.person === T.Person.SecondSingMale ||
|
||||||
|
np.selection.person === T.Person.SecondSingFemale
|
||||||
|
) {
|
||||||
return script === "f" ? "staa" : "ستا";
|
return script === "f" ? "staa" : "ستا";
|
||||||
}
|
}
|
||||||
if (np.selection.person === T.Person.FirstPlurMale || np.selection.person === T.Person.FirstPlurFemale) {
|
if (
|
||||||
|
np.selection.person === T.Person.FirstPlurMale ||
|
||||||
|
np.selection.person === T.Person.FirstPlurFemale
|
||||||
|
) {
|
||||||
return script === "f" ? "zmoonG" : "زمونږ";
|
return script === "f" ? "zmoonG" : "زمونږ";
|
||||||
}
|
}
|
||||||
if (np.selection.person === T.Person.SecondPlurMale || np.selection.person === T.Person.SecondPlurFemale) {
|
if (
|
||||||
|
np.selection.person === T.Person.SecondPlurMale ||
|
||||||
|
np.selection.person === T.Person.SecondPlurFemale
|
||||||
|
) {
|
||||||
return script === "f" ? "staaso" : "ستاسو";
|
return script === "f" ? "staaso" : "ستاسو";
|
||||||
}
|
}
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,24 +3,20 @@ import {
|
||||||
last,
|
last,
|
||||||
addP,
|
addP,
|
||||||
lastNonWhitespace,
|
lastNonWhitespace,
|
||||||
advanceP,
|
|
||||||
reverseP,
|
reverseP,
|
||||||
overwriteP,
|
|
||||||
advanceForHamza,
|
|
||||||
advanceForHamzaMid,
|
|
||||||
} from "./diacritics-helpers";
|
} from "./diacritics-helpers";
|
||||||
|
|
||||||
const phonemeSplits: Array<{
|
const phonemeSplits: Array<{
|
||||||
in: string,
|
in: string;
|
||||||
out: string[],
|
out: string[];
|
||||||
}> = [
|
}> = [
|
||||||
{
|
{
|
||||||
in: "kor",
|
in: "kor",
|
||||||
out: ["k", "o", "r"],
|
out: ["k", "o", "r"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "raaghey",
|
in: "raaghay",
|
||||||
out: ["r", "aa", "gh", "ey"],
|
out: ["r", "aa", "gh", "ay"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "ist'imaal",
|
in: "ist'imaal",
|
||||||
|
@ -35,16 +31,16 @@ const phonemeSplits: Array<{
|
||||||
out: ["b", "a"],
|
out: ["b", "a"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "peydáa",
|
in: "paydáa",
|
||||||
out: ["p", "ey", "d", "aa"],
|
out: ["p", "ay", "d", "aa"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "be kaar",
|
in: "be kaar",
|
||||||
out: ["b", "e", "k", "aa", "r"],
|
out: ["b", "e", "k", "aa", "r"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "raadzeyy",
|
in: "raadzey",
|
||||||
out: ["r", "aa", "dz", "eyy"],
|
out: ["r", "aa", "dz", "ey"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "badanuy ??",
|
in: "badanuy ??",
|
||||||
|
@ -68,8 +64,8 @@ phonemeSplits.forEach((s) => {
|
||||||
});
|
});
|
||||||
|
|
||||||
const badPhonetics: Array<{
|
const badPhonetics: Array<{
|
||||||
in: string,
|
in: string;
|
||||||
problem: string,
|
problem: string;
|
||||||
}> = [
|
}> = [
|
||||||
{
|
{
|
||||||
in: "acar",
|
in: "acar",
|
||||||
|
@ -107,25 +103,31 @@ test("lastNonWhiteSpace should work", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
test("reverseP should work", () => {
|
test("reverseP should work", () => {
|
||||||
expect(reverseP({
|
expect(
|
||||||
|
reverseP({
|
||||||
pIn: "کور",
|
pIn: "کور",
|
||||||
pOut: "تور ",
|
pOut: "تور ",
|
||||||
})).toEqual({
|
})
|
||||||
|
).toEqual({
|
||||||
pIn: " کور",
|
pIn: " کور",
|
||||||
pOut: "تور",
|
pOut: "تور",
|
||||||
});
|
});
|
||||||
expect(reverseP({
|
expect(
|
||||||
|
reverseP({
|
||||||
pIn: "کور",
|
pIn: "کور",
|
||||||
pOut: "تور ... ",
|
pOut: "تور ... ",
|
||||||
})).toEqual({
|
})
|
||||||
|
).toEqual({
|
||||||
pIn: " ... کور",
|
pIn: " ... کور",
|
||||||
pOut: "تور",
|
pOut: "تور",
|
||||||
});
|
});
|
||||||
expect(reverseP({
|
expect(
|
||||||
|
reverseP({
|
||||||
pIn: "کور",
|
pIn: "کور",
|
||||||
pOut: "تور . ",
|
pOut: "تور . ",
|
||||||
})).toEqual({
|
})
|
||||||
|
).toEqual({
|
||||||
pIn: " . کور",
|
pIn: " . کور",
|
||||||
pOut: "تور",
|
pOut: "تور",
|
||||||
});
|
});
|
||||||
})
|
});
|
||||||
|
|
|
@ -8,31 +8,62 @@
|
||||||
|
|
||||||
import { removeAccents } from "./accent-helpers";
|
import { removeAccents } from "./accent-helpers";
|
||||||
|
|
||||||
export type DiacriticsAccumulator = { pIn: string, pOut: string };
|
export type DiacriticsAccumulator = { pIn: string; pOut: string };
|
||||||
|
|
||||||
type Consonant = "b" | "p" | "t" | "T" | "s" | "j" | "ch" | "kh" | "ts" | "dz" | "d" | "D" | "r" | "R" | "z" | "jz" | "G" | "sh" | "x" | "gh" | "f" | "q" | "k" | "g" | "l" | "m" | "n" | "N" | "h" | "w" | "y";
|
type Consonant =
|
||||||
type Ain = "'"
|
| "b"
|
||||||
|
| "p"
|
||||||
|
| "t"
|
||||||
|
| "T"
|
||||||
|
| "s"
|
||||||
|
| "j"
|
||||||
|
| "ch"
|
||||||
|
| "kh"
|
||||||
|
| "ts"
|
||||||
|
| "dz"
|
||||||
|
| "d"
|
||||||
|
| "D"
|
||||||
|
| "r"
|
||||||
|
| "R"
|
||||||
|
| "z"
|
||||||
|
| "jz"
|
||||||
|
| "G"
|
||||||
|
| "sh"
|
||||||
|
| "x"
|
||||||
|
| "gh"
|
||||||
|
| "f"
|
||||||
|
| "q"
|
||||||
|
| "k"
|
||||||
|
| "g"
|
||||||
|
| "l"
|
||||||
|
| "m"
|
||||||
|
| "n"
|
||||||
|
| "N"
|
||||||
|
| "h"
|
||||||
|
| "w"
|
||||||
|
| "y";
|
||||||
|
type Ain = "'";
|
||||||
type JoiningVowel = "-i-" | "-U-" | "-Ul-";
|
type JoiningVowel = "-i-" | "-U-" | "-Ul-";
|
||||||
type LongVowel = "aa" | "ee" | "e" | "oo" | "o" | "ey" | "uy" | "eyy";
|
type LongVowel = "aa" | "ee" | "e" | "oo" | "o" | "ay" | "uy" | "ey";
|
||||||
type ShortVowel = "a" | "i" | "u" | "U";
|
type ShortVowel = "a" | "i" | "u" | "U";
|
||||||
export type Phoneme = Consonant | Ain | LongVowel | ShortVowel | JoiningVowel;
|
export type Phoneme = Consonant | Ain | LongVowel | ShortVowel | JoiningVowel;
|
||||||
|
|
||||||
type PhonemeInfo = {
|
type PhonemeInfo = {
|
||||||
matches?: string[],
|
matches?: string[];
|
||||||
beginningMatches?: string[],
|
beginningMatches?: string[];
|
||||||
endingMatches?: string[],
|
endingMatches?: string[];
|
||||||
consonant?: true,
|
consonant?: true;
|
||||||
diacritic?: string,
|
diacritic?: string;
|
||||||
endingOnly?: true,
|
endingOnly?: true;
|
||||||
takesSukunOnEnding?: true,
|
takesSukunOnEnding?: true;
|
||||||
longVowel?: true,
|
longVowel?: true;
|
||||||
canStartWithAynBefore?: true,
|
canStartWithAynBefore?: true;
|
||||||
useEndingDiacritic?: true,
|
useEndingDiacritic?: true;
|
||||||
ainBlendDiacritic?: string,
|
ainBlendDiacritic?: string;
|
||||||
}
|
};
|
||||||
|
|
||||||
export const zwar = "َ";
|
export const zwar = "َ";
|
||||||
export const zwarakey = "ٙ";
|
export const zwarakay = "ٙ";
|
||||||
export const zer = "ِ";
|
export const zer = "ِ";
|
||||||
export const pesh = "ُ";
|
export const pesh = "ُ";
|
||||||
export const sukun = "ْ";
|
export const sukun = "ْ";
|
||||||
|
@ -44,128 +75,128 @@ export const fathahan = "ً";
|
||||||
|
|
||||||
export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
||||||
// Consonants
|
// Consonants
|
||||||
"b": {
|
b: {
|
||||||
matches: ["ب"],
|
matches: ["ب"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"p": {
|
p: {
|
||||||
matches: ["پ"],
|
matches: ["پ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"t": {
|
t: {
|
||||||
matches: ["ت", "ط"],
|
matches: ["ت", "ط"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"T": {
|
T: {
|
||||||
matches: ["ټ"],
|
matches: ["ټ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"s": {
|
s: {
|
||||||
matches: ["س", "ص", "ث"],
|
matches: ["س", "ص", "ث"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"j": {
|
j: {
|
||||||
matches: ["ج"],
|
matches: ["ج"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"ch": {
|
ch: {
|
||||||
matches: ["چ"],
|
matches: ["چ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"kh": {
|
kh: {
|
||||||
matches: ["خ"],
|
matches: ["خ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"ts": {
|
ts: {
|
||||||
matches: ["څ"],
|
matches: ["څ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"dz": {
|
dz: {
|
||||||
matches: ["ځ"],
|
matches: ["ځ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"d": {
|
d: {
|
||||||
matches: ["د"],
|
matches: ["د"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"D": {
|
D: {
|
||||||
matches: ["ډ"],
|
matches: ["ډ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"r": {
|
r: {
|
||||||
matches: ["ر"],
|
matches: ["ر"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"R": {
|
R: {
|
||||||
matches: ["ړ"],
|
matches: ["ړ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"z": {
|
z: {
|
||||||
matches: ["ز", "ذ", "ظ", "ض"],
|
matches: ["ز", "ذ", "ظ", "ض"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"jz": {
|
jz: {
|
||||||
matches: ["ژ"],
|
matches: ["ژ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"G": {
|
G: {
|
||||||
matches: ["ږ"],
|
matches: ["ږ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"sh": {
|
sh: {
|
||||||
matches: ["ش"],
|
matches: ["ش"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"x": {
|
x: {
|
||||||
matches: ["ښ"],
|
matches: ["ښ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"gh": {
|
gh: {
|
||||||
matches: ["غ"],
|
matches: ["غ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"f": {
|
f: {
|
||||||
matches: ["ف"],
|
matches: ["ف"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"q": {
|
q: {
|
||||||
matches: ["ق"],
|
matches: ["ق"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"k": {
|
k: {
|
||||||
matches: ["ک"],
|
matches: ["ک"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"g": {
|
g: {
|
||||||
matches: ["ګ"],
|
matches: ["ګ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"l": {
|
l: {
|
||||||
matches: ["ل"],
|
matches: ["ل"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"m": {
|
m: {
|
||||||
matches: ["م"],
|
matches: ["م"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"n": {
|
n: {
|
||||||
matches: ["ن"],
|
matches: ["ن"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"N": {
|
N: {
|
||||||
matches: ["ڼ"],
|
matches: ["ڼ"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"h": {
|
h: {
|
||||||
matches: ["ه", "ح"],
|
matches: ["ه", "ح"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
takesSukunOnEnding: true,
|
takesSukunOnEnding: true,
|
||||||
},
|
},
|
||||||
"w": {
|
w: {
|
||||||
matches: ["و"],
|
matches: ["و"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
"y": {
|
y: {
|
||||||
matches: ["ی"],
|
matches: ["ی"],
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
|
@ -175,8 +206,7 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
||||||
consonant: true,
|
consonant: true,
|
||||||
},
|
},
|
||||||
// Joining Vowels
|
// Joining Vowels
|
||||||
"-i-": {
|
"-i-": {},
|
||||||
},
|
|
||||||
"-U-": {
|
"-U-": {
|
||||||
matches: [" و ", "و"],
|
matches: [" و ", "و"],
|
||||||
},
|
},
|
||||||
|
@ -184,14 +214,14 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
||||||
matches: ["ال"],
|
matches: ["ال"],
|
||||||
},
|
},
|
||||||
// Long Vowels
|
// Long Vowels
|
||||||
"aa": {
|
aa: {
|
||||||
matches: ["ا", "أ"],
|
matches: ["ا", "أ"],
|
||||||
beginningMatches: ["آ", "ا"],
|
beginningMatches: ["آ", "ا"],
|
||||||
endingMatches: ["ا", "یٰ"],
|
endingMatches: ["ا", "یٰ"],
|
||||||
longVowel: true,
|
longVowel: true,
|
||||||
ainBlendDiacritic: zwar,
|
ainBlendDiacritic: zwar,
|
||||||
},
|
},
|
||||||
"ee": {
|
ee: {
|
||||||
matches: ["ی"],
|
matches: ["ی"],
|
||||||
longVowel: true,
|
longVowel: true,
|
||||||
endingMatches: ["ي"],
|
endingMatches: ["ي"],
|
||||||
|
@ -199,61 +229,61 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
||||||
canStartWithAynBefore: true,
|
canStartWithAynBefore: true,
|
||||||
ainBlendDiacritic: zer,
|
ainBlendDiacritic: zer,
|
||||||
},
|
},
|
||||||
"e": {
|
e: {
|
||||||
matches: ["ې"],
|
matches: ["ې"],
|
||||||
longVowel: true,
|
longVowel: true,
|
||||||
},
|
},
|
||||||
"o": {
|
o: {
|
||||||
matches: ["و"],
|
matches: ["و"],
|
||||||
longVowel: true,
|
longVowel: true,
|
||||||
},
|
},
|
||||||
"oo": {
|
oo: {
|
||||||
matches: ["و"],
|
matches: ["و"],
|
||||||
longVowel: true,
|
longVowel: true,
|
||||||
diacritic: pesh,
|
diacritic: pesh,
|
||||||
useEndingDiacritic: true,
|
useEndingDiacritic: true,
|
||||||
ainBlendDiacritic: pesh,
|
ainBlendDiacritic: pesh,
|
||||||
},
|
},
|
||||||
"ey": {
|
ay: {
|
||||||
matches: ["ی"],
|
matches: ["ی"],
|
||||||
longVowel: true,
|
longVowel: true,
|
||||||
endingMatches: ["ی"],
|
endingMatches: ["ی"],
|
||||||
},
|
},
|
||||||
"uy": {
|
uy: {
|
||||||
matches: ["ۍ"],
|
matches: ["ۍ"],
|
||||||
longVowel: true,
|
longVowel: true,
|
||||||
endingOnly: true,
|
endingOnly: true,
|
||||||
},
|
},
|
||||||
"eyy": {
|
ey: {
|
||||||
matches: ["ئ"],
|
matches: ["ئ"],
|
||||||
longVowel: true,
|
longVowel: true,
|
||||||
endingOnly: true,
|
endingOnly: true,
|
||||||
},
|
},
|
||||||
// Short Vowels
|
// Short Vowels
|
||||||
"a": {
|
a: {
|
||||||
diacritic: zwar,
|
diacritic: zwar,
|
||||||
endingMatches: ["ه"],
|
endingMatches: ["ه"],
|
||||||
beginningMatches: ["ا", "ع"],
|
beginningMatches: ["ا", "ع"],
|
||||||
// canComeAfterHeyEnding: true,
|
// canComeAfterHayEnding: true,
|
||||||
},
|
},
|
||||||
"u": {
|
u: {
|
||||||
diacritic: zwarakey,
|
diacritic: zwarakay,
|
||||||
endingMatches: ["ه"],
|
endingMatches: ["ه"],
|
||||||
},
|
},
|
||||||
"i": {
|
i: {
|
||||||
diacritic: zer,
|
diacritic: zer,
|
||||||
endingMatches: ["ه"],
|
endingMatches: ["ه"],
|
||||||
beginningMatches: ["ا", "ع"],
|
beginningMatches: ["ا", "ع"],
|
||||||
// takesDiacriticBeforeGurdaHeyEnding: true,
|
// takesDiacriticBeforeGurdaHayEnding: true,
|
||||||
// canBeWasla: true,
|
// canBeWasla: true,
|
||||||
},
|
},
|
||||||
"U": {
|
U: {
|
||||||
diacritic: pesh,
|
diacritic: pesh,
|
||||||
endingMatches: ["ه"],
|
endingMatches: ["ه"],
|
||||||
// takesDiacriticBeforeGurdaHeyEnding: true,
|
// takesDiacriticBeforeGurdaHayEnding: true,
|
||||||
beginningMatches: ["ا", "ع"],
|
beginningMatches: ["ا", "ع"],
|
||||||
},
|
},
|
||||||
}
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* splits a phonetics string into an array of Phonemes
|
* splits a phonetics string into an array of Phonemes
|
||||||
|
@ -264,11 +294,55 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
||||||
* @returns an array of phonemes
|
* @returns an array of phonemes
|
||||||
*/
|
*/
|
||||||
export function splitFIntoPhonemes(fIn: string): Phoneme[] {
|
export function splitFIntoPhonemes(fIn: string): Phoneme[] {
|
||||||
const singleLetterPhonemes: Phoneme[] = ["a", "i", "u", "o", "e", "U", "b", "p", "t", "T", "s", "j", "d", "D", "r", "R", "z", "G", "x", "f", "q", "k", "g", "l", "m", "n", "N", "h", "w", "y", "'"];
|
const singleLetterPhonemes: Phoneme[] = [
|
||||||
|
"a",
|
||||||
|
"i",
|
||||||
|
"u",
|
||||||
|
"o",
|
||||||
|
"e",
|
||||||
|
"U",
|
||||||
|
"b",
|
||||||
|
"p",
|
||||||
|
"t",
|
||||||
|
"T",
|
||||||
|
"s",
|
||||||
|
"j",
|
||||||
|
"d",
|
||||||
|
"D",
|
||||||
|
"r",
|
||||||
|
"R",
|
||||||
|
"z",
|
||||||
|
"G",
|
||||||
|
"x",
|
||||||
|
"f",
|
||||||
|
"q",
|
||||||
|
"k",
|
||||||
|
"g",
|
||||||
|
"l",
|
||||||
|
"m",
|
||||||
|
"n",
|
||||||
|
"N",
|
||||||
|
"h",
|
||||||
|
"w",
|
||||||
|
"y",
|
||||||
|
"'",
|
||||||
|
];
|
||||||
|
|
||||||
const quadrigraphs: Phoneme[] = ["-Ul-"];
|
const quadrigraphs: Phoneme[] = ["-Ul-"];
|
||||||
const trigraphs: Phoneme[] = ["eyy", "-i-", "-U-"];
|
const trigraphs: Phoneme[] = ["ey", "-i-", "-U-"];
|
||||||
const digraphs: Phoneme[] = ["aa", "ee", "ey", "oo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"];
|
const digraphs: Phoneme[] = [
|
||||||
|
"aa",
|
||||||
|
"ee",
|
||||||
|
"ay",
|
||||||
|
"oo",
|
||||||
|
"kh",
|
||||||
|
"gh",
|
||||||
|
"ts",
|
||||||
|
"dz",
|
||||||
|
"jz",
|
||||||
|
"ch",
|
||||||
|
"sh",
|
||||||
|
];
|
||||||
const endingDigraphs: Phoneme[] = ["uy"];
|
const endingDigraphs: Phoneme[] = ["uy"];
|
||||||
const willIgnore = ["?", " ", "`", ".", "…", ",", "-"];
|
const willIgnore = ["?", " ", "`", ".", "…", ",", "-"];
|
||||||
|
|
||||||
|
@ -276,7 +350,7 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
|
||||||
const f = removeAccents(fIn).replace(/ă/g, "a");
|
const f = removeAccents(fIn).replace(/ă/g, "a");
|
||||||
let index = 0;
|
let index = 0;
|
||||||
while (index < f.length) {
|
while (index < f.length) {
|
||||||
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
|
const isLastTwoLetters = index === f.length - 2 || f[index + 2] === " ";
|
||||||
const threeLetterChunk = f.slice(index, index + 3) as Phoneme;
|
const threeLetterChunk = f.slice(index, index + 3) as Phoneme;
|
||||||
const fourLetterChunk = f.slice(index, index + 4) as Phoneme;
|
const fourLetterChunk = f.slice(index, index + 4) as Phoneme;
|
||||||
if (quadrigraphs.includes(fourLetterChunk)) {
|
if (quadrigraphs.includes(fourLetterChunk)) {
|
||||||
|
@ -313,10 +387,10 @@ export enum PhonemeStatus {
|
||||||
LeadingLongVowel,
|
LeadingLongVowel,
|
||||||
LeadingConsonantOrShortVowel,
|
LeadingConsonantOrShortVowel,
|
||||||
DoubleConsonantTashdeed,
|
DoubleConsonantTashdeed,
|
||||||
EndingWithHeyHim,
|
EndingWithHayHim,
|
||||||
DirectMatch,
|
DirectMatch,
|
||||||
DirectMatchAfterSukun,
|
DirectMatchAfterSukun,
|
||||||
EndingWithHeyHimFromSukun,
|
EndingWithHayHimFromSukun,
|
||||||
ShortVowel,
|
ShortVowel,
|
||||||
PersianSilentWWithAa,
|
PersianSilentWWithAa,
|
||||||
ArabicWasla,
|
ArabicWasla,
|
||||||
|
@ -344,11 +418,16 @@ export enum PhonemeStatus {
|
||||||
EndingSmallH,
|
EndingSmallH,
|
||||||
}
|
}
|
||||||
|
|
||||||
export function stateInfo({ state, i, phonemes, phoneme }: {
|
export function stateInfo({
|
||||||
state: DiacriticsAccumulator,
|
state,
|
||||||
i: number,
|
i,
|
||||||
phonemes: Phoneme[],
|
phonemes,
|
||||||
phoneme: Phoneme,
|
phoneme,
|
||||||
|
}: {
|
||||||
|
state: DiacriticsAccumulator;
|
||||||
|
i: number;
|
||||||
|
phonemes: Phoneme[];
|
||||||
|
phoneme: Phoneme;
|
||||||
}) {
|
}) {
|
||||||
const isOutOfWord = (char: string) => !char || char === " ";
|
const isOutOfWord = (char: string) => !char || char === " ";
|
||||||
const prevPLetter = last(state.pOut);
|
const prevPLetter = last(state.pOut);
|
||||||
|
@ -356,43 +435,86 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
const nextPLetter = state.pIn[1];
|
const nextPLetter = state.pIn[1];
|
||||||
const nextPhoneme = phonemes[i + 1];
|
const nextPhoneme = phonemes[i + 1];
|
||||||
const previousPhoneme = i > 0 && phonemes[i - 1];
|
const previousPhoneme = i > 0 && phonemes[i - 1];
|
||||||
const lastThreePLetters = last(state.pOut, 3) + last(state.pOut, 2) + prevPLetter;
|
const lastThreePLetters =
|
||||||
const isBeginningOfWord = (state.pOut === "" || prevPLetter === " ") || (previousPhoneme === "-Ul-" && prevPLetter === "ل") || (["دَر", "وَر"].includes(lastThreePLetters) || (last(state.pOut, 2) + prevPLetter) === "را");
|
last(state.pOut, 3) + last(state.pOut, 2) + prevPLetter;
|
||||||
|
const isBeginningOfWord =
|
||||||
|
state.pOut === "" ||
|
||||||
|
prevPLetter === " " ||
|
||||||
|
(previousPhoneme === "-Ul-" && prevPLetter === "ل") ||
|
||||||
|
["دَر", "وَر"].includes(lastThreePLetters) ||
|
||||||
|
last(state.pOut, 2) + prevPLetter === "را";
|
||||||
const isEndOfWord = isOutOfWord(nextPLetter);
|
const isEndOfWord = isOutOfWord(nextPLetter);
|
||||||
const phonemeInfo = phonemeTable[phoneme];
|
const phonemeInfo = phonemeTable[phoneme];
|
||||||
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]];
|
const previousPhonemeInfo =
|
||||||
|
!isBeginningOfWord && i > 0 && phonemeTable[phonemes[i - 1]];
|
||||||
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
|
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
|
||||||
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
|
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
|
||||||
const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant);
|
const doubleConsonant =
|
||||||
const needsSukun = (doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter))) // || (isEndOfWord && phonemeInfo.takesSukunOnEnding);
|
previousPhonemeInfo &&
|
||||||
const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع"));
|
phonemeInfo.consonant &&
|
||||||
|
previousPhonemeInfo.consonant;
|
||||||
|
const needsSukun =
|
||||||
|
doubleConsonant &&
|
||||||
|
(previousPhoneme !== phoneme ||
|
||||||
|
phonemeInfo.matches?.includes(currentPLetter)); // || (isEndOfWord && phonemeInfo.takesSukunOnEnding);
|
||||||
|
const useAinBlendDiacritics =
|
||||||
|
!isBeginningOfWord &&
|
||||||
|
phonemeInfo.ainBlendDiacritic &&
|
||||||
|
currentPLetter === "ع";
|
||||||
const diacritic = useAinBlendDiacritics
|
const diacritic = useAinBlendDiacritics
|
||||||
? phonemeInfo.ainBlendDiacritic
|
? phonemeInfo.ainBlendDiacritic
|
||||||
: isEndOfWord
|
: isEndOfWord
|
||||||
? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic;
|
? !phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic
|
||||||
|
? phonemeInfo.diacritic
|
||||||
|
: undefined
|
||||||
|
: phonemeInfo.diacritic;
|
||||||
|
|
||||||
const lastWordEndedW = (char: string) => ((prevPLetter === char && !currentPLetter) || (prevPLetter === " " && last(state.pOut, 2) === char));
|
const lastWordEndedW = (char: string) =>
|
||||||
|
(prevPLetter === char && !currentPLetter) ||
|
||||||
|
(prevPLetter === " " && last(state.pOut, 2) === char);
|
||||||
|
|
||||||
function getPhonemeState(): PhonemeStatus {
|
function getPhonemeState(): PhonemeStatus {
|
||||||
if (isBeginningOfWord && phoneme === "aa" && phonemeInfo.beginningMatches?.includes(currentPLetter)) {
|
if (
|
||||||
|
isBeginningOfWord &&
|
||||||
|
phoneme === "aa" &&
|
||||||
|
phonemeInfo.beginningMatches?.includes(currentPLetter)
|
||||||
|
) {
|
||||||
return PhonemeStatus.DirectMatch;
|
return PhonemeStatus.DirectMatch;
|
||||||
}
|
}
|
||||||
if (isBeginningOfWord && phoneme === "oo" && currentPLetter === "و") {
|
if (isBeginningOfWord && phoneme === "oo" && currentPLetter === "و") {
|
||||||
return PhonemeStatus.OoPrefix;
|
return PhonemeStatus.OoPrefix;
|
||||||
}
|
}
|
||||||
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) {
|
if (isBeginningOfWord && phonemeInfo.longVowel && !phonemeInfo.endingOnly) {
|
||||||
if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) {
|
if (
|
||||||
|
phoneme !== "aa" &&
|
||||||
|
currentPLetter !== "ا" &&
|
||||||
|
!phonemeInfo.matches?.includes(nextPLetter)
|
||||||
|
) {
|
||||||
throw Error("phonetics error - needs alef prefix");
|
throw Error("phonetics error - needs alef prefix");
|
||||||
}
|
}
|
||||||
return PhonemeStatus.LeadingLongVowel;
|
return PhonemeStatus.LeadingLongVowel;
|
||||||
}
|
}
|
||||||
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) {
|
if (
|
||||||
|
isBeginningOfWord &&
|
||||||
|
(phonemeInfo.beginningMatches?.includes(currentPLetter) ||
|
||||||
|
phonemeInfo.matches?.includes(currentPLetter))
|
||||||
|
) {
|
||||||
return PhonemeStatus.LeadingConsonantOrShortVowel;
|
return PhonemeStatus.LeadingConsonantOrShortVowel;
|
||||||
}
|
}
|
||||||
if (isBeginningOfWord && phoneme === "aa" && currentPLetter === "ع" && nextPLetter === "ا") {
|
if (
|
||||||
|
isBeginningOfWord &&
|
||||||
|
phoneme === "aa" &&
|
||||||
|
currentPLetter === "ع" &&
|
||||||
|
nextPLetter === "ا"
|
||||||
|
) {
|
||||||
return PhonemeStatus.AinWithLongAAtBeginning;
|
return PhonemeStatus.AinWithLongAAtBeginning;
|
||||||
}
|
}
|
||||||
if (currentPLetter === "ا" && nextPLetter === "ع" && phoneme === "aa" && nextPhoneme !== "'") {
|
if (
|
||||||
|
currentPLetter === "ا" &&
|
||||||
|
nextPLetter === "ع" &&
|
||||||
|
phoneme === "aa" &&
|
||||||
|
nextPhoneme !== "'"
|
||||||
|
) {
|
||||||
return PhonemeStatus.SilentAinAfterAlef;
|
return PhonemeStatus.SilentAinAfterAlef;
|
||||||
}
|
}
|
||||||
// console.log("------");
|
// console.log("------");
|
||||||
|
@ -400,13 +522,28 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
// console.log("state", state);
|
// console.log("state", state);
|
||||||
// console.log("prevPLetter is space", prevPLetter === " ");
|
// console.log("prevPLetter is space", prevPLetter === " ");
|
||||||
// console.log("------");
|
// console.log("------");
|
||||||
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") {
|
if (
|
||||||
return PhonemeStatus.EndOfDuParticle
|
isBeginningOfWord &&
|
||||||
|
phoneme === "u" &&
|
||||||
|
prevPLetter === " " &&
|
||||||
|
lastNonWhitespace(state.pOut) === "د"
|
||||||
|
) {
|
||||||
|
return PhonemeStatus.EndOfDuParticle;
|
||||||
}
|
}
|
||||||
if (isBeginningOfWord && phoneme === "-Ul-" && currentPLetter === "ا" && nextPLetter === "ل") {
|
if (
|
||||||
|
isBeginningOfWord &&
|
||||||
|
phoneme === "-Ul-" &&
|
||||||
|
currentPLetter === "ا" &&
|
||||||
|
nextPLetter === "ل"
|
||||||
|
) {
|
||||||
return PhonemeStatus.ArabicDefiniteArticleUl;
|
return PhonemeStatus.ArabicDefiniteArticleUl;
|
||||||
}
|
}
|
||||||
if (phoneme === "a" && nextPhoneme === "'" && phonemes[i+2] === "a" && currentPLetter === "أ") {
|
if (
|
||||||
|
phoneme === "a" &&
|
||||||
|
nextPhoneme === "'" &&
|
||||||
|
phonemes[i + 2] === "a" &&
|
||||||
|
currentPLetter === "أ"
|
||||||
|
) {
|
||||||
return PhonemeStatus.AlefHamzaBeg;
|
return PhonemeStatus.AlefHamzaBeg;
|
||||||
}
|
}
|
||||||
if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
|
if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
|
||||||
|
@ -418,16 +555,35 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
if (phoneme === "'" && currentPLetter === "و" && nextPLetter === "و") {
|
if (phoneme === "'" && currentPLetter === "و" && nextPLetter === "و") {
|
||||||
return PhonemeStatus.GlottalStopBeforeOo;
|
return PhonemeStatus.GlottalStopBeforeOo;
|
||||||
}
|
}
|
||||||
if (phoneme === "oo" && previousPhoneme === "'" && currentPLetter === "و" && prevPLetter === hamzaAbove) {
|
if (
|
||||||
|
phoneme === "oo" &&
|
||||||
|
previousPhoneme === "'" &&
|
||||||
|
currentPLetter === "و" &&
|
||||||
|
prevPLetter === hamzaAbove
|
||||||
|
) {
|
||||||
return PhonemeStatus.OoAfterGlottalStopOo;
|
return PhonemeStatus.OoAfterGlottalStopOo;
|
||||||
}
|
}
|
||||||
if (phoneme === "'" && last(state.pOut, 2) === "ع" && isOutOfWord(last(state.pOut, 3))) {
|
if (
|
||||||
|
phoneme === "'" &&
|
||||||
|
last(state.pOut, 2) === "ع" &&
|
||||||
|
isOutOfWord(last(state.pOut, 3))
|
||||||
|
) {
|
||||||
return PhonemeStatus.AinBeginningAfterShortVowel;
|
return PhonemeStatus.AinBeginningAfterShortVowel;
|
||||||
}
|
}
|
||||||
if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") {
|
if (
|
||||||
|
!isBeginningOfWord &&
|
||||||
|
phoneme === "aa" &&
|
||||||
|
currentPLetter === "و" &&
|
||||||
|
nextPLetter === "ا"
|
||||||
|
) {
|
||||||
return PhonemeStatus.PersianSilentWWithAa;
|
return PhonemeStatus.PersianSilentWWithAa;
|
||||||
}
|
}
|
||||||
if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") {
|
if (
|
||||||
|
!isBeginningOfWord &&
|
||||||
|
phoneme === "i" &&
|
||||||
|
currentPLetter === "ا" &&
|
||||||
|
nextPLetter === "ل"
|
||||||
|
) {
|
||||||
return PhonemeStatus.ArabicWasla;
|
return PhonemeStatus.ArabicWasla;
|
||||||
}
|
}
|
||||||
if (phoneme === "-i-" && isBeginningOfWord) {
|
if (phoneme === "-i-" && isBeginningOfWord) {
|
||||||
|
@ -443,27 +599,47 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
|
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
|
||||||
return PhonemeStatus.ShortAinVowelMissingComma;
|
return PhonemeStatus.ShortAinVowelMissingComma;
|
||||||
}
|
}
|
||||||
if ((last(state.pOut, 2) === "ا") && isOutOfWord(last(state.pOut, 3))) {
|
if (last(state.pOut, 2) === "ا" && isOutOfWord(last(state.pOut, 3))) {
|
||||||
return PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart;
|
return PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (useAinBlendDiacritics) {
|
if (useAinBlendDiacritics) {
|
||||||
return PhonemeStatus.LongAinVowelMissingComma;
|
return PhonemeStatus.LongAinVowelMissingComma;
|
||||||
}
|
}
|
||||||
if (((!isBeginningOfWord && doubleConsonant) || prevPLetter === " ") && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter)) {
|
if (
|
||||||
|
((!isBeginningOfWord && doubleConsonant) || prevPLetter === " ") &&
|
||||||
|
previousPhoneme === phoneme &&
|
||||||
|
!phonemeInfo.matches?.includes(currentPLetter)
|
||||||
|
) {
|
||||||
return PhonemeStatus.DoubleConsonantTashdeed;
|
return PhonemeStatus.DoubleConsonantTashdeed;
|
||||||
}
|
}
|
||||||
if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) {
|
if (
|
||||||
|
phoneme === "aa" &&
|
||||||
|
currentPLetter === "ی" &&
|
||||||
|
nextPLetter === daggerAlif
|
||||||
|
) {
|
||||||
return PhonemeStatus.AlefDaggarEnding;
|
return PhonemeStatus.AlefDaggarEnding;
|
||||||
}
|
}
|
||||||
if (phoneme === "a" && lastWordEndedW("ح")) {
|
if (phoneme === "a" && lastWordEndedW("ح")) {
|
||||||
return PhonemeStatus.ShortAEndingAfterHeem;
|
return PhonemeStatus.ShortAEndingAfterHeem;
|
||||||
}
|
}
|
||||||
if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) {
|
if (
|
||||||
return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim;
|
isEndOfWord &&
|
||||||
|
((phoneme === "u" && currentPLetter === "ه") ||
|
||||||
|
(phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))
|
||||||
|
) {
|
||||||
|
return needsSukun
|
||||||
|
? PhonemeStatus.EndingWithHayHimFromSukun
|
||||||
|
: PhonemeStatus.EndingWithHayHim;
|
||||||
}
|
}
|
||||||
if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) {
|
if (
|
||||||
return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch;
|
phonemeInfo.matches?.includes(currentPLetter) ||
|
||||||
|
(isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) ||
|
||||||
|
(phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب")
|
||||||
|
) {
|
||||||
|
return needsSukun
|
||||||
|
? PhonemeStatus.DirectMatchAfterSukun
|
||||||
|
: PhonemeStatus.DirectMatch;
|
||||||
}
|
}
|
||||||
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
|
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
|
||||||
return PhonemeStatus.ShortVowel;
|
return PhonemeStatus.ShortVowel;
|
||||||
|
@ -471,21 +647,30 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
|
||||||
if (phoneme === "o" && previousPhoneme === "w" && lastWordEndedW("و")) {
|
if (phoneme === "o" && previousPhoneme === "w" && lastWordEndedW("و")) {
|
||||||
return PhonemeStatus.WoEndingO;
|
return PhonemeStatus.WoEndingO;
|
||||||
}
|
}
|
||||||
if (isEndOfWord && phoneme === "n" && currentPLetter === fathahan && prevPLetter === "ا") {
|
if (
|
||||||
|
isEndOfWord &&
|
||||||
|
phoneme === "n" &&
|
||||||
|
currentPLetter === fathahan &&
|
||||||
|
prevPLetter === "ا"
|
||||||
|
) {
|
||||||
return PhonemeStatus.NOnFathatan;
|
return PhonemeStatus.NOnFathatan;
|
||||||
}
|
}
|
||||||
// console.log("errored", "current", phoneme, "next", nextPhoneme);
|
// console.log("errored", "current", phoneme, "next", nextPhoneme);
|
||||||
// console.log("bad phoneme is ", phoneme);
|
// console.log("bad phoneme is ", phoneme);
|
||||||
throw new Error("phonetics error - no status found for phoneme: " + phoneme);
|
throw new Error(
|
||||||
|
"phonetics error - no status found for phoneme: " + phoneme
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const phs = getPhonemeState();
|
const phs = getPhonemeState();
|
||||||
|
|
||||||
return {
|
return {
|
||||||
phs, phonemeInfo, diacritic, prevPLetter,
|
phs,
|
||||||
|
phonemeInfo,
|
||||||
|
diacritic,
|
||||||
|
prevPLetter,
|
||||||
};
|
};
|
||||||
};
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns the nth last character of a string
|
* returns the nth last character of a string
|
||||||
|
@ -496,7 +681,10 @@ export function last(s: string, n = 1) {
|
||||||
return s[s.length - n];
|
return s[s.length - n];
|
||||||
}
|
}
|
||||||
|
|
||||||
export function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator {
|
export function advanceP(
|
||||||
|
state: DiacriticsAccumulator,
|
||||||
|
n: number = 1
|
||||||
|
): DiacriticsAccumulator {
|
||||||
return {
|
return {
|
||||||
pIn: state.pIn.slice(n),
|
pIn: state.pIn.slice(n),
|
||||||
pOut: state.pOut + state.pIn.slice(0, n),
|
pOut: state.pOut + state.pIn.slice(0, n),
|
||||||
|
@ -518,14 +706,18 @@ export function reverseP(state: DiacriticsAccumulator): DiacriticsAccumulator {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
|
export const addP =
|
||||||
|
(toAdd: string | undefined) =>
|
||||||
|
(state: DiacriticsAccumulator): DiacriticsAccumulator => {
|
||||||
return {
|
return {
|
||||||
...state,
|
...state,
|
||||||
pOut: toAdd ? (state.pOut + toAdd) : state.pOut,
|
pOut: toAdd ? state.pOut + toAdd : state.pOut,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
export const overwriteP = (toWrite: string) => (state: DiacriticsAccumulator): DiacriticsAccumulator => {
|
export const overwriteP =
|
||||||
|
(toWrite: string) =>
|
||||||
|
(state: DiacriticsAccumulator): DiacriticsAccumulator => {
|
||||||
return {
|
return {
|
||||||
pIn: state.pIn.slice(1),
|
pIn: state.pIn.slice(1),
|
||||||
pOut: state.pOut + toWrite,
|
pOut: state.pOut + toWrite,
|
||||||
|
@ -545,7 +737,10 @@ export function lastNonWhitespace(s: string): string {
|
||||||
return penultimateChar;
|
return penultimateChar;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} {
|
export function getCurrentNext(state: DiacriticsAccumulator): {
|
||||||
|
current: string;
|
||||||
|
next: string;
|
||||||
|
} {
|
||||||
return {
|
return {
|
||||||
current: state.pIn[0],
|
current: state.pIn[0],
|
||||||
next: state.pIn[1],
|
next: state.pIn[1],
|
||||||
|
@ -557,7 +752,9 @@ export function getCurrentNext(state: DiacriticsAccumulator): { current: string,
|
||||||
// return (current === "ع") ? advanceP(state) : state;
|
// return (current === "ع") ? advanceP(state) : state;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
export function advanceForHamzaMid(state: DiacriticsAccumulator): DiacriticsAccumulator {
|
export function advanceForHamzaMid(
|
||||||
|
state: DiacriticsAccumulator
|
||||||
|
): DiacriticsAccumulator {
|
||||||
const { current, next } = getCurrentNext(state);
|
const { current, next } = getCurrentNext(state);
|
||||||
if (current === "ئ" && next && next !== "ئ") {
|
if (current === "ئ" && next && next !== "ئ") {
|
||||||
return advanceP(state);
|
return advanceP(state);
|
||||||
|
@ -565,7 +762,9 @@ export function advanceForHamzaMid(state: DiacriticsAccumulator): DiacriticsAccu
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function advanceForHamza(state: DiacriticsAccumulator): DiacriticsAccumulator {
|
export function advanceForHamza(
|
||||||
|
state: DiacriticsAccumulator
|
||||||
|
): DiacriticsAccumulator {
|
||||||
const { current, next } = getCurrentNext(state);
|
const { current, next } = getCurrentNext(state);
|
||||||
if (current === "ه" && (!next || next === " ")) {
|
if (current === "ه" && (!next || next === " ")) {
|
||||||
return advanceP(state);
|
return advanceP(state);
|
||||||
|
@ -575,4 +774,3 @@ export function advanceForHamza(state: DiacriticsAccumulator): DiacriticsAccumul
|
||||||
// }
|
// }
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,23 +6,16 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import {
|
import { addDiacritics } from "./diacritics";
|
||||||
addDiacritics,
|
import { zwar, zwarakay, sukun, tashdeed } from "./diacritics-helpers";
|
||||||
} from "./diacritics";
|
|
||||||
import {
|
|
||||||
zwar,
|
|
||||||
zwarakey,
|
|
||||||
sukun,
|
|
||||||
tashdeed,
|
|
||||||
} from "./diacritics-helpers";
|
|
||||||
import * as T from "../../types";
|
import * as T from "../../types";
|
||||||
|
|
||||||
const diacriticsSections: {
|
const diacriticsSections: {
|
||||||
describe: string,
|
describe: string;
|
||||||
tests: {
|
tests: {
|
||||||
in: T.PsString,
|
in: T.PsString;
|
||||||
out: string | null,
|
out: string | null;
|
||||||
}[],
|
}[];
|
||||||
}[] = [
|
}[] = [
|
||||||
{
|
{
|
||||||
describe: "regular, native Pashto script/sounds",
|
describe: "regular, native Pashto script/sounds",
|
||||||
|
@ -108,14 +101,14 @@ const diacriticsSections: {
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "شئ",
|
p: "شئ",
|
||||||
f: "sheyy",
|
f: "shey",
|
||||||
},
|
},
|
||||||
out: "شئ",
|
out: "شئ",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "کار کوئ چې لاړ شئ",
|
p: "کار کوئ چې لاړ شئ",
|
||||||
f: "kaar kawéyy che laaR sheyy",
|
f: "kaar kawéy che laaR shey",
|
||||||
},
|
},
|
||||||
out: "کار کَوئ چې لاړ شئ",
|
out: "کار کَوئ چې لاړ شئ",
|
||||||
},
|
},
|
||||||
|
@ -146,28 +139,28 @@ const diacriticsSections: {
|
||||||
p: "کول",
|
p: "کول",
|
||||||
f: "kawul",
|
f: "kawul",
|
||||||
},
|
},
|
||||||
out: "کَو" + zwarakey + "ل",
|
out: "کَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "کول",
|
p: "کول",
|
||||||
f: "kiwul",
|
f: "kiwul",
|
||||||
},
|
},
|
||||||
out: "کِو" + zwarakey + "ل",
|
out: "کِو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "کول",
|
p: "کول",
|
||||||
f: "kUwul",
|
f: "kUwul",
|
||||||
},
|
},
|
||||||
out: "کُو" + zwarakey + "ل",
|
out: "کُو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "کول",
|
p: "کول",
|
||||||
f: "kuwul",
|
f: "kuwul",
|
||||||
},
|
},
|
||||||
out: "ک" + zwarakey + "و" + zwarakey + "ل",
|
out: "ک" + zwarakay + "و" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -200,7 +193,7 @@ const diacriticsSections: {
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "سپین",
|
p: "سپین",
|
||||||
f: "speyn",
|
f: "spayn",
|
||||||
},
|
},
|
||||||
out: "سْپین",
|
out: "سْپین",
|
||||||
},
|
},
|
||||||
|
@ -272,21 +265,21 @@ const diacriticsSections: {
|
||||||
p: "رغېدل",
|
p: "رغېدل",
|
||||||
f: "raghedul",
|
f: "raghedul",
|
||||||
},
|
},
|
||||||
out: "رَغېد" + zwarakey + "ل",
|
out: "رَغېد" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "کارول",
|
p: "کارول",
|
||||||
f: "kaarawul",
|
f: "kaarawul",
|
||||||
},
|
},
|
||||||
out: "کارَو" + zwarakey + "ل",
|
out: "کارَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "پېښېدل",
|
p: "پېښېدل",
|
||||||
f: "pexedul",
|
f: "pexedul",
|
||||||
},
|
},
|
||||||
out: "پېښېد" + zwarakey + "ل",
|
out: "پېښېد" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -298,7 +291,7 @@ const diacriticsSections: {
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "سړی",
|
p: "سړی",
|
||||||
f: "saRey",
|
f: "saRay",
|
||||||
},
|
},
|
||||||
out: "سَړی",
|
out: "سَړی",
|
||||||
},
|
},
|
||||||
|
@ -335,28 +328,28 @@ const diacriticsSections: {
|
||||||
p: "ایستل",
|
p: "ایستل",
|
||||||
f: "eestul",
|
f: "eestul",
|
||||||
},
|
},
|
||||||
out: "اِیسْت" + zwarakey + "ل",
|
out: "اِیسْت" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "ایستل",
|
p: "ایستل",
|
||||||
f: "eystul",
|
f: "aystul",
|
||||||
},
|
},
|
||||||
out: "ایسْت" + zwarakey + "ل",
|
out: "ایسْت" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "اېسېدل",
|
p: "اېسېدل",
|
||||||
f: "esedul",
|
f: "esedul",
|
||||||
},
|
},
|
||||||
out: "اېسېد" + zwarakey + "ل",
|
out: "اېسېد" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "اوسېدل",
|
p: "اوسېدل",
|
||||||
f: "osedul",
|
f: "osedul",
|
||||||
},
|
},
|
||||||
out: "اوسېد" + zwarakey + "ل",
|
out: "اوسېد" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -377,7 +370,7 @@ const diacriticsSections: {
|
||||||
p: "واردول",
|
p: "واردول",
|
||||||
f: "waaridawul",
|
f: "waaridawul",
|
||||||
},
|
},
|
||||||
out: "وارِدَو" + zwarakey + "ل",
|
out: "وارِدَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -557,7 +550,7 @@ const diacriticsSections: {
|
||||||
p: "توجه کول",
|
p: "توجه کول",
|
||||||
f: "tawajU kawul",
|
f: "tawajU kawul",
|
||||||
},
|
},
|
||||||
out: "تَوَجُه کَو" + zwarakey + "ل",
|
out: "تَوَجُه کَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -597,7 +590,7 @@ const diacriticsSections: {
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "سختسری",
|
p: "سختسری",
|
||||||
f: "sakht sărey",
|
f: "sakht săray",
|
||||||
},
|
},
|
||||||
out: "سَخْتْسَری",
|
out: "سَخْتْسَری",
|
||||||
},
|
},
|
||||||
|
@ -646,7 +639,7 @@ const diacriticsSections: {
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "وری",
|
p: "وری",
|
||||||
f: "waréy",
|
f: "waráy",
|
||||||
},
|
},
|
||||||
out: "وَری",
|
out: "وَری",
|
||||||
},
|
},
|
||||||
|
@ -660,19 +653,20 @@ const diacriticsSections: {
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "امزری",
|
p: "امزری",
|
||||||
f: "umzaréy",
|
f: "umzaráy",
|
||||||
},
|
},
|
||||||
out: zwarakey + "مْزَری",
|
out: zwarakay + "مْزَری",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
describe: "ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی",
|
describe:
|
||||||
|
"ې followed by ی - y needs to be written as e`y to be distinguished from ay - ی",
|
||||||
tests: [
|
tests: [
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "پتېیل",
|
p: "پتېیل",
|
||||||
f: "pateyúl",
|
f: "patayúl",
|
||||||
},
|
},
|
||||||
out: null,
|
out: null,
|
||||||
},
|
},
|
||||||
|
@ -681,14 +675,14 @@ const diacriticsSections: {
|
||||||
p: "پتېیل",
|
p: "پتېیل",
|
||||||
f: "pate`yúl",
|
f: "pate`yúl",
|
||||||
},
|
},
|
||||||
out: "پَتېی" + zwarakey + "ل",
|
out: "پَتېی" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "درېیم",
|
p: "درېیم",
|
||||||
f: "dre`yum",
|
f: "dre`yum",
|
||||||
},
|
},
|
||||||
out: "دْرېی" + zwarakey + "م",
|
out: "دْرېی" + zwarakay + "م",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
@ -700,7 +694,7 @@ const diacriticsSections: {
|
||||||
p: "تر ... پورې",
|
p: "تر ... پورې",
|
||||||
f: "tur ... pore",
|
f: "tur ... pore",
|
||||||
},
|
},
|
||||||
out: "ت" + zwarakey + "ر ... پورې",
|
out: "ت" + zwarakay + "ر ... پورې",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
@ -729,7 +723,7 @@ const diacriticsSections: {
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "سړی و",
|
p: "سړی و",
|
||||||
f: "saRey wo",
|
f: "saRay wo",
|
||||||
},
|
},
|
||||||
out: "سَړی و",
|
out: "سَړی و",
|
||||||
},
|
},
|
||||||
|
@ -811,7 +805,7 @@ const diacriticsSections: {
|
||||||
p: "منبع",
|
p: "منبع",
|
||||||
f: "manb'i",
|
f: "manb'i",
|
||||||
},
|
},
|
||||||
out: "مَنْبْعِ"
|
out: "مَنْبْعِ",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -825,7 +819,7 @@ const diacriticsSections: {
|
||||||
p: "منبع",
|
p: "منبع",
|
||||||
f: "manbi",
|
f: "manbi",
|
||||||
},
|
},
|
||||||
out: "مَنْبِع"
|
out: "مَنْبِع",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -860,7 +854,7 @@ const diacriticsSections: {
|
||||||
p: "مربع جذر",
|
p: "مربع جذر",
|
||||||
f: "mUraba' jazúr",
|
f: "mUraba' jazúr",
|
||||||
},
|
},
|
||||||
out: "مُرَبَع جَذ" + zwarakey + "ر",
|
out: "مُرَبَع جَذ" + zwarakay + "ر",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -888,7 +882,7 @@ const diacriticsSections: {
|
||||||
p: "راجع کېدل",
|
p: "راجع کېدل",
|
||||||
f: "raaji kedul",
|
f: "raaji kedul",
|
||||||
},
|
},
|
||||||
out: "راجِع کېد" + zwarakey + "ل",
|
out: "راجِع کېد" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -979,7 +973,7 @@ const diacriticsSections: {
|
||||||
f: "ijmaa'",
|
f: "ijmaa'",
|
||||||
},
|
},
|
||||||
out: "اِجْماع",
|
out: "اِجْماع",
|
||||||
}
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1064,21 +1058,21 @@ const diacriticsSections: {
|
||||||
p: "د",
|
p: "د",
|
||||||
f: "du",
|
f: "du",
|
||||||
},
|
},
|
||||||
out: "د" + zwarakey,
|
out: "د" + zwarakay,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "د لاس",
|
p: "د لاس",
|
||||||
f: "du laas",
|
f: "du laas",
|
||||||
},
|
},
|
||||||
out: "د" + zwarakey + " لاس",
|
out: "د" + zwarakay + " لاس",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "د ... په شان",
|
p: "د ... په شان",
|
||||||
f: "du ... pu shaan",
|
f: "du ... pu shaan",
|
||||||
},
|
},
|
||||||
out: "د" + zwarakey + " ... پهٔ شان",
|
out: "د" + zwarakay + " ... پهٔ شان",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
@ -1097,7 +1091,7 @@ const diacriticsSections: {
|
||||||
p: "ذبح کول",
|
p: "ذبح کول",
|
||||||
f: "zabha kawul",
|
f: "zabha kawul",
|
||||||
},
|
},
|
||||||
out: "ذَبْحَ کَو" + zwarakey + "ل",
|
out: "ذَبْحَ کَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
@ -1275,14 +1269,14 @@ const diacriticsSections: {
|
||||||
p: "وځم",
|
p: "وځم",
|
||||||
f: "oodzum",
|
f: "oodzum",
|
||||||
},
|
},
|
||||||
out: "وُځ" + zwarakey + "م",
|
out: "وُځ" + zwarakay + "م",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "وځم",
|
p: "وځم",
|
||||||
f: "wUdzum",
|
f: "wUdzum",
|
||||||
},
|
},
|
||||||
out: "وُځ" + zwarakey + "م",
|
out: "وُځ" + zwarakay + "م",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
@ -1316,5 +1310,3 @@ test("ending with left over phonetics will throw an error", () => {
|
||||||
addDiacritics({ p: "کار", f: "kaar kawul" });
|
addDiacritics({ p: "کار", f: "kaar kawul" });
|
||||||
}).toThrow();
|
}).toThrow();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ import {
|
||||||
splitFIntoPhonemes,
|
splitFIntoPhonemes,
|
||||||
Phoneme,
|
Phoneme,
|
||||||
zwar,
|
zwar,
|
||||||
zwarakey,
|
zwarakay,
|
||||||
zer,
|
zer,
|
||||||
pesh,
|
pesh,
|
||||||
sukun,
|
sukun,
|
||||||
|
@ -35,10 +35,20 @@ import { pipe } from "rambda";
|
||||||
/**
|
/**
|
||||||
* Adds diacritics to a given PsString.
|
* Adds diacritics to a given PsString.
|
||||||
* Errors if the phonetics and script don't line up.
|
* Errors if the phonetics and script don't line up.
|
||||||
|
*
|
||||||
|
* IN PROGRESS - This will hopefully get done and replace the messy, unmaintainable phonetics-to-diacritics.ts currently in use
|
||||||
*/
|
*/
|
||||||
export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString {
|
export function addDiacritics(
|
||||||
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? removeFVarients(f) : f);
|
{ p, f }: T.PsString,
|
||||||
const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p.trim() });
|
ignoreCommas?: true
|
||||||
|
): T.PsString {
|
||||||
|
const phonemes: Phoneme[] = splitFIntoPhonemes(
|
||||||
|
!ignoreCommas ? removeFVarients(f) : f
|
||||||
|
);
|
||||||
|
const { pIn, pOut } = phonemes.reduce(processPhoneme, {
|
||||||
|
pOut: "",
|
||||||
|
pIn: p.trim(),
|
||||||
|
});
|
||||||
if (pIn !== "") {
|
if (pIn !== "") {
|
||||||
throw new Error("phonetics error - phonetics shorter than pashto script");
|
throw new Error("phonetics error - phonetics shorter than pashto script");
|
||||||
}
|
}
|
||||||
|
@ -52,159 +62,80 @@ function processPhoneme(
|
||||||
acc: DiacriticsAccumulator,
|
acc: DiacriticsAccumulator,
|
||||||
phoneme: Phoneme,
|
phoneme: Phoneme,
|
||||||
i: number,
|
i: number,
|
||||||
phonemes: Phoneme[],
|
phonemes: Phoneme[]
|
||||||
): DiacriticsAccumulator {
|
): DiacriticsAccumulator {
|
||||||
const state = acc.pIn.slice(0, 5) === " ... "
|
const state =
|
||||||
|
acc.pIn.slice(0, 5) === " ... "
|
||||||
? advanceP(acc, 5)
|
? advanceP(acc, 5)
|
||||||
: acc.pIn[0] === " "
|
: acc.pIn[0] === " "
|
||||||
? advanceP(acc)
|
? advanceP(acc)
|
||||||
: acc;
|
: acc;
|
||||||
|
|
||||||
const {
|
const { phonemeInfo, diacritic, phs, prevPLetter } = stateInfo({
|
||||||
phonemeInfo,
|
state,
|
||||||
diacritic,
|
i,
|
||||||
phs,
|
phoneme,
|
||||||
prevPLetter,
|
phonemes,
|
||||||
} = stateInfo({ state, i, phoneme, phonemes });
|
});
|
||||||
|
|
||||||
return (phs === PhonemeStatus.LeadingLongVowel) ?
|
return phs === PhonemeStatus.LeadingLongVowel
|
||||||
pipe(
|
? pipe(advanceP, addP(phonemeInfo.diacritic), advanceP)(state)
|
||||||
advanceP,
|
: phs === PhonemeStatus.LeadingConsonantOrShortVowel
|
||||||
addP(phonemeInfo.diacritic),
|
? pipe(advanceP, addP(diacritic))(state)
|
||||||
advanceP,
|
: phs === PhonemeStatus.DoubleConsonantTashdeed
|
||||||
)(state)
|
? pipe(prevPLetter === " " ? reverseP : addP(""), addP(tashdeed))(state)
|
||||||
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ?
|
: phs === PhonemeStatus.EndingWithHayHim
|
||||||
pipe(
|
? pipe(advanceP, addP(phoneme === "u" ? hamzaAbove : sukun))(state)
|
||||||
advanceP,
|
: phs === PhonemeStatus.DirectMatch
|
||||||
addP(diacritic),
|
? pipe(addP(diacritic), advanceP)(state)
|
||||||
)(state)
|
: phs === PhonemeStatus.DirectMatchAfterSukun
|
||||||
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ?
|
? pipe(addP(sukun), advanceP)(state)
|
||||||
pipe(
|
: phs === PhonemeStatus.PersianSilentWWithAa
|
||||||
prevPLetter === " " ? reverseP : addP(""),
|
? pipe(addP("("), advanceP, addP(")"), advanceP)(state)
|
||||||
addP(tashdeed)
|
: phs === PhonemeStatus.ArabicWasla
|
||||||
)(state)
|
? pipe(addP(zer), overwriteP(wasla))(state)
|
||||||
: (phs === PhonemeStatus.EndingWithHeyHim) ?
|
: phs === PhonemeStatus.Izafe
|
||||||
pipe(
|
? pipe(reverseP, addP(zer))(state)
|
||||||
advanceP,
|
: phs === PhonemeStatus.EndOfDuParticle
|
||||||
addP(phoneme === "u" ? hamzaAbove : sukun),
|
? pipe(reverseP, addP(zwarakay))(state)
|
||||||
)(state)
|
: phs === PhonemeStatus.ShortAEndingAfterHeem
|
||||||
: (phs === PhonemeStatus.DirectMatch) ?
|
? pipe(prevPLetter === " " ? reverseP : addP(""), addP(zwar))(state)
|
||||||
pipe(
|
: phs === PhonemeStatus.EndingWithHayHimFromSukun
|
||||||
addP(diacritic),
|
? pipe(addP(sukun), advanceP)(state)
|
||||||
advanceP,
|
: phs === PhonemeStatus.AlefDaggarEnding
|
||||||
)(state)
|
? pipe(advanceP, advanceP)(state)
|
||||||
: (phs === PhonemeStatus.DirectMatchAfterSukun) ?
|
: phs === PhonemeStatus.LongAinVowelMissingComma
|
||||||
pipe(
|
? pipe(addP(diacritic), advanceP, addP(diacritic))(state)
|
||||||
addP(sukun),
|
: phs === PhonemeStatus.ShortAinVowelMissingComma
|
||||||
advanceP,
|
? pipe(addP(diacritic), advanceP)(state)
|
||||||
)(state)
|
: phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart
|
||||||
: (phs === PhonemeStatus.PersianSilentWWithAa) ?
|
? pipe(advanceP, advanceP)(state)
|
||||||
pipe(
|
: phs === PhonemeStatus.AinWithLongAAtBeginning
|
||||||
addP("("),
|
? pipe(advanceP, advanceP)(state)
|
||||||
advanceP,
|
: phs === PhonemeStatus.AlefWithHamza
|
||||||
addP(")"),
|
? pipe(advanceP)(state)
|
||||||
advanceP,
|
: phs === PhonemeStatus.ShortVowel
|
||||||
)(state)
|
? pipe(
|
||||||
: (phs === PhonemeStatus.ArabicWasla) ?
|
|
||||||
pipe(
|
|
||||||
addP(zer),
|
|
||||||
overwriteP(wasla),
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.Izafe) ?
|
|
||||||
pipe(
|
|
||||||
reverseP,
|
|
||||||
addP(zer),
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.EndOfDuParticle) ?
|
|
||||||
pipe(
|
|
||||||
reverseP,
|
|
||||||
addP(zwarakey),
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.ShortAEndingAfterHeem) ?
|
|
||||||
pipe(
|
|
||||||
prevPLetter === " " ? reverseP : addP(""),
|
|
||||||
addP(zwar),
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
|
|
||||||
pipe(
|
|
||||||
addP(sukun),
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.AlefDaggarEnding) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.LongAinVowelMissingComma) ?
|
|
||||||
pipe(
|
|
||||||
addP(diacritic),
|
|
||||||
advanceP,
|
|
||||||
addP(diacritic)
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.ShortAinVowelMissingComma) ?
|
|
||||||
pipe(
|
|
||||||
addP(diacritic),
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.AinWithLongAAtBeginning) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.AlefWithHamza) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.ShortVowel) ?
|
|
||||||
pipe(
|
|
||||||
advanceForHamzaMid,
|
advanceForHamzaMid,
|
||||||
addP(phonemeInfo.diacritic),
|
addP(phonemeInfo.diacritic),
|
||||||
// TODO THIS?
|
// TODO THIS?
|
||||||
advanceForHamza,
|
advanceForHamza
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.NOnFathatan) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.HamzaOnWow) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
addP(hamzaAbove),
|
|
||||||
addP(diacritic),
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
addP(pesh),
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.OoPrefix) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
addP(pesh),
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.GlottalStopBeforeOo) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
addP(hamzaAbove),
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.OoAfterGlottalStopOo) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
|
||||||
: (phs === PhonemeStatus.SilentAinAfterAlef) ?
|
|
||||||
pipe(
|
|
||||||
advanceP,
|
|
||||||
advanceP,
|
|
||||||
)(state)
|
)(state)
|
||||||
|
: phs === PhonemeStatus.ShortAForAlefBeforeFathatan
|
||||||
|
? pipe(advanceP)(state)
|
||||||
|
: phs === PhonemeStatus.NOnFathatan
|
||||||
|
? pipe(advanceP)(state)
|
||||||
|
: phs === PhonemeStatus.HamzaOnWow
|
||||||
|
? pipe(advanceP, addP(hamzaAbove), addP(diacritic))(state)
|
||||||
|
: phs === PhonemeStatus.ArabicDefiniteArticleUl
|
||||||
|
? pipe(advanceP, addP(pesh), advanceP)(state)
|
||||||
|
: phs === PhonemeStatus.OoPrefix
|
||||||
|
? pipe(advanceP, addP(pesh))(state)
|
||||||
|
: phs === PhonemeStatus.GlottalStopBeforeOo
|
||||||
|
? pipe(advanceP, addP(hamzaAbove))(state)
|
||||||
|
: phs === PhonemeStatus.OoAfterGlottalStopOo
|
||||||
|
? pipe(advanceP)(state)
|
||||||
|
: phs === PhonemeStatus.SilentAinAfterAlef
|
||||||
|
? pipe(advanceP, advanceP)(state)
|
||||||
: state;
|
: state;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,19 +11,19 @@ import {
|
||||||
splitFIntoPhonemes,
|
splitFIntoPhonemes,
|
||||||
} from "./phonetics-to-diacritics";
|
} from "./phonetics-to-diacritics";
|
||||||
|
|
||||||
const zwarakey = "ٙ";
|
const zwarakay = "ٙ";
|
||||||
|
|
||||||
const phonemeSplits: Array<{
|
const phonemeSplits: Array<{
|
||||||
in: string,
|
in: string;
|
||||||
out: string[],
|
out: string[];
|
||||||
}> = [
|
}> = [
|
||||||
{
|
{
|
||||||
in: "kor",
|
in: "kor",
|
||||||
out: ["k", "o", "r"],
|
out: ["k", "o", "r"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "raaghey",
|
in: "raaghay",
|
||||||
out: ["r", "aa", "gh", "ey"],
|
out: ["r", "aa", "gh", "ay"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "hatsa",
|
in: "hatsa",
|
||||||
|
@ -34,16 +34,16 @@ const phonemeSplits: Array<{
|
||||||
out: ["b", "a"],
|
out: ["b", "a"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "peydáa",
|
in: "paydáa",
|
||||||
out: ["p", "ey", "d", "áa"],
|
out: ["p", "ay", "d", "áa"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "be kaar",
|
in: "be kaar",
|
||||||
out: ["b", "e", "k", "aa", "r"],
|
out: ["b", "e", "k", "aa", "r"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "raadzeyy",
|
in: "raadzey",
|
||||||
out: ["r", "aa", "dz", "eyy"],
|
out: ["r", "aa", "dz", "ey"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: "badanuy ??",
|
in: "badanuy ??",
|
||||||
|
@ -67,8 +67,8 @@ phonemeSplits.forEach((s) => {
|
||||||
});
|
});
|
||||||
|
|
||||||
const toTest: Array<{
|
const toTest: Array<{
|
||||||
in: { p: string, f: string },
|
in: { p: string; f: string };
|
||||||
out: string | undefined,
|
out: string | undefined;
|
||||||
}> = [
|
}> = [
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -168,28 +168,28 @@ const toTest: Array<{
|
||||||
p: "کول",
|
p: "کول",
|
||||||
f: "kawul",
|
f: "kawul",
|
||||||
},
|
},
|
||||||
out: "کَو" + zwarakey + "ل",
|
out: "کَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "کول",
|
p: "کول",
|
||||||
f: "kiwul",
|
f: "kiwul",
|
||||||
},
|
},
|
||||||
out: "کِو" + zwarakey + "ل",
|
out: "کِو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "کول",
|
p: "کول",
|
||||||
f: "kUwul",
|
f: "kUwul",
|
||||||
},
|
},
|
||||||
out: "کُو" + zwarakey + "ل",
|
out: "کُو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "کول",
|
p: "کول",
|
||||||
f: "kuwul",
|
f: "kuwul",
|
||||||
},
|
},
|
||||||
out: "ک" + zwarakey + "و" + zwarakey + "ل",
|
out: "ک" + zwarakay + "و" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -222,7 +222,7 @@ const toTest: Array<{
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "سپین",
|
p: "سپین",
|
||||||
f: "speyn",
|
f: "spayn",
|
||||||
},
|
},
|
||||||
out: "سْپین",
|
out: "سْپین",
|
||||||
},
|
},
|
||||||
|
@ -236,7 +236,7 @@ const toTest: Array<{
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "پېش",
|
p: "پېش",
|
||||||
f: "peysh",
|
f: "paysh",
|
||||||
},
|
},
|
||||||
out: undefined,
|
out: undefined,
|
||||||
},
|
},
|
||||||
|
@ -245,33 +245,33 @@ const toTest: Array<{
|
||||||
p: "رغېدل",
|
p: "رغېدل",
|
||||||
f: "raghedul",
|
f: "raghedul",
|
||||||
},
|
},
|
||||||
out: "رَغېد" + zwarakey + "ل",
|
out: "رَغېد" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "کارول",
|
p: "کارول",
|
||||||
f: "kaarawul",
|
f: "kaarawul",
|
||||||
},
|
},
|
||||||
out: "کارَو" + zwarakey + "ل",
|
out: "کارَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "پېښېدل",
|
p: "پېښېدل",
|
||||||
f: "pexedul",
|
f: "pexedul",
|
||||||
},
|
},
|
||||||
out: "پېښېد" + zwarakey + "ل",
|
out: "پېښېد" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "مین",
|
p: "مین",
|
||||||
f: "mayín",
|
f: "ma`yín",
|
||||||
},
|
},
|
||||||
out: "مَیِن",
|
out: "مَیِن",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "سړی",
|
p: "سړی",
|
||||||
f: "saRey",
|
f: "saRay",
|
||||||
},
|
},
|
||||||
out: "سَړی",
|
out: "سَړی",
|
||||||
},
|
},
|
||||||
|
@ -308,28 +308,28 @@ const toTest: Array<{
|
||||||
p: "ایستل",
|
p: "ایستل",
|
||||||
f: "eestul",
|
f: "eestul",
|
||||||
},
|
},
|
||||||
out: "اِیسْت" + zwarakey + "ل",
|
out: "اِیسْت" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "ایستل",
|
p: "ایستل",
|
||||||
f: "eystul",
|
f: "aystul",
|
||||||
},
|
},
|
||||||
out: "ایسْت" + zwarakey + "ل",
|
out: "ایسْت" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "اېسېدل",
|
p: "اېسېدل",
|
||||||
f: "esedul",
|
f: "esedul",
|
||||||
},
|
},
|
||||||
out: "اېسېد" + zwarakey + "ل",
|
out: "اېسېد" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "اوسېدل",
|
p: "اوسېدل",
|
||||||
f: "osedul",
|
f: "osedul",
|
||||||
},
|
},
|
||||||
out: "اوسېد" + zwarakey + "ل",
|
out: "اوسېد" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -350,7 +350,7 @@ const toTest: Array<{
|
||||||
p: "واردول",
|
p: "واردول",
|
||||||
f: "waaridawul",
|
f: "waaridawul",
|
||||||
},
|
},
|
||||||
out: "وارِدَو" + zwarakey + "ل",
|
out: "وارِدَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -490,21 +490,21 @@ const toTest: Array<{
|
||||||
p: "ازغن تار",
|
p: "ازغن تار",
|
||||||
f: "azghun taar",
|
f: "azghun taar",
|
||||||
},
|
},
|
||||||
out: "اَزْغ" + zwarakey + "ن" + " تار",
|
out: "اَزْغ" + zwarakay + "ن" + " تار",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "اره څکول",
|
p: "اره څکول",
|
||||||
f: "ara tskawul",
|
f: "ara tskawul",
|
||||||
},
|
},
|
||||||
out: "اَره څْکَو" + zwarakey + "ل",
|
out: "اَره څْکَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "اږیل",
|
p: "اږیل",
|
||||||
f: "aGuyúl",
|
f: "aGuyúl",
|
||||||
},
|
},
|
||||||
out: "اَږ" + zwarakey + "ی" + zwarakey + "ل",
|
out: "اَږ" + zwarakay + "ی" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -669,7 +669,8 @@ const toTest: Array<{
|
||||||
f: "aadam",
|
f: "aadam",
|
||||||
},
|
},
|
||||||
out: undefined,
|
out: undefined,
|
||||||
}, {
|
},
|
||||||
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "منع",
|
p: "منع",
|
||||||
f: "mán'a",
|
f: "mán'a",
|
||||||
|
@ -698,11 +699,11 @@ const toTest: Array<{
|
||||||
},
|
},
|
||||||
out: "اسان",
|
out: "اسان",
|
||||||
},
|
},
|
||||||
// ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی
|
// ې followed by ی - y needs to be written as e`y to be distinguished from ay - ی
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "پتېیل",
|
p: "پتېیل",
|
||||||
f: "pateyúl",
|
f: "patayúl",
|
||||||
},
|
},
|
||||||
out: undefined,
|
out: undefined,
|
||||||
},
|
},
|
||||||
|
@ -711,14 +712,14 @@ const toTest: Array<{
|
||||||
p: "پتېیل",
|
p: "پتېیل",
|
||||||
f: "pate`yúl",
|
f: "pate`yúl",
|
||||||
},
|
},
|
||||||
out: "پَتېی" + zwarakey + "ل",
|
out: "پَتېی" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "درېیم",
|
p: "درېیم",
|
||||||
f: "dre`yum",
|
f: "dre`yum",
|
||||||
},
|
},
|
||||||
out: "دْرېی" + zwarakey + "م",
|
out: "دْرېی" + zwarakay + "م",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -733,7 +734,7 @@ const toTest: Array<{
|
||||||
p: "تر ... پورې",
|
p: "تر ... پورې",
|
||||||
f: "tur ... pore",
|
f: "tur ... pore",
|
||||||
},
|
},
|
||||||
out: "ت" + zwarakey + "ر ... پورې",
|
out: "ت" + zwarakay + "ر ... پورې",
|
||||||
},
|
},
|
||||||
// joiner و
|
// joiner و
|
||||||
{
|
{
|
||||||
|
@ -763,21 +764,21 @@ const toTest: Array<{
|
||||||
p: "د",
|
p: "د",
|
||||||
f: "du",
|
f: "du",
|
||||||
},
|
},
|
||||||
out: "د" + zwarakey,
|
out: "د" + zwarakay,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "د لاس",
|
p: "د لاس",
|
||||||
f: "du laas",
|
f: "du laas",
|
||||||
},
|
},
|
||||||
out: "د" + zwarakey + " لاس",
|
out: "د" + zwarakay + " لاس",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "د ... په شان",
|
p: "د ... په شان",
|
||||||
f: "du ... pu shaan",
|
f: "du ... pu shaan",
|
||||||
},
|
},
|
||||||
out: "د" + zwarakey + " ... پهٔ شان",
|
out: "د" + zwarakay + " ... پهٔ شان",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -798,7 +799,7 @@ const toTest: Array<{
|
||||||
p: "ذبح کول",
|
p: "ذبح کول",
|
||||||
f: "zabha kawul",
|
f: "zabha kawul",
|
||||||
},
|
},
|
||||||
out: "ذَبْحَ کَو" + zwarakey + "ل",
|
out: "ذَبْحَ کَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
// require dagger alif on words ending with یٰ
|
// require dagger alif on words ending with یٰ
|
||||||
{
|
{
|
||||||
|
@ -864,7 +865,7 @@ const toTest: Array<{
|
||||||
p: "طمع لرل",
|
p: "طمع لرل",
|
||||||
f: "tama larul",
|
f: "tama larul",
|
||||||
},
|
},
|
||||||
out: "طَمعَ لَر" + zwarakey + "ل",
|
out: "طَمعَ لَر" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
// Ua ؤ
|
// Ua ؤ
|
||||||
{
|
{
|
||||||
|
@ -885,7 +886,7 @@ const toTest: Array<{
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "شئ",
|
p: "شئ",
|
||||||
f: "sheyy",
|
f: "shey",
|
||||||
},
|
},
|
||||||
out: "شئ",
|
out: "شئ",
|
||||||
},
|
},
|
||||||
|
@ -900,7 +901,7 @@ const toTest: Array<{
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "سړی و",
|
p: "سړی و",
|
||||||
f: "saRey wo",
|
f: "saRay wo",
|
||||||
},
|
},
|
||||||
out: "سَړی و",
|
out: "سَړی و",
|
||||||
},
|
},
|
||||||
|
@ -938,7 +939,7 @@ const toTest: Array<{
|
||||||
p: "توجه کول",
|
p: "توجه کول",
|
||||||
f: "tawajU kawul",
|
f: "tawajU kawul",
|
||||||
},
|
},
|
||||||
out: "تَوَجُه کَو" + zwarakey + "ل",
|
out: "تَوَجُه کَو" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
// With Arabic definate article -Ul- ال
|
// With Arabic definate article -Ul- ال
|
||||||
{
|
{
|
||||||
|
@ -975,7 +976,7 @@ const toTest: Array<{
|
||||||
p: "راجع کېدل",
|
p: "راجع کېدل",
|
||||||
f: "raaji kedul",
|
f: "raaji kedul",
|
||||||
},
|
},
|
||||||
out: "راجعِ کېد" + zwarakey + "ل",
|
out: "راجعِ کېد" + zwarakay + "ل",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -987,7 +988,7 @@ const toTest: Array<{
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "سختسری",
|
p: "سختسری",
|
||||||
f: "sakht sărey",
|
f: "sakht săray",
|
||||||
},
|
},
|
||||||
out: "سَخْتْسَری",
|
out: "سَخْتْسَری",
|
||||||
},
|
},
|
||||||
|
@ -1042,7 +1043,7 @@ const toTest: Array<{
|
||||||
p: "مربع جذر",
|
p: "مربع جذر",
|
||||||
f: "mUraba' jazúr",
|
f: "mUraba' jazúr",
|
||||||
},
|
},
|
||||||
out: "مُرَبَع جَذ" + zwarakey + "ر",
|
out: "مُرَبَع جَذ" + zwarakay + "ر",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
|
@ -1101,14 +1102,14 @@ const toTest: Array<{
|
||||||
p: "وځم",
|
p: "وځم",
|
||||||
f: "oodzum",
|
f: "oodzum",
|
||||||
},
|
},
|
||||||
out: "وځ" + zwarakey + "م",
|
out: "وځ" + zwarakay + "م",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
in: {
|
in: {
|
||||||
p: "وځم",
|
p: "وځم",
|
||||||
f: "wUdzum",
|
f: "wUdzum",
|
||||||
},
|
},
|
||||||
out: "وُځ" + zwarakey + "م",
|
out: "وُځ" + zwarakay + "م",
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const zwar = "َ";
|
const zwar = "َ";
|
||||||
const zwarakey = "ٙ";
|
const zwarakay = "ٙ";
|
||||||
const zer = "ِ";
|
const zer = "ِ";
|
||||||
const pesh = "ُ";
|
const pesh = "ُ";
|
||||||
const sukun = "ْ";
|
const sukun = "ْ";
|
||||||
|
@ -19,8 +19,25 @@ const fathahan = "ً";
|
||||||
|
|
||||||
// TODO: THESE OTHER TRIGRAPHS??
|
// TODO: THESE OTHER TRIGRAPHS??
|
||||||
const quadrigraphs = ["-Ul-"];
|
const quadrigraphs = ["-Ul-"];
|
||||||
const trigraphs = ["eyy", "éyy", "-i-", "-U-"]; // , "aay", "áay", "ooy", "óoy"];
|
const trigraphs = ["ey", "éy", "-i-", "-U-"]; // , "aay", "áay", "ooy", "óoy"];
|
||||||
const digraphs = ["ắ", "aa", "áa", "ee", "ée", "ey", "éy", "oo", "óo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"];
|
const digraphs = [
|
||||||
|
"ắ",
|
||||||
|
"aa",
|
||||||
|
"áa",
|
||||||
|
"ee",
|
||||||
|
"ée",
|
||||||
|
"ay",
|
||||||
|
"áy",
|
||||||
|
"oo",
|
||||||
|
"óo",
|
||||||
|
"kh",
|
||||||
|
"gh",
|
||||||
|
"ts",
|
||||||
|
"dz",
|
||||||
|
"jz",
|
||||||
|
"ch",
|
||||||
|
"sh",
|
||||||
|
];
|
||||||
const endingDigraphs = ["uy", "úy"];
|
const endingDigraphs = ["uy", "úy"];
|
||||||
const willIgnore = ["?", " ", "`", ".", "…"];
|
const willIgnore = ["?", " ", "`", ".", "…"];
|
||||||
|
|
||||||
|
@ -28,7 +45,7 @@ export function splitFIntoPhonemes(f: string): string[] {
|
||||||
const result: string[] = [];
|
const result: string[] = [];
|
||||||
let index = 0;
|
let index = 0;
|
||||||
while (index < f.length) {
|
while (index < f.length) {
|
||||||
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " ");
|
const isLastTwoLetters = index === f.length - 2 || f[index + 2] === " ";
|
||||||
const threeLetterChunk = f.slice(index, index + 3);
|
const threeLetterChunk = f.slice(index, index + 3);
|
||||||
const fourLetterChunk = f.slice(index, index + 4);
|
const fourLetterChunk = f.slice(index, index + 4);
|
||||||
if (quadrigraphs.includes(fourLetterChunk)) {
|
if (quadrigraphs.includes(fourLetterChunk)) {
|
||||||
|
@ -89,7 +106,12 @@ const phonemeTable = [
|
||||||
{ phoneme: "m", possibilities: ["م"], consonant: true },
|
{ phoneme: "m", possibilities: ["م"], consonant: true },
|
||||||
{ phoneme: "n", possibilities: ["ن"], consonant: true },
|
{ phoneme: "n", possibilities: ["ن"], consonant: true },
|
||||||
{ phoneme: "N", possibilities: ["ڼ"], consonant: true },
|
{ phoneme: "N", possibilities: ["ڼ"], consonant: true },
|
||||||
{ phoneme: "h", possibilities: ["ه", "ح"], consonant: true, takesSukunOnEnding: true },
|
{
|
||||||
|
phoneme: "h",
|
||||||
|
possibilities: ["ه", "ح"],
|
||||||
|
consonant: true,
|
||||||
|
takesSukunOnEnding: true,
|
||||||
|
},
|
||||||
{ phoneme: "w", possibilities: ["و"], consonant: true },
|
{ phoneme: "w", possibilities: ["و"], consonant: true },
|
||||||
{ phoneme: "y", possibilities: ["ی"], consonant: true },
|
{ phoneme: "y", possibilities: ["ی"], consonant: true },
|
||||||
|
|
||||||
|
@ -99,33 +121,130 @@ const phonemeTable = [
|
||||||
{ phoneme: "-Ul-", possibilities: ["ال"] },
|
{ phoneme: "-Ul-", possibilities: ["ال"] },
|
||||||
|
|
||||||
// vowels
|
// vowels
|
||||||
{ phoneme: "aa", possibilities: ["ا"], beginning: ["آ", "ا"], endingPossibilities: ["ا", "یٰ"], isLongA: true, canStartWithAynBefore: true },
|
{
|
||||||
{ phoneme: "áa", possibilities: ["ا"], beginning: ["آ", "ا"], endingPossibilities: ["ا", "یٰ"], isLongA: true, canStartWithAynBefore: true },
|
phoneme: "aa",
|
||||||
{ phoneme: "ee", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ي"], diacritic: zer, canStartWithAynBefore: true },
|
possibilities: ["ا"],
|
||||||
{ phoneme: "ée", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ي"], diacritic: zer, canStartWithAynBefore: true },
|
beginning: ["آ", "ا"],
|
||||||
|
endingPossibilities: ["ا", "یٰ"],
|
||||||
|
isLongA: true,
|
||||||
|
canStartWithAynBefore: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "áa",
|
||||||
|
possibilities: ["ا"],
|
||||||
|
beginning: ["آ", "ا"],
|
||||||
|
endingPossibilities: ["ا", "یٰ"],
|
||||||
|
isLongA: true,
|
||||||
|
canStartWithAynBefore: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "ee",
|
||||||
|
possibilities: ["ی"],
|
||||||
|
addAlefOnBeginning: true,
|
||||||
|
endingPossibilities: ["ي"],
|
||||||
|
diacritic: zer,
|
||||||
|
canStartWithAynBefore: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "ée",
|
||||||
|
possibilities: ["ی"],
|
||||||
|
addAlefOnBeginning: true,
|
||||||
|
endingPossibilities: ["ي"],
|
||||||
|
diacritic: zer,
|
||||||
|
canStartWithAynBefore: true,
|
||||||
|
},
|
||||||
{ phoneme: "e", possibilities: ["ې"], addAlefOnBeginning: true },
|
{ phoneme: "e", possibilities: ["ې"], addAlefOnBeginning: true },
|
||||||
{ phoneme: "é", possibilities: ["ې"], addAlefOnBeginning: true },
|
{ phoneme: "é", possibilities: ["ې"], addAlefOnBeginning: true },
|
||||||
{ phoneme: "o", possibilities: ["و"], addAlefOnBeginning: true },
|
{ phoneme: "o", possibilities: ["و"], addAlefOnBeginning: true },
|
||||||
{ phoneme: "ó", possibilities: ["و"], addAlefOnBeginning: true },
|
{ phoneme: "ó", possibilities: ["و"], addAlefOnBeginning: true },
|
||||||
{ phoneme: "oo", possibilities: ["و"], addAlefOnBeginning: true, alsoCanBePrefix: true, diacritic: pesh },
|
{
|
||||||
{ phoneme: "óo", possibilities: ["و"], addAlefOnBeginning: true, diacritic: pesh },
|
phoneme: "oo",
|
||||||
{ phoneme: "ey", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ی"]},
|
possibilities: ["و"],
|
||||||
{ phoneme: "éy", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ی"]},
|
addAlefOnBeginning: true,
|
||||||
|
alsoCanBePrefix: true,
|
||||||
|
diacritic: pesh,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "óo",
|
||||||
|
possibilities: ["و"],
|
||||||
|
addAlefOnBeginning: true,
|
||||||
|
diacritic: pesh,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "ay",
|
||||||
|
possibilities: ["ی"],
|
||||||
|
addAlefOnBeginning: true,
|
||||||
|
endingPossibilities: ["ی"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "áy",
|
||||||
|
possibilities: ["ی"],
|
||||||
|
addAlefOnBeginning: true,
|
||||||
|
endingPossibilities: ["ی"],
|
||||||
|
},
|
||||||
{ phoneme: "uy", possibilities: ["ۍ"], endingOnly: true },
|
{ phoneme: "uy", possibilities: ["ۍ"], endingOnly: true },
|
||||||
{ phoneme: "úy", possibilities: ["ۍ"], endingOnly: true }, // THIS CAN ONLY COME AT THE END DEAL WITH THIS
|
{ phoneme: "úy", possibilities: ["ۍ"], endingOnly: true }, // THIS CAN ONLY COME AT THE END DEAL WITH THIS
|
||||||
{ phoneme: "eyy", possibilities: ["ئ"], endingOnly: true },
|
{ phoneme: "ey", possibilities: ["ئ"], endingOnly: true },
|
||||||
{ phoneme: "éyy", possibilities: ["ئ"], endingOnly: true },
|
{ phoneme: "éy", possibilities: ["ئ"], endingOnly: true },
|
||||||
|
|
||||||
{ phoneme: "a", diacritic: zwar, endingPossibilities: ["ه"], canComeAfterHeyEnding: true, canBeFirstPartOfFathahanEnding: true },
|
{
|
||||||
{ phoneme: "á", diacritic: zwar, endingPossibilities: ["ه"], canComeAfterHeyEnding: true, canBeFirstPartOfFathahanEnding: true },
|
phoneme: "a",
|
||||||
|
diacritic: zwar,
|
||||||
|
endingPossibilities: ["ه"],
|
||||||
|
canComeAfterHayEnding: true,
|
||||||
|
canBeFirstPartOfFathahanEnding: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "á",
|
||||||
|
diacritic: zwar,
|
||||||
|
endingPossibilities: ["ه"],
|
||||||
|
canComeAfterHayEnding: true,
|
||||||
|
canBeFirstPartOfFathahanEnding: true,
|
||||||
|
},
|
||||||
{ phoneme: "ă", diacritic: zwar },
|
{ phoneme: "ă", diacritic: zwar },
|
||||||
{ phoneme: "ắ", diacritic: zwar },
|
{ phoneme: "ắ", diacritic: zwar },
|
||||||
{ phoneme: "u", diacritic: zwarakey, endingPossibilities: ["ه"], hamzaOnEnd: true },
|
{
|
||||||
{ phoneme: "ú", diacritic: zwarakey, endingPossibilities: ["ه"], hamzaOnEnd: true },
|
phoneme: "u",
|
||||||
{ phoneme: "i", diacritic: zer, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, canBeWasla: true, beginning: ["ا", "ع"] },
|
diacritic: zwarakay,
|
||||||
{ phoneme: "í", diacritic: zer, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, canBeWasla: true, beginning: ["ا", "ع"] },
|
endingPossibilities: ["ه"],
|
||||||
{ phoneme: "U", diacritic: pesh, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, beginning: ["ا", "ع"] },
|
hamzaOnEnd: true,
|
||||||
{ phoneme: "Ú", diacritic: pesh, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, beginning: ["ا", "ع"] },
|
},
|
||||||
|
{
|
||||||
|
phoneme: "ú",
|
||||||
|
diacritic: zwarakay,
|
||||||
|
endingPossibilities: ["ه"],
|
||||||
|
hamzaOnEnd: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "i",
|
||||||
|
diacritic: zer,
|
||||||
|
endingPossibilities: ["ه"],
|
||||||
|
takesDiacriticBeforeGurdaHayEnding: true,
|
||||||
|
canBeWasla: true,
|
||||||
|
beginning: ["ا", "ع"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "í",
|
||||||
|
diacritic: zer,
|
||||||
|
endingPossibilities: ["ه"],
|
||||||
|
takesDiacriticBeforeGurdaHayEnding: true,
|
||||||
|
canBeWasla: true,
|
||||||
|
beginning: ["ا", "ع"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "U",
|
||||||
|
diacritic: pesh,
|
||||||
|
endingPossibilities: ["ه"],
|
||||||
|
takesDiacriticBeforeGurdaHayEnding: true,
|
||||||
|
beginning: ["ا", "ع"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
phoneme: "Ú",
|
||||||
|
diacritic: pesh,
|
||||||
|
endingPossibilities: ["ه"],
|
||||||
|
takesDiacriticBeforeGurdaHayEnding: true,
|
||||||
|
beginning: ["ا", "ع"],
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
function isSpace(s: string): boolean {
|
function isSpace(s: string): boolean {
|
||||||
|
@ -142,7 +261,11 @@ interface IDiacriticsErrorMessage {
|
||||||
i: number;
|
i: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
function possibilityMatches(p: string, pIndex: number, possibilities: string[] | undefined): boolean {
|
function possibilityMatches(
|
||||||
|
p: string,
|
||||||
|
pIndex: number,
|
||||||
|
possibilities: string[] | undefined
|
||||||
|
): boolean {
|
||||||
/* istanbul ignore next */
|
/* istanbul ignore next */
|
||||||
if (!possibilities) {
|
if (!possibilities) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -155,10 +278,15 @@ function possibilityMatches(p: string, pIndex: number, possibilities: string[] |
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
function isPrefixedByDirectionalPronoun(i: number, phonemes: string[]): boolean {
|
function isPrefixedByDirectionalPronoun(
|
||||||
|
i: number,
|
||||||
|
phonemes: string[]
|
||||||
|
): boolean {
|
||||||
const potentialPronounFourCharSlice = phonemes.slice(i - 4, i).join("");
|
const potentialPronounFourCharSlice = phonemes.slice(i - 4, i).join("");
|
||||||
const potentialPronounThreeCharSlice = phonemes.slice(i - 3, i).join("");
|
const potentialPronounThreeCharSlice = phonemes.slice(i - 3, i).join("");
|
||||||
if (["wăr-", "war-", "dăr-", "dar-"].includes(potentialPronounFourCharSlice)) {
|
if (
|
||||||
|
["wăr-", "war-", "dăr-", "dar-"].includes(potentialPronounFourCharSlice)
|
||||||
|
) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (potentialPronounThreeCharSlice === "raa-") {
|
if (potentialPronounThreeCharSlice === "raa-") {
|
||||||
|
@ -167,7 +295,11 @@ function isPrefixedByDirectionalPronoun(i: number, phonemes: string[]): boolean
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes: boolean = false): string | undefined {
|
export function phoneticsToDiacritics(
|
||||||
|
ps: string,
|
||||||
|
ph: string,
|
||||||
|
forbidOoPrefixes: boolean = false
|
||||||
|
): string | undefined {
|
||||||
const phonemes = splitFIntoPhonemes(ph.trim().split(",")[0]);
|
const phonemes = splitFIntoPhonemes(ph.trim().split(",")[0]);
|
||||||
const p = ps.trim();
|
const p = ps.trim();
|
||||||
let result = "";
|
let result = "";
|
||||||
|
@ -179,58 +311,72 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
if (phoneme === "-") {
|
if (phoneme === "-") {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const phonemeInfo = phonemeTable.find((element) => element.phoneme === phoneme);
|
const phonemeInfo = phonemeTable.find(
|
||||||
|
(element) => element.phoneme === phoneme
|
||||||
|
);
|
||||||
if (!phonemeInfo) {
|
if (!phonemeInfo) {
|
||||||
errored.push({ error: "phoneme info not found", phoneme, i });
|
errored.push({ error: "phoneme info not found", phoneme, i });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const isDoubleConsonant = (
|
const isDoubleConsonant =
|
||||||
phonemeInfo.consonant &&
|
phonemeInfo.consonant &&
|
||||||
phoneme === phonemes[i - 1] &&
|
phoneme === phonemes[i - 1] &&
|
||||||
// TODO: is this thourough enough to allow double consonants on the ending of the previous word?
|
// TODO: is this thourough enough to allow double consonants on the ending of the previous word?
|
||||||
!(isSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex])) // avoid false double consonant ie ازل لیک azalleek
|
!(isSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex])) // avoid false double consonant ie ازل لیک azalleek
|
||||||
) ? true : false;
|
? true
|
||||||
const isBeginning = !isDoubleConsonant && ((i === 0) || isSpace(p[pIndex - 1]) || (phonemes[i - 1] === "-Ul-") || isPrefixedByDirectionalPronoun(i, phonemes));
|
: false;
|
||||||
const upcomingAEndingAfterHey = (p[pIndex] === "ح" && isSpace(p[pIndex + 1]) && ["a", "á"].includes(phonemes[i + 1]));
|
const isBeginning =
|
||||||
|
!isDoubleConsonant &&
|
||||||
|
(i === 0 ||
|
||||||
|
isSpace(p[pIndex - 1]) ||
|
||||||
|
phonemes[i - 1] === "-Ul-" ||
|
||||||
|
isPrefixedByDirectionalPronoun(i, phonemes));
|
||||||
|
const upcomingAEndingAfterHay =
|
||||||
|
p[pIndex] === "ح" &&
|
||||||
|
isSpace(p[pIndex + 1]) &&
|
||||||
|
["a", "á"].includes(phonemes[i + 1]);
|
||||||
|
|
||||||
// TODO: break this into a seperate function -- why can it sometimes be set to undefined?
|
// TODO: break this into a seperate function -- why can it sometimes be set to undefined?
|
||||||
const isEnding = (i === phonemes.length - 1) || ((
|
const isEnding =
|
||||||
(phonemeInfo.possibilities && isSpace(p[pIndex + 1])) ||
|
i === phonemes.length - 1 ||
|
||||||
|
(((phonemeInfo.possibilities && isSpace(p[pIndex + 1])) ||
|
||||||
(!phonemeInfo.possibilities && isSpace(p[pIndex])) ||
|
(!phonemeInfo.possibilities && isSpace(p[pIndex])) ||
|
||||||
(
|
(!phonemeInfo.possibilities &&
|
||||||
(!phonemeInfo.possibilities && isSpace(p[pIndex + 1])) &&
|
isSpace(p[pIndex + 1]) &&
|
||||||
(possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) || (p[pIndex] === "ع" && phonemes[i + 1] !== "'"))
|
(possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) ||
|
||||||
)
|
(p[pIndex] === "ع" && phonemes[i + 1] !== "'")))) &&
|
||||||
) && !upcomingAEndingAfterHey
|
!upcomingAEndingAfterHay && // makes sure the next letter isn't a double consonant like haqq <-
|
||||||
&& // makes sure the next letter isn't a double consonant like haqq <-
|
|
||||||
!(
|
!(
|
||||||
phonemeInfo.consonant && phoneme === phonemes[i + 1] // &&
|
(phonemeInfo.consonant && phoneme === phonemes[i + 1]) // &&
|
||||||
// !(isSpace(p[pIndex + 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
|
// !(isSpace(p[pIndex + 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
|
||||||
)
|
)) || // can be the trailing double consanant on the end of a word
|
||||||
) || // can be the trailing double consanant on the end of a word
|
(phonemeInfo.consonant &&
|
||||||
(
|
phoneme === phonemes[i - 1] &&
|
||||||
phonemeInfo.consonant && phoneme === phonemes[i - 1] &&
|
!(
|
||||||
!(isEndSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
|
isEndSpace(p[pIndex - 1]) &&
|
||||||
) || // can be یٰ ending
|
phonemeInfo.possibilities.includes(p[pIndex])
|
||||||
(
|
)) || // can be یٰ ending
|
||||||
isEndSpace(p[pIndex + 2]) && (p.slice(pIndex, pIndex + 2) === "یٰ")
|
(isEndSpace(p[pIndex + 2]) && p.slice(pIndex, pIndex + 2) === "یٰ");
|
||||||
);
|
|
||||||
|
|
||||||
const isUofDu = phoneme === "u" && (
|
const isUofDu =
|
||||||
p.slice(pIndex - 2, pIndex) === "د " || // د as previous word
|
phoneme === "u" &&
|
||||||
|
(p.slice(pIndex - 2, pIndex) === "د " || // د as previous word
|
||||||
(p[pIndex] === undefined && p[pIndex - 1] === "د") || // د as the whole thing
|
(p[pIndex] === undefined && p[pIndex - 1] === "د") || // د as the whole thing
|
||||||
p.slice(pIndex - 6, pIndex) === "د ... " // ... د is as the previous word
|
p.slice(pIndex - 6, pIndex) === "د ... "); // ... د is as the previous word
|
||||||
);
|
|
||||||
// TODO: Should p[pIndex - 1] also be in there ??? It messed up قطعه for instance
|
// TODO: Should p[pIndex - 1] also be in there ??? It messed up قطعه for instance
|
||||||
const isEndingAynVowel = isEnding && phonemeInfo.diacritic && [p[pIndex], p[pIndex - 1]].includes("ع") && p[pIndex] !== "ه";
|
const isEndingAynVowel =
|
||||||
|
isEnding &&
|
||||||
|
phonemeInfo.diacritic &&
|
||||||
|
[p[pIndex], p[pIndex - 1]].includes("ع") &&
|
||||||
|
p[pIndex] !== "ه";
|
||||||
const isMiddle = !isBeginning && !isEnding;
|
const isMiddle = !isBeginning && !isEnding;
|
||||||
const isSilentWaw = (
|
const isSilentWaw =
|
||||||
p[pIndex] === "و" &&
|
p[pIndex] === "و" &&
|
||||||
p[pIndex - 1] === "خ" &&
|
p[pIndex - 1] === "خ" &&
|
||||||
p[pIndex + 1] === "ا" &&
|
p[pIndex + 1] === "ا" &&
|
||||||
["áa", "aa"].includes(phoneme)
|
["áa", "aa"].includes(phoneme);
|
||||||
);
|
const isAnAEndingAfterHay =
|
||||||
const isAnAEndingAfterHey = isEnding && p[pIndex - 1] === "ح" && phonemeInfo.canComeAfterHeyEnding;
|
isEnding && p[pIndex - 1] === "ح" && phonemeInfo.canComeAfterHayEnding;
|
||||||
if (isDoubleConsonant) {
|
if (isDoubleConsonant) {
|
||||||
pIndex--;
|
pIndex--;
|
||||||
if (isSpace(p[pIndex])) {
|
if (isSpace(p[pIndex])) {
|
||||||
|
@ -247,14 +393,22 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
pIndex++;
|
pIndex++;
|
||||||
}
|
}
|
||||||
// special check for Arabic wasla
|
// special check for Arabic wasla
|
||||||
if (p.slice(0, 3) === "بال" && phonemes[i - 1] === "b" && phonemeInfo.canBeWasla && phonemes[i + 1] === "l") {
|
if (
|
||||||
|
p.slice(0, 3) === "بال" &&
|
||||||
|
phonemes[i - 1] === "b" &&
|
||||||
|
phonemeInfo.canBeWasla &&
|
||||||
|
phonemes[i + 1] === "l"
|
||||||
|
) {
|
||||||
result += phonemeInfo.diacritic + wasla;
|
result += phonemeInfo.diacritic + wasla;
|
||||||
pIndex++;
|
pIndex++;
|
||||||
previousPhonemeWasAConsonant = false;
|
previousPhonemeWasAConsonant = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// special check for fathahan ending
|
// special check for fathahan ending
|
||||||
if (phonemeInfo.canBeFirstPartOfFathahanEnding && p.slice(pIndex, pIndex + 2) === "اً") {
|
if (
|
||||||
|
phonemeInfo.canBeFirstPartOfFathahanEnding &&
|
||||||
|
p.slice(pIndex, pIndex + 2) === "اً"
|
||||||
|
) {
|
||||||
result += "ا";
|
result += "ا";
|
||||||
pIndex++;
|
pIndex++;
|
||||||
return;
|
return;
|
||||||
|
@ -265,7 +419,12 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// special check for words starting with عا or عی
|
// special check for words starting with عا or عی
|
||||||
if (isBeginning && phonemeInfo.canStartWithAynBefore && p[pIndex] === "ع" && phonemeInfo.possibilities.includes(p[pIndex + 1])) {
|
if (
|
||||||
|
isBeginning &&
|
||||||
|
phonemeInfo.canStartWithAynBefore &&
|
||||||
|
p[pIndex] === "ع" &&
|
||||||
|
phonemeInfo.possibilities.includes(p[pIndex + 1])
|
||||||
|
) {
|
||||||
result += "ع";
|
result += "ع";
|
||||||
result += phonemeInfo.diacritic ? phonemeInfo.diacritic : "";
|
result += phonemeInfo.diacritic ? phonemeInfo.diacritic : "";
|
||||||
result += p[pIndex + 1];
|
result += p[pIndex + 1];
|
||||||
|
@ -273,23 +432,45 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// special check for ؤ Ua
|
// special check for ؤ Ua
|
||||||
if (phoneme === "U" && phonemes[i + 1] === "a" && phonemes[i + 2] !== "a" && p[pIndex] === "و") {
|
if (
|
||||||
|
phoneme === "U" &&
|
||||||
|
phonemes[i + 1] === "a" &&
|
||||||
|
phonemes[i + 2] !== "a" &&
|
||||||
|
p[pIndex] === "و"
|
||||||
|
) {
|
||||||
result += "ؤ";
|
result += "ؤ";
|
||||||
pIndex++;
|
pIndex++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (phoneme === "a" && phonemes[i - 1] === "U" && phonemes[i + 1] !== "a" && result.slice(-2) === "ؤ") {
|
if (
|
||||||
|
phoneme === "a" &&
|
||||||
|
phonemes[i - 1] === "U" &&
|
||||||
|
phonemes[i + 1] !== "a" &&
|
||||||
|
result.slice(-2) === "ؤ"
|
||||||
|
) {
|
||||||
previousPhonemeWasAConsonant = false;
|
previousPhonemeWasAConsonant = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// special check for و wo
|
// special check for و wo
|
||||||
if (isBeginning && phoneme === "w" && phonemes[i + 1] === "o" && p[pIndex] === "و" && isEndSpace(p[pIndex + 1])) {
|
if (
|
||||||
|
isBeginning &&
|
||||||
|
phoneme === "w" &&
|
||||||
|
phonemes[i + 1] === "o" &&
|
||||||
|
p[pIndex] === "و" &&
|
||||||
|
isEndSpace(p[pIndex + 1])
|
||||||
|
) {
|
||||||
result += "و";
|
result += "و";
|
||||||
pIndex++;
|
pIndex++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// TODO: isEndSpace here is redundant??
|
// TODO: isEndSpace here is redundant??
|
||||||
if (isEnding && phoneme === "o" && phonemes[i - 1] === "w" && p[pIndex - 1] === "و" && isEndSpace(p[pIndex])) {
|
if (
|
||||||
|
isEnding &&
|
||||||
|
phoneme === "o" &&
|
||||||
|
phonemes[i - 1] === "w" &&
|
||||||
|
p[pIndex - 1] === "و" &&
|
||||||
|
isEndSpace(p[pIndex])
|
||||||
|
) {
|
||||||
pIndex++;
|
pIndex++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -300,38 +481,67 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// special check for for أ in the middle of the word
|
// special check for for أ in the middle of the word
|
||||||
if (!isBeginning && p[pIndex] === "أ" && phoneme === "a" && phonemes[i + 1] === "'" && phonemes[i + 2] === "a") {
|
if (
|
||||||
|
!isBeginning &&
|
||||||
|
p[pIndex] === "أ" &&
|
||||||
|
phoneme === "a" &&
|
||||||
|
phonemes[i + 1] === "'" &&
|
||||||
|
phonemes[i + 2] === "a"
|
||||||
|
) {
|
||||||
result += "أ";
|
result += "أ";
|
||||||
pIndex++;
|
pIndex++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (p[pIndex - 1] === "أ" && phonemes[i - 1] === "a" && phoneme === "'" && phonemes[i + 1] === "a") {
|
if (
|
||||||
|
p[pIndex - 1] === "أ" &&
|
||||||
|
phonemes[i - 1] === "a" &&
|
||||||
|
phoneme === "'" &&
|
||||||
|
phonemes[i + 1] === "a"
|
||||||
|
) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (p[pIndex - 1] === "أ" && phonemes[i - 2] === "a" && phonemes[i - 1] === "'" && phoneme === "a") {
|
if (
|
||||||
|
p[pIndex - 1] === "أ" &&
|
||||||
|
phonemes[i - 2] === "a" &&
|
||||||
|
phonemes[i - 1] === "'" &&
|
||||||
|
phoneme === "a"
|
||||||
|
) {
|
||||||
previousPhonemeWasAConsonant = false;
|
previousPhonemeWasAConsonant = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// special check for وو 'oo
|
// special check for وو 'oo
|
||||||
if (!isBeginning && p[pIndex] === "و" && p[pIndex + 1] === "و" && phoneme === "'" && phonemes[i + 1] === "oo") {
|
if (
|
||||||
|
!isBeginning &&
|
||||||
|
p[pIndex] === "و" &&
|
||||||
|
p[pIndex + 1] === "و" &&
|
||||||
|
phoneme === "'" &&
|
||||||
|
phonemes[i + 1] === "oo"
|
||||||
|
) {
|
||||||
result += "وُو";
|
result += "وُو";
|
||||||
pIndex += 2;
|
pIndex += 2;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (p[pIndex - 2] === "و" && p[pIndex - 1] === "و" && phonemes[i - 1] === "'" && phoneme === "oo") {
|
if (
|
||||||
|
p[pIndex - 2] === "و" &&
|
||||||
|
p[pIndex - 1] === "و" &&
|
||||||
|
phonemes[i - 1] === "'" &&
|
||||||
|
phoneme === "oo"
|
||||||
|
) {
|
||||||
previousPhonemeWasAConsonant = false;
|
previousPhonemeWasAConsonant = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const prevLetterWasBeginningAyn = (
|
const prevLetterWasBeginningAyn =
|
||||||
p[pIndex - 1] === "ع" &&
|
p[pIndex - 1] === "ع" &&
|
||||||
// isEndSpace(p[pIndex]) && // This breaks it
|
// isEndSpace(p[pIndex]) && // This breaks it
|
||||||
phoneme === "'"
|
phoneme === "'";
|
||||||
);
|
|
||||||
// check if the phoneme lines up in the Pashto word
|
// check if the phoneme lines up in the Pashto word
|
||||||
if (isBeginning && !isUofDu && phonemeInfo.addAlefOnBeginning) {
|
if (isBeginning && !isUofDu && phonemeInfo.addAlefOnBeginning) {
|
||||||
// TODO: Maybe a little bad because it doesn't loop through possibilities
|
// TODO: Maybe a little bad because it doesn't loop through possibilities
|
||||||
if ((!phonemeInfo.alsoCanBePrefix || forbidOoPrefixes) && p.slice(pIndex, pIndex + 2) !== "ا" + phonemeInfo.possibilities[0]) {
|
if (
|
||||||
|
(!phonemeInfo.alsoCanBePrefix || forbidOoPrefixes) &&
|
||||||
|
p.slice(pIndex, pIndex + 2) !== "ا" + phonemeInfo.possibilities[0]
|
||||||
|
) {
|
||||||
errored.push({ error: "didn't start with an aleph", phoneme, i });
|
errored.push({ error: "didn't start with an aleph", phoneme, i });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -348,18 +558,18 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
pIndex++;
|
pIndex++;
|
||||||
return;
|
return;
|
||||||
} else if (
|
} else if (
|
||||||
(isEnding && phonemeInfo.endingPossibilities) &&
|
isEnding &&
|
||||||
|
phonemeInfo.endingPossibilities &&
|
||||||
!isUofDu &&
|
!isUofDu &&
|
||||||
(
|
|
||||||
!possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) &&
|
!possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) &&
|
||||||
!isEndingAynVowel && // allowing short vowels on the end of words ending with ع
|
!isEndingAynVowel && // allowing short vowels on the end of words ending with ع
|
||||||
!isAnAEndingAfterHey
|
!isAnAEndingAfterHay
|
||||||
)
|
|
||||||
) {
|
) {
|
||||||
errored.push({ error: "bad ending", phoneme, i });
|
errored.push({ error: "bad ending", phoneme, i });
|
||||||
return;
|
return;
|
||||||
} else if (
|
} else if (
|
||||||
(isEnding && !phonemeInfo.endingPossibilities) &&
|
isEnding &&
|
||||||
|
!phonemeInfo.endingPossibilities &&
|
||||||
phonemeInfo.possibilities &&
|
phonemeInfo.possibilities &&
|
||||||
!phonemeInfo.possibilities.includes(p[pIndex])
|
!phonemeInfo.possibilities.includes(p[pIndex])
|
||||||
) {
|
) {
|
||||||
|
@ -367,14 +577,17 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
errored.push({ error: "bad ending 2", phoneme, i });
|
errored.push({ error: "bad ending 2", phoneme, i });
|
||||||
return;
|
return;
|
||||||
} else if (
|
} else if (
|
||||||
(phonemeInfo.possibilities && !isEnding) &&
|
phonemeInfo.possibilities &&
|
||||||
(
|
!isEnding &&
|
||||||
!(phonemeInfo.possibilities.includes(p[pIndex])) &&
|
!phonemeInfo.possibilities.includes(p[pIndex]) &&
|
||||||
!(p[pIndex] === "ن" && (p[pIndex + 1] === "ب" && phoneme === "m")) && // && // exception case with نب === mb
|
!(p[pIndex] === "ن" && p[pIndex + 1] === "ب" && phoneme === "m") && // && // exception case with نب === mb
|
||||||
!prevLetterWasBeginningAyn // exception case with words starting with ع like i'zzat
|
!prevLetterWasBeginningAyn // exception case with words starting with ع like i'zzat
|
||||||
)
|
|
||||||
) {
|
) {
|
||||||
errored.push({ error: "improper coressponding letter in middle of word", phoneme, i });
|
errored.push({
|
||||||
|
error: "improper coressponding letter in middle of word",
|
||||||
|
phoneme,
|
||||||
|
i,
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// console.log(phoneme, pIndex, p[pIndex], isEnding);
|
// console.log(phoneme, pIndex, p[pIndex], isEnding);
|
||||||
|
@ -382,7 +595,12 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
// OK, it lines up with the Pashto word, we're good
|
// OK, it lines up with the Pashto word, we're good
|
||||||
// Now continue building the result string
|
// Now continue building the result string
|
||||||
// deal with starting with short vowels and alef
|
// deal with starting with short vowels and alef
|
||||||
if (!isUofDu && isBeginning && !phonemeInfo.possibilities && !phonemeInfo.isIzafe) {
|
if (
|
||||||
|
!isUofDu &&
|
||||||
|
isBeginning &&
|
||||||
|
!phonemeInfo.possibilities &&
|
||||||
|
!phonemeInfo.isIzafe
|
||||||
|
) {
|
||||||
// TODO: WHY IS THIS HERE
|
// TODO: WHY IS THIS HERE
|
||||||
if (!["ا", "ع"].includes(p[pIndex])) {
|
if (!["ا", "ع"].includes(p[pIndex])) {
|
||||||
errored.push({ error: "bad beginning 2", phoneme, i });
|
errored.push({ error: "bad beginning 2", phoneme, i });
|
||||||
|
@ -392,22 +610,30 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
pIndex++;
|
pIndex++;
|
||||||
}
|
}
|
||||||
// if the phoneme carries a diacritic insert it (before the letter if it's coming)
|
// if the phoneme carries a diacritic insert it (before the letter if it's coming)
|
||||||
const isOoPrefix = (phonemeInfo.alsoCanBePrefix && isBeginning && (p[pIndex - 1] !== "ا"));
|
const isOoPrefix =
|
||||||
|
phonemeInfo.alsoCanBePrefix && isBeginning && p[pIndex - 1] !== "ا";
|
||||||
if (phonemeInfo.diacritic && !isEnding && !isOoPrefix) {
|
if (phonemeInfo.diacritic && !isEnding && !isOoPrefix) {
|
||||||
// using this hack to remove the space and put it after the zwarakey we're going to add after د
|
// using this hack to remove the space and put it after the zwarakay we're going to add after د
|
||||||
if (isUofDu && result.slice(-5) === " ... ") {
|
if (isUofDu && result.slice(-5) === " ... ") {
|
||||||
result = result.slice(0, -5) + zwarakey + " ... ";
|
result = result.slice(0, -5) + zwarakay + " ... ";
|
||||||
} else if (isUofDu && result.slice(-1) === " ") {
|
} else if (isUofDu && result.slice(-1) === " ") {
|
||||||
result = result.slice(0, -1) + zwarakey + " ";
|
result = result.slice(0, -1) + zwarakay + " ";
|
||||||
} else {
|
} else {
|
||||||
result += phonemeInfo.diacritic;
|
result += phonemeInfo.diacritic;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// TODO: The middle stuff might be unneccessary/unhelpful
|
// TODO: The middle stuff might be unneccessary/unhelpful
|
||||||
const isACommaWithoutAyn = (phoneme === "'" && (p[pIndex] !== "ع" && !(isMiddle && p[pIndex] === "ئ")));
|
const isACommaWithoutAyn =
|
||||||
|
phoneme === "'" && p[pIndex] !== "ع" && !(isMiddle && p[pIndex] === "ئ");
|
||||||
// if the previous phoneme was a consonant insert a sukun
|
// if the previous phoneme was a consonant insert a sukun
|
||||||
// console.log("Will I go into the adding thing?");
|
// console.log("Will I go into the adding thing?");
|
||||||
if (!isBeginning && previousPhonemeWasAConsonant && phonemeInfo.consonant && phonemes[i - 1] !== "'" && p[pIndex] !== "ع") {
|
if (
|
||||||
|
!isBeginning &&
|
||||||
|
previousPhonemeWasAConsonant &&
|
||||||
|
phonemeInfo.consonant &&
|
||||||
|
phonemes[i - 1] !== "'" &&
|
||||||
|
p[pIndex] !== "ع"
|
||||||
|
) {
|
||||||
result += isDoubleConsonant ? tashdeed : sukun;
|
result += isDoubleConsonant ? tashdeed : sukun;
|
||||||
}
|
}
|
||||||
if (isEnding && isDoubleConsonant) {
|
if (isEnding && isDoubleConsonant) {
|
||||||
|
@ -417,30 +643,38 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if there's a pashto letter for the phoneme, insert it
|
// if there's a pashto letter for the phoneme, insert it
|
||||||
if (!isEndingAynVowel && !isACommaWithoutAyn && (phonemeInfo.possibilities || isEnding)) {
|
if (
|
||||||
|
!isEndingAynVowel &&
|
||||||
|
!isACommaWithoutAyn &&
|
||||||
|
(phonemeInfo.possibilities || isEnding)
|
||||||
|
) {
|
||||||
// need the isSpace check to prevent weird behaviour with izafe
|
// need the isSpace check to prevent weird behaviour with izafe
|
||||||
if (!isUofDu) {
|
if (!isUofDu) {
|
||||||
if (isAnAEndingAfterHey) {
|
if (isAnAEndingAfterHay) {
|
||||||
result += zwar;
|
result += zwar;
|
||||||
if (p[pIndex] === " ") {
|
if (p[pIndex] === " ") {
|
||||||
result += " ";
|
result += " ";
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
result += (isDoubleConsonant || isSpace(p[pIndex])) ? "" : p[pIndex];
|
result += isDoubleConsonant || isSpace(p[pIndex]) ? "" : p[pIndex];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pIndex++;
|
pIndex++;
|
||||||
}
|
}
|
||||||
if (isEnding) {
|
if (isEnding) {
|
||||||
if (isUofDu) {
|
if (isUofDu) {
|
||||||
result += zwarakey;
|
result += zwarakay;
|
||||||
} else if (phonemeInfo.hamzaOnEnd) {
|
} else if (phonemeInfo.hamzaOnEnd) {
|
||||||
result += hamzaAbove;
|
result += hamzaAbove;
|
||||||
} else if (phonemeInfo.takesSukunOnEnding) {
|
} else if (phonemeInfo.takesSukunOnEnding) {
|
||||||
result += sukun;
|
result += sukun;
|
||||||
} else if (p[pIndex] === daggerAlif) {
|
} else if (p[pIndex] === daggerAlif) {
|
||||||
result += daggerAlif;
|
result += daggerAlif;
|
||||||
} else if (isEndSpace(p[pIndex]) && p[pIndex - 1] === "ه" && phonemeInfo.takesDiacriticBeforeGurdaHeyEnding) {
|
} else if (
|
||||||
|
isEndSpace(p[pIndex]) &&
|
||||||
|
p[pIndex - 1] === "ه" &&
|
||||||
|
phonemeInfo.takesDiacriticBeforeGurdaHayEnding
|
||||||
|
) {
|
||||||
result = result.slice(0, -1) + phonemeInfo.diacritic + "ه";
|
result = result.slice(0, -1) + phonemeInfo.diacritic + "ه";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -456,13 +690,20 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
previousPhonemeWasAConsonant = (!isEnding && phonemeInfo.consonant) ? true : false;
|
previousPhonemeWasAConsonant =
|
||||||
|
!isEnding && phonemeInfo.consonant ? true : false;
|
||||||
// ignore the ع or ئ if there's not a ' in the phonetics
|
// ignore the ع or ئ if there's not a ' in the phonetics
|
||||||
const nextPhonemeInfo = phonemeTable.find((element) => phonemes[i + 1] === element.phoneme);
|
const nextPhonemeInfo = phonemeTable.find(
|
||||||
|
(element) => phonemes[i + 1] === element.phoneme
|
||||||
|
);
|
||||||
if (
|
if (
|
||||||
["ع", "ئ"].includes(p[pIndex]) &&
|
["ع", "ئ"].includes(p[pIndex]) &&
|
||||||
![phonemes[i + 1], phonemes[i + 2]].includes("'") &&
|
![phonemes[i + 1], phonemes[i + 2]].includes("'") &&
|
||||||
!(nextPhonemeInfo && nextPhonemeInfo.diacritic && isEndSpace(p[pIndex + 1])) && // don't skip the ع on the end if there's another short letter coming after it
|
!(
|
||||||
|
nextPhonemeInfo &&
|
||||||
|
nextPhonemeInfo.diacritic &&
|
||||||
|
isEndSpace(p[pIndex + 1])
|
||||||
|
) && // don't skip the ع on the end if there's another short letter coming after it
|
||||||
!(p[pIndex] === "ئ" && isEndSpace(p[pIndex + 1])) && // don't skip ئ on the end
|
!(p[pIndex] === "ئ" && isEndSpace(p[pIndex + 1])) && // don't skip ئ on the end
|
||||||
!phonemeInfo.isIzafe
|
!phonemeInfo.isIzafe
|
||||||
) {
|
) {
|
||||||
|
@ -476,7 +717,11 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// if we've arrived at a space in the Pashto, move along before the next iteration
|
// if we've arrived at a space in the Pashto, move along before the next iteration
|
||||||
if (isSpace(p[pIndex]) && phonemes[i + 1] !== "-i-" && !upcomingAEndingAfterHey) {
|
if (
|
||||||
|
isSpace(p[pIndex]) &&
|
||||||
|
phonemes[i + 1] !== "-i-" &&
|
||||||
|
!upcomingAEndingAfterHay
|
||||||
|
) {
|
||||||
result += " ";
|
result += " ";
|
||||||
pIndex++;
|
pIndex++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,7 +107,7 @@ export const sandwiches: T.Sandwich[] = [
|
||||||
{
|
{
|
||||||
type: "sandwich",
|
type: "sandwich",
|
||||||
before: { p: "د", f: "du" },
|
before: { p: "د", f: "du" },
|
||||||
after: { p: "په حیث", f: "pu heys" },
|
after: { p: "په حیث", f: "pu hays" },
|
||||||
e: "as",
|
e: "as",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -105,14 +105,14 @@ export const replacerInfo: IReplacerInfoItem[] = [
|
||||||
ipa: "ɪ́",
|
ipa: "ɪ́",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
char: "ey",
|
char: "ay",
|
||||||
alalc: "ay",
|
alalc: "ay",
|
||||||
ipa: "ai",
|
ipa: "ay",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
char: "éy",
|
char: "áy",
|
||||||
alalc: "áy",
|
alalc: "áy",
|
||||||
ipa: "ái",
|
ipa: "áj",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
char: "ee",
|
char: "ee",
|
||||||
|
@ -140,9 +140,9 @@ export const replacerInfo: IReplacerInfoItem[] = [
|
||||||
ipa: "u:j",
|
ipa: "u:j",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
char: "eyy",
|
char: "ey",
|
||||||
alalc: "ạy",
|
alalc: "ey",
|
||||||
ipa: "ɛ̝j",
|
ipa: "ej",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
char: "e",
|
char: "e",
|
||||||
|
@ -351,4 +351,5 @@ export const replacerInfo: IReplacerInfoItem[] = [
|
||||||
];
|
];
|
||||||
|
|
||||||
// tslint:disable-next-line
|
// tslint:disable-next-line
|
||||||
export const replacerRegex = /aay|áay|aa|áa|a|á|U|Ú|u|ú|ooy|o{1,2}|óo|ó|ey|éy|e{1,2}|ée|é|uy|úy|i|í|w|y|q|g|ts|sh|s|dz|z|t|T|d|D|r|R|n|N|f|b|p|x|kh|q|k|gh|g|G|j|ch|l|l|m|h/g;
|
export const replacerRegex =
|
||||||
|
/aay|áay|aa|áa|a|á|U|Ú|u|ú|ooy|o{1,2}|óo|ó|ay|áy|e{1,2}|ée|é|ey|éy|uy|úy|i|í|w|y|q|g|ts|sh|s|dz|z|t|T|d|D|r|R|n|N|f|b|p|x|kh|q|k|gh|g|G|j|ch|l|l|m|h/g;
|
||||||
|
|
|
@ -6,9 +6,7 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import {
|
import { translatePhonetics } from "./translate-phonetics";
|
||||||
translatePhonetics,
|
|
||||||
} from "./translate-phonetics";
|
|
||||||
|
|
||||||
const dialects = ["southern", "standard", "peshawer"];
|
const dialects = ["southern", "standard", "peshawer"];
|
||||||
const systems = ["ipa", "alalc"];
|
const systems = ["ipa", "alalc"];
|
||||||
|
@ -54,11 +52,11 @@ const translations = [
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
original: "saRey",
|
original: "saRay",
|
||||||
ipa: {
|
ipa: {
|
||||||
southern: "saɻai",
|
southern: "saɻaj",
|
||||||
standard: "saɻai",
|
standard: "saɻaj",
|
||||||
peshawer: "saɻai",
|
peshawer: "saɻaj",
|
||||||
},
|
},
|
||||||
alalc: {
|
alalc: {
|
||||||
southern: "saṛay",
|
southern: "saṛay",
|
||||||
|
@ -72,10 +70,8 @@ translations.forEach((t) => {
|
||||||
systems.forEach((system) => {
|
systems.forEach((system) => {
|
||||||
// check each dialect with given system
|
// check each dialect with given system
|
||||||
dialects.forEach((dialect) => {
|
dialects.forEach((dialect) => {
|
||||||
test(
|
test(// @ts-ignore
|
||||||
// @ts-ignore
|
`${t.original} should be translated to ${t.ipa[dialect]} using ${system} with ${dialect} dialect`, () => {
|
||||||
`${t.original} should be translated to ${t.ipa[dialect]} using ${system} with ${dialect} dialect`,
|
|
||||||
() => {
|
|
||||||
const translated = translatePhonetics(t.original, {
|
const translated = translatePhonetics(t.original, {
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
system,
|
system,
|
||||||
|
@ -84,8 +80,7 @@ translations.forEach((t) => {
|
||||||
});
|
});
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
expect(translated).toBe(t[system][dialect]);
|
expect(translated).toBe(t[system][dialect]);
|
||||||
},
|
});
|
||||||
);
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -8,16 +8,21 @@
|
||||||
|
|
||||||
import { standardizeEntry, validateEntry } from "./validate-entry";
|
import { standardizeEntry, validateEntry } from "./validate-entry";
|
||||||
import * as T from "../../types";
|
import * as T from "../../types";
|
||||||
import { standardizePhonetics } from "./standardize-pashto";
|
|
||||||
|
|
||||||
const toTest: {
|
const toTest: {
|
||||||
input: any,
|
input: any;
|
||||||
output: T.DictionaryEntryError | { ok: true } | { checkComplement: true },
|
output: T.DictionaryEntryError | { ok: true } | { checkComplement: true };
|
||||||
}[] = [
|
}[] = [
|
||||||
{
|
{
|
||||||
input: { ts: undefined },
|
input: { ts: undefined },
|
||||||
output: {
|
output: {
|
||||||
errors: ["missing ts", "missing i", "missing p", "missing f", "missing e"],
|
errors: [
|
||||||
|
"missing ts",
|
||||||
|
"missing i",
|
||||||
|
"missing p",
|
||||||
|
"missing f",
|
||||||
|
"missing e",
|
||||||
|
],
|
||||||
p: "",
|
p: "",
|
||||||
f: "",
|
f: "",
|
||||||
e: "",
|
e: "",
|
||||||
|
@ -37,7 +42,14 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":293,"ts":1527821299,"p":"اخطار","f":"ixtáar","e":"warning, reprimand, admonishment","c":"n. m."},
|
input: {
|
||||||
|
i: 293,
|
||||||
|
ts: 1527821299,
|
||||||
|
p: "اخطار",
|
||||||
|
f: "ixtáar",
|
||||||
|
e: "warning, reprimand, admonishment",
|
||||||
|
c: "n. m.",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["script and phonetics do not match for p and f"],
|
errors: ["script and phonetics do not match for p and f"],
|
||||||
p: "اخطار",
|
p: "اخطار",
|
||||||
|
@ -48,7 +60,17 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puxtaanu","infbf":"puxtan"},
|
input: {
|
||||||
|
i: 2433,
|
||||||
|
ts: 1527815197,
|
||||||
|
p: "پښتون",
|
||||||
|
f: "puxtoon",
|
||||||
|
e: "Pashtun",
|
||||||
|
c: "n. m. unisex / adj. irreg.",
|
||||||
|
infap: "پښتانه",
|
||||||
|
infaf: "puxtaanu",
|
||||||
|
infbf: "puxtan",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["missing infbp"],
|
errors: ["missing infbp"],
|
||||||
p: "پښتون",
|
p: "پښتون",
|
||||||
|
@ -59,7 +81,17 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puxtaanu","infbp":"پښتن"},
|
input: {
|
||||||
|
i: 2433,
|
||||||
|
ts: 1527815197,
|
||||||
|
p: "پښتون",
|
||||||
|
f: "puxtoon",
|
||||||
|
e: "Pashtun",
|
||||||
|
c: "n. m. unisex / adj. irreg.",
|
||||||
|
infap: "پښتانه",
|
||||||
|
infaf: "puxtaanu",
|
||||||
|
infbp: "پښتن",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["missing infbf"],
|
errors: ["missing infbf"],
|
||||||
p: "پښتون",
|
p: "پښتون",
|
||||||
|
@ -70,9 +102,22 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puktaanu","infbp":"پښتن"},
|
input: {
|
||||||
|
i: 2433,
|
||||||
|
ts: 1527815197,
|
||||||
|
p: "پښتون",
|
||||||
|
f: "puxtoon",
|
||||||
|
e: "Pashtun",
|
||||||
|
c: "n. m. unisex / adj. irreg.",
|
||||||
|
infap: "پښتانه",
|
||||||
|
infaf: "puktaanu",
|
||||||
|
infbp: "پښتن",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["script and phonetics do not match for infap and infaf", "missing infbf"],
|
errors: [
|
||||||
|
"script and phonetics do not match for infap and infaf",
|
||||||
|
"missing infbf",
|
||||||
|
],
|
||||||
p: "پښتون",
|
p: "پښتون",
|
||||||
f: "puxtoon",
|
f: "puxtoon",
|
||||||
e: "Pashtun",
|
e: "Pashtun",
|
||||||
|
@ -81,7 +126,19 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"tsumlaastúl","e":"to lie down","l":1596485996977,"separationAtP":2,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true},
|
input: {
|
||||||
|
i: 5000,
|
||||||
|
ts: 1527819674,
|
||||||
|
p: "څملاستل",
|
||||||
|
f: "tsumlaastúl",
|
||||||
|
e: "to lie down",
|
||||||
|
l: 1596485996977,
|
||||||
|
separationAtP: 2,
|
||||||
|
c: "v. intrans. seperable",
|
||||||
|
psp: "څمل",
|
||||||
|
psf: "tsaml",
|
||||||
|
noOo: true,
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["missing separationAtF"],
|
errors: ["missing separationAtF"],
|
||||||
p: "څملاستل",
|
p: "څملاستل",
|
||||||
|
@ -92,9 +149,24 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"sumlaastúl","e":"to lie down","l":1596485996977,"separationAtP":2,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true},
|
input: {
|
||||||
|
i: 5000,
|
||||||
|
ts: 1527819674,
|
||||||
|
p: "څملاستل",
|
||||||
|
f: "sumlaastúl",
|
||||||
|
e: "to lie down",
|
||||||
|
l: 1596485996977,
|
||||||
|
separationAtP: 2,
|
||||||
|
c: "v. intrans. seperable",
|
||||||
|
psp: "څمل",
|
||||||
|
psf: "tsaml",
|
||||||
|
noOo: true,
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["script and phonetics do not match for p and f", "missing separationAtF"],
|
errors: [
|
||||||
|
"script and phonetics do not match for p and f",
|
||||||
|
"missing separationAtF",
|
||||||
|
],
|
||||||
p: "څملاستل",
|
p: "څملاستل",
|
||||||
f: "sumlaastúl",
|
f: "sumlaastúl",
|
||||||
e: "to lie down",
|
e: "to lie down",
|
||||||
|
@ -103,7 +175,19 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"tsumlaastúl","e":"to lie down","l":1596485996977,"separationAtF":4,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true},
|
input: {
|
||||||
|
i: 5000,
|
||||||
|
ts: 1527819674,
|
||||||
|
p: "څملاستل",
|
||||||
|
f: "tsumlaastúl",
|
||||||
|
e: "to lie down",
|
||||||
|
l: 1596485996977,
|
||||||
|
separationAtF: 4,
|
||||||
|
c: "v. intrans. seperable",
|
||||||
|
psp: "څمل",
|
||||||
|
psf: "tsaml",
|
||||||
|
noOo: true,
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["missing separationAtP"],
|
errors: ["missing separationAtP"],
|
||||||
p: "څملاستل",
|
p: "څملاستل",
|
||||||
|
@ -114,7 +198,14 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":2222,"ts":1571859113828,"p":"پخول","f":"pakhawul","e":"to cook, prepare, to cause to ripen, mature","c":"v. stat. comp. trans."},
|
input: {
|
||||||
|
i: 2222,
|
||||||
|
ts: 1571859113828,
|
||||||
|
p: "پخول",
|
||||||
|
f: "pakhawul",
|
||||||
|
e: "to cook, prepare, to cause to ripen, mature",
|
||||||
|
c: "v. stat. comp. trans.",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["missing complement for compound verb"],
|
errors: ["missing complement for compound verb"],
|
||||||
p: "پخول",
|
p: "پخول",
|
||||||
|
@ -125,21 +216,50 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":2222,"ts":1571859113828,"p":"پخول","f":"pakhawul","e":"to cook, prepare, to cause to ripen, mature","l":1574867531681,"c":"v. stat. comp. trans."},
|
input: {
|
||||||
|
i: 2222,
|
||||||
|
ts: 1571859113828,
|
||||||
|
p: "پخول",
|
||||||
|
f: "pakhawul",
|
||||||
|
e: "to cook, prepare, to cause to ripen, mature",
|
||||||
|
l: 1574867531681,
|
||||||
|
c: "v. stat. comp. trans.",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
checkComplement: true,
|
checkComplement: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":2231,"ts":1527812013,"p":"پراخ","f":"praakh, paráakh","e":"wide, broad, spacious, vast","c":"adj."},
|
input: {
|
||||||
|
i: 2231,
|
||||||
|
ts: 1527812013,
|
||||||
|
p: "پراخ",
|
||||||
|
f: "praakh, paráakh",
|
||||||
|
e: "wide, broad, spacious, vast",
|
||||||
|
c: "adj.",
|
||||||
|
},
|
||||||
output: { ok: true },
|
output: { ok: true },
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":0,"ts":1527812013,"p":"پراخ","f":"praakh, paráakh","e":"wide, broad, spacious, vast","c":"adj."},
|
input: {
|
||||||
|
i: 0,
|
||||||
|
ts: 1527812013,
|
||||||
|
p: "پراخ",
|
||||||
|
f: "praakh, paráakh",
|
||||||
|
e: "wide, broad, spacious, vast",
|
||||||
|
c: "adj.",
|
||||||
|
},
|
||||||
output: { ok: true },
|
output: { ok: true },
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj."},
|
input: {
|
||||||
|
i: 12,
|
||||||
|
ts: 1575058859661,
|
||||||
|
p: "آبدار",
|
||||||
|
f: "aawdáar",
|
||||||
|
e: "watery, damp, humid, juicy",
|
||||||
|
c: "adj.",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["script and phonetics do not match for p and f"],
|
errors: ["script and phonetics do not match for p and f"],
|
||||||
p: "آبدار",
|
p: "آبدار",
|
||||||
|
@ -150,37 +270,84 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"ts":1591033069786,"i":7717,"p":"ستړی کول","f":"stuRey kawul","g":"stuReykedul","e":"to get tired, fatigued","c":"v. stat. comp. intrans.","l":1527815306,"ec":"get","ep":"tired"},
|
input: {
|
||||||
|
ts: 1591033069786,
|
||||||
|
i: 7717,
|
||||||
|
p: "ستړی کول",
|
||||||
|
f: "stuRay kawul",
|
||||||
|
g: "stuRaykedul",
|
||||||
|
e: "to get tired, fatigued",
|
||||||
|
c: "v. stat. comp. intrans.",
|
||||||
|
l: 1527815306,
|
||||||
|
ec: "get",
|
||||||
|
ep: "tired",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["wrong ending for intrans. stat. comp"],
|
errors: ["wrong ending for intrans. stat. comp"],
|
||||||
p: "ستړی کول",
|
p: "ستړی کول",
|
||||||
f: "stuRey kawul",
|
f: "stuRay kawul",
|
||||||
e: "to get tired, fatigued",
|
e: "to get tired, fatigued",
|
||||||
ts: 1591033069786,
|
ts: 1591033069786,
|
||||||
erroneousFields: ["p", "f"],
|
erroneousFields: ["p", "f"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"ts":1591033078746,"i":7716,"p":"ستړی کېدل","f":"stuRey kedul","g":"stuReykawul","e":"to make tired, wear out","c":"v. stat. comp. trans.","l":1527815306,"ec":"make","ep":"tired"},
|
input: {
|
||||||
|
ts: 1591033078746,
|
||||||
|
i: 7716,
|
||||||
|
p: "ستړی کېدل",
|
||||||
|
f: "stuRay kedul",
|
||||||
|
g: "stuRaykawul",
|
||||||
|
e: "to make tired, wear out",
|
||||||
|
c: "v. stat. comp. trans.",
|
||||||
|
l: 1527815306,
|
||||||
|
ec: "make",
|
||||||
|
ep: "tired",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["wrong ending for trans. stat. comp"],
|
errors: ["wrong ending for trans. stat. comp"],
|
||||||
p: "ستړی کېدل",
|
p: "ستړی کېدل",
|
||||||
f: "stuRey kedul",
|
f: "stuRay kedul",
|
||||||
e: "to make tired, wear out",
|
e: "to make tired, wear out",
|
||||||
ts: 1591033078746,
|
ts: 1591033078746,
|
||||||
erroneousFields: ["p", "f"],
|
erroneousFields: ["p", "f"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
|
input: {
|
||||||
|
i: 12,
|
||||||
|
ts: 1575058859661,
|
||||||
|
p: "آبدار",
|
||||||
|
f: "aawdáar",
|
||||||
|
e: "watery, damp, humid, juicy",
|
||||||
|
c: "adj.",
|
||||||
|
diacExcept: true,
|
||||||
|
},
|
||||||
output: { ok: true },
|
output: { ok: true },
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
|
input: {
|
||||||
|
i: 12,
|
||||||
|
ts: 1575058859661,
|
||||||
|
p: "آبدار",
|
||||||
|
f: "aawdáar",
|
||||||
|
e: "watery, damp, humid, juicy",
|
||||||
|
c: "adj.",
|
||||||
|
diacExcept: true,
|
||||||
|
},
|
||||||
output: { ok: true },
|
output: { ok: true },
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"bechaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
input: {
|
||||||
|
ts: 1527812488,
|
||||||
|
i: 1934,
|
||||||
|
p: "بې چاره",
|
||||||
|
f: "bechaara",
|
||||||
|
g: "bechaara",
|
||||||
|
e: "poor thing, pitiful",
|
||||||
|
r: 3,
|
||||||
|
c: "adj.",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["spacing discrepency between p and f"],
|
errors: ["spacing discrepency between p and f"],
|
||||||
p: "بې چاره",
|
p: "بې چاره",
|
||||||
|
@ -191,7 +358,16 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"ts":1527812488,"i":1934,"p":"بېچاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
input: {
|
||||||
|
ts: 1527812488,
|
||||||
|
i: 1934,
|
||||||
|
p: "بېچاره",
|
||||||
|
f: "be chaara",
|
||||||
|
g: "bechaara",
|
||||||
|
e: "poor thing, pitiful",
|
||||||
|
r: 3,
|
||||||
|
c: "adj.",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["spacing discrepency between p and f"],
|
errors: ["spacing discrepency between p and f"],
|
||||||
p: "بېچاره",
|
p: "بېچاره",
|
||||||
|
@ -202,11 +378,31 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
input: {
|
||||||
output: { ok: true }
|
ts: 1527812488,
|
||||||
|
i: 1934,
|
||||||
|
p: "بې چاره",
|
||||||
|
f: "be chaara",
|
||||||
|
g: "bechaara",
|
||||||
|
e: "poor thing, pitiful",
|
||||||
|
r: 3,
|
||||||
|
c: "adj.",
|
||||||
|
},
|
||||||
|
output: { ok: true },
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"ts":1527814265,"i":12969,"p":"مکتب","f":"maktab","g":"maktab","e":"school","r":4,"c":"n. m.","app":"مکاتب","apf":"ma kaatib"},
|
input: {
|
||||||
|
ts: 1527814265,
|
||||||
|
i: 12969,
|
||||||
|
p: "مکتب",
|
||||||
|
f: "maktab",
|
||||||
|
g: "maktab",
|
||||||
|
e: "school",
|
||||||
|
r: 4,
|
||||||
|
c: "n. m.",
|
||||||
|
app: "مکاتب",
|
||||||
|
apf: "ma kaatib",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["spacing discrepency between app and apf"],
|
errors: ["spacing discrepency between app and apf"],
|
||||||
p: "مکتب",
|
p: "مکتب",
|
||||||
|
@ -217,9 +413,23 @@ const toTest: {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: {"ts":1527815870,"i":183,"p":"اثر","f":"asar","g":"asar","e":"influence, impression, tracks, affect","r":4,"c":"n. m.","app":"اثرات, آثار","apf":"asráat"},
|
input: {
|
||||||
|
ts: 1527815870,
|
||||||
|
i: 183,
|
||||||
|
p: "اثر",
|
||||||
|
f: "asar",
|
||||||
|
g: "asar",
|
||||||
|
e: "influence, impression, tracks, affect",
|
||||||
|
r: 4,
|
||||||
|
c: "n. m.",
|
||||||
|
app: "اثرات, آثار",
|
||||||
|
apf: "asráat",
|
||||||
|
},
|
||||||
output: {
|
output: {
|
||||||
errors: ["difference in variation length between app and apf", "script and phonetics do not match for app and apf"],
|
errors: [
|
||||||
|
"difference in variation length between app and apf",
|
||||||
|
"script and phonetics do not match for app and apf",
|
||||||
|
],
|
||||||
p: "اثر",
|
p: "اثر",
|
||||||
f: "asar",
|
f: "asar",
|
||||||
e: "influence, impression, tracks, affect",
|
e: "influence, impression, tracks, affect",
|
||||||
|
@ -236,6 +446,23 @@ test("validateEntry should work", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
test("standardizeEntry", () => {
|
test("standardizeEntry", () => {
|
||||||
expect(standardizeEntry({"i":195,"ts":1527822036,"p":"اجتماعي","f":"ijtimaa‘ee, ijtimaayee","g":"ijtimaaee,ijtimaayee","e":"public, social, societal","c":"adj."}))
|
expect(
|
||||||
.toEqual({"i":195,"ts":1527822036,"p":"اجتماعي","f":"ijtimaa'ee, ijtimaayee","g":"ijtimaaee,ijtimaayee","e":"public, social, societal","c":"adj."});
|
standardizeEntry({
|
||||||
|
i: 195,
|
||||||
|
ts: 1527822036,
|
||||||
|
p: "اجتماعي",
|
||||||
|
f: "ijtimaa‘ee, ijtimaayee",
|
||||||
|
g: "ijtimaaee,ijtimaayee",
|
||||||
|
e: "public, social, societal",
|
||||||
|
c: "adj.",
|
||||||
|
})
|
||||||
|
).toEqual({
|
||||||
|
i: 195,
|
||||||
|
ts: 1527822036,
|
||||||
|
p: "اجتماعي",
|
||||||
|
f: "ijtimaa'ee, ijtimaayee",
|
||||||
|
g: "ijtimaaee,ijtimaayee",
|
||||||
|
e: "public, social, societal",
|
||||||
|
c: "adj.",
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in New Issue