phonetics conversion done

This commit is contained in:
adueck 2023-07-27 12:28:50 +04:00
parent fc97db0dd3
commit c0cd34c3d6
11 changed files with 4890 additions and 4018 deletions

View File

@ -1,46 +1,93 @@
import * as T from "../../../types"; import * as T from "../../../types";
import classNames from "classnames"; import classNames from "classnames";
import { getEnglishFromRendered } from "../../../lib/src/phrase-building/np-tools";
import { import {
getEnglishFromRendered, getEnglishPersonInfo,
} from "../../../lib/src/phrase-building/np-tools"; getEnglishParticipleInflection,
import { getEnglishPersonInfo, getEnglishParticipleInflection, getEnglishGenNumInfo } from "../../../lib/src/misc-helpers"; getEnglishGenNumInfo,
} from "../../../lib/src/misc-helpers";
import { useState } from "react"; import { useState } from "react";
import { getLength } from "../../../lib/src/p-text-helpers"; import { getLength } from "../../../lib/src/p-text-helpers";
import { roleIcon } from "../vp-explorer/VPExplorerExplanationModal"; import { roleIcon } from "../vp-explorer/VPExplorerExplanationModal";
import { negativeParticle } from "../../../lib/src/grammar-units"; import { negativeParticle } from "../../../lib/src/grammar-units";
function Block({ opts, block, king, script }: { function Block({
opts: T.TextOptions, opts,
block: T.Block, block,
king?: "subject" | "object" | undefined, king,
script,
}: {
opts: T.TextOptions;
block: T.Block;
king?: "subject" | "object" | undefined;
script: "p" | "f"; script: "p" | "f";
}) { }) {
if ("equative" in block.block) { if ("equative" in block.block) {
return <EquativeBlock opts={opts} eq={block.block.equative} script={script} />; return (
<EquativeBlock opts={opts} eq={block.block.equative} script={script} />
);
} }
if (block.block.type === "AP") { if (block.block.type === "AP") {
const english = getEnglishFromRendered(block.block); const english = getEnglishFromRendered(block.block);
return <APBlock opts={opts} english={english} script={script}>{block.block}</APBlock> return (
<APBlock opts={opts} english={english} script={script}>
{block.block}
</APBlock>
);
} }
if (block.block.type === "subjectSelection") { if (block.block.type === "subjectSelection") {
const role = king === "subject" ? "king" : king === "object" ? "servant" : undefined; const role =
return <SubjectBlock opts={opts} np={block.block.selection} role={role} script={script} /> king === "subject" ? "king" : king === "object" ? "servant" : undefined;
return (
<SubjectBlock
opts={opts}
np={block.block.selection}
role={role}
script={script}
/>
);
} }
if (block.block.type === "objectSelection") { if (block.block.type === "objectSelection") {
const role = king === "object" ? "king" : king === "subject" ? "servant" : undefined; const role =
return <ObjectBlock opts={opts} obj={block.block.selection} role={role} script={script} />; king === "object" ? "king" : king === "subject" ? "servant" : undefined;
return (
<ObjectBlock
opts={opts}
obj={block.block.selection}
role={role}
script={script}
/>
);
} }
if (block.block.type === "predicateSelection") { if (block.block.type === "predicateSelection") {
const english = getEnglishFromRendered(block.block.selection); const english = getEnglishFromRendered(block.block.selection);
return <div className="text-center"> return (
<div><strong>Predicate</strong></div> <div className="text-center">
{block.block.selection.type === "complement" <div>
? <ComplementBlock opts={opts} comp={block.block.selection.selection} script={script} /> <strong>Predicate</strong>
: <NPBlock opts={opts} english={english} script={script}>{block.block.selection}</NPBlock>}
</div> </div>
{block.block.selection.type === "complement" ? (
<ComplementBlock
opts={opts}
comp={block.block.selection.selection}
script={script}
/>
) : (
<NPBlock opts={opts} english={english} script={script}>
{block.block.selection}
</NPBlock>
)}
</div>
);
} }
if (block.block.type === "negative") { if (block.block.type === "negative") {
return <NegBlock opts={opts} imperative={block.block.imperative} script={script} /> return (
<NegBlock
opts={opts}
imperative={block.block.imperative}
script={script}
/>
);
} }
if (block.block.type === "PH") { if (block.block.type === "PH") {
return <PerfHeadBlock opts={opts} ps={block.block.ps} script={script} />; return <PerfHeadBlock opts={opts} ps={block.block.ps} script={script} />;
@ -49,19 +96,36 @@ function Block({ opts, block, king, script }: {
return <VBBlock opts={opts} block={block.block} script={script} />; return <VBBlock opts={opts} block={block.block} script={script} />;
} }
if (block.block.type === "complement") { if (block.block.type === "complement") {
return <ComplementBlock opts={opts} comp={block.block.selection} script={script} /> return (
<ComplementBlock
opts={opts}
comp={block.block.selection}
script={script}
/>
);
} }
if (block.block.type === "NComp") { if (block.block.type === "NComp") {
return <NCompBlock opts={opts} comp={block.block.comp} script={script} /> return <NCompBlock opts={opts} comp={block.block.comp} script={script} />;
} }
return <WeldedBlock opts={opts} welded={block.block} script={script} /> return <WeldedBlock opts={opts} welded={block.block} script={script} />;
} }
export default Block; export default Block;
function Border({ children, extraClassName, padding }: { children: JSX.Element | JSX.Element[] | string, extraClassName?: string, padding?: string }) { function Border({
return <div children,
className={`block-border d-flex flex-row justify-content-center align-items-center ${extraClassName ? extraClassName : ""}`} extraClassName,
padding,
}: {
children: JSX.Element | JSX.Element[] | string;
extraClassName?: string;
padding?: string;
}) {
return (
<div
className={`block-border d-flex flex-row justify-content-center align-items-center ${
extraClassName ? extraClassName : ""
}`}
style={{ style={{
padding: padding ? padding : "1rem", padding: padding ? padding : "1rem",
textAlign: "center", textAlign: "center",
@ -70,65 +134,93 @@ function Border({ children, extraClassName, padding }: { children: JSX.Element |
> >
<>{children}</> <>{children}</>
</div> </div>
);
} }
function VBBlock({ opts, block, script }: { function VBBlock({
opts: T.TextOptions, opts,
script: "p" | "f", block,
block: T.VBBasic | T.VBGenNum | (T.VBBasic & { script,
}: {
opts: T.TextOptions;
script: "p" | "f";
block:
| T.VBBasic
| T.VBGenNum
| (T.VBBasic & {
person: T.Person; person: T.Person;
}), });
}) { }) {
const [length, setLength] = useState<T.Length>("long"); const [length, setLength] = useState<T.Length>("long");
const [version, setVersion] = useState<number>(0); const [version, setVersion] = useState<number>(0);
const ps = getLength(block.ps, length); const ps = getLength(block.ps, length);
function changeVersion() { function changeVersion() {
setVersion(o => (o + 1) % ps.length); setVersion((o) => (o + 1) % ps.length);
} }
function changeLength() { function changeLength() {
setLength(o => ( setLength((o) =>
o === "long" o === "long"
? "short" ? "short"
: o === "short" && "mini" in block.ps : o === "short" && "mini" in block.ps
? "mini" ? "mini"
: "long" : "long"
)); );
} }
const infInfo = "gender" in block const infInfo =
"gender" in block
? getEnglishGenNumInfo(block.gender, block.number) ? getEnglishGenNumInfo(block.gender, block.number)
: "person" in block : "person" in block
? getEnglishPersonInfo(block.person, "short") ? getEnglishPersonInfo(block.person, "short")
: ""; : "";
return <div className="text-center"> return (
<div className="text-center">
<div className="d-flex flex-row justify-content-around"> <div className="d-flex flex-row justify-content-around">
{"long" in block.ps && <div className="clickable small mb-1" onClick={changeLength}>{length}</div>} {"long" in block.ps && (
{ps.length > 1 && <div className="clickable small mb-1" onClick={changeVersion}>v. {version + 1}</div>} <div className="clickable small mb-1" onClick={changeLength}>
{length}
</div>
)}
{ps.length > 1 && (
<div className="clickable small mb-1" onClick={changeVersion}>
v. {version + 1}
</div>
)}
</div> </div>
<Border> <Border>
<> <>{ps[version][script]}</>
{ps[version][script]}
</>
</Border> </Border>
<div>VBlock</div> <div>VBlock</div>
<SubText>{infInfo}</SubText> <SubText>{infInfo}</SubText>
</div> </div>
);
} }
function WeldedBlock({ opts, welded, script }: { function WeldedBlock({
opts: T.TextOptions, opts,
script: "p" | "f", welded,
welded: T.Welded, script,
}: {
opts: T.TextOptions;
script: "p" | "f";
welded: T.Welded;
}) { }) {
return <div className="text-center"> return (
<Border padding="0.5rem" extraClassName={script === "p" ? "flex-row-reverse" : ""}> <div className="text-center">
{welded.left.type === "NComp" <Border
? <NCompBlock opts={opts} comp={welded.left.comp} script={script} /> padding="0.5rem"
: welded.left.type === "VB" extraClassName={script === "p" ? "flex-row-reverse" : ""}
? <VBBlock opts={opts} block={welded.left} script={script} /> >
: <WeldedBlock opts={opts} welded={welded.left} script={script} />} {welded.left.type === "NComp" ? (
<NCompBlock opts={opts} comp={welded.left.comp} script={script} />
) : welded.left.type === "VB" ? (
<VBBlock opts={opts} block={welded.left} script={script} />
) : (
<WeldedBlock opts={opts} welded={welded.left} script={script} />
)}
<VBBlock opts={opts} block={welded.right} script={script} /> <VBBlock opts={opts} block={welded.right} script={script} />
</Border> </Border>
</div> </div>
);
} }
// function VerbSBlock({ opts, v, script }: { // function VerbSBlock({ opts, v, script }: {
@ -192,19 +284,22 @@ function WeldedBlock({ opts, welded, script }: {
// </div> // </div>
// } // }
function PerfHeadBlock({ opts, ps, script }: { function PerfHeadBlock({
opts: T.TextOptions, opts,
ps: T.PsString, ps,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
ps: T.PsString;
script: "p" | "f";
}) { }) {
return <div className="text-center"> return (
<Border> <div className="text-center">
{ps[script]} <Border>{ps[script]}</Border>
</Border>
<div>perf. head</div> <div>perf. head</div>
<SubText>{'\u00A0'}</SubText> <SubText>{"\u00A0"}</SubText>
</div>; </div>
);
} }
// function ModalAuxBlock({ opts, aux, script }: { // function ModalAuxBlock({ opts, aux, script }: {
@ -222,249 +317,389 @@ function PerfHeadBlock({ opts, ps, script }: {
// </div>; // </div>;
// } // }
function NegBlock({ opts, imperative, script }: { function NegBlock({
opts: T.TextOptions, opts,
imperative: boolean, imperative,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
imperative: boolean;
script: "p" | "f";
}) { }) {
return <div className="text-center"> return (
<div className="text-center">
<Border> <Border>
{negativeParticle[imperative ? "imperative" : "nonImperative"][script]} {negativeParticle[imperative ? "imperative" : "nonImperative"][script]}
</Border> </Border>
<div>Neg.</div> <div>Neg.</div>
<SubText>{imperative ? "don't" : "not"}</SubText> <SubText>{imperative ? "don't" : "not"}</SubText>
</div>; </div>
);
} }
function EquativeBlock({ opts, eq, script }: { function EquativeBlock({
opts: T.TextOptions, opts,
eq: T.EquativeRendered, eq,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
eq: T.EquativeRendered;
script: "p" | "f";
}) { }) {
const [length, setLength] = useState<T.Length>("long"); const [length, setLength] = useState<T.Length>("long");
function changeLength() { function changeLength() {
setLength(o => ( setLength((o) =>
o === "long" o === "long"
? "short" ? "short"
: o === "short" && "mini" in eq.ps : o === "short" && "mini" in eq.ps
? "mini" ? "mini"
: "long" : "long"
)); );
} }
return <div className="text-center"> return (
{"long" in eq.ps && <div className="clickable small mb-1" onClick={changeLength}>{length}</div>} <div className="text-center">
<Border> {"long" in eq.ps && (
{getLength(eq.ps, length)[0][script]} <div className="clickable small mb-1" onClick={changeLength}>
</Border> {length}
</div>
)}
<Border>{getLength(eq.ps, length)[0][script]}</Border>
<div>Equative</div> <div>Equative</div>
<SubText>{getEnglishPersonInfo(eq.person, "short")}</SubText> <SubText>{getEnglishPersonInfo(eq.person, "short")}</SubText>
</div>; </div>
);
} }
function SubjectBlock({ opts, np, role, script }: { function SubjectBlock({
opts: T.TextOptions, opts,
np: T.Rendered<T.NPSelection>, np,
role: "king" | "servant" | undefined, role,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
np: T.Rendered<T.NPSelection>;
role: "king" | "servant" | undefined;
script: "p" | "f";
}) { }) {
const english = getEnglishFromRendered(np); const english = getEnglishFromRendered(np);
return <div className="text-center"> return (
<div><strong>Subject</strong>{role ? roleIcon[role] : ""}</div> <div className="text-center">
<NPBlock opts={opts} english={english} script={script}>{np}</NPBlock> <div>
</div>; <strong>Subject</strong>
{role ? roleIcon[role] : ""}
</div>
<NPBlock opts={opts} english={english} script={script}>
{np}
</NPBlock>
</div>
);
} }
function ObjectBlock({ opts, obj, role, script }: { function ObjectBlock({
opts: T.TextOptions, opts,
obj: T.Rendered<T.ObjectSelectionComplete>["selection"], obj,
role: "king" | "servant" | undefined, role,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
obj: T.Rendered<T.ObjectSelectionComplete>["selection"];
role: "king" | "servant" | undefined;
script: "p" | "f";
}) { }) {
if (typeof obj !== "object") { if (typeof obj !== "object") {
return null; return null;
} }
const english = getEnglishFromRendered(obj); const english = getEnglishFromRendered(obj);
return <div className="text-center"> return (
<div><strong>Object</strong>{role ? roleIcon[role] : ""}</div> <div className="text-center">
<NPBlock opts={opts} english={english} script={script}>{obj}</NPBlock> <div>
</div>; <strong>Object</strong>
{role ? roleIcon[role] : ""}
</div>
<NPBlock opts={opts} english={english} script={script}>
{obj}
</NPBlock>
</div>
);
} }
function NCompBlock({ opts, comp, script }: { function NCompBlock({
script: "p" | "f", opts,
opts: T.TextOptions, comp,
comp: T.Comp, script,
}: {
script: "p" | "f";
opts: T.TextOptions;
comp: T.Comp;
}) { }) {
return <div className="text-center"> return (
<Border> <div className="text-center">
{comp.ps[script]} <Border>{comp.ps[script]}</Border>
</Border> {comp.type === "AdjComp" && (
{comp.type === "AdjComp" <div>
? <div>adj. <span className="text-muted small">{getEnglishGenNumInfo(comp.gender, comp.number)}</span></div> <div>
: <div>TODO</div>} adj.{" "}
<SubText> <span className="text-muted small">
todo {getEnglishGenNumInfo(comp.gender, comp.number)}
{/* {adj.e} */} </span>
</SubText> </div>
</div>; <SubText>{comp.ps.e}</SubText>
</div>
)}
</div>
);
} }
function ComplementBlock({ opts, comp, script, inside }: { function ComplementBlock({
script: "p" | "f", opts,
opts: T.TextOptions, comp,
comp: T.Rendered<T.ComplementSelection["selection"]> | T.Rendered<T.UnselectedComplementSelection>["selection"], script,
inside?: boolean, inside,
}: {
script: "p" | "f";
opts: T.TextOptions;
comp:
| T.Rendered<T.ComplementSelection["selection"]>
| T.Rendered<T.UnselectedComplementSelection>["selection"];
inside?: boolean;
}) { }) {
function AdjectiveBlock({ opts, adj }: { function AdjectiveBlock({
opts: T.TextOptions, opts,
adj: T.Rendered<T.AdjectiveSelection>, adj,
}: {
opts: T.TextOptions;
adj: T.Rendered<T.AdjectiveSelection>;
}) { }) {
return <div className="text-center"> return (
<Border> <div className="text-center">
{adj.ps[0][script]} <Border>{adj.ps[0][script]}</Border>
</Border> <div>
<div>Adj. <span className="text-muted small">({getEnglishParticipleInflection(adj.person, "short")})</span></div> Adj.{" "}
<span className="text-muted small">
({getEnglishParticipleInflection(adj.person, "short")})
</span>
</div>
<SubText>{adj.e}</SubText> <SubText>{adj.e}</SubText>
</div>; </div>
);
} }
function LocAdvBlock({ opts, adv }: { function LocAdvBlock({
opts: T.TextOptions, opts,
adv: T.Rendered<T.LocativeAdverbSelection>, adv,
}: {
opts: T.TextOptions;
adv: T.Rendered<T.LocativeAdverbSelection>;
}) { }) {
return <div className="text-center"> return (
<Border> <div className="text-center">
{adv.ps[0][script]} <Border>{adv.ps[0][script]}</Border>
</Border>
<div>Loc. Adv.</div> <div>Loc. Adv.</div>
<SubText>{adv.e}</SubText> <SubText>{adv.e}</SubText>
</div>; </div>
);
} }
return <div className="text-center"> return (
<div className="text-center">
<div>Complement</div> <div>Complement</div>
{comp.type === "adjective" {comp.type === "adjective" ? (
? <AdjectiveBlock opts={opts} adj={comp} /> <AdjectiveBlock opts={opts} adj={comp} />
: comp.type === "loc. adv." ) : comp.type === "loc. adv." ? (
? <LocAdvBlock opts={opts} adv={comp} /> <LocAdvBlock opts={opts} adv={comp} />
: comp.type === "noun" ) : comp.type === "noun" ? (
? <CompNounBlock opts={opts} noun={comp} script={script} /> <CompNounBlock opts={opts} noun={comp} script={script} />
: comp.type === "unselected" ) : comp.type === "unselected" ? (
? <div> <div>
<Border> <Border>____</Border>
____ {!inside && (
</Border> <>
{!inside && <>
<div>&nbsp;</div> <div>&nbsp;</div>
<SubText>{comp.e}</SubText> <SubText>{comp.e}</SubText>
</>} </>
)}
</div> </div>
: <div> ) : (
<div>
<Sandwich opts={opts} sandwich={comp} script={script} /> <Sandwich opts={opts} sandwich={comp} script={script} />
<div>Sandwich</div> <div>Sandwich</div>
<SubText>{comp.e}</SubText> <SubText>{comp.e}</SubText>
</div>} </div>
</div>; )}
</div>
);
} }
export function APBlock({ opts, children, english, script }: { export function APBlock({
opts: T.TextOptions, opts,
children: T.Rendered<T.APSelection>, children,
english?: string, english,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
children: T.Rendered<T.APSelection>;
english?: string;
script: "p" | "f";
}) { }) {
const ap = children; const ap = children;
if (ap.selection.type === "adverb") { if (ap.selection.type === "adverb") {
return <div className="text-center"> return (
<Border> <div className="text-center">
{ap.selection.ps[0][script]} <Border>{ap.selection.ps[0][script]}</Border>
</Border>
<div>AP</div> <div>AP</div>
<SubText>{english}</SubText> <SubText>{english}</SubText>
</div>; </div>
);
} }
return <div> return (
<div>
<Sandwich opts={opts} sandwich={ap.selection} script={script} /> <Sandwich opts={opts} sandwich={ap.selection} script={script} />
<div>AP</div> <div>AP</div>
<SubText>{english}</SubText> <SubText>{english}</SubText>
</div>; </div>
);
} }
function Sandwich({ opts, sandwich, script }: { function Sandwich({
opts: T.TextOptions, opts,
sandwich: T.Rendered<T.SandwichSelection<T.Sandwich>>, sandwich,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
sandwich: T.Rendered<T.SandwichSelection<T.Sandwich>>;
script: "p" | "f";
}) { }) {
return <div className="text-center"> return (
<div className="text-center">
<div className="text-center">Sandwich 🥪</div> <div className="text-center">Sandwich 🥪</div>
<Border padding="0.75rem 0.5rem 0.25rem 0.5rem"> <Border padding="0.75rem 0.5rem 0.25rem 0.5rem">
<div className={`d-flex flex-row${script === "p" ? "-reverse" : ""} justify-content-between align-items-end`}> <div
<Possesors opts={opts} script={script}>{sandwich.inside.selection.type !== "pronoun" ? sandwich.inside.selection.possesor : undefined}</Possesors> className={`d-flex flex-row${
<div className="mr-2 ml-1 mb-1"><strong>{sandwich.before ? sandwich.before.f : ""}</strong></div> script === "p" ? "-reverse" : ""
<div> } justify-content-between align-items-end`}
<NPBlock opts={opts} inside script={script}>{sandwich.inside}</NPBlock> >
<Possesors opts={opts} script={script}>
{sandwich.inside.selection.type !== "pronoun"
? sandwich.inside.selection.possesor
: undefined}
</Possesors>
<div className="mr-2 ml-1 mb-1">
<strong>{sandwich.before ? sandwich.before.f : ""}</strong>
</div>
<div>
<NPBlock opts={opts} inside script={script}>
{sandwich.inside}
</NPBlock>
</div>
<div className="ml-2 mr-1 mb-1">
<strong>{sandwich.after ? sandwich.after.f : ""}</strong>
</div> </div>
<div className="ml-2 mr-1 mb-1"><strong>{sandwich.after ? sandwich.after.f : ""}</strong></div>
</div> </div>
</Border> </Border>
</div>; </div>
);
} }
function CompNounBlock({ opts, noun, script }: { function CompNounBlock({
opts: T.TextOptions, opts,
noun: T.Rendered<T.NounSelection>, noun,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
noun: T.Rendered<T.NounSelection>;
script: "p" | "f";
}) { }) {
return <div className="text-center"> return (
<div className="text-center">
<Border <Border
extraClassName={`!inside && hasPossesor ? "pt-2" : ""`} extraClassName={`!inside && hasPossesor ? "pt-2" : ""`}
padding={"1rem"} padding={"1rem"}
> >
{noun.ps[0][script]} {noun.ps[0][script]}
</Border> </Border>
<div> <div>Comp. Noun</div>
Comp. Noun
</div>
<SubText>{noun.e}</SubText> <SubText>{noun.e}</SubText>
</div> </div>
);
} }
export function NPBlock({ opts, children, inside, english, script }: { export function NPBlock({
opts: T.TextOptions, opts,
children: T.Rendered<T.NPSelection>, children,
inside?: boolean, inside,
english?: string, english,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
children: T.Rendered<T.NPSelection>;
inside?: boolean;
english?: string;
script: "p" | "f";
}) { }) {
const np = children; const np = children;
const hasPossesor = !!(np.selection.type !== "pronoun" && np.selection.possesor && !np.selection.possesor.shrunken); const hasPossesor = !!(
np.selection.type !== "pronoun" &&
np.selection.possesor &&
!np.selection.possesor.shrunken
);
const elements = [ const elements = [
...!inside ? [<Possesors opts={opts} script={script}>{np.selection.type !== "pronoun" ? np.selection.possesor : undefined}</Possesors>] : [], ...(!inside
<Adjectives opts={opts} script={script}>{np.selection.adjectives}</Adjectives>, ? [
<div className={np.selection.adjectives?.length ? "mx-1" : ""}> {np.selection.ps[0][script]}</div>, <Possesors opts={opts} script={script}>
{np.selection.type !== "pronoun"
? np.selection.possesor
: undefined}
</Possesors>,
]
: []),
<Adjectives opts={opts} script={script}>
{np.selection.adjectives}
</Adjectives>,
<div className={np.selection.adjectives?.length ? "mx-1" : ""}>
{" "}
{np.selection.ps[0][script]}
</div>,
]; ];
const el = script === "p" ? elements.reverse() : elements; const el = script === "p" ? elements.reverse() : elements;
return <div className="text-center"> return (
<div className="text-center">
<Border <Border
extraClassName={`!inside && hasPossesor ? "pt-2" : ""`} extraClassName={`!inside && hasPossesor ? "pt-2" : ""`}
padding={inside ? "0.3rem" : hasPossesor ? "0.5rem 0.8rem 0.25rem 0.8rem" : "1rem"} padding={
inside
? "0.3rem"
: hasPossesor
? "0.5rem 0.8rem 0.25rem 0.8rem"
: "1rem"
}
> >
{el} {el}
</Border> </Border>
<div className={inside ? "small" : ""}> <div className={inside ? "small" : ""}>
NP NP
{!inside ? <> {!inside ? (
<>
{` `} {` `}
<span className="text-muted small">({getEnglishPersonInfo(np.selection.person, "short")})</span> <span className="text-muted small">
</> : <></>} ({getEnglishPersonInfo(np.selection.person, "short")})
</span>
</>
) : (
<></>
)}
</div> </div>
{!inside && <SubText>{english}</SubText>} {!inside && <SubText>{english}</SubText>}
</div> </div>
);
} }
function Possesors({ opts, children, script }: { function Possesors({
opts: T.TextOptions, opts,
children: { shrunken: boolean, np: T.Rendered<T.NPSelection> } | undefined, children,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
children: { shrunken: boolean; np: T.Rendered<T.NPSelection> } | undefined;
script: "p" | "f";
}) { }) {
if (!children) { if (!children) {
return null; return null;
@ -473,62 +708,107 @@ function Possesors({ opts, children, script }: {
return null; return null;
} }
const contraction = checkForContraction(children.np, script); const contraction = checkForContraction(children.np, script);
return <div className={`d-flex flex-row${script === "p" ? "-reverse" : ""} mr-1 align-items-end`} style={{ return (
<div
className={`d-flex flex-row${
script === "p" ? "-reverse" : ""
} mr-1 align-items-end`}
style={{
marginBottom: "0.5rem", marginBottom: "0.5rem",
borderBottom: "1px solid grey", borderBottom: "1px solid grey",
}}> }}
{children.np.selection.type !== "pronoun" && <Possesors opts={opts} script={script}>{children.np.selection.possesor}</Possesors>} >
{children.np.selection.type !== "pronoun" && (
<Possesors opts={opts} script={script}>
{children.np.selection.possesor}
</Possesors>
)}
<div> <div>
{contraction && <div className="mb-1">({contraction})</div>} {contraction && <div className="mb-1">({contraction})</div>}
<div className={classNames("d-flex", (script === "f" ? "flex-row" : "flex-row-reverse"), "align-items-center", { "text-muted": contraction })}> <div
className={classNames(
"d-flex",
script === "f" ? "flex-row" : "flex-row-reverse",
"align-items-center",
{ "text-muted": contraction }
)}
>
<div className="mx-1 pb-2">{script === "p" ? "د" : "du"}</div> <div className="mx-1 pb-2">{script === "p" ? "د" : "du"}</div>
<div> <div>
<NPBlock script={script} opts={opts} inside>{children.np}</NPBlock> <NPBlock script={script} opts={opts} inside>
{children.np}
</NPBlock>
</div> </div>
</div> </div>
</div> </div>
</div> </div>
);
} }
function Adjectives({ opts, children, script }: { function Adjectives({
opts: T.TextOptions, opts,
children: T.Rendered<T.AdjectiveSelection>[] | undefined, children,
script: "p" | "f", script,
}: {
opts: T.TextOptions;
children: T.Rendered<T.AdjectiveSelection>[] | undefined;
script: "p" | "f";
}) { }) {
if (!children) { if (!children) {
return null; return null;
} }
const c = script === "p" const c = script === "p" ? children.reverse() : children;
? children.reverse() return (
: children; <em className="mr-1">
return <em className="mr-1"> {c.map((a) => a.ps[0][script]).join(" ")}
{c.map(a => a.ps[0][script]).join(" ")}{` `} {` `}
</em> </em>
);
} }
function SubText({ children: e }: { children: string | undefined }) { function SubText({ children: e }: { children: string | undefined }) {
return <div className="small text-muted text-center" style={{ return (
<div
className="small text-muted text-center"
style={{
margin: "0 auto", margin: "0 auto",
maxWidth: "300px", maxWidth: "300px",
height: "1rem", height: "1rem",
}}>{e ? e : ""}</div>; }}
>
{e ? e : ""}
</div>
);
} }
function checkForContraction(np: T.Rendered<T.NPSelection>, script: "p" | "f"): string | undefined { function checkForContraction(
np: T.Rendered<T.NPSelection>,
script: "p" | "f"
): string | undefined {
if (np.selection.type !== "pronoun") return undefined; if (np.selection.type !== "pronoun") return undefined;
if (np.selection.person === T.Person.FirstSingMale || np.selection.person === T.Person.FirstSingFemale) { if (
np.selection.person === T.Person.FirstSingMale ||
np.selection.person === T.Person.FirstSingFemale
) {
return script === "f" ? "zmaa" : "زما"; return script === "f" ? "zmaa" : "زما";
} }
if (np.selection.person === T.Person.SecondSingMale || np.selection.person === T.Person.SecondSingFemale) { if (
np.selection.person === T.Person.SecondSingMale ||
np.selection.person === T.Person.SecondSingFemale
) {
return script === "f" ? "staa" : "ستا"; return script === "f" ? "staa" : "ستا";
} }
if (np.selection.person === T.Person.FirstPlurMale || np.selection.person === T.Person.FirstPlurFemale) { if (
np.selection.person === T.Person.FirstPlurMale ||
np.selection.person === T.Person.FirstPlurFemale
) {
return script === "f" ? "zmoonG" : "زمونږ"; return script === "f" ? "zmoonG" : "زمونږ";
} }
if (np.selection.person === T.Person.SecondPlurMale || np.selection.person === T.Person.SecondPlurFemale) { if (
np.selection.person === T.Person.SecondPlurMale ||
np.selection.person === T.Person.SecondPlurFemale
) {
return script === "f" ? "staaso" : "ستاسو"; return script === "f" ? "staaso" : "ستاسو";
} }
return undefined; return undefined;
} }

View File

@ -3,24 +3,20 @@ import {
last, last,
addP, addP,
lastNonWhitespace, lastNonWhitespace,
advanceP,
reverseP, reverseP,
overwriteP,
advanceForHamza,
advanceForHamzaMid,
} from "./diacritics-helpers"; } from "./diacritics-helpers";
const phonemeSplits: Array<{ const phonemeSplits: Array<{
in: string, in: string;
out: string[], out: string[];
}> = [ }> = [
{ {
in: "kor", in: "kor",
out: ["k", "o", "r"], out: ["k", "o", "r"],
}, },
{ {
in: "raaghey", in: "raaghay",
out: ["r", "aa", "gh", "ey"], out: ["r", "aa", "gh", "ay"],
}, },
{ {
in: "ist'imaal", in: "ist'imaal",
@ -35,16 +31,16 @@ const phonemeSplits: Array<{
out: ["b", "a"], out: ["b", "a"],
}, },
{ {
in: "peydáa", in: "paydáa",
out: ["p", "ey", "d", "aa"], out: ["p", "ay", "d", "aa"],
}, },
{ {
in: "be kaar", in: "be kaar",
out: ["b", "e", "k", "aa", "r"], out: ["b", "e", "k", "aa", "r"],
}, },
{ {
in: "raadzeyy", in: "raadzey",
out: ["r", "aa", "dz", "eyy"], out: ["r", "aa", "dz", "ey"],
}, },
{ {
in: "badanuy ??", in: "badanuy ??",
@ -68,8 +64,8 @@ phonemeSplits.forEach((s) => {
}); });
const badPhonetics: Array<{ const badPhonetics: Array<{
in: string, in: string;
problem: string, problem: string;
}> = [ }> = [
{ {
in: "acar", in: "acar",
@ -107,25 +103,31 @@ test("lastNonWhiteSpace should work", () => {
}); });
test("reverseP should work", () => { test("reverseP should work", () => {
expect(reverseP({ expect(
reverseP({
pIn: "کور", pIn: "کور",
pOut: "تور ", pOut: "تور ",
})).toEqual({ })
).toEqual({
pIn: " کور", pIn: " کور",
pOut: "تور", pOut: "تور",
}); });
expect(reverseP({ expect(
reverseP({
pIn: "کور", pIn: "کور",
pOut: "تور ... ", pOut: "تور ... ",
})).toEqual({ })
).toEqual({
pIn: " ... کور", pIn: " ... کور",
pOut: "تور", pOut: "تور",
}); });
expect(reverseP({ expect(
reverseP({
pIn: "کور", pIn: "کور",
pOut: "تور . ", pOut: "تور . ",
})).toEqual({ })
).toEqual({
pIn: " . کور", pIn: " . کور",
pOut: "تور", pOut: "تور",
}); });
}) });

View File

@ -8,31 +8,62 @@
import { removeAccents } from "./accent-helpers"; import { removeAccents } from "./accent-helpers";
export type DiacriticsAccumulator = { pIn: string, pOut: string }; export type DiacriticsAccumulator = { pIn: string; pOut: string };
type Consonant = "b" | "p" | "t" | "T" | "s" | "j" | "ch" | "kh" | "ts" | "dz" | "d" | "D" | "r" | "R" | "z" | "jz" | "G" | "sh" | "x" | "gh" | "f" | "q" | "k" | "g" | "l" | "m" | "n" | "N" | "h" | "w" | "y"; type Consonant =
type Ain = "'" | "b"
| "p"
| "t"
| "T"
| "s"
| "j"
| "ch"
| "kh"
| "ts"
| "dz"
| "d"
| "D"
| "r"
| "R"
| "z"
| "jz"
| "G"
| "sh"
| "x"
| "gh"
| "f"
| "q"
| "k"
| "g"
| "l"
| "m"
| "n"
| "N"
| "h"
| "w"
| "y";
type Ain = "'";
type JoiningVowel = "-i-" | "-U-" | "-Ul-"; type JoiningVowel = "-i-" | "-U-" | "-Ul-";
type LongVowel = "aa" | "ee" | "e" | "oo" | "o" | "ey" | "uy" | "eyy"; type LongVowel = "aa" | "ee" | "e" | "oo" | "o" | "ay" | "uy" | "ey";
type ShortVowel = "a" | "i" | "u" | "U"; type ShortVowel = "a" | "i" | "u" | "U";
export type Phoneme = Consonant | Ain | LongVowel | ShortVowel | JoiningVowel; export type Phoneme = Consonant | Ain | LongVowel | ShortVowel | JoiningVowel;
type PhonemeInfo = { type PhonemeInfo = {
matches?: string[], matches?: string[];
beginningMatches?: string[], beginningMatches?: string[];
endingMatches?: string[], endingMatches?: string[];
consonant?: true, consonant?: true;
diacritic?: string, diacritic?: string;
endingOnly?: true, endingOnly?: true;
takesSukunOnEnding?: true, takesSukunOnEnding?: true;
longVowel?: true, longVowel?: true;
canStartWithAynBefore?: true, canStartWithAynBefore?: true;
useEndingDiacritic?: true, useEndingDiacritic?: true;
ainBlendDiacritic?: string, ainBlendDiacritic?: string;
} };
export const zwar = "َ"; export const zwar = "َ";
export const zwarakey = "ٙ"; export const zwarakay = "ٙ";
export const zer = "ِ"; export const zer = "ِ";
export const pesh = "ُ"; export const pesh = "ُ";
export const sukun = "ْ"; export const sukun = "ْ";
@ -44,128 +75,128 @@ export const fathahan = "ً";
export const phonemeTable: Record<Phoneme, PhonemeInfo> = { export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
// Consonants // Consonants
"b": { b: {
matches: ["ب"], matches: ["ب"],
consonant: true, consonant: true,
}, },
"p": { p: {
matches: ["پ"], matches: ["پ"],
consonant: true, consonant: true,
}, },
"t": { t: {
matches: ["ت", "ط"], matches: ["ت", "ط"],
consonant: true, consonant: true,
}, },
"T": { T: {
matches: ["ټ"], matches: ["ټ"],
consonant: true, consonant: true,
}, },
"s": { s: {
matches: ["س", "ص", "ث"], matches: ["س", "ص", "ث"],
consonant: true, consonant: true,
}, },
"j": { j: {
matches: ["ج"], matches: ["ج"],
consonant: true, consonant: true,
}, },
"ch": { ch: {
matches: ["چ"], matches: ["چ"],
consonant: true, consonant: true,
}, },
"kh": { kh: {
matches: ["خ"], matches: ["خ"],
consonant: true, consonant: true,
}, },
"ts": { ts: {
matches: ["څ"], matches: ["څ"],
consonant: true, consonant: true,
}, },
"dz": { dz: {
matches: ["ځ"], matches: ["ځ"],
consonant: true, consonant: true,
}, },
"d": { d: {
matches: ["د"], matches: ["د"],
consonant: true, consonant: true,
}, },
"D": { D: {
matches: ["ډ"], matches: ["ډ"],
consonant: true, consonant: true,
}, },
"r": { r: {
matches: ["ر"], matches: ["ر"],
consonant: true, consonant: true,
}, },
"R": { R: {
matches: ["ړ"], matches: ["ړ"],
consonant: true, consonant: true,
}, },
"z": { z: {
matches: ["ز", "ذ", "ظ", "ض"], matches: ["ز", "ذ", "ظ", "ض"],
consonant: true, consonant: true,
}, },
"jz": { jz: {
matches: ["ژ"], matches: ["ژ"],
consonant: true, consonant: true,
}, },
"G": { G: {
matches: ["ږ"], matches: ["ږ"],
consonant: true, consonant: true,
}, },
"sh": { sh: {
matches: ["ش"], matches: ["ش"],
consonant: true, consonant: true,
}, },
"x": { x: {
matches: ["ښ"], matches: ["ښ"],
consonant: true, consonant: true,
}, },
"gh": { gh: {
matches: ["غ"], matches: ["غ"],
consonant: true, consonant: true,
}, },
"f": { f: {
matches: ["ف"], matches: ["ف"],
consonant: true, consonant: true,
}, },
"q": { q: {
matches: ["ق"], matches: ["ق"],
consonant: true, consonant: true,
}, },
"k": { k: {
matches: ["ک"], matches: ["ک"],
consonant: true, consonant: true,
}, },
"g": { g: {
matches: ["ګ"], matches: ["ګ"],
consonant: true, consonant: true,
}, },
"l": { l: {
matches: ["ل"], matches: ["ل"],
consonant: true, consonant: true,
}, },
"m": { m: {
matches: ["م"], matches: ["م"],
consonant: true, consonant: true,
}, },
"n": { n: {
matches: ["ن"], matches: ["ن"],
consonant: true, consonant: true,
}, },
"N": { N: {
matches: ["ڼ"], matches: ["ڼ"],
consonant: true, consonant: true,
}, },
"h": { h: {
matches: ["ه", "ح"], matches: ["ه", "ح"],
consonant: true, consonant: true,
takesSukunOnEnding: true, takesSukunOnEnding: true,
}, },
"w": { w: {
matches: ["و"], matches: ["و"],
consonant: true, consonant: true,
}, },
"y": { y: {
matches: ["ی"], matches: ["ی"],
consonant: true, consonant: true,
}, },
@ -175,8 +206,7 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
consonant: true, consonant: true,
}, },
// Joining Vowels // Joining Vowels
"-i-": { "-i-": {},
},
"-U-": { "-U-": {
matches: [" و ", "و"], matches: [" و ", "و"],
}, },
@ -184,14 +214,14 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
matches: ["ال"], matches: ["ال"],
}, },
// Long Vowels // Long Vowels
"aa": { aa: {
matches: ["ا", "أ"], matches: ["ا", "أ"],
beginningMatches: ["آ", "ا"], beginningMatches: ["آ", "ا"],
endingMatches: ["ا", "یٰ"], endingMatches: ["ا", "یٰ"],
longVowel: true, longVowel: true,
ainBlendDiacritic: zwar, ainBlendDiacritic: zwar,
}, },
"ee": { ee: {
matches: ["ی"], matches: ["ی"],
longVowel: true, longVowel: true,
endingMatches: ["ي"], endingMatches: ["ي"],
@ -199,61 +229,61 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
canStartWithAynBefore: true, canStartWithAynBefore: true,
ainBlendDiacritic: zer, ainBlendDiacritic: zer,
}, },
"e": { e: {
matches: ["ې"], matches: ["ې"],
longVowel: true, longVowel: true,
}, },
"o": { o: {
matches: ["و"], matches: ["و"],
longVowel: true, longVowel: true,
}, },
"oo": { oo: {
matches: ["و"], matches: ["و"],
longVowel: true, longVowel: true,
diacritic: pesh, diacritic: pesh,
useEndingDiacritic: true, useEndingDiacritic: true,
ainBlendDiacritic: pesh, ainBlendDiacritic: pesh,
}, },
"ey": { ay: {
matches: ["ی"], matches: ["ی"],
longVowel: true, longVowel: true,
endingMatches: ["ی"], endingMatches: ["ی"],
}, },
"uy": { uy: {
matches: ["ۍ"], matches: ["ۍ"],
longVowel: true, longVowel: true,
endingOnly: true, endingOnly: true,
}, },
"eyy": { ey: {
matches: ["ئ"], matches: ["ئ"],
longVowel: true, longVowel: true,
endingOnly: true, endingOnly: true,
}, },
// Short Vowels // Short Vowels
"a": { a: {
diacritic: zwar, diacritic: zwar,
endingMatches: ["ه"], endingMatches: ["ه"],
beginningMatches: ["ا", "ع"], beginningMatches: ["ا", "ع"],
// canComeAfterHeyEnding: true, // canComeAfterHayEnding: true,
}, },
"u": { u: {
diacritic: zwarakey, diacritic: zwarakay,
endingMatches: ["ه"], endingMatches: ["ه"],
}, },
"i": { i: {
diacritic: zer, diacritic: zer,
endingMatches: ["ه"], endingMatches: ["ه"],
beginningMatches: ["ا", "ع"], beginningMatches: ["ا", "ع"],
// takesDiacriticBeforeGurdaHeyEnding: true, // takesDiacriticBeforeGurdaHayEnding: true,
// canBeWasla: true, // canBeWasla: true,
}, },
"U": { U: {
diacritic: pesh, diacritic: pesh,
endingMatches: ["ه"], endingMatches: ["ه"],
// takesDiacriticBeforeGurdaHeyEnding: true, // takesDiacriticBeforeGurdaHayEnding: true,
beginningMatches: ["ا", "ع"], beginningMatches: ["ا", "ع"],
}, },
} };
/** /**
* splits a phonetics string into an array of Phonemes * splits a phonetics string into an array of Phonemes
@ -264,11 +294,55 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
* @returns an array of phonemes * @returns an array of phonemes
*/ */
export function splitFIntoPhonemes(fIn: string): Phoneme[] { export function splitFIntoPhonemes(fIn: string): Phoneme[] {
const singleLetterPhonemes: Phoneme[] = ["a", "i", "u", "o", "e", "U", "b", "p", "t", "T", "s", "j", "d", "D", "r", "R", "z", "G", "x", "f", "q", "k", "g", "l", "m", "n", "N", "h", "w", "y", "'"]; const singleLetterPhonemes: Phoneme[] = [
"a",
"i",
"u",
"o",
"e",
"U",
"b",
"p",
"t",
"T",
"s",
"j",
"d",
"D",
"r",
"R",
"z",
"G",
"x",
"f",
"q",
"k",
"g",
"l",
"m",
"n",
"N",
"h",
"w",
"y",
"'",
];
const quadrigraphs: Phoneme[] = ["-Ul-"]; const quadrigraphs: Phoneme[] = ["-Ul-"];
const trigraphs: Phoneme[] = ["eyy", "-i-", "-U-"]; const trigraphs: Phoneme[] = ["ey", "-i-", "-U-"];
const digraphs: Phoneme[] = ["aa", "ee", "ey", "oo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"]; const digraphs: Phoneme[] = [
"aa",
"ee",
"ay",
"oo",
"kh",
"gh",
"ts",
"dz",
"jz",
"ch",
"sh",
];
const endingDigraphs: Phoneme[] = ["uy"]; const endingDigraphs: Phoneme[] = ["uy"];
const willIgnore = ["?", " ", "`", ".", "…", ",", "-"]; const willIgnore = ["?", " ", "`", ".", "…", ",", "-"];
@ -276,7 +350,7 @@ export const phonemeTable: Record<Phoneme, PhonemeInfo> = {
const f = removeAccents(fIn).replace(/ă/g, "a"); const f = removeAccents(fIn).replace(/ă/g, "a");
let index = 0; let index = 0;
while (index < f.length) { while (index < f.length) {
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " "); const isLastTwoLetters = index === f.length - 2 || f[index + 2] === " ";
const threeLetterChunk = f.slice(index, index + 3) as Phoneme; const threeLetterChunk = f.slice(index, index + 3) as Phoneme;
const fourLetterChunk = f.slice(index, index + 4) as Phoneme; const fourLetterChunk = f.slice(index, index + 4) as Phoneme;
if (quadrigraphs.includes(fourLetterChunk)) { if (quadrigraphs.includes(fourLetterChunk)) {
@ -313,10 +387,10 @@ export enum PhonemeStatus {
LeadingLongVowel, LeadingLongVowel,
LeadingConsonantOrShortVowel, LeadingConsonantOrShortVowel,
DoubleConsonantTashdeed, DoubleConsonantTashdeed,
EndingWithHeyHim, EndingWithHayHim,
DirectMatch, DirectMatch,
DirectMatchAfterSukun, DirectMatchAfterSukun,
EndingWithHeyHimFromSukun, EndingWithHayHimFromSukun,
ShortVowel, ShortVowel,
PersianSilentWWithAa, PersianSilentWWithAa,
ArabicWasla, ArabicWasla,
@ -344,11 +418,16 @@ export enum PhonemeStatus {
EndingSmallH, EndingSmallH,
} }
export function stateInfo({ state, i, phonemes, phoneme }: { export function stateInfo({
state: DiacriticsAccumulator, state,
i: number, i,
phonemes: Phoneme[], phonemes,
phoneme: Phoneme, phoneme,
}: {
state: DiacriticsAccumulator;
i: number;
phonemes: Phoneme[];
phoneme: Phoneme;
}) { }) {
const isOutOfWord = (char: string) => !char || char === " "; const isOutOfWord = (char: string) => !char || char === " ";
const prevPLetter = last(state.pOut); const prevPLetter = last(state.pOut);
@ -356,43 +435,86 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
const nextPLetter = state.pIn[1]; const nextPLetter = state.pIn[1];
const nextPhoneme = phonemes[i + 1]; const nextPhoneme = phonemes[i + 1];
const previousPhoneme = i > 0 && phonemes[i - 1]; const previousPhoneme = i > 0 && phonemes[i - 1];
const lastThreePLetters = last(state.pOut, 3) + last(state.pOut, 2) + prevPLetter; const lastThreePLetters =
const isBeginningOfWord = (state.pOut === "" || prevPLetter === " ") || (previousPhoneme === "-Ul-" && prevPLetter === "ل") || (["دَر", "وَر"].includes(lastThreePLetters) || (last(state.pOut, 2) + prevPLetter) === "را"); last(state.pOut, 3) + last(state.pOut, 2) + prevPLetter;
const isBeginningOfWord =
state.pOut === "" ||
prevPLetter === " " ||
(previousPhoneme === "-Ul-" && prevPLetter === "ل") ||
["دَر", "وَر"].includes(lastThreePLetters) ||
last(state.pOut, 2) + prevPLetter === "را";
const isEndOfWord = isOutOfWord(nextPLetter); const isEndOfWord = isOutOfWord(nextPLetter);
const phonemeInfo = phonemeTable[phoneme]; const phonemeInfo = phonemeTable[phoneme];
const previousPhonemeInfo = (!isBeginningOfWord && i > 0) && phonemeTable[phonemes[i-1]]; const previousPhonemeInfo =
!isBeginningOfWord && i > 0 && phonemeTable[phonemes[i - 1]];
// const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1]; // const nextPhoneme = (phonemes.length > (i + 1)) && phonemes[i+1];
// const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined; // const nextPhonemeInfo = nextPhoneme ? phonemeTable[nextPhoneme] : undefined;
const doubleConsonant = previousPhonemeInfo && (phonemeInfo.consonant && previousPhonemeInfo.consonant); const doubleConsonant =
const needsSukun = (doubleConsonant && ((previousPhoneme !== phoneme) || phonemeInfo.matches?.includes(currentPLetter))) // || (isEndOfWord && phonemeInfo.takesSukunOnEnding); previousPhonemeInfo &&
const useAinBlendDiacritics = (!isBeginningOfWord && (phonemeInfo.ainBlendDiacritic && currentPLetter === "ع")); phonemeInfo.consonant &&
previousPhonemeInfo.consonant;
const needsSukun =
doubleConsonant &&
(previousPhoneme !== phoneme ||
phonemeInfo.matches?.includes(currentPLetter)); // || (isEndOfWord && phonemeInfo.takesSukunOnEnding);
const useAinBlendDiacritics =
!isBeginningOfWord &&
phonemeInfo.ainBlendDiacritic &&
currentPLetter === "ع";
const diacritic = useAinBlendDiacritics const diacritic = useAinBlendDiacritics
? phonemeInfo.ainBlendDiacritic ? phonemeInfo.ainBlendDiacritic
: isEndOfWord : isEndOfWord
? ((!phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic) ? phonemeInfo.diacritic : undefined) : phonemeInfo.diacritic; ? !phonemeInfo.longVowel || phonemeInfo.useEndingDiacritic
? phonemeInfo.diacritic
: undefined
: phonemeInfo.diacritic;
const lastWordEndedW = (char: string) => ((prevPLetter === char && !currentPLetter) || (prevPLetter === " " && last(state.pOut, 2) === char)); const lastWordEndedW = (char: string) =>
(prevPLetter === char && !currentPLetter) ||
(prevPLetter === " " && last(state.pOut, 2) === char);
function getPhonemeState(): PhonemeStatus { function getPhonemeState(): PhonemeStatus {
if (isBeginningOfWord && phoneme === "aa" && phonemeInfo.beginningMatches?.includes(currentPLetter)) { if (
isBeginningOfWord &&
phoneme === "aa" &&
phonemeInfo.beginningMatches?.includes(currentPLetter)
) {
return PhonemeStatus.DirectMatch; return PhonemeStatus.DirectMatch;
} }
if (isBeginningOfWord && phoneme === "oo" && currentPLetter === "و") { if (isBeginningOfWord && phoneme === "oo" && currentPLetter === "و") {
return PhonemeStatus.OoPrefix; return PhonemeStatus.OoPrefix;
} }
if (isBeginningOfWord && (phonemeInfo.longVowel && !phonemeInfo.endingOnly)) { if (isBeginningOfWord && phonemeInfo.longVowel && !phonemeInfo.endingOnly) {
if (phoneme !== "aa" && currentPLetter !== "ا" && !phonemeInfo.matches?.includes(nextPLetter)) { if (
phoneme !== "aa" &&
currentPLetter !== "ا" &&
!phonemeInfo.matches?.includes(nextPLetter)
) {
throw Error("phonetics error - needs alef prefix"); throw Error("phonetics error - needs alef prefix");
} }
return PhonemeStatus.LeadingLongVowel; return PhonemeStatus.LeadingLongVowel;
} }
if (isBeginningOfWord && (phonemeInfo.beginningMatches?.includes(currentPLetter) || phonemeInfo.matches?.includes(currentPLetter))) { if (
isBeginningOfWord &&
(phonemeInfo.beginningMatches?.includes(currentPLetter) ||
phonemeInfo.matches?.includes(currentPLetter))
) {
return PhonemeStatus.LeadingConsonantOrShortVowel; return PhonemeStatus.LeadingConsonantOrShortVowel;
} }
if (isBeginningOfWord && phoneme === "aa" && currentPLetter === "ع" && nextPLetter === "ا") { if (
isBeginningOfWord &&
phoneme === "aa" &&
currentPLetter === "ع" &&
nextPLetter === "ا"
) {
return PhonemeStatus.AinWithLongAAtBeginning; return PhonemeStatus.AinWithLongAAtBeginning;
} }
if (currentPLetter === "ا" && nextPLetter === "ع" && phoneme === "aa" && nextPhoneme !== "'") { if (
currentPLetter === "ا" &&
nextPLetter === "ع" &&
phoneme === "aa" &&
nextPhoneme !== "'"
) {
return PhonemeStatus.SilentAinAfterAlef; return PhonemeStatus.SilentAinAfterAlef;
} }
// console.log("------"); // console.log("------");
@ -400,13 +522,28 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
// console.log("state", state); // console.log("state", state);
// console.log("prevPLetter is space", prevPLetter === " "); // console.log("prevPLetter is space", prevPLetter === " ");
// console.log("------"); // console.log("------");
if (isBeginningOfWord && phoneme === "u" && prevPLetter === " " && lastNonWhitespace(state.pOut) === "د") { if (
return PhonemeStatus.EndOfDuParticle isBeginningOfWord &&
phoneme === "u" &&
prevPLetter === " " &&
lastNonWhitespace(state.pOut) === "د"
) {
return PhonemeStatus.EndOfDuParticle;
} }
if (isBeginningOfWord && phoneme === "-Ul-" && currentPLetter === "ا" && nextPLetter === "ل") { if (
isBeginningOfWord &&
phoneme === "-Ul-" &&
currentPLetter === "ا" &&
nextPLetter === "ل"
) {
return PhonemeStatus.ArabicDefiniteArticleUl; return PhonemeStatus.ArabicDefiniteArticleUl;
} }
if (phoneme === "a" && nextPhoneme === "'" && phonemes[i+2] === "a" && currentPLetter === "أ") { if (
phoneme === "a" &&
nextPhoneme === "'" &&
phonemes[i + 2] === "a" &&
currentPLetter === "أ"
) {
return PhonemeStatus.AlefHamzaBeg; return PhonemeStatus.AlefHamzaBeg;
} }
if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") { if (phoneme === "a" && previousPhoneme === "U" && currentPLetter === "و") {
@ -418,16 +555,35 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
if (phoneme === "'" && currentPLetter === "و" && nextPLetter === "و") { if (phoneme === "'" && currentPLetter === "و" && nextPLetter === "و") {
return PhonemeStatus.GlottalStopBeforeOo; return PhonemeStatus.GlottalStopBeforeOo;
} }
if (phoneme === "oo" && previousPhoneme === "'" && currentPLetter === "و" && prevPLetter === hamzaAbove) { if (
phoneme === "oo" &&
previousPhoneme === "'" &&
currentPLetter === "و" &&
prevPLetter === hamzaAbove
) {
return PhonemeStatus.OoAfterGlottalStopOo; return PhonemeStatus.OoAfterGlottalStopOo;
} }
if (phoneme === "'" && last(state.pOut, 2) === "ع" && isOutOfWord(last(state.pOut, 3))) { if (
phoneme === "'" &&
last(state.pOut, 2) === "ع" &&
isOutOfWord(last(state.pOut, 3))
) {
return PhonemeStatus.AinBeginningAfterShortVowel; return PhonemeStatus.AinBeginningAfterShortVowel;
} }
if (!isBeginningOfWord && phoneme === "aa" && currentPLetter === "و" && nextPLetter === "ا") { if (
!isBeginningOfWord &&
phoneme === "aa" &&
currentPLetter === "و" &&
nextPLetter === "ا"
) {
return PhonemeStatus.PersianSilentWWithAa; return PhonemeStatus.PersianSilentWWithAa;
} }
if (!isBeginningOfWord && phoneme === "i" && currentPLetter === "ا" && nextPLetter === "ل") { if (
!isBeginningOfWord &&
phoneme === "i" &&
currentPLetter === "ا" &&
nextPLetter === "ل"
) {
return PhonemeStatus.ArabicWasla; return PhonemeStatus.ArabicWasla;
} }
if (phoneme === "-i-" && isBeginningOfWord) { if (phoneme === "-i-" && isBeginningOfWord) {
@ -443,27 +599,47 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) { if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
return PhonemeStatus.ShortAinVowelMissingComma; return PhonemeStatus.ShortAinVowelMissingComma;
} }
if ((last(state.pOut, 2) === "ا") && isOutOfWord(last(state.pOut, 3))) { if (last(state.pOut, 2) === "ا" && isOutOfWord(last(state.pOut, 3))) {
return PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart; return PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart;
} }
} }
if (useAinBlendDiacritics) { if (useAinBlendDiacritics) {
return PhonemeStatus.LongAinVowelMissingComma; return PhonemeStatus.LongAinVowelMissingComma;
} }
if (((!isBeginningOfWord && doubleConsonant) || prevPLetter === " ") && (previousPhoneme === phoneme) && !phonemeInfo.matches?.includes(currentPLetter)) { if (
((!isBeginningOfWord && doubleConsonant) || prevPLetter === " ") &&
previousPhoneme === phoneme &&
!phonemeInfo.matches?.includes(currentPLetter)
) {
return PhonemeStatus.DoubleConsonantTashdeed; return PhonemeStatus.DoubleConsonantTashdeed;
} }
if (phoneme === "aa" && currentPLetter === "ی" && nextPLetter === daggerAlif) { if (
phoneme === "aa" &&
currentPLetter === "ی" &&
nextPLetter === daggerAlif
) {
return PhonemeStatus.AlefDaggarEnding; return PhonemeStatus.AlefDaggarEnding;
} }
if (phoneme === "a" && lastWordEndedW("ح")) { if (phoneme === "a" && lastWordEndedW("ح")) {
return PhonemeStatus.ShortAEndingAfterHeem; return PhonemeStatus.ShortAEndingAfterHeem;
} }
if (isEndOfWord && ((phoneme === "u" && currentPLetter === "ه") || (phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))) { if (
return needsSukun ? PhonemeStatus.EndingWithHeyHimFromSukun : PhonemeStatus.EndingWithHeyHim; isEndOfWord &&
((phoneme === "u" && currentPLetter === "ه") ||
(phoneme === "h" && ["ه", "ح"].includes(currentPLetter)))
) {
return needsSukun
? PhonemeStatus.EndingWithHayHimFromSukun
: PhonemeStatus.EndingWithHayHim;
} }
if ((phonemeInfo.matches?.includes(currentPLetter) || (isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) || (phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب"))) { if (
return needsSukun ? PhonemeStatus.DirectMatchAfterSukun : PhonemeStatus.DirectMatch; phonemeInfo.matches?.includes(currentPLetter) ||
(isEndOfWord && phonemeInfo.endingMatches?.includes(currentPLetter)) ||
(phoneme === "m" && currentPLetter === "ن" && nextPLetter === "ب")
) {
return needsSukun
? PhonemeStatus.DirectMatchAfterSukun
: PhonemeStatus.DirectMatch;
} }
if (phonemeInfo.diacritic && !phonemeInfo.longVowel) { if (phonemeInfo.diacritic && !phonemeInfo.longVowel) {
return PhonemeStatus.ShortVowel; return PhonemeStatus.ShortVowel;
@ -471,21 +647,30 @@ export function stateInfo({ state, i, phonemes, phoneme }: {
if (phoneme === "o" && previousPhoneme === "w" && lastWordEndedW("و")) { if (phoneme === "o" && previousPhoneme === "w" && lastWordEndedW("و")) {
return PhonemeStatus.WoEndingO; return PhonemeStatus.WoEndingO;
} }
if (isEndOfWord && phoneme === "n" && currentPLetter === fathahan && prevPLetter === "ا") { if (
isEndOfWord &&
phoneme === "n" &&
currentPLetter === fathahan &&
prevPLetter === "ا"
) {
return PhonemeStatus.NOnFathatan; return PhonemeStatus.NOnFathatan;
} }
// console.log("errored", "current", phoneme, "next", nextPhoneme); // console.log("errored", "current", phoneme, "next", nextPhoneme);
// console.log("bad phoneme is ", phoneme); // console.log("bad phoneme is ", phoneme);
throw new Error("phonetics error - no status found for phoneme: " + phoneme); throw new Error(
"phonetics error - no status found for phoneme: " + phoneme
);
} }
const phs = getPhonemeState(); const phs = getPhonemeState();
return { return {
phs, phonemeInfo, diacritic, prevPLetter, phs,
phonemeInfo,
diacritic,
prevPLetter,
}; };
}; }
/** /**
* returns the nth last character of a string * returns the nth last character of a string
@ -496,7 +681,10 @@ export function last(s: string, n = 1) {
return s[s.length - n]; return s[s.length - n];
} }
export function advanceP(state: DiacriticsAccumulator, n: number = 1): DiacriticsAccumulator { export function advanceP(
state: DiacriticsAccumulator,
n: number = 1
): DiacriticsAccumulator {
return { return {
pIn: state.pIn.slice(n), pIn: state.pIn.slice(n),
pOut: state.pOut + state.pIn.slice(0, n), pOut: state.pOut + state.pIn.slice(0, n),
@ -518,14 +706,18 @@ export function reverseP(state: DiacriticsAccumulator): DiacriticsAccumulator {
}; };
} }
export const addP = (toAdd: string | undefined) => (state: DiacriticsAccumulator): DiacriticsAccumulator => { export const addP =
(toAdd: string | undefined) =>
(state: DiacriticsAccumulator): DiacriticsAccumulator => {
return { return {
...state, ...state,
pOut: toAdd ? (state.pOut + toAdd) : state.pOut, pOut: toAdd ? state.pOut + toAdd : state.pOut,
}; };
}; };
export const overwriteP = (toWrite: string) => (state: DiacriticsAccumulator): DiacriticsAccumulator => { export const overwriteP =
(toWrite: string) =>
(state: DiacriticsAccumulator): DiacriticsAccumulator => {
return { return {
pIn: state.pIn.slice(1), pIn: state.pIn.slice(1),
pOut: state.pOut + toWrite, pOut: state.pOut + toWrite,
@ -545,7 +737,10 @@ export function lastNonWhitespace(s: string): string {
return penultimateChar; return penultimateChar;
} }
export function getCurrentNext(state: DiacriticsAccumulator): { current: string, next: string} { export function getCurrentNext(state: DiacriticsAccumulator): {
current: string;
next: string;
} {
return { return {
current: state.pIn[0], current: state.pIn[0],
next: state.pIn[1], next: state.pIn[1],
@ -557,7 +752,9 @@ export function getCurrentNext(state: DiacriticsAccumulator): { current: string,
// return (current === "ع") ? advanceP(state) : state; // return (current === "ع") ? advanceP(state) : state;
// } // }
export function advanceForHamzaMid(state: DiacriticsAccumulator): DiacriticsAccumulator { export function advanceForHamzaMid(
state: DiacriticsAccumulator
): DiacriticsAccumulator {
const { current, next } = getCurrentNext(state); const { current, next } = getCurrentNext(state);
if (current === "ئ" && next && next !== "ئ") { if (current === "ئ" && next && next !== "ئ") {
return advanceP(state); return advanceP(state);
@ -565,7 +762,9 @@ export function advanceForHamzaMid(state: DiacriticsAccumulator): DiacriticsAccu
return state; return state;
} }
export function advanceForHamza(state: DiacriticsAccumulator): DiacriticsAccumulator { export function advanceForHamza(
state: DiacriticsAccumulator
): DiacriticsAccumulator {
const { current, next } = getCurrentNext(state); const { current, next } = getCurrentNext(state);
if (current === "ه" && (!next || next === " ")) { if (current === "ه" && (!next || next === " ")) {
return advanceP(state); return advanceP(state);
@ -575,4 +774,3 @@ export function advanceForHamza(state: DiacriticsAccumulator): DiacriticsAccumul
// } // }
return state; return state;
} }

View File

@ -6,23 +6,16 @@
* *
*/ */
import { import { addDiacritics } from "./diacritics";
addDiacritics, import { zwar, zwarakay, sukun, tashdeed } from "./diacritics-helpers";
} from "./diacritics";
import {
zwar,
zwarakey,
sukun,
tashdeed,
} from "./diacritics-helpers";
import * as T from "../../types"; import * as T from "../../types";
const diacriticsSections: { const diacriticsSections: {
describe: string, describe: string;
tests: { tests: {
in: T.PsString, in: T.PsString;
out: string | null, out: string | null;
}[], }[];
}[] = [ }[] = [
{ {
describe: "regular, native Pashto script/sounds", describe: "regular, native Pashto script/sounds",
@ -108,14 +101,14 @@ const diacriticsSections: {
{ {
in: { in: {
p: "شئ", p: "شئ",
f: "sheyy", f: "shey",
}, },
out: "شئ", out: "شئ",
}, },
{ {
in: { in: {
p: "کار کوئ چې لاړ شئ", p: "کار کوئ چې لاړ شئ",
f: "kaar kawéyy che laaR sheyy", f: "kaar kawéy che laaR shey",
}, },
out: "کار کَوئ چې لاړ شئ", out: "کار کَوئ چې لاړ شئ",
}, },
@ -146,28 +139,28 @@ const diacriticsSections: {
p: "کول", p: "کول",
f: "kawul", f: "kawul",
}, },
out: "کَو" + zwarakey + "ل", out: "کَو" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "کول", p: "کول",
f: "kiwul", f: "kiwul",
}, },
out: "کِو" + zwarakey + "ل", out: "کِو" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "کول", p: "کول",
f: "kUwul", f: "kUwul",
}, },
out: "کُو" + zwarakey + "ل", out: "کُو" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "کول", p: "کول",
f: "kuwul", f: "kuwul",
}, },
out: "ک" + zwarakey + "و" + zwarakey + "ل", out: "ک" + zwarakay + "و" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -200,7 +193,7 @@ const diacriticsSections: {
{ {
in: { in: {
p: "سپین", p: "سپین",
f: "speyn", f: "spayn",
}, },
out: "سْپین", out: "سْپین",
}, },
@ -272,21 +265,21 @@ const diacriticsSections: {
p: "رغېدل", p: "رغېدل",
f: "raghedul", f: "raghedul",
}, },
out: "رَغېد" + zwarakey + "ل", out: "رَغېد" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "کارول", p: "کارول",
f: "kaarawul", f: "kaarawul",
}, },
out: "کارَو" + zwarakey + "ل", out: "کارَو" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "پېښېدل", p: "پېښېدل",
f: "pexedul", f: "pexedul",
}, },
out: "پېښېد" + zwarakey + "ل", out: "پېښېد" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -298,7 +291,7 @@ const diacriticsSections: {
{ {
in: { in: {
p: "سړی", p: "سړی",
f: "saRey", f: "saRay",
}, },
out: "سَړی", out: "سَړی",
}, },
@ -335,28 +328,28 @@ const diacriticsSections: {
p: "ایستل", p: "ایستل",
f: "eestul", f: "eestul",
}, },
out: "اِیسْت" + zwarakey + "ل", out: "اِیسْت" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "ایستل", p: "ایستل",
f: "eystul", f: "aystul",
}, },
out: "ایسْت" + zwarakey + "ل", out: "ایسْت" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "اېسېدل", p: "اېسېدل",
f: "esedul", f: "esedul",
}, },
out: "اېسېد" + zwarakey + "ل", out: "اېسېد" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "اوسېدل", p: "اوسېدل",
f: "osedul", f: "osedul",
}, },
out: "اوسېد" + zwarakey + "ل", out: "اوسېد" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -377,7 +370,7 @@ const diacriticsSections: {
p: "واردول", p: "واردول",
f: "waaridawul", f: "waaridawul",
}, },
out: "وارِدَو" + zwarakey + "ل", out: "وارِدَو" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -557,7 +550,7 @@ const diacriticsSections: {
p: "توجه کول", p: "توجه کول",
f: "tawajU kawul", f: "tawajU kawul",
}, },
out: "تَوَجُه کَو" + zwarakey + "ل", out: "تَوَجُه کَو" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -597,7 +590,7 @@ const diacriticsSections: {
{ {
in: { in: {
p: "سختسری", p: "سختسری",
f: "sakht sărey", f: "sakht săray",
}, },
out: "سَخْتْسَری", out: "سَخْتْسَری",
}, },
@ -646,7 +639,7 @@ const diacriticsSections: {
{ {
in: { in: {
p: "وری", p: "وری",
f: "waréy", f: "waráy",
}, },
out: "وَری", out: "وَری",
}, },
@ -660,19 +653,20 @@ const diacriticsSections: {
{ {
in: { in: {
p: "امزری", p: "امزری",
f: "umzaréy", f: "umzaráy",
}, },
out: zwarakey + "مْزَری", out: zwarakay + "مْزَری",
}, },
], ],
}, },
{ {
describe: "ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی", describe:
"ې followed by ی - y needs to be written as e`y to be distinguished from ay - ی",
tests: [ tests: [
{ {
in: { in: {
p: "پتېیل", p: "پتېیل",
f: "pateyúl", f: "patayúl",
}, },
out: null, out: null,
}, },
@ -681,14 +675,14 @@ const diacriticsSections: {
p: "پتېیل", p: "پتېیل",
f: "pate`yúl", f: "pate`yúl",
}, },
out: "پَتېی" + zwarakey + "ل", out: "پَتېی" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "درېیم", p: "درېیم",
f: "dre`yum", f: "dre`yum",
}, },
out: "دْرېی" + zwarakey + "م", out: "دْرېی" + zwarakay + "م",
}, },
], ],
}, },
@ -700,7 +694,7 @@ const diacriticsSections: {
p: "تر ... پورې", p: "تر ... پورې",
f: "tur ... pore", f: "tur ... pore",
}, },
out: "ت" + zwarakey + "ر ... پورې", out: "ت" + zwarakay + "ر ... پورې",
}, },
], ],
}, },
@ -729,7 +723,7 @@ const diacriticsSections: {
{ {
in: { in: {
p: "سړی و", p: "سړی و",
f: "saRey wo", f: "saRay wo",
}, },
out: "سَړی و", out: "سَړی و",
}, },
@ -811,7 +805,7 @@ const diacriticsSections: {
p: "منبع", p: "منبع",
f: "manb'i", f: "manb'i",
}, },
out: "مَنْبْعِ" out: "مَنْبْعِ",
}, },
{ {
in: { in: {
@ -825,7 +819,7 @@ const diacriticsSections: {
p: "منبع", p: "منبع",
f: "manbi", f: "manbi",
}, },
out: "مَنْبِع" out: "مَنْبِع",
}, },
{ {
in: { in: {
@ -860,7 +854,7 @@ const diacriticsSections: {
p: "مربع جذر", p: "مربع جذر",
f: "mUraba' jazúr", f: "mUraba' jazúr",
}, },
out: "مُرَبَع جَذ" + zwarakey + "ر", out: "مُرَبَع جَذ" + zwarakay + "ر",
}, },
{ {
in: { in: {
@ -888,7 +882,7 @@ const diacriticsSections: {
p: "راجع کېدل", p: "راجع کېدل",
f: "raaji kedul", f: "raaji kedul",
}, },
out: "راجِع کېد" + zwarakey + "ل", out: "راجِع کېد" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -979,7 +973,7 @@ const diacriticsSections: {
f: "ijmaa'", f: "ijmaa'",
}, },
out: "اِجْماع", out: "اِجْماع",
} },
], ],
}, },
{ {
@ -1064,21 +1058,21 @@ const diacriticsSections: {
p: "د", p: "د",
f: "du", f: "du",
}, },
out: "د" + zwarakey, out: "د" + zwarakay,
}, },
{ {
in: { in: {
p: "د لاس", p: "د لاس",
f: "du laas", f: "du laas",
}, },
out: "د" + zwarakey + " لاس", out: "د" + zwarakay + " لاس",
}, },
{ {
in: { in: {
p: "د ... په شان", p: "د ... په شان",
f: "du ... pu shaan", f: "du ... pu shaan",
}, },
out: "د" + zwarakey + " ... پهٔ شان", out: "د" + zwarakay + " ... پهٔ شان",
}, },
], ],
}, },
@ -1097,7 +1091,7 @@ const diacriticsSections: {
p: "ذبح کول", p: "ذبح کول",
f: "zabha kawul", f: "zabha kawul",
}, },
out: "ذَبْحَ کَو" + zwarakey + "ل", out: "ذَبْحَ کَو" + zwarakay + "ل",
}, },
], ],
}, },
@ -1275,14 +1269,14 @@ const diacriticsSections: {
p: "وځم", p: "وځم",
f: "oodzum", f: "oodzum",
}, },
out: "وُځ" + zwarakey + "م", out: "وُځ" + zwarakay + "م",
}, },
{ {
in: { in: {
p: "وځم", p: "وځم",
f: "wUdzum", f: "wUdzum",
}, },
out: "وُځ" + zwarakey + "م", out: "وُځ" + zwarakay + "م",
}, },
], ],
}, },
@ -1316,5 +1310,3 @@ test("ending with left over phonetics will throw an error", () => {
addDiacritics({ p: "کار", f: "kaar kawul" }); addDiacritics({ p: "کار", f: "kaar kawul" });
}).toThrow(); }).toThrow();
}); });

View File

@ -11,7 +11,7 @@ import {
splitFIntoPhonemes, splitFIntoPhonemes,
Phoneme, Phoneme,
zwar, zwar,
zwarakey, zwarakay,
zer, zer,
pesh, pesh,
sukun, sukun,
@ -35,10 +35,20 @@ import { pipe } from "rambda";
/** /**
* Adds diacritics to a given PsString. * Adds diacritics to a given PsString.
* Errors if the phonetics and script don't line up. * Errors if the phonetics and script don't line up.
*
* IN PROGRESS - This will hopefully get done and replace the messy, unmaintainable phonetics-to-diacritics.ts currently in use
*/ */
export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString { export function addDiacritics(
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? removeFVarients(f) : f); { p, f }: T.PsString,
const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p.trim() }); ignoreCommas?: true
): T.PsString {
const phonemes: Phoneme[] = splitFIntoPhonemes(
!ignoreCommas ? removeFVarients(f) : f
);
const { pIn, pOut } = phonemes.reduce(processPhoneme, {
pOut: "",
pIn: p.trim(),
});
if (pIn !== "") { if (pIn !== "") {
throw new Error("phonetics error - phonetics shorter than pashto script"); throw new Error("phonetics error - phonetics shorter than pashto script");
} }
@ -52,159 +62,80 @@ function processPhoneme(
acc: DiacriticsAccumulator, acc: DiacriticsAccumulator,
phoneme: Phoneme, phoneme: Phoneme,
i: number, i: number,
phonemes: Phoneme[], phonemes: Phoneme[]
): DiacriticsAccumulator { ): DiacriticsAccumulator {
const state = acc.pIn.slice(0, 5) === " ... " const state =
acc.pIn.slice(0, 5) === " ... "
? advanceP(acc, 5) ? advanceP(acc, 5)
: acc.pIn[0] === " " : acc.pIn[0] === " "
? advanceP(acc) ? advanceP(acc)
: acc; : acc;
const { const { phonemeInfo, diacritic, phs, prevPLetter } = stateInfo({
phonemeInfo, state,
diacritic, i,
phs, phoneme,
prevPLetter, phonemes,
} = stateInfo({ state, i, phoneme, phonemes }); });
return (phs === PhonemeStatus.LeadingLongVowel) ? return phs === PhonemeStatus.LeadingLongVowel
pipe( ? pipe(advanceP, addP(phonemeInfo.diacritic), advanceP)(state)
advanceP, : phs === PhonemeStatus.LeadingConsonantOrShortVowel
addP(phonemeInfo.diacritic), ? pipe(advanceP, addP(diacritic))(state)
advanceP, : phs === PhonemeStatus.DoubleConsonantTashdeed
)(state) ? pipe(prevPLetter === " " ? reverseP : addP(""), addP(tashdeed))(state)
: (phs === PhonemeStatus.LeadingConsonantOrShortVowel) ? : phs === PhonemeStatus.EndingWithHayHim
pipe( ? pipe(advanceP, addP(phoneme === "u" ? hamzaAbove : sukun))(state)
advanceP, : phs === PhonemeStatus.DirectMatch
addP(diacritic), ? pipe(addP(diacritic), advanceP)(state)
)(state) : phs === PhonemeStatus.DirectMatchAfterSukun
: (phs === PhonemeStatus.DoubleConsonantTashdeed) ? ? pipe(addP(sukun), advanceP)(state)
pipe( : phs === PhonemeStatus.PersianSilentWWithAa
prevPLetter === " " ? reverseP : addP(""), ? pipe(addP("("), advanceP, addP(")"), advanceP)(state)
addP(tashdeed) : phs === PhonemeStatus.ArabicWasla
)(state) ? pipe(addP(zer), overwriteP(wasla))(state)
: (phs === PhonemeStatus.EndingWithHeyHim) ? : phs === PhonemeStatus.Izafe
pipe( ? pipe(reverseP, addP(zer))(state)
advanceP, : phs === PhonemeStatus.EndOfDuParticle
addP(phoneme === "u" ? hamzaAbove : sukun), ? pipe(reverseP, addP(zwarakay))(state)
)(state) : phs === PhonemeStatus.ShortAEndingAfterHeem
: (phs === PhonemeStatus.DirectMatch) ? ? pipe(prevPLetter === " " ? reverseP : addP(""), addP(zwar))(state)
pipe( : phs === PhonemeStatus.EndingWithHayHimFromSukun
addP(diacritic), ? pipe(addP(sukun), advanceP)(state)
advanceP, : phs === PhonemeStatus.AlefDaggarEnding
)(state) ? pipe(advanceP, advanceP)(state)
: (phs === PhonemeStatus.DirectMatchAfterSukun) ? : phs === PhonemeStatus.LongAinVowelMissingComma
pipe( ? pipe(addP(diacritic), advanceP, addP(diacritic))(state)
addP(sukun), : phs === PhonemeStatus.ShortAinVowelMissingComma
advanceP, ? pipe(addP(diacritic), advanceP)(state)
)(state) : phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart
: (phs === PhonemeStatus.PersianSilentWWithAa) ? ? pipe(advanceP, advanceP)(state)
pipe( : phs === PhonemeStatus.AinWithLongAAtBeginning
addP("("), ? pipe(advanceP, advanceP)(state)
advanceP, : phs === PhonemeStatus.AlefWithHamza
addP(")"), ? pipe(advanceP)(state)
advanceP, : phs === PhonemeStatus.ShortVowel
)(state) ? pipe(
: (phs === PhonemeStatus.ArabicWasla) ?
pipe(
addP(zer),
overwriteP(wasla),
)(state)
: (phs === PhonemeStatus.Izafe) ?
pipe(
reverseP,
addP(zer),
)(state)
: (phs === PhonemeStatus.EndOfDuParticle) ?
pipe(
reverseP,
addP(zwarakey),
)(state)
: (phs === PhonemeStatus.ShortAEndingAfterHeem) ?
pipe(
prevPLetter === " " ? reverseP : addP(""),
addP(zwar),
)(state)
: (phs === PhonemeStatus.EndingWithHeyHimFromSukun) ?
pipe(
addP(sukun),
advanceP,
)(state)
: (phs === PhonemeStatus.AlefDaggarEnding) ?
pipe(
advanceP,
advanceP,
)(state)
: (phs === PhonemeStatus.LongAinVowelMissingComma) ?
pipe(
addP(diacritic),
advanceP,
addP(diacritic)
)(state)
: (phs === PhonemeStatus.ShortAinVowelMissingComma) ?
pipe(
addP(diacritic),
advanceP,
)(state)
: (phs === PhonemeStatus.ShortAinVowelMissingCommaAfterAlefStart) ?
pipe(
advanceP,
advanceP,
)(state)
: (phs === PhonemeStatus.AinWithLongAAtBeginning) ?
pipe(
advanceP,
advanceP,
)(state)
: (phs === PhonemeStatus.AlefWithHamza) ?
pipe(
advanceP,
)(state)
: (phs === PhonemeStatus.ShortVowel) ?
pipe(
advanceForHamzaMid, advanceForHamzaMid,
addP(phonemeInfo.diacritic), addP(phonemeInfo.diacritic),
// TODO THIS? // TODO THIS?
advanceForHamza, advanceForHamza
)(state)
: (phs === PhonemeStatus.ShortAForAlefBeforeFathatan) ?
pipe(
advanceP,
)(state)
: (phs === PhonemeStatus.NOnFathatan) ?
pipe(
advanceP,
)(state)
: (phs === PhonemeStatus.HamzaOnWow) ?
pipe(
advanceP,
addP(hamzaAbove),
addP(diacritic),
)(state)
: (phs === PhonemeStatus.ArabicDefiniteArticleUl) ?
pipe(
advanceP,
addP(pesh),
advanceP,
)(state)
: (phs === PhonemeStatus.OoPrefix) ?
pipe(
advanceP,
addP(pesh),
)(state)
: (phs === PhonemeStatus.GlottalStopBeforeOo) ?
pipe(
advanceP,
addP(hamzaAbove),
)(state)
: (phs === PhonemeStatus.OoAfterGlottalStopOo) ?
pipe(
advanceP,
)(state)
: (phs === PhonemeStatus.SilentAinAfterAlef) ?
pipe(
advanceP,
advanceP,
)(state) )(state)
: phs === PhonemeStatus.ShortAForAlefBeforeFathatan
? pipe(advanceP)(state)
: phs === PhonemeStatus.NOnFathatan
? pipe(advanceP)(state)
: phs === PhonemeStatus.HamzaOnWow
? pipe(advanceP, addP(hamzaAbove), addP(diacritic))(state)
: phs === PhonemeStatus.ArabicDefiniteArticleUl
? pipe(advanceP, addP(pesh), advanceP)(state)
: phs === PhonemeStatus.OoPrefix
? pipe(advanceP, addP(pesh))(state)
: phs === PhonemeStatus.GlottalStopBeforeOo
? pipe(advanceP, addP(hamzaAbove))(state)
: phs === PhonemeStatus.OoAfterGlottalStopOo
? pipe(advanceP)(state)
: phs === PhonemeStatus.SilentAinAfterAlef
? pipe(advanceP, advanceP)(state)
: state; : state;
} }

View File

@ -11,19 +11,19 @@ import {
splitFIntoPhonemes, splitFIntoPhonemes,
} from "./phonetics-to-diacritics"; } from "./phonetics-to-diacritics";
const zwarakey = "ٙ"; const zwarakay = "ٙ";
const phonemeSplits: Array<{ const phonemeSplits: Array<{
in: string, in: string;
out: string[], out: string[];
}> = [ }> = [
{ {
in: "kor", in: "kor",
out: ["k", "o", "r"], out: ["k", "o", "r"],
}, },
{ {
in: "raaghey", in: "raaghay",
out: ["r", "aa", "gh", "ey"], out: ["r", "aa", "gh", "ay"],
}, },
{ {
in: "hatsa", in: "hatsa",
@ -34,16 +34,16 @@ const phonemeSplits: Array<{
out: ["b", "a"], out: ["b", "a"],
}, },
{ {
in: "peydáa", in: "paydáa",
out: ["p", "ey", "d", "áa"], out: ["p", "ay", "d", "áa"],
}, },
{ {
in: "be kaar", in: "be kaar",
out: ["b", "e", "k", "aa", "r"], out: ["b", "e", "k", "aa", "r"],
}, },
{ {
in: "raadzeyy", in: "raadzey",
out: ["r", "aa", "dz", "eyy"], out: ["r", "aa", "dz", "ey"],
}, },
{ {
in: "badanuy ??", in: "badanuy ??",
@ -67,8 +67,8 @@ phonemeSplits.forEach((s) => {
}); });
const toTest: Array<{ const toTest: Array<{
in: { p: string, f: string }, in: { p: string; f: string };
out: string | undefined, out: string | undefined;
}> = [ }> = [
{ {
in: { in: {
@ -168,28 +168,28 @@ const toTest: Array<{
p: "کول", p: "کول",
f: "kawul", f: "kawul",
}, },
out: "کَو" + zwarakey + "ل", out: "کَو" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "کول", p: "کول",
f: "kiwul", f: "kiwul",
}, },
out: "کِو" + zwarakey + "ل", out: "کِو" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "کول", p: "کول",
f: "kUwul", f: "kUwul",
}, },
out: "کُو" + zwarakey + "ل", out: "کُو" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "کول", p: "کول",
f: "kuwul", f: "kuwul",
}, },
out: "ک" + zwarakey + "و" + zwarakey + "ل", out: "ک" + zwarakay + "و" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -222,7 +222,7 @@ const toTest: Array<{
{ {
in: { in: {
p: "سپین", p: "سپین",
f: "speyn", f: "spayn",
}, },
out: "سْپین", out: "سْپین",
}, },
@ -236,7 +236,7 @@ const toTest: Array<{
{ {
in: { in: {
p: "پېش", p: "پېش",
f: "peysh", f: "paysh",
}, },
out: undefined, out: undefined,
}, },
@ -245,33 +245,33 @@ const toTest: Array<{
p: "رغېدل", p: "رغېدل",
f: "raghedul", f: "raghedul",
}, },
out: "رَغېد" + zwarakey + "ل", out: "رَغېد" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "کارول", p: "کارول",
f: "kaarawul", f: "kaarawul",
}, },
out: "کارَو" + zwarakey + "ل", out: "کارَو" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "پېښېدل", p: "پېښېدل",
f: "pexedul", f: "pexedul",
}, },
out: "پېښېد" + zwarakey + "ل", out: "پېښېد" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "مین", p: "مین",
f: "mayín", f: "ma`yín",
}, },
out: "مَیِن", out: "مَیِن",
}, },
{ {
in: { in: {
p: "سړی", p: "سړی",
f: "saRey", f: "saRay",
}, },
out: "سَړی", out: "سَړی",
}, },
@ -308,28 +308,28 @@ const toTest: Array<{
p: "ایستل", p: "ایستل",
f: "eestul", f: "eestul",
}, },
out: "اِیسْت" + zwarakey + "ل", out: "اِیسْت" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "ایستل", p: "ایستل",
f: "eystul", f: "aystul",
}, },
out: "ایسْت" + zwarakey + "ل", out: "ایسْت" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "اېسېدل", p: "اېسېدل",
f: "esedul", f: "esedul",
}, },
out: "اېسېد" + zwarakey + "ل", out: "اېسېد" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "اوسېدل", p: "اوسېدل",
f: "osedul", f: "osedul",
}, },
out: "اوسېد" + zwarakey + "ل", out: "اوسېد" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -350,7 +350,7 @@ const toTest: Array<{
p: "واردول", p: "واردول",
f: "waaridawul", f: "waaridawul",
}, },
out: "وارِدَو" + zwarakey + "ل", out: "وارِدَو" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -490,21 +490,21 @@ const toTest: Array<{
p: "ازغن تار", p: "ازغن تار",
f: "azghun taar", f: "azghun taar",
}, },
out: "اَزْغ" + zwarakey + "ن" + " تار", out: "اَزْغ" + zwarakay + "ن" + " تار",
}, },
{ {
in: { in: {
p: "اره څکول", p: "اره څکول",
f: "ara tskawul", f: "ara tskawul",
}, },
out: "اَره څْکَو" + zwarakey + "ل", out: "اَره څْکَو" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "اږیل", p: "اږیل",
f: "aGuyúl", f: "aGuyúl",
}, },
out: "اَږ" + zwarakey + "ی" + zwarakey + "ل", out: "اَږ" + zwarakay + "ی" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -669,7 +669,8 @@ const toTest: Array<{
f: "aadam", f: "aadam",
}, },
out: undefined, out: undefined,
}, { },
{
in: { in: {
p: "منع", p: "منع",
f: "mán'a", f: "mán'a",
@ -698,11 +699,11 @@ const toTest: Array<{
}, },
out: "اسان", out: "اسان",
}, },
// ې followed by ی - y needs to be written as e`y to be distinguished from ey - ی // ې followed by ی - y needs to be written as e`y to be distinguished from ay - ی
{ {
in: { in: {
p: "پتېیل", p: "پتېیل",
f: "pateyúl", f: "patayúl",
}, },
out: undefined, out: undefined,
}, },
@ -711,14 +712,14 @@ const toTest: Array<{
p: "پتېیل", p: "پتېیل",
f: "pate`yúl", f: "pate`yúl",
}, },
out: "پَتېی" + zwarakey + "ل", out: "پَتېی" + zwarakay + "ل",
}, },
{ {
in: { in: {
p: "درېیم", p: "درېیم",
f: "dre`yum", f: "dre`yum",
}, },
out: "دْرېی" + zwarakey + "م", out: "دْرېی" + zwarakay + "م",
}, },
{ {
in: { in: {
@ -733,7 +734,7 @@ const toTest: Array<{
p: "تر ... پورې", p: "تر ... پورې",
f: "tur ... pore", f: "tur ... pore",
}, },
out: "ت" + zwarakey + "ر ... پورې", out: "ت" + zwarakay + "ر ... پورې",
}, },
// joiner و // joiner و
{ {
@ -763,21 +764,21 @@ const toTest: Array<{
p: "د", p: "د",
f: "du", f: "du",
}, },
out: "د" + zwarakey, out: "د" + zwarakay,
}, },
{ {
in: { in: {
p: "د لاس", p: "د لاس",
f: "du laas", f: "du laas",
}, },
out: "د" + zwarakey + " لاس", out: "د" + zwarakay + " لاس",
}, },
{ {
in: { in: {
p: "د ... په شان", p: "د ... په شان",
f: "du ... pu shaan", f: "du ... pu shaan",
}, },
out: "د" + zwarakey + " ... پهٔ شان", out: "د" + zwarakay + " ... پهٔ شان",
}, },
{ {
in: { in: {
@ -798,7 +799,7 @@ const toTest: Array<{
p: "ذبح کول", p: "ذبح کول",
f: "zabha kawul", f: "zabha kawul",
}, },
out: "ذَبْحَ کَو" + zwarakey + "ل", out: "ذَبْحَ کَو" + zwarakay + "ل",
}, },
// require dagger alif on words ending with یٰ // require dagger alif on words ending with یٰ
{ {
@ -864,7 +865,7 @@ const toTest: Array<{
p: "طمع لرل", p: "طمع لرل",
f: "tama larul", f: "tama larul",
}, },
out: "طَمعَ لَر" + zwarakey + "ل", out: "طَمعَ لَر" + zwarakay + "ل",
}, },
// Ua ؤ // Ua ؤ
{ {
@ -885,7 +886,7 @@ const toTest: Array<{
{ {
in: { in: {
p: "شئ", p: "شئ",
f: "sheyy", f: "shey",
}, },
out: "شئ", out: "شئ",
}, },
@ -900,7 +901,7 @@ const toTest: Array<{
{ {
in: { in: {
p: "سړی و", p: "سړی و",
f: "saRey wo", f: "saRay wo",
}, },
out: "سَړی و", out: "سَړی و",
}, },
@ -938,7 +939,7 @@ const toTest: Array<{
p: "توجه کول", p: "توجه کول",
f: "tawajU kawul", f: "tawajU kawul",
}, },
out: "تَوَجُه کَو" + zwarakey + "ل", out: "تَوَجُه کَو" + zwarakay + "ل",
}, },
// With Arabic definate article -Ul- ال // With Arabic definate article -Ul- ال
{ {
@ -975,7 +976,7 @@ const toTest: Array<{
p: "راجع کېدل", p: "راجع کېدل",
f: "raaji kedul", f: "raaji kedul",
}, },
out: "راجعِ کېد" + zwarakey + "ل", out: "راجعِ کېد" + zwarakay + "ل",
}, },
{ {
in: { in: {
@ -987,7 +988,7 @@ const toTest: Array<{
{ {
in: { in: {
p: "سختسری", p: "سختسری",
f: "sakht sărey", f: "sakht săray",
}, },
out: "سَخْتْسَری", out: "سَخْتْسَری",
}, },
@ -1042,7 +1043,7 @@ const toTest: Array<{
p: "مربع جذر", p: "مربع جذر",
f: "mUraba' jazúr", f: "mUraba' jazúr",
}, },
out: "مُرَبَع جَذ" + zwarakey + "ر", out: "مُرَبَع جَذ" + zwarakay + "ر",
}, },
{ {
in: { in: {
@ -1101,14 +1102,14 @@ const toTest: Array<{
p: "وځم", p: "وځم",
f: "oodzum", f: "oodzum",
}, },
out: "وځ" + zwarakey + "م", out: "وځ" + zwarakay + "م",
}, },
{ {
in: { in: {
p: "وځم", p: "وځم",
f: "wUdzum", f: "wUdzum",
}, },
out: "وُځ" + zwarakey + "م", out: "وُځ" + zwarakay + "م",
}, },
]; ];

View File

@ -7,7 +7,7 @@
*/ */
const zwar = "َ"; const zwar = "َ";
const zwarakey = "ٙ"; const zwarakay = "ٙ";
const zer = "ِ"; const zer = "ِ";
const pesh = "ُ"; const pesh = "ُ";
const sukun = "ْ"; const sukun = "ْ";
@ -19,8 +19,25 @@ const fathahan = "ً";
// TODO: THESE OTHER TRIGRAPHS?? // TODO: THESE OTHER TRIGRAPHS??
const quadrigraphs = ["-Ul-"]; const quadrigraphs = ["-Ul-"];
const trigraphs = ["eyy", "éyy", "-i-", "-U-"]; // , "aay", "áay", "ooy", "óoy"]; const trigraphs = ["ey", "éy", "-i-", "-U-"]; // , "aay", "áay", "ooy", "óoy"];
const digraphs = ["ắ", "aa", "áa", "ee", "ée", "ey", "éy", "oo", "óo", "kh", "gh", "ts", "dz", "jz", "ch", "sh"]; const digraphs = [
"ắ",
"aa",
"áa",
"ee",
"ée",
"ay",
"áy",
"oo",
"óo",
"kh",
"gh",
"ts",
"dz",
"jz",
"ch",
"sh",
];
const endingDigraphs = ["uy", "úy"]; const endingDigraphs = ["uy", "úy"];
const willIgnore = ["?", " ", "`", ".", "…"]; const willIgnore = ["?", " ", "`", ".", "…"];
@ -28,7 +45,7 @@ export function splitFIntoPhonemes(f: string): string[] {
const result: string[] = []; const result: string[] = [];
let index = 0; let index = 0;
while (index < f.length) { while (index < f.length) {
const isLastTwoLetters = (index === f.length - 2 || f[index + 2] === " "); const isLastTwoLetters = index === f.length - 2 || f[index + 2] === " ";
const threeLetterChunk = f.slice(index, index + 3); const threeLetterChunk = f.slice(index, index + 3);
const fourLetterChunk = f.slice(index, index + 4); const fourLetterChunk = f.slice(index, index + 4);
if (quadrigraphs.includes(fourLetterChunk)) { if (quadrigraphs.includes(fourLetterChunk)) {
@ -89,7 +106,12 @@ const phonemeTable = [
{ phoneme: "m", possibilities: ["م"], consonant: true }, { phoneme: "m", possibilities: ["م"], consonant: true },
{ phoneme: "n", possibilities: ["ن"], consonant: true }, { phoneme: "n", possibilities: ["ن"], consonant: true },
{ phoneme: "N", possibilities: ["ڼ"], consonant: true }, { phoneme: "N", possibilities: ["ڼ"], consonant: true },
{ phoneme: "h", possibilities: ["ه", "ح"], consonant: true, takesSukunOnEnding: true }, {
phoneme: "h",
possibilities: ["ه", "ح"],
consonant: true,
takesSukunOnEnding: true,
},
{ phoneme: "w", possibilities: ["و"], consonant: true }, { phoneme: "w", possibilities: ["و"], consonant: true },
{ phoneme: "y", possibilities: ["ی"], consonant: true }, { phoneme: "y", possibilities: ["ی"], consonant: true },
@ -99,33 +121,130 @@ const phonemeTable = [
{ phoneme: "-Ul-", possibilities: ["ال"] }, { phoneme: "-Ul-", possibilities: ["ال"] },
// vowels // vowels
{ phoneme: "aa", possibilities: ["ا"], beginning: ["آ", "ا"], endingPossibilities: ["ا", "یٰ"], isLongA: true, canStartWithAynBefore: true }, {
{ phoneme: "áa", possibilities: ["ا"], beginning: ["آ", "ا"], endingPossibilities: ["ا", "یٰ"], isLongA: true, canStartWithAynBefore: true }, phoneme: "aa",
{ phoneme: "ee", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ي"], diacritic: zer, canStartWithAynBefore: true }, possibilities: ["ا"],
{ phoneme: "ée", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ي"], diacritic: zer, canStartWithAynBefore: true }, beginning: ["آ", "ا"],
endingPossibilities: ["ا", "یٰ"],
isLongA: true,
canStartWithAynBefore: true,
},
{
phoneme: "áa",
possibilities: ["ا"],
beginning: ["آ", "ا"],
endingPossibilities: ["ا", "یٰ"],
isLongA: true,
canStartWithAynBefore: true,
},
{
phoneme: "ee",
possibilities: ["ی"],
addAlefOnBeginning: true,
endingPossibilities: ["ي"],
diacritic: zer,
canStartWithAynBefore: true,
},
{
phoneme: "ée",
possibilities: ["ی"],
addAlefOnBeginning: true,
endingPossibilities: ["ي"],
diacritic: zer,
canStartWithAynBefore: true,
},
{ phoneme: "e", possibilities: ["ې"], addAlefOnBeginning: true }, { phoneme: "e", possibilities: ["ې"], addAlefOnBeginning: true },
{ phoneme: "é", possibilities: ["ې"], addAlefOnBeginning: true }, { phoneme: "é", possibilities: ["ې"], addAlefOnBeginning: true },
{ phoneme: "o", possibilities: ["و"], addAlefOnBeginning: true }, { phoneme: "o", possibilities: ["و"], addAlefOnBeginning: true },
{ phoneme: "ó", possibilities: ["و"], addAlefOnBeginning: true }, { phoneme: "ó", possibilities: ["و"], addAlefOnBeginning: true },
{ phoneme: "oo", possibilities: ["و"], addAlefOnBeginning: true, alsoCanBePrefix: true, diacritic: pesh }, {
{ phoneme: "óo", possibilities: ["و"], addAlefOnBeginning: true, diacritic: pesh }, phoneme: "oo",
{ phoneme: "ey", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ی"]}, possibilities: ["و"],
{ phoneme: "éy", possibilities: ["ی"], addAlefOnBeginning: true, endingPossibilities: ["ی"]}, addAlefOnBeginning: true,
alsoCanBePrefix: true,
diacritic: pesh,
},
{
phoneme: "óo",
possibilities: ["و"],
addAlefOnBeginning: true,
diacritic: pesh,
},
{
phoneme: "ay",
possibilities: ["ی"],
addAlefOnBeginning: true,
endingPossibilities: ["ی"],
},
{
phoneme: "áy",
possibilities: ["ی"],
addAlefOnBeginning: true,
endingPossibilities: ["ی"],
},
{ phoneme: "uy", possibilities: ["ۍ"], endingOnly: true }, { phoneme: "uy", possibilities: ["ۍ"], endingOnly: true },
{ phoneme: "úy", possibilities: ["ۍ"], endingOnly: true }, // THIS CAN ONLY COME AT THE END DEAL WITH THIS { phoneme: "úy", possibilities: ["ۍ"], endingOnly: true }, // THIS CAN ONLY COME AT THE END DEAL WITH THIS
{ phoneme: "eyy", possibilities: ["ئ"], endingOnly: true }, { phoneme: "ey", possibilities: ["ئ"], endingOnly: true },
{ phoneme: "éyy", possibilities: ["ئ"], endingOnly: true }, { phoneme: "éy", possibilities: ["ئ"], endingOnly: true },
{ phoneme: "a", diacritic: zwar, endingPossibilities: ["ه"], canComeAfterHeyEnding: true, canBeFirstPartOfFathahanEnding: true }, {
{ phoneme: "á", diacritic: zwar, endingPossibilities: ["ه"], canComeAfterHeyEnding: true, canBeFirstPartOfFathahanEnding: true }, phoneme: "a",
diacritic: zwar,
endingPossibilities: ["ه"],
canComeAfterHayEnding: true,
canBeFirstPartOfFathahanEnding: true,
},
{
phoneme: "á",
diacritic: zwar,
endingPossibilities: ["ه"],
canComeAfterHayEnding: true,
canBeFirstPartOfFathahanEnding: true,
},
{ phoneme: "ă", diacritic: zwar }, { phoneme: "ă", diacritic: zwar },
{ phoneme: "ắ", diacritic: zwar }, { phoneme: "ắ", diacritic: zwar },
{ phoneme: "u", diacritic: zwarakey, endingPossibilities: ["ه"], hamzaOnEnd: true }, {
{ phoneme: "ú", diacritic: zwarakey, endingPossibilities: ["ه"], hamzaOnEnd: true }, phoneme: "u",
{ phoneme: "i", diacritic: zer, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, canBeWasla: true, beginning: ["ا", "ع"] }, diacritic: zwarakay,
{ phoneme: "í", diacritic: zer, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, canBeWasla: true, beginning: ["ا", "ع"] }, endingPossibilities: ["ه"],
{ phoneme: "U", diacritic: pesh, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, beginning: ["ا", "ع"] }, hamzaOnEnd: true,
{ phoneme: "Ú", diacritic: pesh, endingPossibilities: ["ه"], takesDiacriticBeforeGurdaHeyEnding: true, beginning: ["ا", "ع"] }, },
{
phoneme: "ú",
diacritic: zwarakay,
endingPossibilities: ["ه"],
hamzaOnEnd: true,
},
{
phoneme: "i",
diacritic: zer,
endingPossibilities: ["ه"],
takesDiacriticBeforeGurdaHayEnding: true,
canBeWasla: true,
beginning: ["ا", "ع"],
},
{
phoneme: "í",
diacritic: zer,
endingPossibilities: ["ه"],
takesDiacriticBeforeGurdaHayEnding: true,
canBeWasla: true,
beginning: ["ا", "ع"],
},
{
phoneme: "U",
diacritic: pesh,
endingPossibilities: ["ه"],
takesDiacriticBeforeGurdaHayEnding: true,
beginning: ["ا", "ع"],
},
{
phoneme: "Ú",
diacritic: pesh,
endingPossibilities: ["ه"],
takesDiacriticBeforeGurdaHayEnding: true,
beginning: ["ا", "ع"],
},
]; ];
function isSpace(s: string): boolean { function isSpace(s: string): boolean {
@ -142,7 +261,11 @@ interface IDiacriticsErrorMessage {
i: number; i: number;
} }
function possibilityMatches(p: string, pIndex: number, possibilities: string[] | undefined): boolean { function possibilityMatches(
p: string,
pIndex: number,
possibilities: string[] | undefined
): boolean {
/* istanbul ignore next */ /* istanbul ignore next */
if (!possibilities) { if (!possibilities) {
return false; return false;
@ -155,10 +278,15 @@ function possibilityMatches(p: string, pIndex: number, possibilities: string[] |
return false; return false;
} }
function isPrefixedByDirectionalPronoun(i: number, phonemes: string[]): boolean { function isPrefixedByDirectionalPronoun(
i: number,
phonemes: string[]
): boolean {
const potentialPronounFourCharSlice = phonemes.slice(i - 4, i).join(""); const potentialPronounFourCharSlice = phonemes.slice(i - 4, i).join("");
const potentialPronounThreeCharSlice = phonemes.slice(i - 3, i).join(""); const potentialPronounThreeCharSlice = phonemes.slice(i - 3, i).join("");
if (["wăr-", "war-", "dăr-", "dar-"].includes(potentialPronounFourCharSlice)) { if (
["wăr-", "war-", "dăr-", "dar-"].includes(potentialPronounFourCharSlice)
) {
return true; return true;
} }
if (potentialPronounThreeCharSlice === "raa-") { if (potentialPronounThreeCharSlice === "raa-") {
@ -167,7 +295,11 @@ function isPrefixedByDirectionalPronoun(i: number, phonemes: string[]): boolean
return false; return false;
} }
export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes: boolean = false): string | undefined { export function phoneticsToDiacritics(
ps: string,
ph: string,
forbidOoPrefixes: boolean = false
): string | undefined {
const phonemes = splitFIntoPhonemes(ph.trim().split(",")[0]); const phonemes = splitFIntoPhonemes(ph.trim().split(",")[0]);
const p = ps.trim(); const p = ps.trim();
let result = ""; let result = "";
@ -179,58 +311,72 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
if (phoneme === "-") { if (phoneme === "-") {
return; return;
} }
const phonemeInfo = phonemeTable.find((element) => element.phoneme === phoneme); const phonemeInfo = phonemeTable.find(
(element) => element.phoneme === phoneme
);
if (!phonemeInfo) { if (!phonemeInfo) {
errored.push({ error: "phoneme info not found", phoneme, i }); errored.push({ error: "phoneme info not found", phoneme, i });
return; return;
} }
const isDoubleConsonant = ( const isDoubleConsonant =
phonemeInfo.consonant && phonemeInfo.consonant &&
phoneme === phonemes[i - 1] && phoneme === phonemes[i - 1] &&
// TODO: is this thourough enough to allow double consonants on the ending of the previous word? // TODO: is this thourough enough to allow double consonants on the ending of the previous word?
!(isSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex])) // avoid false double consonant ie ازل لیک azalleek !(isSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex])) // avoid false double consonant ie ازل لیک azalleek
) ? true : false; ? true
const isBeginning = !isDoubleConsonant && ((i === 0) || isSpace(p[pIndex - 1]) || (phonemes[i - 1] === "-Ul-") || isPrefixedByDirectionalPronoun(i, phonemes)); : false;
const upcomingAEndingAfterHey = (p[pIndex] === "ح" && isSpace(p[pIndex + 1]) && ["a", "á"].includes(phonemes[i + 1])); const isBeginning =
!isDoubleConsonant &&
(i === 0 ||
isSpace(p[pIndex - 1]) ||
phonemes[i - 1] === "-Ul-" ||
isPrefixedByDirectionalPronoun(i, phonemes));
const upcomingAEndingAfterHay =
p[pIndex] === "ح" &&
isSpace(p[pIndex + 1]) &&
["a", "á"].includes(phonemes[i + 1]);
// TODO: break this into a seperate function -- why can it sometimes be set to undefined? // TODO: break this into a seperate function -- why can it sometimes be set to undefined?
const isEnding = (i === phonemes.length - 1) || (( const isEnding =
(phonemeInfo.possibilities && isSpace(p[pIndex + 1])) || i === phonemes.length - 1 ||
(((phonemeInfo.possibilities && isSpace(p[pIndex + 1])) ||
(!phonemeInfo.possibilities && isSpace(p[pIndex])) || (!phonemeInfo.possibilities && isSpace(p[pIndex])) ||
( (!phonemeInfo.possibilities &&
(!phonemeInfo.possibilities && isSpace(p[pIndex + 1])) && isSpace(p[pIndex + 1]) &&
(possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) || (p[pIndex] === "ع" && phonemes[i + 1] !== "'")) (possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) ||
) (p[pIndex] === "ع" && phonemes[i + 1] !== "'")))) &&
) && !upcomingAEndingAfterHey !upcomingAEndingAfterHay && // makes sure the next letter isn't a double consonant like haqq <-
&& // makes sure the next letter isn't a double consonant like haqq <-
!( !(
phonemeInfo.consonant && phoneme === phonemes[i + 1] // && (phonemeInfo.consonant && phoneme === phonemes[i + 1]) // &&
// !(isSpace(p[pIndex + 1]) && phonemeInfo.possibilities.includes(p[pIndex])) // !(isSpace(p[pIndex + 1]) && phonemeInfo.possibilities.includes(p[pIndex]))
) )) || // can be the trailing double consanant on the end of a word
) || // can be the trailing double consanant on the end of a word (phonemeInfo.consonant &&
( phoneme === phonemes[i - 1] &&
phonemeInfo.consonant && phoneme === phonemes[i - 1] && !(
!(isEndSpace(p[pIndex - 1]) && phonemeInfo.possibilities.includes(p[pIndex])) isEndSpace(p[pIndex - 1]) &&
) || // can be یٰ ending phonemeInfo.possibilities.includes(p[pIndex])
( )) || // can be یٰ ending
isEndSpace(p[pIndex + 2]) && (p.slice(pIndex, pIndex + 2) === "یٰ") (isEndSpace(p[pIndex + 2]) && p.slice(pIndex, pIndex + 2) === "یٰ");
);
const isUofDu = phoneme === "u" && ( const isUofDu =
p.slice(pIndex - 2, pIndex) === "د " || // د as previous word phoneme === "u" &&
(p.slice(pIndex - 2, pIndex) === "د " || // د as previous word
(p[pIndex] === undefined && p[pIndex - 1] === "د") || // د as the whole thing (p[pIndex] === undefined && p[pIndex - 1] === "د") || // د as the whole thing
p.slice(pIndex - 6, pIndex) === "د ... " // ... د is as the previous word p.slice(pIndex - 6, pIndex) === "د ... "); // ... د is as the previous word
);
// TODO: Should p[pIndex - 1] also be in there ??? It messed up قطعه for instance // TODO: Should p[pIndex - 1] also be in there ??? It messed up قطعه for instance
const isEndingAynVowel = isEnding && phonemeInfo.diacritic && [p[pIndex], p[pIndex - 1]].includes("ع") && p[pIndex] !== "ه"; const isEndingAynVowel =
isEnding &&
phonemeInfo.diacritic &&
[p[pIndex], p[pIndex - 1]].includes("ع") &&
p[pIndex] !== "ه";
const isMiddle = !isBeginning && !isEnding; const isMiddle = !isBeginning && !isEnding;
const isSilentWaw = ( const isSilentWaw =
p[pIndex] === "و" && p[pIndex] === "و" &&
p[pIndex - 1] === "خ" && p[pIndex - 1] === "خ" &&
p[pIndex + 1] === "ا" && p[pIndex + 1] === "ا" &&
["áa", "aa"].includes(phoneme) ["áa", "aa"].includes(phoneme);
); const isAnAEndingAfterHay =
const isAnAEndingAfterHey = isEnding && p[pIndex - 1] === "ح" && phonemeInfo.canComeAfterHeyEnding; isEnding && p[pIndex - 1] === "ح" && phonemeInfo.canComeAfterHayEnding;
if (isDoubleConsonant) { if (isDoubleConsonant) {
pIndex--; pIndex--;
if (isSpace(p[pIndex])) { if (isSpace(p[pIndex])) {
@ -247,14 +393,22 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
pIndex++; pIndex++;
} }
// special check for Arabic wasla // special check for Arabic wasla
if (p.slice(0, 3) === "بال" && phonemes[i - 1] === "b" && phonemeInfo.canBeWasla && phonemes[i + 1] === "l") { if (
p.slice(0, 3) === "بال" &&
phonemes[i - 1] === "b" &&
phonemeInfo.canBeWasla &&
phonemes[i + 1] === "l"
) {
result += phonemeInfo.diacritic + wasla; result += phonemeInfo.diacritic + wasla;
pIndex++; pIndex++;
previousPhonemeWasAConsonant = false; previousPhonemeWasAConsonant = false;
return; return;
} }
// special check for fathahan ending // special check for fathahan ending
if (phonemeInfo.canBeFirstPartOfFathahanEnding && p.slice(pIndex, pIndex + 2) === "اً") { if (
phonemeInfo.canBeFirstPartOfFathahanEnding &&
p.slice(pIndex, pIndex + 2) === "اً"
) {
result += "ا"; result += "ا";
pIndex++; pIndex++;
return; return;
@ -265,7 +419,12 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return; return;
} }
// special check for words starting with عا or عی // special check for words starting with عا or عی
if (isBeginning && phonemeInfo.canStartWithAynBefore && p[pIndex] === "ع" && phonemeInfo.possibilities.includes(p[pIndex + 1])) { if (
isBeginning &&
phonemeInfo.canStartWithAynBefore &&
p[pIndex] === "ع" &&
phonemeInfo.possibilities.includes(p[pIndex + 1])
) {
result += "ع"; result += "ع";
result += phonemeInfo.diacritic ? phonemeInfo.diacritic : ""; result += phonemeInfo.diacritic ? phonemeInfo.diacritic : "";
result += p[pIndex + 1]; result += p[pIndex + 1];
@ -273,23 +432,45 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return; return;
} }
// special check for ؤ Ua // special check for ؤ Ua
if (phoneme === "U" && phonemes[i + 1] === "a" && phonemes[i + 2] !== "a" && p[pIndex] === "و") { if (
phoneme === "U" &&
phonemes[i + 1] === "a" &&
phonemes[i + 2] !== "a" &&
p[pIndex] === "و"
) {
result += "ؤ"; result += "ؤ";
pIndex++; pIndex++;
return; return;
} }
if (phoneme === "a" && phonemes[i - 1] === "U" && phonemes[i + 1] !== "a" && result.slice(-2) === "ؤ") { if (
phoneme === "a" &&
phonemes[i - 1] === "U" &&
phonemes[i + 1] !== "a" &&
result.slice(-2) === "ؤ"
) {
previousPhonemeWasAConsonant = false; previousPhonemeWasAConsonant = false;
return; return;
} }
// special check for و wo // special check for و wo
if (isBeginning && phoneme === "w" && phonemes[i + 1] === "o" && p[pIndex] === "و" && isEndSpace(p[pIndex + 1])) { if (
isBeginning &&
phoneme === "w" &&
phonemes[i + 1] === "o" &&
p[pIndex] === "و" &&
isEndSpace(p[pIndex + 1])
) {
result += "و"; result += "و";
pIndex++; pIndex++;
return; return;
} }
// TODO: isEndSpace here is redundant?? // TODO: isEndSpace here is redundant??
if (isEnding && phoneme === "o" && phonemes[i - 1] === "w" && p[pIndex - 1] === "و" && isEndSpace(p[pIndex])) { if (
isEnding &&
phoneme === "o" &&
phonemes[i - 1] === "w" &&
p[pIndex - 1] === "و" &&
isEndSpace(p[pIndex])
) {
pIndex++; pIndex++;
return; return;
} }
@ -300,38 +481,67 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return; return;
} }
// special check for for أ in the middle of the word // special check for for أ in the middle of the word
if (!isBeginning && p[pIndex] === "أ" && phoneme === "a" && phonemes[i + 1] === "'" && phonemes[i + 2] === "a") { if (
!isBeginning &&
p[pIndex] === "أ" &&
phoneme === "a" &&
phonemes[i + 1] === "'" &&
phonemes[i + 2] === "a"
) {
result += "أ"; result += "أ";
pIndex++; pIndex++;
return; return;
} }
if (p[pIndex - 1] === "أ" && phonemes[i - 1] === "a" && phoneme === "'" && phonemes[i + 1] === "a") { if (
p[pIndex - 1] === "أ" &&
phonemes[i - 1] === "a" &&
phoneme === "'" &&
phonemes[i + 1] === "a"
) {
return; return;
} }
if (p[pIndex - 1] === "أ" && phonemes[i - 2] === "a" && phonemes[i - 1] === "'" && phoneme === "a") { if (
p[pIndex - 1] === "أ" &&
phonemes[i - 2] === "a" &&
phonemes[i - 1] === "'" &&
phoneme === "a"
) {
previousPhonemeWasAConsonant = false; previousPhonemeWasAConsonant = false;
return; return;
} }
// special check for وو 'oo // special check for وو 'oo
if (!isBeginning && p[pIndex] === "و" && p[pIndex + 1] === "و" && phoneme === "'" && phonemes[i + 1] === "oo") { if (
!isBeginning &&
p[pIndex] === "و" &&
p[pIndex + 1] === "و" &&
phoneme === "'" &&
phonemes[i + 1] === "oo"
) {
result += "وُو"; result += "وُو";
pIndex += 2; pIndex += 2;
return; return;
} }
if (p[pIndex - 2] === "و" && p[pIndex - 1] === "و" && phonemes[i - 1] === "'" && phoneme === "oo") { if (
p[pIndex - 2] === "و" &&
p[pIndex - 1] === "و" &&
phonemes[i - 1] === "'" &&
phoneme === "oo"
) {
previousPhonemeWasAConsonant = false; previousPhonemeWasAConsonant = false;
return; return;
} }
const prevLetterWasBeginningAyn = ( const prevLetterWasBeginningAyn =
p[pIndex - 1] === "ع" && p[pIndex - 1] === "ع" &&
// isEndSpace(p[pIndex]) && // This breaks it // isEndSpace(p[pIndex]) && // This breaks it
phoneme === "'" phoneme === "'";
);
// check if the phoneme lines up in the Pashto word // check if the phoneme lines up in the Pashto word
if (isBeginning && !isUofDu && phonemeInfo.addAlefOnBeginning) { if (isBeginning && !isUofDu && phonemeInfo.addAlefOnBeginning) {
// TODO: Maybe a little bad because it doesn't loop through possibilities // TODO: Maybe a little bad because it doesn't loop through possibilities
if ((!phonemeInfo.alsoCanBePrefix || forbidOoPrefixes) && p.slice(pIndex, pIndex + 2) !== "ا" + phonemeInfo.possibilities[0]) { if (
(!phonemeInfo.alsoCanBePrefix || forbidOoPrefixes) &&
p.slice(pIndex, pIndex + 2) !== "ا" + phonemeInfo.possibilities[0]
) {
errored.push({ error: "didn't start with an aleph", phoneme, i }); errored.push({ error: "didn't start with an aleph", phoneme, i });
return; return;
} }
@ -348,18 +558,18 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
pIndex++; pIndex++;
return; return;
} else if ( } else if (
(isEnding && phonemeInfo.endingPossibilities) && isEnding &&
phonemeInfo.endingPossibilities &&
!isUofDu && !isUofDu &&
(
!possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) && !possibilityMatches(p, pIndex, phonemeInfo.endingPossibilities) &&
!isEndingAynVowel && // allowing short vowels on the end of words ending with ع !isEndingAynVowel && // allowing short vowels on the end of words ending with ع
!isAnAEndingAfterHey !isAnAEndingAfterHay
)
) { ) {
errored.push({ error: "bad ending", phoneme, i }); errored.push({ error: "bad ending", phoneme, i });
return; return;
} else if ( } else if (
(isEnding && !phonemeInfo.endingPossibilities) && isEnding &&
!phonemeInfo.endingPossibilities &&
phonemeInfo.possibilities && phonemeInfo.possibilities &&
!phonemeInfo.possibilities.includes(p[pIndex]) !phonemeInfo.possibilities.includes(p[pIndex])
) { ) {
@ -367,14 +577,17 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
errored.push({ error: "bad ending 2", phoneme, i }); errored.push({ error: "bad ending 2", phoneme, i });
return; return;
} else if ( } else if (
(phonemeInfo.possibilities && !isEnding) && phonemeInfo.possibilities &&
( !isEnding &&
!(phonemeInfo.possibilities.includes(p[pIndex])) && !phonemeInfo.possibilities.includes(p[pIndex]) &&
!(p[pIndex] === "ن" && (p[pIndex + 1] === "ب" && phoneme === "m")) && // && // exception case with نب === mb !(p[pIndex] === "ن" && p[pIndex + 1] === "ب" && phoneme === "m") && // && // exception case with نب === mb
!prevLetterWasBeginningAyn // exception case with words starting with ع like i'zzat !prevLetterWasBeginningAyn // exception case with words starting with ع like i'zzat
)
) { ) {
errored.push({ error: "improper coressponding letter in middle of word", phoneme, i }); errored.push({
error: "improper coressponding letter in middle of word",
phoneme,
i,
});
return; return;
} }
// console.log(phoneme, pIndex, p[pIndex], isEnding); // console.log(phoneme, pIndex, p[pIndex], isEnding);
@ -382,7 +595,12 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
// OK, it lines up with the Pashto word, we're good // OK, it lines up with the Pashto word, we're good
// Now continue building the result string // Now continue building the result string
// deal with starting with short vowels and alef // deal with starting with short vowels and alef
if (!isUofDu && isBeginning && !phonemeInfo.possibilities && !phonemeInfo.isIzafe) { if (
!isUofDu &&
isBeginning &&
!phonemeInfo.possibilities &&
!phonemeInfo.isIzafe
) {
// TODO: WHY IS THIS HERE // TODO: WHY IS THIS HERE
if (!["ا", "ع"].includes(p[pIndex])) { if (!["ا", "ع"].includes(p[pIndex])) {
errored.push({ error: "bad beginning 2", phoneme, i }); errored.push({ error: "bad beginning 2", phoneme, i });
@ -392,22 +610,30 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
pIndex++; pIndex++;
} }
// if the phoneme carries a diacritic insert it (before the letter if it's coming) // if the phoneme carries a diacritic insert it (before the letter if it's coming)
const isOoPrefix = (phonemeInfo.alsoCanBePrefix && isBeginning && (p[pIndex - 1] !== "ا")); const isOoPrefix =
phonemeInfo.alsoCanBePrefix && isBeginning && p[pIndex - 1] !== "ا";
if (phonemeInfo.diacritic && !isEnding && !isOoPrefix) { if (phonemeInfo.diacritic && !isEnding && !isOoPrefix) {
// using this hack to remove the space and put it after the zwarakey we're going to add after د // using this hack to remove the space and put it after the zwarakay we're going to add after د
if (isUofDu && result.slice(-5) === " ... ") { if (isUofDu && result.slice(-5) === " ... ") {
result = result.slice(0, -5) + zwarakey + " ... "; result = result.slice(0, -5) + zwarakay + " ... ";
} else if (isUofDu && result.slice(-1) === " ") { } else if (isUofDu && result.slice(-1) === " ") {
result = result.slice(0, -1) + zwarakey + " "; result = result.slice(0, -1) + zwarakay + " ";
} else { } else {
result += phonemeInfo.diacritic; result += phonemeInfo.diacritic;
} }
} }
// TODO: The middle stuff might be unneccessary/unhelpful // TODO: The middle stuff might be unneccessary/unhelpful
const isACommaWithoutAyn = (phoneme === "'" && (p[pIndex] !== "ع" && !(isMiddle && p[pIndex] === "ئ"))); const isACommaWithoutAyn =
phoneme === "'" && p[pIndex] !== "ع" && !(isMiddle && p[pIndex] === "ئ");
// if the previous phoneme was a consonant insert a sukun // if the previous phoneme was a consonant insert a sukun
// console.log("Will I go into the adding thing?"); // console.log("Will I go into the adding thing?");
if (!isBeginning && previousPhonemeWasAConsonant && phonemeInfo.consonant && phonemes[i - 1] !== "'" && p[pIndex] !== "ع") { if (
!isBeginning &&
previousPhonemeWasAConsonant &&
phonemeInfo.consonant &&
phonemes[i - 1] !== "'" &&
p[pIndex] !== "ع"
) {
result += isDoubleConsonant ? tashdeed : sukun; result += isDoubleConsonant ? tashdeed : sukun;
} }
if (isEnding && isDoubleConsonant) { if (isEnding && isDoubleConsonant) {
@ -417,30 +643,38 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
} }
} }
// if there's a pashto letter for the phoneme, insert it // if there's a pashto letter for the phoneme, insert it
if (!isEndingAynVowel && !isACommaWithoutAyn && (phonemeInfo.possibilities || isEnding)) { if (
!isEndingAynVowel &&
!isACommaWithoutAyn &&
(phonemeInfo.possibilities || isEnding)
) {
// need the isSpace check to prevent weird behaviour with izafe // need the isSpace check to prevent weird behaviour with izafe
if (!isUofDu) { if (!isUofDu) {
if (isAnAEndingAfterHey) { if (isAnAEndingAfterHay) {
result += zwar; result += zwar;
if (p[pIndex] === " ") { if (p[pIndex] === " ") {
result += " "; result += " ";
} }
} else { } else {
result += (isDoubleConsonant || isSpace(p[pIndex])) ? "" : p[pIndex]; result += isDoubleConsonant || isSpace(p[pIndex]) ? "" : p[pIndex];
} }
} }
pIndex++; pIndex++;
} }
if (isEnding) { if (isEnding) {
if (isUofDu) { if (isUofDu) {
result += zwarakey; result += zwarakay;
} else if (phonemeInfo.hamzaOnEnd) { } else if (phonemeInfo.hamzaOnEnd) {
result += hamzaAbove; result += hamzaAbove;
} else if (phonemeInfo.takesSukunOnEnding) { } else if (phonemeInfo.takesSukunOnEnding) {
result += sukun; result += sukun;
} else if (p[pIndex] === daggerAlif) { } else if (p[pIndex] === daggerAlif) {
result += daggerAlif; result += daggerAlif;
} else if (isEndSpace(p[pIndex]) && p[pIndex - 1] === "ه" && phonemeInfo.takesDiacriticBeforeGurdaHeyEnding) { } else if (
isEndSpace(p[pIndex]) &&
p[pIndex - 1] === "ه" &&
phonemeInfo.takesDiacriticBeforeGurdaHayEnding
) {
result = result.slice(0, -1) + phonemeInfo.diacritic + "ه"; result = result.slice(0, -1) + phonemeInfo.diacritic + "ه";
} }
} }
@ -456,13 +690,20 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
} }
return; return;
} }
previousPhonemeWasAConsonant = (!isEnding && phonemeInfo.consonant) ? true : false; previousPhonemeWasAConsonant =
!isEnding && phonemeInfo.consonant ? true : false;
// ignore the ع or ئ if there's not a ' in the phonetics // ignore the ع or ئ if there's not a ' in the phonetics
const nextPhonemeInfo = phonemeTable.find((element) => phonemes[i + 1] === element.phoneme); const nextPhonemeInfo = phonemeTable.find(
(element) => phonemes[i + 1] === element.phoneme
);
if ( if (
["ع", "ئ"].includes(p[pIndex]) && ["ع", "ئ"].includes(p[pIndex]) &&
![phonemes[i + 1], phonemes[i + 2]].includes("'") && ![phonemes[i + 1], phonemes[i + 2]].includes("'") &&
!(nextPhonemeInfo && nextPhonemeInfo.diacritic && isEndSpace(p[pIndex + 1])) && // don't skip the ع on the end if there's another short letter coming after it !(
nextPhonemeInfo &&
nextPhonemeInfo.diacritic &&
isEndSpace(p[pIndex + 1])
) && // don't skip the ع on the end if there's another short letter coming after it
!(p[pIndex] === "ئ" && isEndSpace(p[pIndex + 1])) && // don't skip ئ on the end !(p[pIndex] === "ئ" && isEndSpace(p[pIndex + 1])) && // don't skip ئ on the end
!phonemeInfo.isIzafe !phonemeInfo.isIzafe
) { ) {
@ -476,7 +717,11 @@ export function phoneticsToDiacritics(ps: string, ph: string, forbidOoPrefixes:
return; return;
} }
// if we've arrived at a space in the Pashto, move along before the next iteration // if we've arrived at a space in the Pashto, move along before the next iteration
if (isSpace(p[pIndex]) && phonemes[i + 1] !== "-i-" && !upcomingAEndingAfterHey) { if (
isSpace(p[pIndex]) &&
phonemes[i + 1] !== "-i-" &&
!upcomingAEndingAfterHay
) {
result += " "; result += " ";
pIndex++; pIndex++;
} }

View File

@ -107,7 +107,7 @@ export const sandwiches: T.Sandwich[] = [
{ {
type: "sandwich", type: "sandwich",
before: { p: "د", f: "du" }, before: { p: "د", f: "du" },
after: { p: "په حیث", f: "pu heys" }, after: { p: "په حیث", f: "pu hays" },
e: "as", e: "as",
}, },
{ {

View File

@ -105,14 +105,14 @@ export const replacerInfo: IReplacerInfoItem[] = [
ipa: "ɪ́", ipa: "ɪ́",
}, },
{ {
char: "ey", char: "ay",
alalc: "ay", alalc: "ay",
ipa: "ai", ipa: "ay",
}, },
{ {
char: "éy", char: "áy",
alalc: "áy", alalc: "áy",
ipa: i", ipa: j",
}, },
{ {
char: "ee", char: "ee",
@ -140,9 +140,9 @@ export const replacerInfo: IReplacerInfoItem[] = [
ipa: "u:j", ipa: "u:j",
}, },
{ {
char: "eyy", char: "ey",
alalc: "y", alalc: "ey",
ipa: "ɛ̝j", ipa: "ej",
}, },
{ {
char: "e", char: "e",
@ -351,4 +351,5 @@ export const replacerInfo: IReplacerInfoItem[] = [
]; ];
// tslint:disable-next-line // tslint:disable-next-line
export const replacerRegex = /aay|áay|aa|áa|a|á|U|Ú|u|ú|ooy|o{1,2}|óo|ó|ey|éy|e{1,2}|ée|é|uy|úy|i|í|w|y|q|g|ts|sh|s|dz|z|t|T|d|D|r|R|n|N|f|b|p|x|kh|q|k|gh|g|G|j|ch|l|l|m|h/g; export const replacerRegex =
/aay|áay|aa|áa|a|á|U|Ú|u|ú|ooy|o{1,2}|óo|ó|ay|áy|e{1,2}|ée|é|ey|éy|uy|úy|i|í|w|y|q|g|ts|sh|s|dz|z|t|T|d|D|r|R|n|N|f|b|p|x|kh|q|k|gh|g|G|j|ch|l|l|m|h/g;

View File

@ -6,9 +6,7 @@
* *
*/ */
import { import { translatePhonetics } from "./translate-phonetics";
translatePhonetics,
} from "./translate-phonetics";
const dialects = ["southern", "standard", "peshawer"]; const dialects = ["southern", "standard", "peshawer"];
const systems = ["ipa", "alalc"]; const systems = ["ipa", "alalc"];
@ -54,11 +52,11 @@ const translations = [
}, },
}, },
{ {
original: "saRey", original: "saRay",
ipa: { ipa: {
southern: "saɻai", southern: "saɻaj",
standard: "saɻai", standard: "saɻaj",
peshawer: "saɻai", peshawer: "saɻaj",
}, },
alalc: { alalc: {
southern: "saṛay", southern: "saṛay",
@ -72,10 +70,8 @@ translations.forEach((t) => {
systems.forEach((system) => { systems.forEach((system) => {
// check each dialect with given system // check each dialect with given system
dialects.forEach((dialect) => { dialects.forEach((dialect) => {
test( test(// @ts-ignore
// @ts-ignore `${t.original} should be translated to ${t.ipa[dialect]} using ${system} with ${dialect} dialect`, () => {
`${t.original} should be translated to ${t.ipa[dialect]} using ${system} with ${dialect} dialect`,
() => {
const translated = translatePhonetics(t.original, { const translated = translatePhonetics(t.original, {
// @ts-ignore // @ts-ignore
system, system,
@ -84,8 +80,7 @@ translations.forEach((t) => {
}); });
// @ts-ignore // @ts-ignore
expect(translated).toBe(t[system][dialect]); expect(translated).toBe(t[system][dialect]);
}, });
);
}); });
}); });
}); });

View File

@ -8,16 +8,21 @@
import { standardizeEntry, validateEntry } from "./validate-entry"; import { standardizeEntry, validateEntry } from "./validate-entry";
import * as T from "../../types"; import * as T from "../../types";
import { standardizePhonetics } from "./standardize-pashto";
const toTest: { const toTest: {
input: any, input: any;
output: T.DictionaryEntryError | { ok: true } | { checkComplement: true }, output: T.DictionaryEntryError | { ok: true } | { checkComplement: true };
}[] = [ }[] = [
{ {
input: { ts: undefined }, input: { ts: undefined },
output: { output: {
errors: ["missing ts", "missing i", "missing p", "missing f", "missing e"], errors: [
"missing ts",
"missing i",
"missing p",
"missing f",
"missing e",
],
p: "", p: "",
f: "", f: "",
e: "", e: "",
@ -37,7 +42,14 @@ const toTest: {
}, },
}, },
{ {
input: {"i":293,"ts":1527821299,"p":"اخطار","f":"ixtáar","e":"warning, reprimand, admonishment","c":"n. m."}, input: {
i: 293,
ts: 1527821299,
p: "اخطار",
f: "ixtáar",
e: "warning, reprimand, admonishment",
c: "n. m.",
},
output: { output: {
errors: ["script and phonetics do not match for p and f"], errors: ["script and phonetics do not match for p and f"],
p: "اخطار", p: "اخطار",
@ -48,7 +60,17 @@ const toTest: {
}, },
}, },
{ {
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puxtaanu","infbf":"puxtan"}, input: {
i: 2433,
ts: 1527815197,
p: "پښتون",
f: "puxtoon",
e: "Pashtun",
c: "n. m. unisex / adj. irreg.",
infap: "پښتانه",
infaf: "puxtaanu",
infbf: "puxtan",
},
output: { output: {
errors: ["missing infbp"], errors: ["missing infbp"],
p: "پښتون", p: "پښتون",
@ -59,7 +81,17 @@ const toTest: {
}, },
}, },
{ {
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puxtaanu","infbp":"پښتن"}, input: {
i: 2433,
ts: 1527815197,
p: "پښتون",
f: "puxtoon",
e: "Pashtun",
c: "n. m. unisex / adj. irreg.",
infap: "پښتانه",
infaf: "puxtaanu",
infbp: "پښتن",
},
output: { output: {
errors: ["missing infbf"], errors: ["missing infbf"],
p: "پښتون", p: "پښتون",
@ -70,9 +102,22 @@ const toTest: {
}, },
}, },
{ {
input: {"i":2433,"ts":1527815197,"p":"پښتون","f":"puxtoon","e":"Pashtun","c":"n. m. unisex / adj. irreg.","infap":"پښتانه","infaf":"puktaanu","infbp":"پښتن"}, input: {
i: 2433,
ts: 1527815197,
p: "پښتون",
f: "puxtoon",
e: "Pashtun",
c: "n. m. unisex / adj. irreg.",
infap: "پښتانه",
infaf: "puktaanu",
infbp: "پښتن",
},
output: { output: {
errors: ["script and phonetics do not match for infap and infaf", "missing infbf"], errors: [
"script and phonetics do not match for infap and infaf",
"missing infbf",
],
p: "پښتون", p: "پښتون",
f: "puxtoon", f: "puxtoon",
e: "Pashtun", e: "Pashtun",
@ -81,7 +126,19 @@ const toTest: {
}, },
}, },
{ {
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"tsumlaastúl","e":"to lie down","l":1596485996977,"separationAtP":2,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true}, input: {
i: 5000,
ts: 1527819674,
p: "څملاستل",
f: "tsumlaastúl",
e: "to lie down",
l: 1596485996977,
separationAtP: 2,
c: "v. intrans. seperable",
psp: "څمل",
psf: "tsaml",
noOo: true,
},
output: { output: {
errors: ["missing separationAtF"], errors: ["missing separationAtF"],
p: "څملاستل", p: "څملاستل",
@ -92,9 +149,24 @@ const toTest: {
}, },
}, },
{ {
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"sumlaastúl","e":"to lie down","l":1596485996977,"separationAtP":2,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true}, input: {
i: 5000,
ts: 1527819674,
p: "څملاستل",
f: "sumlaastúl",
e: "to lie down",
l: 1596485996977,
separationAtP: 2,
c: "v. intrans. seperable",
psp: "څمل",
psf: "tsaml",
noOo: true,
},
output: { output: {
errors: ["script and phonetics do not match for p and f", "missing separationAtF"], errors: [
"script and phonetics do not match for p and f",
"missing separationAtF",
],
p: "څملاستل", p: "څملاستل",
f: "sumlaastúl", f: "sumlaastúl",
e: "to lie down", e: "to lie down",
@ -103,7 +175,19 @@ const toTest: {
}, },
}, },
{ {
input: {"i":5000,"ts":1527819674,"p":"څملاستل","f":"tsumlaastúl","e":"to lie down","l":1596485996977,"separationAtF":4,"c":"v. intrans. seperable","psp":"څمل","psf":"tsaml","noOo":true}, input: {
i: 5000,
ts: 1527819674,
p: "څملاستل",
f: "tsumlaastúl",
e: "to lie down",
l: 1596485996977,
separationAtF: 4,
c: "v. intrans. seperable",
psp: "څمل",
psf: "tsaml",
noOo: true,
},
output: { output: {
errors: ["missing separationAtP"], errors: ["missing separationAtP"],
p: "څملاستل", p: "څملاستل",
@ -114,7 +198,14 @@ const toTest: {
}, },
}, },
{ {
input: {"i":2222,"ts":1571859113828,"p":"پخول","f":"pakhawul","e":"to cook, prepare, to cause to ripen, mature","c":"v. stat. comp. trans."}, input: {
i: 2222,
ts: 1571859113828,
p: "پخول",
f: "pakhawul",
e: "to cook, prepare, to cause to ripen, mature",
c: "v. stat. comp. trans.",
},
output: { output: {
errors: ["missing complement for compound verb"], errors: ["missing complement for compound verb"],
p: "پخول", p: "پخول",
@ -125,21 +216,50 @@ const toTest: {
}, },
}, },
{ {
input: {"i":2222,"ts":1571859113828,"p":"پخول","f":"pakhawul","e":"to cook, prepare, to cause to ripen, mature","l":1574867531681,"c":"v. stat. comp. trans."}, input: {
i: 2222,
ts: 1571859113828,
p: "پخول",
f: "pakhawul",
e: "to cook, prepare, to cause to ripen, mature",
l: 1574867531681,
c: "v. stat. comp. trans.",
},
output: { output: {
checkComplement: true, checkComplement: true,
}, },
}, },
{ {
input: {"i":2231,"ts":1527812013,"p":"پراخ","f":"praakh, paráakh","e":"wide, broad, spacious, vast","c":"adj."}, input: {
i: 2231,
ts: 1527812013,
p: "پراخ",
f: "praakh, paráakh",
e: "wide, broad, spacious, vast",
c: "adj.",
},
output: { ok: true }, output: { ok: true },
}, },
{ {
input: {"i":0,"ts":1527812013,"p":"پراخ","f":"praakh, paráakh","e":"wide, broad, spacious, vast","c":"adj."}, input: {
i: 0,
ts: 1527812013,
p: "پراخ",
f: "praakh, paráakh",
e: "wide, broad, spacious, vast",
c: "adj.",
},
output: { ok: true }, output: { ok: true },
}, },
{ {
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj."}, input: {
i: 12,
ts: 1575058859661,
p: "آبدار",
f: "aawdáar",
e: "watery, damp, humid, juicy",
c: "adj.",
},
output: { output: {
errors: ["script and phonetics do not match for p and f"], errors: ["script and phonetics do not match for p and f"],
p: "آبدار", p: "آبدار",
@ -150,37 +270,84 @@ const toTest: {
}, },
}, },
{ {
input: {"ts":1591033069786,"i":7717,"p":"ستړی کول","f":"stuRey kawul","g":"stuReykedul","e":"to get tired, fatigued","c":"v. stat. comp. intrans.","l":1527815306,"ec":"get","ep":"tired"}, input: {
ts: 1591033069786,
i: 7717,
p: "ستړی کول",
f: "stuRay kawul",
g: "stuRaykedul",
e: "to get tired, fatigued",
c: "v. stat. comp. intrans.",
l: 1527815306,
ec: "get",
ep: "tired",
},
output: { output: {
errors: ["wrong ending for intrans. stat. comp"], errors: ["wrong ending for intrans. stat. comp"],
p: "ستړی کول", p: "ستړی کول",
f: "stuRey kawul", f: "stuRay kawul",
e: "to get tired, fatigued", e: "to get tired, fatigued",
ts: 1591033069786, ts: 1591033069786,
erroneousFields: ["p", "f"], erroneousFields: ["p", "f"],
}, },
}, },
{ {
input: {"ts":1591033078746,"i":7716,"p":"ستړی کېدل","f":"stuRey kedul","g":"stuReykawul","e":"to make tired, wear out","c":"v. stat. comp. trans.","l":1527815306,"ec":"make","ep":"tired"}, input: {
ts: 1591033078746,
i: 7716,
p: "ستړی کېدل",
f: "stuRay kedul",
g: "stuRaykawul",
e: "to make tired, wear out",
c: "v. stat. comp. trans.",
l: 1527815306,
ec: "make",
ep: "tired",
},
output: { output: {
errors: ["wrong ending for trans. stat. comp"], errors: ["wrong ending for trans. stat. comp"],
p: "ستړی کېدل", p: "ستړی کېدل",
f: "stuRey kedul", f: "stuRay kedul",
e: "to make tired, wear out", e: "to make tired, wear out",
ts: 1591033078746, ts: 1591033078746,
erroneousFields: ["p", "f"], erroneousFields: ["p", "f"],
}, },
}, },
{ {
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true}, input: {
i: 12,
ts: 1575058859661,
p: "آبدار",
f: "aawdáar",
e: "watery, damp, humid, juicy",
c: "adj.",
diacExcept: true,
},
output: { ok: true }, output: { ok: true },
}, },
{ {
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true}, input: {
i: 12,
ts: 1575058859661,
p: "آبدار",
f: "aawdáar",
e: "watery, damp, humid, juicy",
c: "adj.",
diacExcept: true,
},
output: { ok: true }, output: { ok: true },
}, },
{ {
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"bechaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."}, input: {
ts: 1527812488,
i: 1934,
p: "بې چاره",
f: "bechaara",
g: "bechaara",
e: "poor thing, pitiful",
r: 3,
c: "adj.",
},
output: { output: {
errors: ["spacing discrepency between p and f"], errors: ["spacing discrepency between p and f"],
p: "بې چاره", p: "بې چاره",
@ -191,7 +358,16 @@ const toTest: {
}, },
}, },
{ {
input: {"ts":1527812488,"i":1934,"p":"بېچاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."}, input: {
ts: 1527812488,
i: 1934,
p: "بېچاره",
f: "be chaara",
g: "bechaara",
e: "poor thing, pitiful",
r: 3,
c: "adj.",
},
output: { output: {
errors: ["spacing discrepency between p and f"], errors: ["spacing discrepency between p and f"],
p: "بېچاره", p: "بېچاره",
@ -202,11 +378,31 @@ const toTest: {
}, },
}, },
{ {
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."}, input: {
output: { ok: true } ts: 1527812488,
i: 1934,
p: "بې چاره",
f: "be chaara",
g: "bechaara",
e: "poor thing, pitiful",
r: 3,
c: "adj.",
},
output: { ok: true },
}, },
{ {
input: {"ts":1527814265,"i":12969,"p":"مکتب","f":"maktab","g":"maktab","e":"school","r":4,"c":"n. m.","app":"مکاتب","apf":"ma kaatib"}, input: {
ts: 1527814265,
i: 12969,
p: "مکتب",
f: "maktab",
g: "maktab",
e: "school",
r: 4,
c: "n. m.",
app: "مکاتب",
apf: "ma kaatib",
},
output: { output: {
errors: ["spacing discrepency between app and apf"], errors: ["spacing discrepency between app and apf"],
p: "مکتب", p: "مکتب",
@ -217,9 +413,23 @@ const toTest: {
}, },
}, },
{ {
input: {"ts":1527815870,"i":183,"p":"اثر","f":"asar","g":"asar","e":"influence, impression, tracks, affect","r":4,"c":"n. m.","app":"اثرات, آثار","apf":"asráat"}, input: {
ts: 1527815870,
i: 183,
p: "اثر",
f: "asar",
g: "asar",
e: "influence, impression, tracks, affect",
r: 4,
c: "n. m.",
app: "اثرات, آثار",
apf: "asráat",
},
output: { output: {
errors: ["difference in variation length between app and apf", "script and phonetics do not match for app and apf"], errors: [
"difference in variation length between app and apf",
"script and phonetics do not match for app and apf",
],
p: "اثر", p: "اثر",
f: "asar", f: "asar",
e: "influence, impression, tracks, affect", e: "influence, impression, tracks, affect",
@ -236,6 +446,23 @@ test("validateEntry should work", () => {
}); });
test("standardizeEntry", () => { test("standardizeEntry", () => {
expect(standardizeEntry({"i":195,"ts":1527822036,"p":"اجتماعي","f":"ijtimaaee, ijtimaayee","g":"ijtimaaee,ijtimaayee","e":"public, social, societal","c":"adj."})) expect(
.toEqual({"i":195,"ts":1527822036,"p":"اجتماعي","f":"ijtimaa'ee, ijtimaayee","g":"ijtimaaee,ijtimaayee","e":"public, social, societal","c":"adj."}); standardizeEntry({
i: 195,
ts: 1527822036,
p: "اجتماعي",
f: "ijtimaaee, ijtimaayee",
g: "ijtimaaee,ijtimaayee",
e: "public, social, societal",
c: "adj.",
})
).toEqual({
i: 195,
ts: 1527822036,
p: "اجتماعي",
f: "ijtimaa'ee, ijtimaayee",
g: "ijtimaaee,ijtimaayee",
e: "public, social, societal",
c: "adj.",
});
}); });