just trying out parsing very, very basic VPs
This commit is contained in:
parent
4cc81c8b10
commit
e910de719f
|
@ -146,7 +146,7 @@ function VBBlock({
|
|||
script: "p" | "f";
|
||||
block:
|
||||
| T.VBBasic
|
||||
| T.VBGenNum
|
||||
| (T.VBBasic & (T.VBPartInfo | T.VBAbilityInfo))
|
||||
| (T.VBBasic & {
|
||||
person: T.Person;
|
||||
});
|
||||
|
@ -167,8 +167,8 @@ function VBBlock({
|
|||
);
|
||||
}
|
||||
const infInfo =
|
||||
"gender" in block
|
||||
? getEnglishGenNumInfo(block.gender, block.number)
|
||||
"info" in block && block.info.type === "ppart"
|
||||
? getEnglishGenNumInfo(block.info.genNum.gender, block.info.genNum.number)
|
||||
: "person" in block
|
||||
? getEnglishPersonInfo(block.person, "short")
|
||||
: "";
|
||||
|
|
|
@ -132,7 +132,7 @@ function grabLength(
|
|||
if (vb.type === "welded") {
|
||||
return {
|
||||
...vb,
|
||||
right: grabVBLength(vb.right) as T.VBBasic | T.VBGenNum,
|
||||
right: grabVBLength(vb.right) as T.VBBasic | T.VBP,
|
||||
};
|
||||
}
|
||||
if (!(length in vb.ps)) {
|
||||
|
|
|
@ -3,12 +3,17 @@ import * as T from "../types";
|
|||
import { parsePhrase } from "../lib/src/parsing/parse-phrase";
|
||||
import { lookup } from "../lib/src/parsing/lookup";
|
||||
import { tokenizer } from "../lib/src/parsing/tokenizer";
|
||||
import { NPDisplay } from "../components/library";
|
||||
import {
|
||||
CompiledPTextDisplay,
|
||||
NPDisplay,
|
||||
compileVP,
|
||||
renderVP,
|
||||
} from "../components/library";
|
||||
|
||||
function ParserDemo({ opts }: { opts: T.TextOptions }) {
|
||||
const [text, setText] = useState<string>("");
|
||||
const [result, setResult] = useState<
|
||||
{ inflected: boolean; selection: T.NPSelection }[]
|
||||
ReturnType<typeof parsePhrase>["success"]
|
||||
>([]);
|
||||
const [errors, setErrors] = useState<string[]>([]);
|
||||
function handleChange(e: React.ChangeEvent<HTMLInputElement>) {
|
||||
|
@ -26,7 +31,7 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
|
|||
}
|
||||
return (
|
||||
<div className="mt-3" style={{ marginBottom: "1000px" }}>
|
||||
<p>Type an adjective or noun (w or without adjs) to parse it</p>
|
||||
<p>Type a NP</p>
|
||||
<div className="form-group mb-2">
|
||||
<input
|
||||
dir="rtl"
|
||||
|
@ -45,17 +50,49 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
|
|||
{errors.length > 0 && (
|
||||
<>
|
||||
<div className="alert alert-danger" role="alert">
|
||||
{errors.length > 0 ? (
|
||||
<>
|
||||
<div>possible errors:</div>
|
||||
<ul>
|
||||
{errors.map((e) => (
|
||||
<div>{e}</div>
|
||||
<li>{e}</li>
|
||||
))}
|
||||
</ul>
|
||||
</>
|
||||
) : (
|
||||
<div>{errors[0]}</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="text-center">Did you mean:</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{result.map((np) => (
|
||||
<NPDisplay NP={np.selection} inflected={np.inflected} opts={opts} />
|
||||
{result.map((res) =>
|
||||
"inflected" in res ? (
|
||||
<NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
|
||||
) : "verb" in res ? (
|
||||
(() => {
|
||||
const rendered = renderVP(res);
|
||||
const compiled = compileVP(rendered, res.form);
|
||||
return (
|
||||
<div>
|
||||
<CompiledPTextDisplay compiled={compiled} opts={opts} />
|
||||
{compiled.e && (
|
||||
<div className={`text-muted mt-2 text-center`}>
|
||||
{compiled.e.map((e, i) => (
|
||||
<div key={i}>{e}</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})()
|
||||
) : (
|
||||
<samp>
|
||||
<pre>{JSON.stringify(res, null, " ")}</pre>
|
||||
</samp>
|
||||
)
|
||||
)}
|
||||
<details>
|
||||
<summary>AST</summary>
|
||||
<samp>
|
||||
|
|
|
@ -92,7 +92,7 @@ export function mapVerbRenderedOutput(
|
|||
f: (a: T.PsString) => T.PsString,
|
||||
[a, b]: T.VerbRenderedOutput
|
||||
): T.VerbRenderedOutput {
|
||||
return [fmapVHead(a), fmapV(b)];
|
||||
return [fmapVHead(a), fmapVE(b)];
|
||||
function fmapVHead([v]: [T.VHead] | []): [T.VHead] | [] {
|
||||
if (v === undefined) {
|
||||
return [];
|
||||
|
@ -118,10 +118,10 @@ export function mapVerbRenderedOutput(
|
|||
ps: f(comp.ps),
|
||||
};
|
||||
}
|
||||
function fmapV(v: [T.VB, T.VBE] | [T.VBE]): [T.VB, T.VBE] | [T.VBE] {
|
||||
return v.map(fmapVB) as [T.VB, T.VBE] | [T.VBE];
|
||||
function fmapVE(v: [T.VBP, T.VBE] | [T.VBE]): [T.VBP, T.VBE] | [T.VBE] {
|
||||
return v.map(fmapVB) as [T.VBP, T.VBE] | [T.VBE];
|
||||
}
|
||||
function fmapVB<V extends T.VB | T.VBE>(v: V): V {
|
||||
function fmapVB<V extends T.VB | T.VBE | T.VBP>(v: V): V {
|
||||
if (v.type === "welded") {
|
||||
return {
|
||||
...v,
|
||||
|
|
|
@ -1031,11 +1031,11 @@ export const persons = [
|
|||
person: 9,
|
||||
},
|
||||
{
|
||||
label: { subject: "thay (m. pl.)", object: "them (m. pl.)" },
|
||||
label: { subject: "they (m. pl.)", object: "them (m. pl.)" },
|
||||
person: 10,
|
||||
},
|
||||
{
|
||||
label: { subject: "thay (f. pl.)", object: "them (f. pl.)" },
|
||||
label: { subject: "they (f. pl.)", object: "them (f. pl.)" },
|
||||
person: 11,
|
||||
},
|
||||
];
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -26,6 +26,7 @@ import { getPastParticiple, getRootStem } from "./roots-and-stems";
|
|||
import {
|
||||
isKedul,
|
||||
perfectTenseToEquative,
|
||||
vEntry,
|
||||
verbEndingConcat,
|
||||
} from "./rs-helpers";
|
||||
import {
|
||||
|
@ -33,7 +34,24 @@ import {
|
|||
accentPsSyllable,
|
||||
removeAccents,
|
||||
} from "../accent-helpers";
|
||||
|
||||
const kedulStat = vEntry({
|
||||
ts: 1581086654898,
|
||||
i: 11100,
|
||||
p: "کېدل",
|
||||
f: "kedul",
|
||||
g: "kedul",
|
||||
e: "to become _____",
|
||||
r: 2,
|
||||
c: "v. intrans.",
|
||||
ssp: "ش",
|
||||
ssf: "sh",
|
||||
prp: "شول",
|
||||
prf: "shwul",
|
||||
pprtp: "شوی",
|
||||
pprtf: "shúway",
|
||||
noOo: true,
|
||||
ec: "become",
|
||||
});
|
||||
const formulas: Record<
|
||||
T.VerbTense | T.ImperativeTense,
|
||||
{
|
||||
|
@ -123,11 +141,12 @@ export function renderVerb({
|
|||
const type = isAbilityTense(tense) ? "ability" : "basic";
|
||||
const transitive = object !== undefined;
|
||||
const king = transitive && isPast ? object : subject;
|
||||
const base = isPast ? "root" : "stem";
|
||||
|
||||
// #1 get the appropriate root / stem
|
||||
const [vHead, rest] = getRootStem({
|
||||
verb,
|
||||
rs: isPast ? "root" : "stem",
|
||||
rs: base,
|
||||
aspect: negative && isImperativeTense(tense) ? "imperfective" : aspect,
|
||||
voice,
|
||||
type,
|
||||
|
@ -148,6 +167,8 @@ export function renderVerb({
|
|||
pastThird: isPast && king === T.Person.ThirdSingMale,
|
||||
aspect,
|
||||
basicForm: type === "basic" && voice === "active",
|
||||
base,
|
||||
ability: type === "ability",
|
||||
}),
|
||||
],
|
||||
};
|
||||
|
@ -165,7 +186,7 @@ function renderPerfectVerb({
|
|||
voice: T.Voice;
|
||||
}): {
|
||||
hasBa: boolean;
|
||||
vbs: [[], [T.VB, T.VBE]];
|
||||
vbs: [[], [T.VBP, T.VBE]];
|
||||
objComp: T.Rendered<T.NPSelection> | undefined;
|
||||
} {
|
||||
const hasBa = perfectTenseHasBa(tense);
|
||||
|
@ -178,6 +199,10 @@ function renderPerfectVerb({
|
|||
type: "VB",
|
||||
person,
|
||||
ps: fmapSingleOrLengthOpts((x) => x[row][col], equative),
|
||||
info: {
|
||||
type: "equative",
|
||||
tense: perfectTenseToEquative(tense),
|
||||
},
|
||||
};
|
||||
return {
|
||||
hasBa,
|
||||
|
@ -194,32 +219,46 @@ function addEnding({
|
|||
pastThird,
|
||||
aspect,
|
||||
basicForm,
|
||||
base,
|
||||
ability,
|
||||
}: {
|
||||
rs: [T.VB, T.VBA] | [T.VBA];
|
||||
rs: [T.VBP, T.VB] | [T.VB];
|
||||
ending: T.SingleOrLengthOpts<T.PsString[]>;
|
||||
person: T.Person;
|
||||
verb: T.VerbEntry;
|
||||
pastThird: boolean;
|
||||
aspect: T.Aspect;
|
||||
basicForm: boolean;
|
||||
}): [T.VB, T.VBE] | [T.VBE] {
|
||||
base: "stem" | "root";
|
||||
ability: boolean;
|
||||
}): [T.VBP, T.VBE] | [T.VBE] {
|
||||
return rs.length === 2
|
||||
? [rs[0], addEnd(rs[1], ending)]
|
||||
: [addEnd(rs[0], ending)];
|
||||
function addEnd(
|
||||
vba: T.VBA,
|
||||
ending: T.SingleOrLengthOpts<T.PsString[]>
|
||||
): T.VBE {
|
||||
if (vba.type === "welded") {
|
||||
function addEnd(vb: T.VB, ending: T.SingleOrLengthOpts<T.PsString[]>): T.VBE {
|
||||
const info = {
|
||||
type: "verb" as const,
|
||||
aspect: ability ? "perfective" : aspect,
|
||||
base,
|
||||
verb: ability ? kedulStat : verb,
|
||||
...(ability
|
||||
? {
|
||||
abilityAux: true,
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
if (vb.type === "welded") {
|
||||
return {
|
||||
...vba,
|
||||
right: addToVBBasicEnd(vba.right, ending),
|
||||
...vb,
|
||||
right: addToVBBasicEnd(vb.right, ending),
|
||||
person,
|
||||
info,
|
||||
};
|
||||
}
|
||||
return {
|
||||
...addToVBBasicEnd(vba, ending),
|
||||
...addToVBBasicEnd(vb, ending),
|
||||
person,
|
||||
info,
|
||||
};
|
||||
}
|
||||
function addToVBBasicEnd(
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -15,7 +15,7 @@ import {
|
|||
countSyllables,
|
||||
removeAccents,
|
||||
} from "../accent-helpers";
|
||||
import { isKawulVerb, isTlulVerb } from "../type-predicates";
|
||||
import { isKawulVerb } from "../type-predicates";
|
||||
import {
|
||||
vEntry,
|
||||
addAbilityEnding,
|
||||
|
@ -123,42 +123,60 @@ function getAbilityRs(
|
|||
rs: "root" | "stem",
|
||||
voice: T.Voice,
|
||||
genderNum: T.GenderNumber
|
||||
): [[] | [T.VHead], [T.VB, T.VBA]] {
|
||||
): [[] | [T.VHead], [T.VBP, T.VB]] {
|
||||
// https://grammar.lingdocs.com/verbs/ability/#exceptions
|
||||
const losesAspect =
|
||||
isTlulVerb(verb) ||
|
||||
(verb.entry.prp && verb.entry.p !== "کول") ||
|
||||
(isStatComp(verb) && vTransitivity(verb) === "intransitive");
|
||||
const asp = losesAspect ? "imperfective" : aspect;
|
||||
const [vhead, [basicroot]] =
|
||||
voice === "passive"
|
||||
? getPassiveRs(verb, "imperfective", "root", genderNum)
|
||||
: getRoot(verb, genderNum, losesAspect ? "imperfective" : aspect);
|
||||
return [vhead, [addAbilityEnding(basicroot), rs === "root" ? shwulVB : shVB]];
|
||||
: getRoot(verb, genderNum, asp);
|
||||
return [
|
||||
vhead,
|
||||
[addAbilityEnding(basicroot, verb, asp), rs === "root" ? shwulVB : shVB],
|
||||
];
|
||||
}
|
||||
|
||||
export function getPastParticiple(
|
||||
verb: T.VerbEntry,
|
||||
voice: T.Voice,
|
||||
{ gender, number }: { gender: T.Gender; number: T.NounNumber }
|
||||
): T.VBGenNum | T.WeldedGN {
|
||||
): T.VBP {
|
||||
const v = removeFVarientsFromVerb(verb);
|
||||
if (voice === "passive") {
|
||||
return getPassivePp(v, { gender, number });
|
||||
}
|
||||
if (isStatComp(v) && v.complement) {
|
||||
return weld(
|
||||
return {
|
||||
...weld(
|
||||
makeComplement(v.complement, { gender, number }),
|
||||
getPastParticiple(statVerb[vTransitivity(verb)], voice, {
|
||||
gender,
|
||||
number,
|
||||
}) as T.VBGenNum
|
||||
);
|
||||
})
|
||||
),
|
||||
info: {
|
||||
type: "ppart",
|
||||
genNum: { gender, number },
|
||||
verb,
|
||||
},
|
||||
};
|
||||
}
|
||||
if (verb.entry.pprtp && verb.entry.pprtf) {
|
||||
const base = makePsString(verb.entry.pprtp, verb.entry.pprtf);
|
||||
return {
|
||||
type: "VB",
|
||||
ps: inflectPattern3(base, { gender, number }),
|
||||
info: {
|
||||
type: "ppart",
|
||||
verb,
|
||||
genNum: {
|
||||
gender,
|
||||
number,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
const basicRoot = getRoot(
|
||||
|
@ -166,7 +184,7 @@ export function getPastParticiple(
|
|||
{ gender, number },
|
||||
"imperfective"
|
||||
)[1][0];
|
||||
const longRoot = getLongVB(basicRoot);
|
||||
const longRoot = getLongVB(basicRoot) as T.VBNoLenghts<T.VB>;
|
||||
const rootWLengths = possiblePPartLengths(longRoot);
|
||||
/* istanbul ignore next */
|
||||
if ("right" in rootWLengths) {
|
||||
|
@ -175,8 +193,14 @@ export function getPastParticiple(
|
|||
return {
|
||||
...rootWLengths,
|
||||
ps: addTail(rootWLengths.ps),
|
||||
info: {
|
||||
type: "ppart",
|
||||
verb,
|
||||
genNum: {
|
||||
gender,
|
||||
number,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
function addTail(
|
||||
|
@ -192,12 +216,19 @@ export function getPastParticiple(
|
|||
function getPassivePp(
|
||||
verb: T.VerbEntryNoFVars,
|
||||
genderNumber: T.GenderNumber
|
||||
): T.WeldedGN {
|
||||
): T.VBP {
|
||||
if (isStatComp(verb) && verb.complement) {
|
||||
return weld(
|
||||
return {
|
||||
...weld(
|
||||
makeComplement(verb.complement, genderNumber),
|
||||
getPassivePp(statVerb.transitive, genderNumber)
|
||||
);
|
||||
),
|
||||
info: {
|
||||
type: "ppart",
|
||||
verb,
|
||||
genNum: genderNumber,
|
||||
},
|
||||
};
|
||||
}
|
||||
const basicRoot = getRoot(
|
||||
verb,
|
||||
|
@ -205,38 +236,26 @@ function getPassivePp(
|
|||
isKawulVerb(verb) ? "perfective" : "imperfective"
|
||||
)[1][0];
|
||||
const longRoot = getLongVB(basicRoot);
|
||||
const kedulVb: T.VBGenNum = getPastParticiple(
|
||||
const kedulVb = getPastParticiple(
|
||||
statVerb.intransitive,
|
||||
"active",
|
||||
genderNumber
|
||||
) as T.VBGenNum;
|
||||
return weld(longRoot, kedulVb);
|
||||
}
|
||||
|
||||
function getPassiveRs(
|
||||
verb: T.VerbEntryNoFVars,
|
||||
aspect: T.Aspect,
|
||||
rs: "root" | "stem",
|
||||
genderNumber: T.GenderNumber
|
||||
): [[] | [T.VHead], [T.VBA]] {
|
||||
const [vHead, [basicRoot]] = getRoot(verb, genderNumber, aspect);
|
||||
const longRoot = getLongVB(basicRoot);
|
||||
const kedulVba = getRootStem({
|
||||
verb: statVerb.intransitive,
|
||||
aspect,
|
||||
rs,
|
||||
type: "basic",
|
||||
voice: "active",
|
||||
genderNumber: { gender: "masc", number: "singular" },
|
||||
})[1][0] as T.VBBasic;
|
||||
return [vHead, [weld(longRoot, kedulVba)]];
|
||||
);
|
||||
return {
|
||||
...weld(longRoot, kedulVb),
|
||||
info: {
|
||||
type: "ppart",
|
||||
verb,
|
||||
genNum: genderNumber,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function getRoot(
|
||||
verb: T.VerbEntryNoFVars,
|
||||
genderNum: T.GenderNumber,
|
||||
aspect: T.Aspect
|
||||
): [[T.VHead] | [], [T.VBA]] {
|
||||
): [[T.VHead] | [], [T.VB]] {
|
||||
if (
|
||||
verb.complement &&
|
||||
isStatComp(verb) &&
|
||||
|
@ -430,6 +449,25 @@ function getStem(
|
|||
}
|
||||
}
|
||||
|
||||
function getPassiveRs(
|
||||
verb: T.VerbEntryNoFVars,
|
||||
aspect: T.Aspect,
|
||||
rs: "root" | "stem",
|
||||
genderNumber: T.GenderNumber
|
||||
): [[] | [T.VHead], [T.VB]] {
|
||||
const [vHead, [basicRoot]] = getRoot(verb, genderNumber, aspect);
|
||||
const longRoot = getLongVB(basicRoot);
|
||||
const kedulVba = getRootStem({
|
||||
verb: statVerb.intransitive,
|
||||
aspect,
|
||||
rs,
|
||||
type: "basic",
|
||||
voice: "active",
|
||||
genderNumber: { gender: "masc", number: "singular" },
|
||||
})[1][0] as T.VBBasic;
|
||||
return [vHead, [weld(longRoot, kedulVba)]];
|
||||
}
|
||||
|
||||
// TODO: This is a nasty and messy way to do it with the length options included
|
||||
function getPerfectiveHead(
|
||||
base: T.PsString,
|
||||
|
|
|
@ -123,19 +123,10 @@ export function verbEndingConcat(
|
|||
);
|
||||
}
|
||||
|
||||
// TODO: THIS IS UGGGGLY NEED TO THINK THROUGH THE TYPING ON THE WELDING
|
||||
export function weld(
|
||||
left: T.Welded["left"],
|
||||
right: T.VBGenNum | T.WeldedGN
|
||||
): T.WeldedGN;
|
||||
export function weld(
|
||||
left: T.Welded["left"],
|
||||
right: T.VBBasic | T.NComp | T.Welded
|
||||
): T.Welded;
|
||||
export function weld(
|
||||
left: T.Welded["left"],
|
||||
right: T.VBBasic | T.VBGenNum | T.Welded | T.NComp | T.WeldedGN
|
||||
): T.Welded | T.WeldedGN {
|
||||
right: T.VB | T.VBP | T.NComp
|
||||
): T.Welded {
|
||||
if (right.type === "welded") {
|
||||
return weld(weld(left, right.left), right.right);
|
||||
}
|
||||
|
@ -218,7 +209,11 @@ export function tlulPerfectiveStem(person: {
|
|||
];
|
||||
}
|
||||
|
||||
export function addAbilityEnding(vb: T.VBA): T.VBA {
|
||||
export function addAbilityEnding(
|
||||
vb: T.VB,
|
||||
verb: T.VerbEntry,
|
||||
aspect: T.Aspect
|
||||
): T.VBP {
|
||||
const abilityEnding: T.PsString[] = [
|
||||
{ p: "ی", f: "ay" },
|
||||
{ p: "ای", f: "aay" },
|
||||
|
@ -227,9 +222,21 @@ export function addAbilityEnding(vb: T.VBA): T.VBA {
|
|||
return {
|
||||
...vb,
|
||||
right: addToEnd(vb.right, abilityEnding),
|
||||
info: {
|
||||
type: "ability",
|
||||
verb,
|
||||
aspect,
|
||||
},
|
||||
};
|
||||
}
|
||||
return addToEnd(vb, abilityEnding);
|
||||
return {
|
||||
...addToEnd(vb, abilityEnding),
|
||||
info: {
|
||||
type: "ability",
|
||||
verb,
|
||||
aspect,
|
||||
},
|
||||
};
|
||||
function addToEnd(vb: T.VBBasic, end: T.PsString[]): T.VBBasic {
|
||||
/* istanbul ignore next */
|
||||
if (!("long" in vb.ps)) {
|
||||
|
@ -248,8 +255,8 @@ export function addAbilityEnding(vb: T.VBA): T.VBA {
|
|||
}
|
||||
|
||||
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBBasic>): T.VBBasic;
|
||||
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBA>): T.VBA;
|
||||
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBA>): T.VBA {
|
||||
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VB>): T.VB;
|
||||
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VB>): T.VB {
|
||||
const shortenableEndings = ["ښتل", "ستل", "وتل"];
|
||||
const wrul = ["وړل", "راوړل", "وروړل", "دروړل"];
|
||||
// can't find a case where this is used - type safety
|
||||
|
@ -294,12 +301,11 @@ export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBA>): T.VBA {
|
|||
return vba;
|
||||
}
|
||||
|
||||
export function getLongVB(vb: T.VBBasic): T.VBNoLenghts<T.VBBasic>;
|
||||
export function getLongVB(vb: T.VBA): T.VBNoLenghts<T.VBA>;
|
||||
export function getLongVB(vb: T.VBA): T.VBNoLenghts<T.VBA> {
|
||||
export function getLongVB(vb: T.VB): T.VBNoLenghts<T.VB> {
|
||||
if (vb.type === "welded") {
|
||||
return {
|
||||
...vb,
|
||||
// @ts-ignore
|
||||
right: getLongVB(vb.right),
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import * as T from "../../../types";
|
||||
import { endsInConsonant } from "../p-text-helpers";
|
||||
import {
|
||||
isPattern1Entry,
|
||||
isPattern2Entry,
|
||||
|
@ -50,6 +51,7 @@ export function getInflectionQueries(
|
|||
},
|
||||
});
|
||||
if (noun) {
|
||||
// TODO: could merge these queries for more efficiency ??
|
||||
queries.push({
|
||||
search: { ppp: s },
|
||||
details: {
|
||||
|
@ -59,7 +61,17 @@ export function getInflectionQueries(
|
|||
predicate: isNounEntry,
|
||||
},
|
||||
});
|
||||
if (s.endsWith("و")) {
|
||||
queries.push({
|
||||
search: { app: s },
|
||||
details: {
|
||||
inflection: [0],
|
||||
gender: ["masc", "fem"],
|
||||
plural: true,
|
||||
predicate: isNounEntry,
|
||||
},
|
||||
});
|
||||
// TODO: what about short vowel ending nouns with وو etc
|
||||
if (s.endsWith("و") && !["ا", "و"].includes(s.charAt(s.length - 2))) {
|
||||
queries.push({
|
||||
search: { ppp: s.slice(0, -1) },
|
||||
details: {
|
||||
|
@ -69,6 +81,15 @@ export function getInflectionQueries(
|
|||
predicate: isMascNounEntry,
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { app: s.slice(0, -1) },
|
||||
details: {
|
||||
inflection: [1],
|
||||
gender: ["masc"],
|
||||
plural: true,
|
||||
predicate: isMascNounEntry,
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { ppp: s.slice(0, -1) + "ې" },
|
||||
details: {
|
||||
|
@ -218,6 +239,15 @@ export function getInflectionQueries(
|
|||
!isPattern4Entry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { app: s.slice(0, -2) },
|
||||
details: {
|
||||
inflection: [1],
|
||||
gender: ["masc"],
|
||||
plural: true,
|
||||
predicate: (e) => isNounEntry(e),
|
||||
},
|
||||
});
|
||||
}
|
||||
if (
|
||||
s.endsWith("ګانو") &&
|
||||
|
@ -364,6 +394,18 @@ export function getInflectionQueries(
|
|||
predicate: isPattern1Entry,
|
||||
},
|
||||
});
|
||||
if (noun) {
|
||||
// bundled plural
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) },
|
||||
details: {
|
||||
inflection: [0],
|
||||
plural: true,
|
||||
gender: ["masc"],
|
||||
predicate: (e) => !isPattern5Entry(e) && endsInConsonant(e),
|
||||
},
|
||||
});
|
||||
}
|
||||
queries.push({
|
||||
search: { infbp: s.slice(0, -1) },
|
||||
details: {
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
import nounsAdjs from "../../../nouns-adjs";
|
||||
import verbs from "../../../verbs";
|
||||
import * as T from "../../../types";
|
||||
import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
|
||||
import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
|
||||
|
||||
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
|
||||
const [key, value] = Object.entries(s)[0];
|
||||
// TODO: could make this more efficient - merging ppp and app queries?
|
||||
if (key === "ppp") {
|
||||
return nounsAdjs.filter(
|
||||
(e) =>
|
||||
|
@ -14,16 +17,42 @@ export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
|
|||
.includes(value as string)
|
||||
);
|
||||
}
|
||||
if (key === "ppp") {
|
||||
return nounsAdjs.filter(
|
||||
(e) =>
|
||||
e.app &&
|
||||
e.app
|
||||
.split(",")
|
||||
.map((w) => w.trim())
|
||||
.includes(value as string)
|
||||
);
|
||||
}
|
||||
// @ts-ignore
|
||||
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
|
||||
}
|
||||
|
||||
export function verbLookup(
|
||||
s: (e: T.VerbDictionaryEntry) => boolean
|
||||
): T.VerbEntry[] {
|
||||
return verbs.filter(({ entry }) => s(entry));
|
||||
}
|
||||
|
||||
export function wordQuery(word: string, type: "adj"): T.AdjectiveEntry;
|
||||
export function wordQuery(word: string, type: "noun"): T.NounEntry;
|
||||
export function wordQuery(word: string, type: "verb"): T.VerbEntryNoFVars;
|
||||
export function wordQuery(
|
||||
word: string,
|
||||
type: "noun" | "adj"
|
||||
): T.NounEntry | T.AdjectiveEntry {
|
||||
type: "noun" | "adj" | "verb"
|
||||
): T.NounEntry | T.AdjectiveEntry | T.VerbEntryNoFVars {
|
||||
if (type === "verb") {
|
||||
const verb = verbs.find(
|
||||
(x) => x.entry.p === word || x.entry.f === word || x.entry.g === word
|
||||
);
|
||||
if (!verb) {
|
||||
throw new Error(`missing ${word} in word query`);
|
||||
}
|
||||
return removeFVarientsFromVerb(verb);
|
||||
}
|
||||
const entry = nounsAdjs.find(
|
||||
(x) => x.p === word || x.f === word || x.g === word
|
||||
);
|
||||
|
|
|
@ -1,67 +1,14 @@
|
|||
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
|
||||
import * as T from "../../../types";
|
||||
import { lookup } from "./lookup";
|
||||
import { lookup, wordQuery } from "./lookup";
|
||||
import { parseAdjective } from "./parse-adjective";
|
||||
import { tokenizer } from "./tokenizer";
|
||||
|
||||
const ghut = {
|
||||
ts: 1527812625,
|
||||
i: 9561,
|
||||
p: "غټ",
|
||||
f: "ghuT, ghaT",
|
||||
g: "ghuT,ghaT",
|
||||
e: "big, fat",
|
||||
r: 4,
|
||||
c: "adj.",
|
||||
} as T.AdjectiveEntry;
|
||||
const sturey = {
|
||||
ts: 1527815306,
|
||||
i: 7933,
|
||||
p: "ستړی",
|
||||
f: "stúRay",
|
||||
g: "stuRay",
|
||||
e: "tired",
|
||||
r: 4,
|
||||
c: "adj. / adv.",
|
||||
} as T.AdjectiveEntry;
|
||||
const narey = {
|
||||
ts: 1527819320,
|
||||
i: 14027,
|
||||
p: "نری",
|
||||
f: "naráy",
|
||||
g: "naray",
|
||||
e: "thin; mild; high (pitch)",
|
||||
r: 4,
|
||||
c: "adj.",
|
||||
} as T.AdjectiveEntry;
|
||||
const zor = {
|
||||
ts: 1527815451,
|
||||
i: 7570,
|
||||
p: "زوړ",
|
||||
f: "zoR",
|
||||
g: "zoR",
|
||||
e: "old",
|
||||
r: 4,
|
||||
c: "adj.",
|
||||
infap: "زاړه",
|
||||
infaf: "zaaRu",
|
||||
infbp: "زړ",
|
||||
infbf: "zaR",
|
||||
} as T.AdjectiveEntry;
|
||||
const sheen = {
|
||||
ts: 1527815265,
|
||||
i: 8979,
|
||||
p: "شین",
|
||||
f: "sheen",
|
||||
g: "sheen",
|
||||
e: "green, blue; unripe, immature; bright, sunny",
|
||||
r: 4,
|
||||
c: "adj.",
|
||||
infap: "شنه",
|
||||
infaf: "shnu",
|
||||
infbp: "شن",
|
||||
infbf: "shn",
|
||||
} as T.AdjectiveEntry;
|
||||
const ghut = wordQuery("غټ", "adj");
|
||||
const sturey = wordQuery("ستړی", "adj");
|
||||
const narey = wordQuery("نری", "adj");
|
||||
const zor = wordQuery("زوړ", "adj");
|
||||
const sheen = wordQuery("شین", "adj");
|
||||
|
||||
const tests: {
|
||||
category: string;
|
||||
|
@ -312,7 +259,7 @@ describe("parsing adjectives", () => {
|
|||
test(category, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
const possibilities = parseAdjective(tokens, lookup).map((x) => x[1]);
|
||||
const possibilities = parseAdjective(tokens, lookup).map((x) => x.body);
|
||||
expect(
|
||||
possibilities.map((x) => {
|
||||
const { given, ...rest } = x;
|
||||
|
|
|
@ -6,6 +6,7 @@ import * as T from "../../../types";
|
|||
import { lookup, wordQuery } from "./lookup";
|
||||
import { parseNoun } from "./parse-noun";
|
||||
import { tokenizer } from "./tokenizer";
|
||||
import { isCompleteResult } from "./utils";
|
||||
|
||||
const sor = wordQuery("سوړ", "adj");
|
||||
const zor = wordQuery("زوړ", "adj");
|
||||
|
@ -36,6 +37,12 @@ const maamaa = wordQuery("ماما", "noun");
|
|||
const peesho = wordQuery("پیشو", "noun");
|
||||
const duaa = wordQuery("دعا", "noun");
|
||||
const zooy = wordQuery("زوی", "noun");
|
||||
const nabee = wordQuery("نبي", "noun");
|
||||
const lafz = wordQuery("لفظ", "noun");
|
||||
|
||||
// TODO: test for adjective errors etc
|
||||
|
||||
// bundled plural
|
||||
|
||||
const tests: {
|
||||
category: string;
|
||||
|
@ -123,6 +130,13 @@ const tests: {
|
|||
gender: "fem",
|
||||
},
|
||||
},
|
||||
{
|
||||
inflected: false,
|
||||
selection: {
|
||||
...makeNounSelection(daktar, undefined),
|
||||
number: "plural",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -1290,18 +1304,98 @@ const tests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
category: "arabic plurals",
|
||||
cases: [
|
||||
{
|
||||
input: "الفاظ",
|
||||
output: [
|
||||
{
|
||||
inflected: false,
|
||||
selection: {
|
||||
...makeNounSelection(lafz, undefined),
|
||||
number: "plural",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "الفاظو",
|
||||
output: [
|
||||
{
|
||||
inflected: true,
|
||||
selection: {
|
||||
...makeNounSelection(lafz, undefined),
|
||||
number: "plural",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "نبي",
|
||||
output: [
|
||||
{
|
||||
inflected: false,
|
||||
selection: makeNounSelection(nabee, undefined),
|
||||
},
|
||||
{
|
||||
inflected: true,
|
||||
selection: makeNounSelection(nabee, undefined),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "انبیا",
|
||||
output: [
|
||||
{
|
||||
inflected: false,
|
||||
selection: {
|
||||
...makeNounSelection(nabee, undefined),
|
||||
number: "plural",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "انبیاوو",
|
||||
output: [
|
||||
{
|
||||
inflected: true,
|
||||
selection: {
|
||||
...makeNounSelection(nabee, undefined),
|
||||
number: "plural",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
category: "bundled plurals",
|
||||
cases: [
|
||||
{
|
||||
input: "کوره",
|
||||
output: [
|
||||
{
|
||||
inflected: false,
|
||||
selection: {
|
||||
...makeNounSelection(kor, undefined),
|
||||
number: "plural",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
// PROBLEM WITH غټې وریژې
|
||||
// ];
|
||||
|
||||
describe("parsing nouns", () => {
|
||||
tests.forEach(({ category, cases }) => {
|
||||
// eslint-disable-next-line jest/valid-title
|
||||
test(category, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
const res = parseNoun(tokens, lookup, undefined).map(([t, res]) => res);
|
||||
const res = parseNoun(tokens, lookup).map(({ body }) => body);
|
||||
expect(res).toEqual(output);
|
||||
});
|
||||
});
|
||||
|
@ -1407,10 +1501,8 @@ const adjsTests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
// TODO: testing issue with the parser returning multiple options needs
|
||||
// to be worked out to test double adjectives
|
||||
{
|
||||
input: "غټو کورونو",
|
||||
input: "غټو زړو کورونو",
|
||||
output: [
|
||||
{
|
||||
inflected: true,
|
||||
|
@ -1419,7 +1511,7 @@ const adjsTests: {
|
|||
number: "plural",
|
||||
adjectives: [
|
||||
makeAdjectiveSelection(ghut),
|
||||
// makeAdjectiveSelection(zor),
|
||||
makeAdjectiveSelection(zor),
|
||||
],
|
||||
},
|
||||
},
|
||||
|
@ -1429,15 +1521,17 @@ const adjsTests: {
|
|||
},
|
||||
];
|
||||
|
||||
// describe("parsing nouns with adjectives", () => {
|
||||
// adjsTests.forEach(({ category, cases }) => {
|
||||
// // eslint-disable-next-line jest/valid-title
|
||||
// test(category, () => {
|
||||
// cases.forEach(({ input, output }) => {
|
||||
// const tokens = tokenizer(input);
|
||||
// const res = parseNoun(tokens, lookup, undefined).map(([t, res]) => res);
|
||||
// expect(res).toEqual(output);
|
||||
// });
|
||||
// });
|
||||
// });
|
||||
// });
|
||||
describe("parsing nouns with adjectives", () => {
|
||||
adjsTests.forEach(({ category, cases }) => {
|
||||
// eslint-disable-next-line jest/valid-title
|
||||
test(category, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
const res = parseNoun(tokens, lookup)
|
||||
.filter(isCompleteResult)
|
||||
.map(({ body }) => body);
|
||||
expect(res).toEqual(output);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -9,102 +9,31 @@ import {
|
|||
} from "../type-predicates";
|
||||
import { getInflectionQueries } from "./inflection-query";
|
||||
import { parseAdjective } from "./parse-adjective";
|
||||
import { groupWith, equals } from "rambda";
|
||||
import { parsePossesor } from "./parse-possesor";
|
||||
import { bindParseResult } from "./utils";
|
||||
|
||||
// TODO:
|
||||
// - cleanup the workflow and make sure all nouns are covered and test
|
||||
// - add possesive parsing
|
||||
type NounResult = { inflected: boolean; selection: T.NounSelection };
|
||||
|
||||
export function parseNoun(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||
prevPossesor: { inflected: boolean; selection: T.NounSelection } | undefined
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
|
||||
): T.ParseResult<NounResult>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
const possesor =
|
||||
first.s === "د" ? parseNoun(rest, lookup, undefined) : undefined;
|
||||
if (possesor) {
|
||||
const runsAfterPossesor: T.ParseResult<NounResult | undefined>[] = possesor
|
||||
? possesor
|
||||
: [{ tokens, body: undefined, errors: [] }];
|
||||
// could be a case for a monad ??
|
||||
return removeUnneccesaryFailing(
|
||||
runsAfterPossesor.flatMap(
|
||||
({ tokens, body: possesor, errors }) =>
|
||||
parseNoun(
|
||||
tokens,
|
||||
lookup,
|
||||
possesor
|
||||
? {
|
||||
inflected: possesor.inflected,
|
||||
selection: {
|
||||
...possesor.selection,
|
||||
possesor: prevPossesor
|
||||
? {
|
||||
shrunken: false,
|
||||
np: {
|
||||
type: "NP",
|
||||
selection: prevPossesor.selection,
|
||||
},
|
||||
}
|
||||
: undefined,
|
||||
},
|
||||
}
|
||||
: undefined
|
||||
)
|
||||
// .map<T.ParseResult<NounResult>>(([t, r, errs]) => [
|
||||
// t,
|
||||
// r,
|
||||
// // TODO: should the errors from the runsAfterPossesor be thrown out?
|
||||
// // or ...errors should be kept?
|
||||
// // to show an error like د غتو ماشومان نومونه
|
||||
// // adj error غټ should be first inflection (seems confusing)
|
||||
// [...errs, ...errors],
|
||||
// ])
|
||||
)
|
||||
);
|
||||
} else {
|
||||
return removeUnneccesaryFailing(
|
||||
parseNounAfterPossesor(tokens, lookup, prevPossesor, [])
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function removeUnneccesaryFailing(
|
||||
results: T.ParseResult<NounResult>[]
|
||||
): T.ParseResult<NounResult>[] {
|
||||
// group by identical results
|
||||
const groups = groupWith(
|
||||
(a, b) => equals(a.body.selection, b.body.selection),
|
||||
results
|
||||
);
|
||||
// if there's a group of identical results with some success in it
|
||||
// remove any erroneous results
|
||||
const stage1 = groups.flatMap((group) => {
|
||||
if (group.find((x) => x.errors.length === 0)) {
|
||||
return group.filter((x) => x.errors.length === 0);
|
||||
}
|
||||
return group;
|
||||
const possesor = parsePossesor(tokens, lookup, undefined);
|
||||
if (possesor.length) {
|
||||
return bindParseResult(possesor, (tokens, p) => {
|
||||
return parseNounAfterPossesor(tokens, lookup, p, []);
|
||||
});
|
||||
// finally, if there's any success anywhere, remove any of the errors
|
||||
if (stage1.find((x) => x.errors.length === 0)) {
|
||||
return stage1.filter((x) => x.errors.length === 0);
|
||||
} else {
|
||||
return stage1;
|
||||
}
|
||||
return parseNounAfterPossesor(tokens, lookup, undefined, []);
|
||||
}
|
||||
|
||||
// create NP parsing function for that
|
||||
// TODO with possesor, parse an NP not a noun
|
||||
|
||||
function parseNounAfterPossesor(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||
possesor: { inflected: boolean; selection: T.NounSelection } | undefined,
|
||||
possesor: T.PossesorSelection | undefined,
|
||||
adjectives: {
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
|
@ -117,14 +46,13 @@ function parseNounAfterPossesor(
|
|||
}
|
||||
// TODO: add recognition of او between adjectives
|
||||
const adjRes = parseAdjective(tokens, lookup);
|
||||
const withAdj = adjRes.flatMap(({ tokens: tkns, body: adj }) =>
|
||||
const withAdj = bindParseResult(adjRes, (tkns, adj) =>
|
||||
parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
|
||||
);
|
||||
const [first, ...rest] = tokens;
|
||||
const w: ReturnType<typeof parseNoun> = [];
|
||||
|
||||
const searches = getInflectionQueries(first.s, true);
|
||||
|
||||
const w: ReturnType<typeof parseNoun> = [];
|
||||
searches.forEach(({ search, details }) => {
|
||||
const nounEntries = lookup(search).filter(isNounEntry);
|
||||
details.forEach((deets) => {
|
||||
|
@ -147,6 +75,11 @@ function parseNounAfterPossesor(
|
|||
convertInflection(inf, entry, gender, deets.plural).forEach(
|
||||
({ inflected, number }) => {
|
||||
const selection = makeNounSelection(entry, undefined);
|
||||
const errors = [
|
||||
...adjErrors.map((message) => ({
|
||||
message,
|
||||
})),
|
||||
];
|
||||
w.push({
|
||||
tokens: rest,
|
||||
body: {
|
||||
|
@ -162,25 +95,10 @@ function parseNounAfterPossesor(
|
|||
adjectives: adjectives.map((a) => a.selection),
|
||||
// TODO: could be nicer to validate that the possesor is inflected before
|
||||
// and just pass in the selection
|
||||
possesor: possesor
|
||||
? {
|
||||
shrunken: false,
|
||||
np: {
|
||||
type: "NP",
|
||||
selection: possesor.selection,
|
||||
},
|
||||
}
|
||||
: undefined,
|
||||
possesor,
|
||||
},
|
||||
},
|
||||
errors: [
|
||||
...(possesor?.inflected === false
|
||||
? [{ message: "possesor should be inflected" }]
|
||||
: []),
|
||||
...adjErrors.map((message) => ({
|
||||
message,
|
||||
})),
|
||||
],
|
||||
errors,
|
||||
});
|
||||
}
|
||||
);
|
||||
|
|
|
@ -4,9 +4,13 @@ import { parseNoun } from "./parse-noun";
|
|||
import { fmapParseResult } from "../fp-ps";
|
||||
|
||||
export function parseNP(
|
||||
s: T.Token[],
|
||||
s: Readonly<T.Token[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
|
||||
): T.ParseResult<{ inflected: boolean; selection: T.NPSelection }>[] {
|
||||
if (s.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
function makeNPSl(
|
||||
a:
|
||||
| {
|
||||
|
@ -33,6 +37,6 @@ export function parseNP(
|
|||
// @ts-ignore grrr webpack is having trouble with this
|
||||
return fmapParseResult(makeNPSl, [
|
||||
...parsePronoun(s),
|
||||
...parseNoun(s, lookup, undefined),
|
||||
...parseNoun(s, lookup),
|
||||
]);
|
||||
}
|
||||
|
|
|
@ -1,20 +1,34 @@
|
|||
import * as T from "../../../types";
|
||||
import { verbLookup } from "./lookup";
|
||||
import { parseNP } from "./parse-np";
|
||||
import { parseVerb } from "./parse-verb";
|
||||
import { parseVP } from "./parse-vp";
|
||||
|
||||
export function parsePhrase(
|
||||
s: T.Token[],
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
|
||||
): {
|
||||
success: { inflected: boolean; selection: T.NPSelection }[];
|
||||
success: (
|
||||
| {
|
||||
inflected: boolean;
|
||||
selection: T.NPSelection;
|
||||
}
|
||||
| Omit<T.VBE, "ps">
|
||||
| T.VPSelectionComplete
|
||||
)[];
|
||||
errors: string[];
|
||||
} {
|
||||
const nps = parseNP(s, lookup).filter(({ tokens }) => !tokens.length);
|
||||
const res = [
|
||||
...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
|
||||
...parseVerb(s, verbLookup),
|
||||
...parseVP(s, lookup, verbLookup),
|
||||
];
|
||||
|
||||
const success = nps.map((x) => x.body);
|
||||
const success = res.map((x) => x.body);
|
||||
return {
|
||||
success,
|
||||
errors: [
|
||||
...new Set(nps.flatMap(({ errors }) => errors.map((e) => e.message))),
|
||||
...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
|
||||
],
|
||||
};
|
||||
}
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
/* eslint-disable jest/no-conditional-expect */
|
||||
import * as T from "../../../types";
|
||||
import {
|
||||
makeAdjectiveSelection,
|
||||
makeNounSelection,
|
||||
makePronounSelection,
|
||||
} from "../phrase-building/make-selections";
|
||||
import { lookup, wordQuery } from "./lookup";
|
||||
import { parsePossesor } from "./parse-possesor";
|
||||
import { tokenizer } from "./tokenizer";
|
||||
import { isCompleteResult } from "./utils";
|
||||
|
||||
const sturey = wordQuery("ستړی", "adj");
|
||||
const sarey = wordQuery("سړی", "noun");
|
||||
const maashoom = wordQuery("ماشوم", "noun");
|
||||
const malguray = wordQuery("ملګری", "noun");
|
||||
const plaar = wordQuery("پلار", "noun");
|
||||
|
||||
const tests: {
|
||||
input: string;
|
||||
output: T.NPSelection["selection"][] | "error";
|
||||
}[] = [
|
||||
{
|
||||
input: "د سړي",
|
||||
output: [makeNounSelection(sarey, undefined)],
|
||||
},
|
||||
{
|
||||
input: "د ماشومې",
|
||||
output: [
|
||||
{
|
||||
...makeNounSelection(maashoom, undefined),
|
||||
gender: "fem",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "د ستړي پلار د ملګري",
|
||||
output: [
|
||||
{
|
||||
...makeNounSelection(malguray, undefined),
|
||||
possesor: {
|
||||
shrunken: false,
|
||||
np: {
|
||||
type: "NP",
|
||||
selection: {
|
||||
...makeNounSelection(plaar, undefined),
|
||||
adjectives: [makeAdjectiveSelection(sturey)],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "د سړی نوم",
|
||||
output: "error",
|
||||
},
|
||||
{
|
||||
input: "د ښځې د ماشومه",
|
||||
output: "error",
|
||||
},
|
||||
{
|
||||
input: "زما",
|
||||
output: [
|
||||
makePronounSelection(T.Person.FirstSingMale),
|
||||
makePronounSelection(T.Person.FirstSingFemale),
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ستا",
|
||||
output: [
|
||||
makePronounSelection(T.Person.SecondSingMale),
|
||||
makePronounSelection(T.Person.SecondSingFemale),
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "زمونږ",
|
||||
output: [
|
||||
makePronounSelection(T.Person.FirstPlurMale),
|
||||
makePronounSelection(T.Person.FirstPlurFemale),
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "زموږ",
|
||||
output: [
|
||||
makePronounSelection(T.Person.FirstPlurMale),
|
||||
makePronounSelection(T.Person.FirstPlurFemale),
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ستاسو",
|
||||
output: [
|
||||
makePronounSelection(T.Person.SecondPlurMale),
|
||||
makePronounSelection(T.Person.SecondPlurFemale),
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ستاسې",
|
||||
output: [
|
||||
makePronounSelection(T.Person.SecondPlurMale),
|
||||
makePronounSelection(T.Person.SecondPlurFemale),
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "د پلار ستا",
|
||||
output: "error",
|
||||
},
|
||||
];
|
||||
|
||||
test("parse possesor", () => {
|
||||
tests.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
const parsed = parsePossesor(tokens, lookup, undefined);
|
||||
if (output === "error") {
|
||||
expect(parsed.some((x) => x.errors.length)).toBe(true);
|
||||
} else {
|
||||
expect(
|
||||
parsePossesor(tokens, lookup, undefined)
|
||||
.filter(isCompleteResult)
|
||||
.map((x) => x.body.np.selection)
|
||||
).toEqual(output);
|
||||
}
|
||||
});
|
||||
});
|
|
@ -0,0 +1,136 @@
|
|||
import * as T from "../../../types";
|
||||
import { parseNP } from "./parse-np";
|
||||
import { bindParseResult } from "./utils";
|
||||
// TODO: maybe contractions should just be male to cut down on the
|
||||
// alternative sentences
|
||||
const contractions: [string[], T.Person[]][] = [
|
||||
[["زما"], [T.Person.FirstSingMale, T.Person.FirstSingFemale]],
|
||||
[["ستا"], [T.Person.SecondSingMale, T.Person.SecondSingFemale]],
|
||||
[
|
||||
["زمونږ", "زموږ"],
|
||||
[T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
||||
],
|
||||
[
|
||||
["ستاسو", "ستاسې"],
|
||||
[T.Person.SecondPlurMale, T.Person.SecondPlurFemale],
|
||||
],
|
||||
];
|
||||
|
||||
export function parsePossesor(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||
prevPossesor: T.PossesorSelection | undefined
|
||||
): T.ParseResult<T.PossesorSelection>[] {
|
||||
if (tokens.length === 0) {
|
||||
if (prevPossesor) {
|
||||
return [
|
||||
{
|
||||
tokens,
|
||||
body: prevPossesor,
|
||||
errors: [],
|
||||
},
|
||||
];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
// parse contraction
|
||||
// then later (if possessor || contractions)
|
||||
const contractions = parseContractions(first);
|
||||
if (contractions.length) {
|
||||
const errors = prevPossesor
|
||||
? [{ message: "a pronoun cannot have a possesor" }]
|
||||
: [];
|
||||
return contractions
|
||||
.flatMap((p) => parsePossesor(rest, lookup, p))
|
||||
.map((x) => ({
|
||||
...x,
|
||||
errors: [...errors, ...x.errors],
|
||||
}));
|
||||
}
|
||||
if (first.s === "د") {
|
||||
const np = parseNP(rest, lookup);
|
||||
return bindParseResult(np, (tokens, body) => {
|
||||
const possesor: T.PossesorSelection = {
|
||||
shrunken: false,
|
||||
np: body.selection,
|
||||
};
|
||||
return {
|
||||
errors: !body.inflected
|
||||
? // TODO: get ps to say which possesor
|
||||
// TODO: track the position coming from the parseNP etc for highlighting
|
||||
[{ message: `possesor should be inflected` }]
|
||||
: [],
|
||||
// add and check error - can't add possesor to pronoun
|
||||
next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)),
|
||||
};
|
||||
});
|
||||
}
|
||||
if (first.s === "زما") {
|
||||
return [
|
||||
{
|
||||
tokens: rest,
|
||||
body: {
|
||||
shrunken: false,
|
||||
np: {
|
||||
type: "NP",
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
distance: "far",
|
||||
person: T.Person.FirstSingMale,
|
||||
},
|
||||
},
|
||||
},
|
||||
errors: [],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (prevPossesor) {
|
||||
return [
|
||||
{
|
||||
tokens,
|
||||
body: prevPossesor,
|
||||
errors: [],
|
||||
},
|
||||
];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function addPoss(
|
||||
possesor: T.PossesorSelection | undefined,
|
||||
possesorOf: T.PossesorSelection
|
||||
): T.PossesorSelection {
|
||||
return {
|
||||
...possesorOf,
|
||||
...(possesorOf.np.selection.type !== "pronoun"
|
||||
? {
|
||||
np: {
|
||||
...possesorOf.np,
|
||||
selection: {
|
||||
...possesorOf.np.selection,
|
||||
possesor,
|
||||
},
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
}
|
||||
|
||||
function parseContractions({ s }: T.Token): T.PossesorSelection[] {
|
||||
const c = contractions.find(([ps]) => ps.includes(s));
|
||||
if (!c) {
|
||||
return [];
|
||||
}
|
||||
return c[1].map((person) => ({
|
||||
shrunken: false,
|
||||
np: {
|
||||
type: "NP",
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
distance: "far",
|
||||
person,
|
||||
},
|
||||
},
|
||||
}));
|
||||
}
|
|
@ -2,7 +2,7 @@ import * as T from "../../../types";
|
|||
|
||||
type Result = ReturnType<typeof parsePronoun>[number];
|
||||
|
||||
// TODO: map for doubling true, false, and masc fem
|
||||
// TODO: add chaa
|
||||
export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
|
||||
inflected: boolean;
|
||||
selection: T.PronounSelection;
|
||||
|
@ -21,6 +21,19 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
|
|||
},
|
||||
errors: [],
|
||||
}));
|
||||
} else if (s === "ما") {
|
||||
return [0, 1].map((person) => ({
|
||||
tokens: rest,
|
||||
body: {
|
||||
inflected: true,
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
errors: [],
|
||||
}));
|
||||
} else if (s === "ته") {
|
||||
return [2, 3].map((person) => ({
|
||||
tokens: rest,
|
||||
|
@ -34,6 +47,19 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
|
|||
},
|
||||
errors: [],
|
||||
}));
|
||||
} else if (s === "تا") {
|
||||
return [2, 3].map((person) => ({
|
||||
tokens: rest,
|
||||
body: {
|
||||
inflected: true,
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
errors: [],
|
||||
}));
|
||||
} else if (s === "هغه") {
|
||||
return [
|
||||
...[false, true].map<Result>((inflected) => ({
|
||||
|
@ -42,7 +68,7 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
|
|||
inflected,
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: 5,
|
||||
person: 4,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
|
@ -54,7 +80,7 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
|
|||
inflected: false,
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: 5,
|
||||
person: 4,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
import * as T from "../../../types";
|
||||
|
||||
export function parseVerb(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
verbLookup: (s: (e: T.VerbDictionaryEntry) => boolean) => T.VerbEntry[]
|
||||
): T.ParseResult<Omit<T.VBE, "ps">>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
const people = getVerbEnding(first.s);
|
||||
if (people.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const verbs = findByStem(first.s.slice(0, -1), verbLookup);
|
||||
|
||||
return people.flatMap((person) =>
|
||||
verbs.map((verb) => ({
|
||||
tokens: rest,
|
||||
body: {
|
||||
type: "VB",
|
||||
person,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: "imperfective",
|
||||
base: "stem",
|
||||
verb,
|
||||
},
|
||||
},
|
||||
errors: [],
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
function getVerbEnding(p: string): T.Person[] {
|
||||
if (p.endsWith("م")) {
|
||||
return [T.Person.FirstSingMale, T.Person.FirstSingFemale];
|
||||
} else if (p.endsWith("ې")) {
|
||||
return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
|
||||
} else if (p.endsWith("ي")) {
|
||||
return [
|
||||
T.Person.ThirdSingMale,
|
||||
T.Person.ThirdSingFemale,
|
||||
T.Person.ThirdPlurMale,
|
||||
T.Person.ThirdPlurFemale,
|
||||
];
|
||||
} else if (p.endsWith("و")) {
|
||||
return [T.Person.FirstPlurMale, T.Person.FirstPlurFemale];
|
||||
} else if (p.endsWith("ئ")) {
|
||||
return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function findByStem(
|
||||
stem: string,
|
||||
verbLookup: (s: (e: T.VerbDictionaryEntry) => boolean) => T.VerbEntry[]
|
||||
): T.VerbEntry[] {
|
||||
return verbLookup(
|
||||
(e) =>
|
||||
e.psp === stem ||
|
||||
(!e.psp && !e.c.includes("comp") && e.p.slice(0, -1) === stem)
|
||||
);
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
import * as T from "../../../types";
|
||||
import { parseNP } from "./parse-np";
|
||||
import { bindParseResult } from "./utils";
|
||||
import { parseVerb } from "./parse-verb";
|
||||
import {
|
||||
makeObjectSelectionComplete,
|
||||
makeSubjectSelectionComplete,
|
||||
} from "../phrase-building/blocks-utils";
|
||||
import { vEntry } from "../new-verb-engine/rs-helpers";
|
||||
import { getPersonFromNP, isThirdPerson } from "../phrase-building/vp-tools";
|
||||
// to hide equatives type-doubling issue
|
||||
const kedulStat = vEntry({
|
||||
ts: 1581086654898,
|
||||
i: 11100,
|
||||
p: "کېدل",
|
||||
f: "kedul",
|
||||
g: "kedul",
|
||||
e: "to become _____",
|
||||
r: 2,
|
||||
c: "v. intrans.",
|
||||
ssp: "ش",
|
||||
ssf: "sh",
|
||||
prp: "شول",
|
||||
prf: "shwul",
|
||||
pprtp: "شوی",
|
||||
pprtf: "shúway",
|
||||
noOo: true,
|
||||
ec: "become",
|
||||
});
|
||||
|
||||
export function parseVP(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||
verbLookup: (s: (e: T.VerbDictionaryEntry) => boolean) => T.VerbEntry[]
|
||||
): T.ParseResult<T.VPSelectionComplete>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
// how to make this into a nice pipeline... 🤔
|
||||
const NP1 = parseNP(tokens, lookup);
|
||||
const NP2 = bindParseResult(NP1, (tokens) => parseNP(tokens, lookup), true);
|
||||
const vb = bindParseResult(
|
||||
NP2,
|
||||
(tokens) => parseVerb(tokens, verbLookup),
|
||||
true
|
||||
);
|
||||
// TODO: be able to bind mulitple vals
|
||||
return bindParseResult<Omit<T.VBE, "ps">, T.VPSelectionComplete>(
|
||||
vb,
|
||||
(tokens, v) => {
|
||||
const w: T.ParseResult<T.VPSelectionComplete>[] = [];
|
||||
NP1.forEach(({ body: np1 }) => {
|
||||
NP2.forEach(({ body: np2 }) => {
|
||||
[
|
||||
[np1, np2],
|
||||
[np2, np1],
|
||||
].forEach(([s, o]) => {
|
||||
const errors: T.ParseError[] = [];
|
||||
const subjPerson = getPersonFromNP(s.selection);
|
||||
if (s.inflected) {
|
||||
errors.push({ message: "subject should not be inflected" });
|
||||
}
|
||||
if (o.selection.selection.type === "pronoun") {
|
||||
if (!isThirdPerson(subjPerson) && !o.inflected) {
|
||||
errors.push({
|
||||
message:
|
||||
"1st or 2nd person object pronoun should be inflected",
|
||||
});
|
||||
}
|
||||
} else if (o.inflected) {
|
||||
errors.push({ message: "object should not be inflected" });
|
||||
}
|
||||
if (getPersonFromNP(s.selection) !== v.person) {
|
||||
errors.push({ message: "verb does not match subject" });
|
||||
}
|
||||
const blocks: T.VPSBlockComplete[] = [
|
||||
{
|
||||
key: 1,
|
||||
block: makeSubjectSelectionComplete(s.selection),
|
||||
},
|
||||
{
|
||||
key: 2,
|
||||
block: makeObjectSelectionComplete(o.selection),
|
||||
},
|
||||
];
|
||||
const verb: T.VerbSelectionComplete = {
|
||||
type: "verb",
|
||||
verb: v.info.type === "verb" ? v.info.verb : kedulStat,
|
||||
transitivity: "transitive",
|
||||
canChangeTransitivity: false,
|
||||
canChangeStatDyn: false,
|
||||
negative: false,
|
||||
tense: "presentVerb",
|
||||
canChangeVoice: true,
|
||||
isCompound: false,
|
||||
voice: "active",
|
||||
};
|
||||
w.push({
|
||||
tokens,
|
||||
body: {
|
||||
blocks,
|
||||
verb,
|
||||
externalComplement: undefined,
|
||||
form: {
|
||||
removeKing: false,
|
||||
shrinkServant: false,
|
||||
},
|
||||
},
|
||||
errors,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
return w;
|
||||
}
|
||||
);
|
||||
}
|
|
@ -1,7 +1,8 @@
|
|||
import { Token } from "../../../types";
|
||||
import { standardizePashto } from "../standardize-pashto";
|
||||
|
||||
export function tokenizer(s: string): Token[] {
|
||||
const words = s.trim().split(/ +/);
|
||||
const words = standardizePashto(s).trim().split(/ +/);
|
||||
const indexed: { i: number; s: string }[] = [];
|
||||
for (let i = 0; i < words.length; i++) {
|
||||
indexed.push({ i, s: words[i] });
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
import * as T from "../../../types";
|
||||
|
||||
/**
|
||||
* Monadic binding for ParseResult[]
|
||||
*
|
||||
* Takes a given array of parse results
|
||||
* and a function to take the tokens and body of each parse result
|
||||
* and do something further with them
|
||||
*
|
||||
* all the results are flatMapped into a new ParseResult[] monad
|
||||
* and the errors are passed on and pruned
|
||||
*
|
||||
* @param previous - the set of results (monad) to start with
|
||||
* @param f - a function that takes a remaining list of tokens and one body of the previous result
|
||||
* and returns the next set of possible results, optionally with an object containing any errors
|
||||
* @param getCritical - if needed, a function that returns with *part* of the result body to compare
|
||||
* for identical results while pruning out the unneccesary errors
|
||||
* @param ignorePrevious - pass in true if you don't need the previous ParseResult to calculate
|
||||
* the next one. This will add effeciancy by only caring about how many tokens are available
|
||||
* from the different previous results
|
||||
* @returns
|
||||
*/
|
||||
export function bindParseResult<C extends object, D extends object>(
|
||||
previous: T.ParseResult<C>[],
|
||||
f: (
|
||||
tokens: Readonly<T.Token[]>,
|
||||
r: C
|
||||
) =>
|
||||
| T.ParseResult<D>[]
|
||||
| {
|
||||
errors: T.ParseError[];
|
||||
next: T.ParseResult<D>[];
|
||||
},
|
||||
ignorePrevious?: boolean
|
||||
): T.ParseResult<D>[] {
|
||||
// const prev = ignorePrevious
|
||||
// ? (() => {
|
||||
// const resArr: T.ParseResult<C>[] = [];
|
||||
// previous.filter((item) => {
|
||||
// var i = resArr.findIndex(
|
||||
// (x) => x.tokens.length === item.tokens.length
|
||||
// );
|
||||
// if (i <= -1) {
|
||||
// resArr.push(item);
|
||||
// }
|
||||
// return null;
|
||||
// });
|
||||
// return resArr;
|
||||
// })()
|
||||
// : previous;
|
||||
const prev = previous;
|
||||
const nextPossibilities = prev.flatMap(({ tokens, body, errors }) => {
|
||||
const res = f(tokens, body);
|
||||
const { errors: errsPassed, next } = Array.isArray(res)
|
||||
? { errors: [], next: res }
|
||||
: res;
|
||||
return next.map((x) => ({
|
||||
tokens: x.tokens,
|
||||
body: x.body,
|
||||
errors: [...errsPassed, ...x.errors, ...errors],
|
||||
}));
|
||||
});
|
||||
return cleanOutFails(nextPossibilities);
|
||||
}
|
||||
|
||||
export function cleanOutFails<C extends object>(
|
||||
results: T.ParseResult<C>[]
|
||||
): T.ParseResult<C>[] {
|
||||
// if there's any success anywhere, remove any of the errors
|
||||
const errorsGone = results.find((x) => x.errors.length === 0)
|
||||
? results.filter((x) => x.errors.length === 0)
|
||||
: results;
|
||||
// @ts-ignore
|
||||
return Array.from(new Set(errorsGone.map(JSON.stringify))).map(JSON.parse);
|
||||
}
|
||||
|
||||
export function isCompleteResult<C extends object>(
|
||||
r: T.ParseResult<C>
|
||||
): boolean {
|
||||
return !r.tokens.length && !r.errors.length;
|
||||
}
|
|
@ -157,6 +157,15 @@ export function makeSubjectSelection(
|
|||
};
|
||||
}
|
||||
|
||||
export function makeSubjectSelectionComplete(
|
||||
selection: T.NPSelection
|
||||
): T.SubjectSelectionComplete {
|
||||
return {
|
||||
type: "subjectSelection",
|
||||
selection,
|
||||
};
|
||||
}
|
||||
|
||||
export function makeObjectSelection(
|
||||
selection:
|
||||
| T.ObjectSelection
|
||||
|
@ -195,6 +204,15 @@ export function makeObjectSelection(
|
|||
};
|
||||
}
|
||||
|
||||
export function makeObjectSelectionComplete(
|
||||
selection: T.NPSelection
|
||||
): T.ObjectSelectionComplete {
|
||||
return {
|
||||
type: "objectSelection",
|
||||
selection,
|
||||
};
|
||||
}
|
||||
|
||||
export function EPSBlocksAreComplete(
|
||||
blocks: T.EPSBlock[]
|
||||
): blocks is T.EPSBlockComplete[] {
|
||||
|
|
|
@ -344,9 +344,7 @@ function getPsFromPiece(
|
|||
}
|
||||
|
||||
function getPsFromWelded(v: T.Welded): T.PsString[] {
|
||||
function getPsFromSide(
|
||||
v: T.VBBasic | T.Welded | T.NComp | T.VBGenNum
|
||||
): T.PsString[] {
|
||||
function getPsFromSide(v: T.VB | T.NComp): T.PsString[] {
|
||||
if (v.type === "VB") {
|
||||
return flattenLengths(v.ps);
|
||||
}
|
||||
|
|
|
@ -31,6 +31,29 @@ export function makeAdjectiveSelection(
|
|||
};
|
||||
}
|
||||
|
||||
export function makePossesorSelection(
|
||||
np: T.NPSelection["selection"]
|
||||
): T.PossesorSelection {
|
||||
return {
|
||||
shrunken: false,
|
||||
np: {
|
||||
type: "NP",
|
||||
selection: np,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function makePronounSelection(
|
||||
person: T.Person,
|
||||
distance?: "near" | "far"
|
||||
): T.PronounSelection {
|
||||
return {
|
||||
type: "pronoun",
|
||||
distance: distance || "far",
|
||||
person,
|
||||
};
|
||||
}
|
||||
|
||||
export function makeParticipleSelection(
|
||||
verb: T.VerbEntry
|
||||
): T.ParticipleSelection {
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import {
|
||||
isFirstPerson,
|
||||
isSecondPerson,
|
||||
} from "../misc-helpers";
|
||||
import { isFirstPerson, isSecondPerson } from "../misc-helpers";
|
||||
import * as T from "../../../types";
|
||||
import { concatPsString } from "../p-text-helpers";
|
||||
|
||||
function getBaseAndAdjectives({ selection }: T.Rendered<T.NPSelection | T.ComplementSelection | T.APSelection>): T.PsString[] {
|
||||
function getBaseAndAdjectives({
|
||||
selection,
|
||||
}: T.Rendered<
|
||||
T.NPSelection | T.ComplementSelection | T.APSelection
|
||||
>): T.PsString[] {
|
||||
if (selection.type === "sandwich") {
|
||||
return getSandwichPsBaseAndAdjectives(selection);
|
||||
}
|
||||
|
@ -13,37 +14,52 @@ function getBaseAndAdjectives({ selection }: T.Rendered<T.NPSelection | T.Comple
|
|||
if (!adjs) {
|
||||
return selection.ps;
|
||||
}
|
||||
return selection.ps.map(p => (
|
||||
return selection.ps.map((p) =>
|
||||
concatPsString(
|
||||
adjs.reduce((accum, curr) => (
|
||||
adjs.reduce(
|
||||
(accum, curr) =>
|
||||
// TODO: with variations of adjs?
|
||||
concatPsString(accum, (accum.p === "" && accum.f === "") ? "" : " ", curr.ps[0])
|
||||
), { p: "", f: "" }),
|
||||
concatPsString(
|
||||
accum,
|
||||
accum.p === "" && accum.f === "" ? "" : " ",
|
||||
curr.ps[0]
|
||||
),
|
||||
{ p: "", f: "" }
|
||||
),
|
||||
" ",
|
||||
p,
|
||||
p
|
||||
)
|
||||
));
|
||||
);
|
||||
}
|
||||
|
||||
function getSandwichPsBaseAndAdjectives(s: T.Rendered<T.SandwichSelection<T.Sandwich>>): T.PsString[] {
|
||||
function getSandwichPsBaseAndAdjectives(
|
||||
s: T.Rendered<T.SandwichSelection<T.Sandwich>>
|
||||
): T.PsString[] {
|
||||
const insideBase = getBaseAndAdjectives(s.inside);
|
||||
const willContractWithPronoun = s.before && s.before.p === "د" && s.inside.selection.type === "pronoun"
|
||||
&& (isFirstPerson(s.inside.selection.person) || isSecondPerson(s.inside.selection.person))
|
||||
const contracted = (willContractWithPronoun && s.inside.selection.type === "pronoun")
|
||||
const willContractWithPronoun =
|
||||
s.before &&
|
||||
s.before.p === "د" &&
|
||||
s.inside.selection.type === "pronoun" &&
|
||||
(isFirstPerson(s.inside.selection.person) ||
|
||||
isSecondPerson(s.inside.selection.person));
|
||||
const contracted =
|
||||
willContractWithPronoun && s.inside.selection.type === "pronoun"
|
||||
? contractPronoun(s.inside.selection)
|
||||
: undefined
|
||||
return insideBase.map((inside) => (
|
||||
: undefined;
|
||||
return insideBase.map((inside) =>
|
||||
concatPsString(
|
||||
(s.before && !willContractWithPronoun) ? s.before : "",
|
||||
s.before && !willContractWithPronoun ? s.before : "",
|
||||
s.before ? " " : "",
|
||||
contracted ? contracted : inside,
|
||||
s.after ? " " : "",
|
||||
s.after ? s.after : "",
|
||||
s.after ? s.after : ""
|
||||
)
|
||||
));
|
||||
);
|
||||
}
|
||||
|
||||
function contractPronoun(n: T.Rendered<T.PronounSelection>): T.PsString | undefined {
|
||||
function contractPronoun(
|
||||
n: T.Rendered<T.PronounSelection>
|
||||
): T.PsString | undefined {
|
||||
return isFirstPerson(n.person)
|
||||
? concatPsString({ p: "ز", f: "z" }, n.ps[0])
|
||||
: isSecondPerson(n.person)
|
||||
|
@ -51,7 +67,9 @@ function contractPronoun(n: T.Rendered<T.PronounSelection>): T.PsString | undefi
|
|||
: undefined;
|
||||
}
|
||||
|
||||
function trimOffShrunkenPossesive(p: T.Rendered<T.NPSelection>): T.Rendered<T.NPSelection> {
|
||||
function trimOffShrunkenPossesive(
|
||||
p: T.Rendered<T.NPSelection>
|
||||
): T.Rendered<T.NPSelection> {
|
||||
if (!("possesor" in p.selection)) {
|
||||
return p;
|
||||
}
|
||||
|
@ -79,33 +97,47 @@ function trimOffShrunkenPossesive(p: T.Rendered<T.NPSelection>): T.Rendered<T.NP
|
|||
};
|
||||
}
|
||||
|
||||
export function getPashtoFromRendered(b: T.Rendered<T.NPSelection> | T.Rendered<T.ComplementSelection> | T.Rendered<T.APSelection>, subjectsPerson: false | T.Person): T.PsString[] {
|
||||
export function getPashtoFromRendered(
|
||||
b:
|
||||
| T.Rendered<T.NPSelection>
|
||||
| T.Rendered<T.ComplementSelection>
|
||||
| T.Rendered<T.APSelection>,
|
||||
subjectsPerson: false | T.Person
|
||||
): T.PsString[] {
|
||||
const base = getBaseAndAdjectives(b);
|
||||
if (b.selection.type === "loc. adv." || b.selection.type === "adverb") {
|
||||
return base;
|
||||
}
|
||||
if (b.selection.type === "adjective") {
|
||||
if (!b.selection.sandwich) {
|
||||
return base
|
||||
return base;
|
||||
}
|
||||
// TODO: Kinda cheating
|
||||
const sandwichPs = getPashtoFromRendered({ type: "AP", selection: b.selection.sandwich }, false);
|
||||
return base.flatMap(p => (
|
||||
sandwichPs.flatMap(s => (
|
||||
concatPsString(s, " ", p)
|
||||
))
|
||||
));
|
||||
const sandwichPs = getPashtoFromRendered(
|
||||
{ type: "AP", selection: b.selection.sandwich },
|
||||
false
|
||||
);
|
||||
return base.flatMap((p) =>
|
||||
sandwichPs.flatMap((s) => concatPsString(s, " ", p))
|
||||
);
|
||||
}
|
||||
const trimmed = b.selection.type === "sandwich" ? {
|
||||
const trimmed =
|
||||
b.selection.type === "sandwich"
|
||||
? {
|
||||
type: b.type,
|
||||
selection: {
|
||||
...b.selection,
|
||||
inside: trimOffShrunkenPossesive(b.selection.inside),
|
||||
},
|
||||
} : trimOffShrunkenPossesive({ type: "NP", selection: b.selection });
|
||||
}
|
||||
: trimOffShrunkenPossesive({ type: "NP", selection: b.selection });
|
||||
if (trimmed.selection.type === "sandwich") {
|
||||
return trimmed.selection.inside.selection.possesor
|
||||
? addPossesor(trimmed.selection.inside.selection.possesor.np, base, subjectsPerson)
|
||||
? addPossesor(
|
||||
trimmed.selection.inside.selection.possesor.np,
|
||||
base,
|
||||
subjectsPerson
|
||||
)
|
||||
: base;
|
||||
}
|
||||
if (trimmed.selection.possesor) {
|
||||
|
@ -114,53 +146,69 @@ export function getPashtoFromRendered(b: T.Rendered<T.NPSelection> | T.Rendered<
|
|||
return base;
|
||||
}
|
||||
|
||||
function addPossesor(owner: T.Rendered<T.NPSelection>, existing: T.PsString[], subjectsPerson: false | T.Person): T.PsString[] {
|
||||
function addPossesor(
|
||||
owner: T.Rendered<T.NPSelection>,
|
||||
existing: T.PsString[],
|
||||
subjectsPerson: false | T.Person
|
||||
): T.PsString[] {
|
||||
function willBeReflexive(subj: T.Person, obj: T.Person): boolean {
|
||||
return (
|
||||
([0, 1].includes(subj) && [0, 1].includes(obj))
|
||||
||
|
||||
([0, 1].includes(subj) && [0, 1].includes(obj)) ||
|
||||
([2, 3].includes(subj) && [8, 9].includes(obj))
|
||||
);
|
||||
}
|
||||
const wPossesor = existing.flatMap(ps => (
|
||||
getBaseAndAdjectives(owner).map(v => (
|
||||
(owner.selection.type === "pronoun" && subjectsPerson !== false && willBeReflexive(subjectsPerson, owner.selection.person))
|
||||
const wPossesor = existing.flatMap((ps) =>
|
||||
getBaseAndAdjectives(owner).map((v) =>
|
||||
owner.selection.type === "pronoun" &&
|
||||
subjectsPerson !== false &&
|
||||
willBeReflexive(subjectsPerson, owner.selection.person)
|
||||
? concatPsString({ p: "خپل", f: "khpul" }, " ", ps)
|
||||
: (owner.selection.type === "pronoun" && isFirstPerson(owner.selection.person))
|
||||
: owner.selection.type === "pronoun" &&
|
||||
isFirstPerson(owner.selection.person)
|
||||
? concatPsString({ p: "ز", f: "z" }, v, " ", ps)
|
||||
: (owner.selection.type === "pronoun" && isSecondPerson(owner.selection.person))
|
||||
: owner.selection.type === "pronoun" &&
|
||||
isSecondPerson(owner.selection.person)
|
||||
? concatPsString({ p: "س", f: "s" }, v, " ", ps)
|
||||
: concatPsString({ p: "د", f: "du" }, " ", v, " ", ps)
|
||||
))
|
||||
));
|
||||
)
|
||||
);
|
||||
if (!owner.selection.possesor) {
|
||||
return wPossesor;
|
||||
}
|
||||
return addPossesor(owner.selection.possesor.np, wPossesor, subjectsPerson);
|
||||
}
|
||||
|
||||
function addArticlesAndAdjs(np: T.Rendered<T.NounSelection>): string | undefined {
|
||||
function addArticlesAndAdjs(
|
||||
np: T.Rendered<T.NounSelection>
|
||||
): string | undefined {
|
||||
if (!np.e) return undefined;
|
||||
try {
|
||||
// split out the atricles so adjectives can be stuck inbetween them and the word
|
||||
const chunks = np.e.split("the)");
|
||||
const [articles, word] = chunks.length === 1
|
||||
? ["", np.e]
|
||||
: [chunks[0] + "the) ", chunks[1]];
|
||||
const [articles, word] =
|
||||
chunks.length === 1 ? ["", np.e] : [chunks[0] + "the) ", chunks[1]];
|
||||
const adjs = !np.adjectives
|
||||
? ""
|
||||
: np.adjectives.reduce((accum, curr): string => {
|
||||
if (!curr.e) throw new Error("no english for adjective");
|
||||
return accum + curr.e + " ";
|
||||
}, "");
|
||||
const genderTag = np.genderCanChange ? (np.gender === "fem" ? " (f.)" : " (m.)") : "";
|
||||
const genderTag = np.genderCanChange
|
||||
? np.gender === "fem"
|
||||
? " (f.)"
|
||||
: " (m.)"
|
||||
: "";
|
||||
return `${articles}${adjs}${word}${genderTag}`;
|
||||
} catch (e) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function addPossesors(possesor: T.Rendered<T.NPSelection> | undefined, base: string | undefined, type: "noun" | "participle"): string | undefined {
|
||||
function addPossesors(
|
||||
possesor: T.Rendered<T.NPSelection> | undefined,
|
||||
base: string | undefined,
|
||||
type: "noun" | "participle"
|
||||
): string | undefined {
|
||||
function removeArticles(s: string): string {
|
||||
return s.replace("(the) ", "").replace("(a/the) ", "");
|
||||
}
|
||||
|
@ -169,7 +217,9 @@ function addPossesors(possesor: T.Rendered<T.NPSelection> | undefined, base: str
|
|||
if (possesor.selection.type === "pronoun") {
|
||||
return type === "noun"
|
||||
? `${pronounPossEng(possesor.selection.person)} ${removeArticles(base)}`
|
||||
: `(${pronounPossEng(possesor.selection.person)}) ${removeArticles(base)} (${possesor.selection.e})`
|
||||
: `(${pronounPossEng(possesor.selection.person)}) ${removeArticles(
|
||||
base
|
||||
)} (${possesor.selection.e})`;
|
||||
}
|
||||
const possesorE = getEnglishFromRendered(possesor);
|
||||
if (!possesorE) return undefined;
|
||||
|
@ -180,17 +230,20 @@ function addPossesors(possesor: T.Rendered<T.NPSelection> | undefined, base: str
|
|||
}
|
||||
|
||||
function pronounPossEng(p: T.Person): string {
|
||||
function gend(x: T.Person): string {
|
||||
return `${x % 2 === 0 ? "m." : "f."}`;
|
||||
}
|
||||
if (p === T.Person.FirstSingMale || p === T.Person.FirstSingFemale) {
|
||||
return "my";
|
||||
return `my (${gend(p)})`;
|
||||
}
|
||||
if (p === T.Person.FirstPlurMale || p === T.Person.FirstPlurFemale) {
|
||||
return "our";
|
||||
return `our (${gend(p)})`;
|
||||
}
|
||||
if (p === T.Person.SecondSingMale || p === T.Person.SecondSingFemale) {
|
||||
return "your";
|
||||
return `your (${gend(p)})`;
|
||||
}
|
||||
if (p === T.Person.SecondPlurMale || p === T.Person.SecondPlurFemale) {
|
||||
return "your (pl.)";
|
||||
return `your (${gend(p)} pl.)`;
|
||||
}
|
||||
if (p === T.Person.ThirdSingMale) {
|
||||
return "his/its";
|
||||
|
@ -198,10 +251,17 @@ function pronounPossEng(p: T.Person): string {
|
|||
if (p === T.Person.ThirdSingFemale) {
|
||||
return "her/its";
|
||||
}
|
||||
return "their";
|
||||
return `their ${gend(p)}`;
|
||||
}
|
||||
|
||||
export function getEnglishFromRendered(r: T.Rendered<T.NPSelection | T.ComplementSelection | T.APSelection | T.SandwichSelection<T.Sandwich>>): string | undefined {
|
||||
export function getEnglishFromRendered(
|
||||
r: T.Rendered<
|
||||
| T.NPSelection
|
||||
| T.ComplementSelection
|
||||
| T.APSelection
|
||||
| T.SandwichSelection<T.Sandwich>
|
||||
>
|
||||
): string | undefined {
|
||||
if (r.type === "sandwich") {
|
||||
return getEnglishFromRenderedSandwich(r);
|
||||
}
|
||||
|
@ -219,18 +279,30 @@ export function getEnglishFromRendered(r: T.Rendered<T.NPSelection | T.Complemen
|
|||
return r.selection.e;
|
||||
}
|
||||
if (r.selection.type === "participle") {
|
||||
return addPossesors(r.selection.possesor?.np, r.selection.e, r.selection.type);
|
||||
return addPossesors(
|
||||
r.selection.possesor?.np,
|
||||
r.selection.e,
|
||||
r.selection.type
|
||||
);
|
||||
}
|
||||
return addPossesors(r.selection.possesor?.np, addArticlesAndAdjs(r.selection), r.selection.type);
|
||||
return addPossesors(
|
||||
r.selection.possesor?.np,
|
||||
addArticlesAndAdjs(r.selection),
|
||||
r.selection.type
|
||||
);
|
||||
}
|
||||
|
||||
function getEnglishFromRenderedSandwich(r: T.Rendered<T.SandwichSelection<T.Sandwich>>): string | undefined {
|
||||
function getEnglishFromRenderedSandwich(
|
||||
r: T.Rendered<T.SandwichSelection<T.Sandwich>>
|
||||
): string | undefined {
|
||||
const insideE = getEnglishFromRendered(r.inside);
|
||||
if (!insideE) return undefined;
|
||||
return `${r.e} ${insideE}`;
|
||||
}
|
||||
|
||||
function getEnglishFromRenderedAdjective(a: T.Rendered<T.AdjectiveSelection>): string | undefined {
|
||||
function getEnglishFromRenderedAdjective(
|
||||
a: T.Rendered<T.AdjectiveSelection>
|
||||
): string | undefined {
|
||||
if (!a.sandwich) {
|
||||
return a.e;
|
||||
}
|
||||
|
|
64
src/types.ts
64
src/types.ts
|
@ -721,6 +721,14 @@ export type EquativeTense =
|
|||
| "wouldBe"
|
||||
| "pastSubjunctive"
|
||||
| "wouldHaveBeen";
|
||||
export type EquativeTenseWithoutBa =
|
||||
| "present"
|
||||
| "subjunctive"
|
||||
| "habitual"
|
||||
| "past"
|
||||
| "wouldBe"
|
||||
| "pastSubjunctive"
|
||||
| "wouldHaveBeen";
|
||||
export type PerfectTense = `${EquativeTense}Perfect`;
|
||||
export type AbilityTense = `${VerbTense}Modal`;
|
||||
export type ImperativeTense = `${Aspect}Imperative`;
|
||||
|
@ -1201,16 +1209,50 @@ export type RenderVerbOutput = {
|
|||
hasBa: boolean;
|
||||
vbs: VerbRenderedOutput;
|
||||
};
|
||||
export type VerbRenderedOutput = [[VHead] | [], [VB, VBE] | [VBE]];
|
||||
export type RootsStemsOutput = [[VHead] | [], [VB, VBA] | [VBA]]; // or perfect / equative
|
||||
export type VerbRenderedOutput = [[VHead] | [], [VBP, VBE] | [VBE]];
|
||||
export type RootsStemsOutput = [[VHead] | [], [VBP, VB] | [VB]]; // or perfect / equative
|
||||
|
||||
export type VB = VBBasic | VBGenNum | Welded | WeldedGN;
|
||||
/** A VB block that can have endings attached to it */
|
||||
export type VBA = Exclude<VB, VBGenNum | WeldedGN>;
|
||||
export type VB = VBBasic | Welded;
|
||||
/** A VB block that has had a person verb ending attached */
|
||||
export type VBE = (VBBasic | Welded) & {
|
||||
export type VBE = VB & {
|
||||
person: Person;
|
||||
}; // or equative
|
||||
info:
|
||||
| {
|
||||
type: "equative";
|
||||
tense: EquativeTenseWithoutBa;
|
||||
}
|
||||
| {
|
||||
type: "verb";
|
||||
aspect: Aspect;
|
||||
base: "stem" | "root";
|
||||
verb: VerbEntry;
|
||||
abilityAux?: boolean;
|
||||
};
|
||||
};
|
||||
|
||||
/** A VB block used for ability verbs or perfect (past participle)
|
||||
* get optionally swapped in order with the VBE when used with negative
|
||||
*/
|
||||
export type VBP = VB & (VBPartInfo | VBAbilityInfo);
|
||||
|
||||
export type VBPartInfo = {
|
||||
info: {
|
||||
type: "ppart";
|
||||
genNum: GenderNumber;
|
||||
verb: VerbEntry;
|
||||
};
|
||||
};
|
||||
|
||||
export type VBAbilityInfo = {
|
||||
info: {
|
||||
type: "ability";
|
||||
verb: VerbEntry;
|
||||
aspect: Aspect;
|
||||
};
|
||||
};
|
||||
|
||||
// in VB OR VBE - add root / stem and entry for parsing info
|
||||
// but how would that work with perfect and ability verbs ...
|
||||
|
||||
export type VBNoLenghts<V extends VB> = V extends VBBasic
|
||||
? Omit<VBBasic, "ps"> & { ps: PsString[] }
|
||||
|
@ -1221,10 +1263,6 @@ export type VBBasic = {
|
|||
ps: SingleOrLengthOpts<PsString[]>;
|
||||
};
|
||||
|
||||
// TODO: might be a better design decision to keep the GenderNuber stuff
|
||||
// in the RIGHT side of the weld
|
||||
export type VBGenNum = VBBasic & GenderNumber;
|
||||
|
||||
export type GenderNumber = {
|
||||
gender: Gender;
|
||||
number: NounNumber;
|
||||
|
@ -1233,11 +1271,9 @@ export type GenderNumber = {
|
|||
export type Welded = {
|
||||
type: "welded";
|
||||
left: NComp | VBBasic | Welded;
|
||||
right: VBBasic;
|
||||
right: VBBasic | (VBBasic & (VBPartInfo | VBAbilityInfo));
|
||||
};
|
||||
|
||||
export type WeldedGN = Omit<Welded, "right"> & { right: VBGenNum };
|
||||
|
||||
export type VHead = PH | NComp;
|
||||
|
||||
/** perfective head block */
|
||||
|
|
|
@ -27,4 +27,12 @@ module.exports = [
|
|||
ts: 1527815450,
|
||||
e: "son", // زوی
|
||||
},
|
||||
{
|
||||
ts: 1527823093,
|
||||
e: "prophet", // نبي
|
||||
},
|
||||
{
|
||||
ts: 1527822456,
|
||||
e: "word", // لفظ
|
||||
},
|
||||
];
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
*/
|
||||
|
||||
module.exports = [
|
||||
1527815139, // osedul
|
||||
1585228579997, // ورتلل
|
||||
1527815216, // راتلل - to come
|
||||
1527813473, // الوتل - to fly
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
*/
|
||||
|
||||
module.exports = [
|
||||
1527817457, // درکول
|
||||
1659037345120, // بیانېدل
|
||||
1608137130992, // چیغه کول
|
||||
1658537998960, // لېونی کول
|
||||
1527812403, // بچ کول - to save, protect, guard, spare, rescue, economize
|
||||
|
|
Loading…
Reference in New Issue