just trying out parsing very, very basic VPs

This commit is contained in:
adueck 2023-08-05 20:35:15 +04:00
parent 4cc81c8b10
commit e910de719f
32 changed files with 3106 additions and 2328 deletions

View File

@ -146,7 +146,7 @@ function VBBlock({
script: "p" | "f";
block:
| T.VBBasic
| T.VBGenNum
| (T.VBBasic & (T.VBPartInfo | T.VBAbilityInfo))
| (T.VBBasic & {
person: T.Person;
});
@ -167,8 +167,8 @@ function VBBlock({
);
}
const infInfo =
"gender" in block
? getEnglishGenNumInfo(block.gender, block.number)
"info" in block && block.info.type === "ppart"
? getEnglishGenNumInfo(block.info.genNum.gender, block.info.genNum.number)
: "person" in block
? getEnglishPersonInfo(block.person, "short")
: "";

View File

@ -132,7 +132,7 @@ function grabLength(
if (vb.type === "welded") {
return {
...vb,
right: grabVBLength(vb.right) as T.VBBasic | T.VBGenNum,
right: grabVBLength(vb.right) as T.VBBasic | T.VBP,
};
}
if (!(length in vb.ps)) {

View File

@ -3,12 +3,17 @@ import * as T from "../types";
import { parsePhrase } from "../lib/src/parsing/parse-phrase";
import { lookup } from "../lib/src/parsing/lookup";
import { tokenizer } from "../lib/src/parsing/tokenizer";
import { NPDisplay } from "../components/library";
import {
CompiledPTextDisplay,
NPDisplay,
compileVP,
renderVP,
} from "../components/library";
function ParserDemo({ opts }: { opts: T.TextOptions }) {
const [text, setText] = useState<string>("");
const [result, setResult] = useState<
{ inflected: boolean; selection: T.NPSelection }[]
ReturnType<typeof parsePhrase>["success"]
>([]);
const [errors, setErrors] = useState<string[]>([]);
function handleChange(e: React.ChangeEvent<HTMLInputElement>) {
@ -26,7 +31,7 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
}
return (
<div className="mt-3" style={{ marginBottom: "1000px" }}>
<p>Type an adjective or noun (w or without adjs) to parse it</p>
<p>Type a NP</p>
<div className="form-group mb-2">
<input
dir="rtl"
@ -45,17 +50,49 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
{errors.length > 0 && (
<>
<div className="alert alert-danger" role="alert">
{errors.length > 0 ? (
<>
<div>possible errors:</div>
<ul>
{errors.map((e) => (
<div>{e}</div>
<li>{e}</li>
))}
</ul>
</>
) : (
<div>{errors[0]}</div>
)}
</div>
<div className="text-center">Did you mean:</div>
</>
)}
{result.map((np) => (
<NPDisplay NP={np.selection} inflected={np.inflected} opts={opts} />
{result.map((res) =>
"inflected" in res ? (
<NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
) : "verb" in res ? (
(() => {
const rendered = renderVP(res);
const compiled = compileVP(rendered, res.form);
return (
<div>
<CompiledPTextDisplay compiled={compiled} opts={opts} />
{compiled.e && (
<div className={`text-muted mt-2 text-center`}>
{compiled.e.map((e, i) => (
<div key={i}>{e}</div>
))}
</div>
)}
</div>
);
})()
) : (
<samp>
<pre>{JSON.stringify(res, null, " ")}</pre>
</samp>
)
)}
<details>
<summary>AST</summary>
<samp>

View File

@ -92,7 +92,7 @@ export function mapVerbRenderedOutput(
f: (a: T.PsString) => T.PsString,
[a, b]: T.VerbRenderedOutput
): T.VerbRenderedOutput {
return [fmapVHead(a), fmapV(b)];
return [fmapVHead(a), fmapVE(b)];
function fmapVHead([v]: [T.VHead] | []): [T.VHead] | [] {
if (v === undefined) {
return [];
@ -118,10 +118,10 @@ export function mapVerbRenderedOutput(
ps: f(comp.ps),
};
}
function fmapV(v: [T.VB, T.VBE] | [T.VBE]): [T.VB, T.VBE] | [T.VBE] {
return v.map(fmapVB) as [T.VB, T.VBE] | [T.VBE];
function fmapVE(v: [T.VBP, T.VBE] | [T.VBE]): [T.VBP, T.VBE] | [T.VBE] {
return v.map(fmapVB) as [T.VBP, T.VBE] | [T.VBE];
}
function fmapVB<V extends T.VB | T.VBE>(v: V): V {
function fmapVB<V extends T.VB | T.VBE | T.VBP>(v: V): V {
if (v.type === "welded") {
return {
...v,

View File

@ -1031,11 +1031,11 @@ export const persons = [
person: 9,
},
{
label: { subject: "thay (m. pl.)", object: "them (m. pl.)" },
label: { subject: "they (m. pl.)", object: "them (m. pl.)" },
person: 10,
},
{
label: { subject: "thay (f. pl.)", object: "them (f. pl.)" },
label: { subject: "they (f. pl.)", object: "them (f. pl.)" },
person: 11,
},
];

File diff suppressed because it is too large Load Diff

View File

@ -26,6 +26,7 @@ import { getPastParticiple, getRootStem } from "./roots-and-stems";
import {
isKedul,
perfectTenseToEquative,
vEntry,
verbEndingConcat,
} from "./rs-helpers";
import {
@ -33,7 +34,24 @@ import {
accentPsSyllable,
removeAccents,
} from "../accent-helpers";
const kedulStat = vEntry({
ts: 1581086654898,
i: 11100,
p: "کېدل",
f: "kedul",
g: "kedul",
e: "to become _____",
r: 2,
c: "v. intrans.",
ssp: "ش",
ssf: "sh",
prp: "شول",
prf: "shwul",
pprtp: "شوی",
pprtf: "shúway",
noOo: true,
ec: "become",
});
const formulas: Record<
T.VerbTense | T.ImperativeTense,
{
@ -123,11 +141,12 @@ export function renderVerb({
const type = isAbilityTense(tense) ? "ability" : "basic";
const transitive = object !== undefined;
const king = transitive && isPast ? object : subject;
const base = isPast ? "root" : "stem";
// #1 get the appropriate root / stem
const [vHead, rest] = getRootStem({
verb,
rs: isPast ? "root" : "stem",
rs: base,
aspect: negative && isImperativeTense(tense) ? "imperfective" : aspect,
voice,
type,
@ -148,6 +167,8 @@ export function renderVerb({
pastThird: isPast && king === T.Person.ThirdSingMale,
aspect,
basicForm: type === "basic" && voice === "active",
base,
ability: type === "ability",
}),
],
};
@ -165,7 +186,7 @@ function renderPerfectVerb({
voice: T.Voice;
}): {
hasBa: boolean;
vbs: [[], [T.VB, T.VBE]];
vbs: [[], [T.VBP, T.VBE]];
objComp: T.Rendered<T.NPSelection> | undefined;
} {
const hasBa = perfectTenseHasBa(tense);
@ -178,6 +199,10 @@ function renderPerfectVerb({
type: "VB",
person,
ps: fmapSingleOrLengthOpts((x) => x[row][col], equative),
info: {
type: "equative",
tense: perfectTenseToEquative(tense),
},
};
return {
hasBa,
@ -194,32 +219,46 @@ function addEnding({
pastThird,
aspect,
basicForm,
base,
ability,
}: {
rs: [T.VB, T.VBA] | [T.VBA];
rs: [T.VBP, T.VB] | [T.VB];
ending: T.SingleOrLengthOpts<T.PsString[]>;
person: T.Person;
verb: T.VerbEntry;
pastThird: boolean;
aspect: T.Aspect;
basicForm: boolean;
}): [T.VB, T.VBE] | [T.VBE] {
base: "stem" | "root";
ability: boolean;
}): [T.VBP, T.VBE] | [T.VBE] {
return rs.length === 2
? [rs[0], addEnd(rs[1], ending)]
: [addEnd(rs[0], ending)];
function addEnd(
vba: T.VBA,
ending: T.SingleOrLengthOpts<T.PsString[]>
): T.VBE {
if (vba.type === "welded") {
function addEnd(vb: T.VB, ending: T.SingleOrLengthOpts<T.PsString[]>): T.VBE {
const info = {
type: "verb" as const,
aspect: ability ? "perfective" : aspect,
base,
verb: ability ? kedulStat : verb,
...(ability
? {
abilityAux: true,
}
: {}),
};
if (vb.type === "welded") {
return {
...vba,
right: addToVBBasicEnd(vba.right, ending),
...vb,
right: addToVBBasicEnd(vb.right, ending),
person,
info,
};
}
return {
...addToVBBasicEnd(vba, ending),
...addToVBBasicEnd(vb, ending),
person,
info,
};
}
function addToVBBasicEnd(

File diff suppressed because it is too large Load Diff

View File

@ -15,7 +15,7 @@ import {
countSyllables,
removeAccents,
} from "../accent-helpers";
import { isKawulVerb, isTlulVerb } from "../type-predicates";
import { isKawulVerb } from "../type-predicates";
import {
vEntry,
addAbilityEnding,
@ -123,42 +123,60 @@ function getAbilityRs(
rs: "root" | "stem",
voice: T.Voice,
genderNum: T.GenderNumber
): [[] | [T.VHead], [T.VB, T.VBA]] {
): [[] | [T.VHead], [T.VBP, T.VB]] {
// https://grammar.lingdocs.com/verbs/ability/#exceptions
const losesAspect =
isTlulVerb(verb) ||
(verb.entry.prp && verb.entry.p !== "کول") ||
(isStatComp(verb) && vTransitivity(verb) === "intransitive");
const asp = losesAspect ? "imperfective" : aspect;
const [vhead, [basicroot]] =
voice === "passive"
? getPassiveRs(verb, "imperfective", "root", genderNum)
: getRoot(verb, genderNum, losesAspect ? "imperfective" : aspect);
return [vhead, [addAbilityEnding(basicroot), rs === "root" ? shwulVB : shVB]];
: getRoot(verb, genderNum, asp);
return [
vhead,
[addAbilityEnding(basicroot, verb, asp), rs === "root" ? shwulVB : shVB],
];
}
export function getPastParticiple(
verb: T.VerbEntry,
voice: T.Voice,
{ gender, number }: { gender: T.Gender; number: T.NounNumber }
): T.VBGenNum | T.WeldedGN {
): T.VBP {
const v = removeFVarientsFromVerb(verb);
if (voice === "passive") {
return getPassivePp(v, { gender, number });
}
if (isStatComp(v) && v.complement) {
return weld(
return {
...weld(
makeComplement(v.complement, { gender, number }),
getPastParticiple(statVerb[vTransitivity(verb)], voice, {
gender,
number,
}) as T.VBGenNum
);
})
),
info: {
type: "ppart",
genNum: { gender, number },
verb,
},
};
}
if (verb.entry.pprtp && verb.entry.pprtf) {
const base = makePsString(verb.entry.pprtp, verb.entry.pprtf);
return {
type: "VB",
ps: inflectPattern3(base, { gender, number }),
info: {
type: "ppart",
verb,
genNum: {
gender,
number,
},
},
};
}
const basicRoot = getRoot(
@ -166,7 +184,7 @@ export function getPastParticiple(
{ gender, number },
"imperfective"
)[1][0];
const longRoot = getLongVB(basicRoot);
const longRoot = getLongVB(basicRoot) as T.VBNoLenghts<T.VB>;
const rootWLengths = possiblePPartLengths(longRoot);
/* istanbul ignore next */
if ("right" in rootWLengths) {
@ -175,8 +193,14 @@ export function getPastParticiple(
return {
...rootWLengths,
ps: addTail(rootWLengths.ps),
info: {
type: "ppart",
verb,
genNum: {
gender,
number,
},
},
};
function addTail(
@ -192,12 +216,19 @@ export function getPastParticiple(
function getPassivePp(
verb: T.VerbEntryNoFVars,
genderNumber: T.GenderNumber
): T.WeldedGN {
): T.VBP {
if (isStatComp(verb) && verb.complement) {
return weld(
return {
...weld(
makeComplement(verb.complement, genderNumber),
getPassivePp(statVerb.transitive, genderNumber)
);
),
info: {
type: "ppart",
verb,
genNum: genderNumber,
},
};
}
const basicRoot = getRoot(
verb,
@ -205,38 +236,26 @@ function getPassivePp(
isKawulVerb(verb) ? "perfective" : "imperfective"
)[1][0];
const longRoot = getLongVB(basicRoot);
const kedulVb: T.VBGenNum = getPastParticiple(
const kedulVb = getPastParticiple(
statVerb.intransitive,
"active",
genderNumber
) as T.VBGenNum;
return weld(longRoot, kedulVb);
}
function getPassiveRs(
verb: T.VerbEntryNoFVars,
aspect: T.Aspect,
rs: "root" | "stem",
genderNumber: T.GenderNumber
): [[] | [T.VHead], [T.VBA]] {
const [vHead, [basicRoot]] = getRoot(verb, genderNumber, aspect);
const longRoot = getLongVB(basicRoot);
const kedulVba = getRootStem({
verb: statVerb.intransitive,
aspect,
rs,
type: "basic",
voice: "active",
genderNumber: { gender: "masc", number: "singular" },
})[1][0] as T.VBBasic;
return [vHead, [weld(longRoot, kedulVba)]];
);
return {
...weld(longRoot, kedulVb),
info: {
type: "ppart",
verb,
genNum: genderNumber,
},
};
}
function getRoot(
verb: T.VerbEntryNoFVars,
genderNum: T.GenderNumber,
aspect: T.Aspect
): [[T.VHead] | [], [T.VBA]] {
): [[T.VHead] | [], [T.VB]] {
if (
verb.complement &&
isStatComp(verb) &&
@ -430,6 +449,25 @@ function getStem(
}
}
function getPassiveRs(
verb: T.VerbEntryNoFVars,
aspect: T.Aspect,
rs: "root" | "stem",
genderNumber: T.GenderNumber
): [[] | [T.VHead], [T.VB]] {
const [vHead, [basicRoot]] = getRoot(verb, genderNumber, aspect);
const longRoot = getLongVB(basicRoot);
const kedulVba = getRootStem({
verb: statVerb.intransitive,
aspect,
rs,
type: "basic",
voice: "active",
genderNumber: { gender: "masc", number: "singular" },
})[1][0] as T.VBBasic;
return [vHead, [weld(longRoot, kedulVba)]];
}
// TODO: This is a nasty and messy way to do it with the length options included
function getPerfectiveHead(
base: T.PsString,

View File

@ -123,19 +123,10 @@ export function verbEndingConcat(
);
}
// TODO: THIS IS UGGGGLY NEED TO THINK THROUGH THE TYPING ON THE WELDING
export function weld(
left: T.Welded["left"],
right: T.VBGenNum | T.WeldedGN
): T.WeldedGN;
export function weld(
left: T.Welded["left"],
right: T.VBBasic | T.NComp | T.Welded
): T.Welded;
export function weld(
left: T.Welded["left"],
right: T.VBBasic | T.VBGenNum | T.Welded | T.NComp | T.WeldedGN
): T.Welded | T.WeldedGN {
right: T.VB | T.VBP | T.NComp
): T.Welded {
if (right.type === "welded") {
return weld(weld(left, right.left), right.right);
}
@ -218,7 +209,11 @@ export function tlulPerfectiveStem(person: {
];
}
export function addAbilityEnding(vb: T.VBA): T.VBA {
export function addAbilityEnding(
vb: T.VB,
verb: T.VerbEntry,
aspect: T.Aspect
): T.VBP {
const abilityEnding: T.PsString[] = [
{ p: "ی", f: "ay" },
{ p: "ای", f: "aay" },
@ -227,9 +222,21 @@ export function addAbilityEnding(vb: T.VBA): T.VBA {
return {
...vb,
right: addToEnd(vb.right, abilityEnding),
info: {
type: "ability",
verb,
aspect,
},
};
}
return addToEnd(vb, abilityEnding);
return {
...addToEnd(vb, abilityEnding),
info: {
type: "ability",
verb,
aspect,
},
};
function addToEnd(vb: T.VBBasic, end: T.PsString[]): T.VBBasic {
/* istanbul ignore next */
if (!("long" in vb.ps)) {
@ -248,8 +255,8 @@ export function addAbilityEnding(vb: T.VBA): T.VBA {
}
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBBasic>): T.VBBasic;
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBA>): T.VBA;
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBA>): T.VBA {
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VB>): T.VB;
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VB>): T.VB {
const shortenableEndings = ["ښتل", "ستل", "وتل"];
const wrul = ["وړل", "راوړل", "وروړل", "دروړل"];
// can't find a case where this is used - type safety
@ -294,12 +301,11 @@ export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBA>): T.VBA {
return vba;
}
export function getLongVB(vb: T.VBBasic): T.VBNoLenghts<T.VBBasic>;
export function getLongVB(vb: T.VBA): T.VBNoLenghts<T.VBA>;
export function getLongVB(vb: T.VBA): T.VBNoLenghts<T.VBA> {
export function getLongVB(vb: T.VB): T.VBNoLenghts<T.VB> {
if (vb.type === "welded") {
return {
...vb,
// @ts-ignore
right: getLongVB(vb.right),
};
}

View File

@ -1,4 +1,5 @@
import * as T from "../../../types";
import { endsInConsonant } from "../p-text-helpers";
import {
isPattern1Entry,
isPattern2Entry,
@ -50,6 +51,7 @@ export function getInflectionQueries(
},
});
if (noun) {
// TODO: could merge these queries for more efficiency ??
queries.push({
search: { ppp: s },
details: {
@ -59,7 +61,17 @@ export function getInflectionQueries(
predicate: isNounEntry,
},
});
if (s.endsWith("و")) {
queries.push({
search: { app: s },
details: {
inflection: [0],
gender: ["masc", "fem"],
plural: true,
predicate: isNounEntry,
},
});
// TODO: what about short vowel ending nouns with وو etc
if (s.endsWith("و") && !["ا", "و"].includes(s.charAt(s.length - 2))) {
queries.push({
search: { ppp: s.slice(0, -1) },
details: {
@ -69,6 +81,15 @@ export function getInflectionQueries(
predicate: isMascNounEntry,
},
});
queries.push({
search: { app: s.slice(0, -1) },
details: {
inflection: [1],
gender: ["masc"],
plural: true,
predicate: isMascNounEntry,
},
});
queries.push({
search: { ppp: s.slice(0, -1) + "ې" },
details: {
@ -218,6 +239,15 @@ export function getInflectionQueries(
!isPattern4Entry(e),
},
});
queries.push({
search: { app: s.slice(0, -2) },
details: {
inflection: [1],
gender: ["masc"],
plural: true,
predicate: (e) => isNounEntry(e),
},
});
}
if (
s.endsWith("ګانو") &&
@ -364,6 +394,18 @@ export function getInflectionQueries(
predicate: isPattern1Entry,
},
});
if (noun) {
// bundled plural
queries.push({
search: { p: s.slice(0, -1) },
details: {
inflection: [0],
plural: true,
gender: ["masc"],
predicate: (e) => !isPattern5Entry(e) && endsInConsonant(e),
},
});
}
queries.push({
search: { infbp: s.slice(0, -1) },
details: {

View File

@ -1,9 +1,12 @@
import nounsAdjs from "../../../nouns-adjs";
import verbs from "../../../verbs";
import * as T from "../../../types";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
const [key, value] = Object.entries(s)[0];
// TODO: could make this more efficient - merging ppp and app queries?
if (key === "ppp") {
return nounsAdjs.filter(
(e) =>
@ -14,16 +17,42 @@ export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
.includes(value as string)
);
}
if (key === "ppp") {
return nounsAdjs.filter(
(e) =>
e.app &&
e.app
.split(",")
.map((w) => w.trim())
.includes(value as string)
);
}
// @ts-ignore
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
}
export function verbLookup(
s: (e: T.VerbDictionaryEntry) => boolean
): T.VerbEntry[] {
return verbs.filter(({ entry }) => s(entry));
}
export function wordQuery(word: string, type: "adj"): T.AdjectiveEntry;
export function wordQuery(word: string, type: "noun"): T.NounEntry;
export function wordQuery(word: string, type: "verb"): T.VerbEntryNoFVars;
export function wordQuery(
word: string,
type: "noun" | "adj"
): T.NounEntry | T.AdjectiveEntry {
type: "noun" | "adj" | "verb"
): T.NounEntry | T.AdjectiveEntry | T.VerbEntryNoFVars {
if (type === "verb") {
const verb = verbs.find(
(x) => x.entry.p === word || x.entry.f === word || x.entry.g === word
);
if (!verb) {
throw new Error(`missing ${word} in word query`);
}
return removeFVarientsFromVerb(verb);
}
const entry = nounsAdjs.find(
(x) => x.p === word || x.f === word || x.g === word
);

View File

@ -1,67 +1,14 @@
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import * as T from "../../../types";
import { lookup } from "./lookup";
import { lookup, wordQuery } from "./lookup";
import { parseAdjective } from "./parse-adjective";
import { tokenizer } from "./tokenizer";
const ghut = {
ts: 1527812625,
i: 9561,
p: "غټ",
f: "ghuT, ghaT",
g: "ghuT,ghaT",
e: "big, fat",
r: 4,
c: "adj.",
} as T.AdjectiveEntry;
const sturey = {
ts: 1527815306,
i: 7933,
p: "ستړی",
f: "stúRay",
g: "stuRay",
e: "tired",
r: 4,
c: "adj. / adv.",
} as T.AdjectiveEntry;
const narey = {
ts: 1527819320,
i: 14027,
p: "نری",
f: "naráy",
g: "naray",
e: "thin; mild; high (pitch)",
r: 4,
c: "adj.",
} as T.AdjectiveEntry;
const zor = {
ts: 1527815451,
i: 7570,
p: "زوړ",
f: "zoR",
g: "zoR",
e: "old",
r: 4,
c: "adj.",
infap: "زاړه",
infaf: "zaaRu",
infbp: "زړ",
infbf: "zaR",
} as T.AdjectiveEntry;
const sheen = {
ts: 1527815265,
i: 8979,
p: "شین",
f: "sheen",
g: "sheen",
e: "green, blue; unripe, immature; bright, sunny",
r: 4,
c: "adj.",
infap: "شنه",
infaf: "shnu",
infbp: "شن",
infbf: "shn",
} as T.AdjectiveEntry;
const ghut = wordQuery("غټ", "adj");
const sturey = wordQuery("ستړی", "adj");
const narey = wordQuery("نری", "adj");
const zor = wordQuery("زوړ", "adj");
const sheen = wordQuery("شین", "adj");
const tests: {
category: string;
@ -312,7 +259,7 @@ describe("parsing adjectives", () => {
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const possibilities = parseAdjective(tokens, lookup).map((x) => x[1]);
const possibilities = parseAdjective(tokens, lookup).map((x) => x.body);
expect(
possibilities.map((x) => {
const { given, ...rest } = x;

View File

@ -6,6 +6,7 @@ import * as T from "../../../types";
import { lookup, wordQuery } from "./lookup";
import { parseNoun } from "./parse-noun";
import { tokenizer } from "./tokenizer";
import { isCompleteResult } from "./utils";
const sor = wordQuery("سوړ", "adj");
const zor = wordQuery("زوړ", "adj");
@ -36,6 +37,12 @@ const maamaa = wordQuery("ماما", "noun");
const peesho = wordQuery("پیشو", "noun");
const duaa = wordQuery("دعا", "noun");
const zooy = wordQuery("زوی", "noun");
const nabee = wordQuery("نبي", "noun");
const lafz = wordQuery("لفظ", "noun");
// TODO: test for adjective errors etc
// bundled plural
const tests: {
category: string;
@ -123,6 +130,13 @@ const tests: {
gender: "fem",
},
},
{
inflected: false,
selection: {
...makeNounSelection(daktar, undefined),
number: "plural",
},
},
],
},
{
@ -1290,18 +1304,98 @@ const tests: {
},
],
},
{
category: "arabic plurals",
cases: [
{
input: "الفاظ",
output: [
{
inflected: false,
selection: {
...makeNounSelection(lafz, undefined),
number: "plural",
},
},
],
},
{
input: "الفاظو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(lafz, undefined),
number: "plural",
},
},
],
},
{
input: "نبي",
output: [
{
inflected: false,
selection: makeNounSelection(nabee, undefined),
},
{
inflected: true,
selection: makeNounSelection(nabee, undefined),
},
],
},
{
input: "انبیا",
output: [
{
inflected: false,
selection: {
...makeNounSelection(nabee, undefined),
number: "plural",
},
},
],
},
{
input: "انبیاوو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(nabee, undefined),
number: "plural",
},
},
],
},
],
},
{
category: "bundled plurals",
cases: [
{
input: "کوره",
output: [
{
inflected: false,
selection: {
...makeNounSelection(kor, undefined),
number: "plural",
},
},
],
},
],
},
];
// PROBLEM WITH غټې وریژې
// ];
describe("parsing nouns", () => {
tests.forEach(({ category, cases }) => {
// eslint-disable-next-line jest/valid-title
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup, undefined).map(([t, res]) => res);
const res = parseNoun(tokens, lookup).map(({ body }) => body);
expect(res).toEqual(output);
});
});
@ -1407,10 +1501,8 @@ const adjsTests: {
},
],
},
// TODO: testing issue with the parser returning multiple options needs
// to be worked out to test double adjectives
{
input: "غټو کورونو",
input: "غټو زړو کورونو",
output: [
{
inflected: true,
@ -1419,7 +1511,7 @@ const adjsTests: {
number: "plural",
adjectives: [
makeAdjectiveSelection(ghut),
// makeAdjectiveSelection(zor),
makeAdjectiveSelection(zor),
],
},
},
@ -1429,15 +1521,17 @@ const adjsTests: {
},
];
// describe("parsing nouns with adjectives", () => {
// adjsTests.forEach(({ category, cases }) => {
// // eslint-disable-next-line jest/valid-title
// test(category, () => {
// cases.forEach(({ input, output }) => {
// const tokens = tokenizer(input);
// const res = parseNoun(tokens, lookup, undefined).map(([t, res]) => res);
// expect(res).toEqual(output);
// });
// });
// });
// });
describe("parsing nouns with adjectives", () => {
adjsTests.forEach(({ category, cases }) => {
// eslint-disable-next-line jest/valid-title
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup)
.filter(isCompleteResult)
.map(({ body }) => body);
expect(res).toEqual(output);
});
});
});
});

View File

@ -9,102 +9,31 @@ import {
} from "../type-predicates";
import { getInflectionQueries } from "./inflection-query";
import { parseAdjective } from "./parse-adjective";
import { groupWith, equals } from "rambda";
import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils";
// TODO:
// - cleanup the workflow and make sure all nouns are covered and test
// - add possesive parsing
type NounResult = { inflected: boolean; selection: T.NounSelection };
export function parseNoun(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
prevPossesor: { inflected: boolean; selection: T.NounSelection } | undefined
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): T.ParseResult<NounResult>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const possesor =
first.s === "د" ? parseNoun(rest, lookup, undefined) : undefined;
if (possesor) {
const runsAfterPossesor: T.ParseResult<NounResult | undefined>[] = possesor
? possesor
: [{ tokens, body: undefined, errors: [] }];
// could be a case for a monad ??
return removeUnneccesaryFailing(
runsAfterPossesor.flatMap(
({ tokens, body: possesor, errors }) =>
parseNoun(
tokens,
lookup,
possesor
? {
inflected: possesor.inflected,
selection: {
...possesor.selection,
possesor: prevPossesor
? {
shrunken: false,
np: {
type: "NP",
selection: prevPossesor.selection,
},
}
: undefined,
},
}
: undefined
)
// .map<T.ParseResult<NounResult>>(([t, r, errs]) => [
// t,
// r,
// // TODO: should the errors from the runsAfterPossesor be thrown out?
// // or ...errors should be kept?
// // to show an error like د غتو ماشومان نومونه
// // adj error غټ should be first inflection (seems confusing)
// [...errs, ...errors],
// ])
)
);
} else {
return removeUnneccesaryFailing(
parseNounAfterPossesor(tokens, lookup, prevPossesor, [])
);
}
}
function removeUnneccesaryFailing(
results: T.ParseResult<NounResult>[]
): T.ParseResult<NounResult>[] {
// group by identical results
const groups = groupWith(
(a, b) => equals(a.body.selection, b.body.selection),
results
);
// if there's a group of identical results with some success in it
// remove any erroneous results
const stage1 = groups.flatMap((group) => {
if (group.find((x) => x.errors.length === 0)) {
return group.filter((x) => x.errors.length === 0);
}
return group;
const possesor = parsePossesor(tokens, lookup, undefined);
if (possesor.length) {
return bindParseResult(possesor, (tokens, p) => {
return parseNounAfterPossesor(tokens, lookup, p, []);
});
// finally, if there's any success anywhere, remove any of the errors
if (stage1.find((x) => x.errors.length === 0)) {
return stage1.filter((x) => x.errors.length === 0);
} else {
return stage1;
}
return parseNounAfterPossesor(tokens, lookup, undefined, []);
}
// create NP parsing function for that
// TODO with possesor, parse an NP not a noun
function parseNounAfterPossesor(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
possesor: { inflected: boolean; selection: T.NounSelection } | undefined,
possesor: T.PossesorSelection | undefined,
adjectives: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
@ -117,14 +46,13 @@ function parseNounAfterPossesor(
}
// TODO: add recognition of او between adjectives
const adjRes = parseAdjective(tokens, lookup);
const withAdj = adjRes.flatMap(({ tokens: tkns, body: adj }) =>
const withAdj = bindParseResult(adjRes, (tkns, adj) =>
parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
);
const [first, ...rest] = tokens;
const w: ReturnType<typeof parseNoun> = [];
const searches = getInflectionQueries(first.s, true);
const w: ReturnType<typeof parseNoun> = [];
searches.forEach(({ search, details }) => {
const nounEntries = lookup(search).filter(isNounEntry);
details.forEach((deets) => {
@ -147,6 +75,11 @@ function parseNounAfterPossesor(
convertInflection(inf, entry, gender, deets.plural).forEach(
({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined);
const errors = [
...adjErrors.map((message) => ({
message,
})),
];
w.push({
tokens: rest,
body: {
@ -162,25 +95,10 @@ function parseNounAfterPossesor(
adjectives: adjectives.map((a) => a.selection),
// TODO: could be nicer to validate that the possesor is inflected before
// and just pass in the selection
possesor: possesor
? {
shrunken: false,
np: {
type: "NP",
selection: possesor.selection,
},
}
: undefined,
possesor,
},
},
errors: [
...(possesor?.inflected === false
? [{ message: "possesor should be inflected" }]
: []),
...adjErrors.map((message) => ({
message,
})),
],
errors,
});
}
);

View File

@ -4,9 +4,13 @@ import { parseNoun } from "./parse-noun";
import { fmapParseResult } from "../fp-ps";
export function parseNP(
s: T.Token[],
s: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): T.ParseResult<{ inflected: boolean; selection: T.NPSelection }>[] {
if (s.length === 0) {
return [];
}
function makeNPSl(
a:
| {
@ -33,6 +37,6 @@ export function parseNP(
// @ts-ignore grrr webpack is having trouble with this
return fmapParseResult(makeNPSl, [
...parsePronoun(s),
...parseNoun(s, lookup, undefined),
...parseNoun(s, lookup),
]);
}

View File

@ -1,20 +1,34 @@
import * as T from "../../../types";
import { verbLookup } from "./lookup";
import { parseNP } from "./parse-np";
import { parseVerb } from "./parse-verb";
import { parseVP } from "./parse-vp";
export function parsePhrase(
s: T.Token[],
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): {
success: { inflected: boolean; selection: T.NPSelection }[];
success: (
| {
inflected: boolean;
selection: T.NPSelection;
}
| Omit<T.VBE, "ps">
| T.VPSelectionComplete
)[];
errors: string[];
} {
const nps = parseNP(s, lookup).filter(({ tokens }) => !tokens.length);
const res = [
...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
...parseVerb(s, verbLookup),
...parseVP(s, lookup, verbLookup),
];
const success = nps.map((x) => x.body);
const success = res.map((x) => x.body);
return {
success,
errors: [
...new Set(nps.flatMap(({ errors }) => errors.map((e) => e.message))),
...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
],
};
}

View File

@ -0,0 +1,124 @@
/* eslint-disable jest/no-conditional-expect */
import * as T from "../../../types";
import {
makeAdjectiveSelection,
makeNounSelection,
makePronounSelection,
} from "../phrase-building/make-selections";
import { lookup, wordQuery } from "./lookup";
import { parsePossesor } from "./parse-possesor";
import { tokenizer } from "./tokenizer";
import { isCompleteResult } from "./utils";
const sturey = wordQuery("ستړی", "adj");
const sarey = wordQuery("سړی", "noun");
const maashoom = wordQuery("ماشوم", "noun");
const malguray = wordQuery("ملګری", "noun");
const plaar = wordQuery("پلار", "noun");
const tests: {
input: string;
output: T.NPSelection["selection"][] | "error";
}[] = [
{
input: "د سړي",
output: [makeNounSelection(sarey, undefined)],
},
{
input: "د ماشومې",
output: [
{
...makeNounSelection(maashoom, undefined),
gender: "fem",
},
],
},
{
input: "د ستړي پلار د ملګري",
output: [
{
...makeNounSelection(malguray, undefined),
possesor: {
shrunken: false,
np: {
type: "NP",
selection: {
...makeNounSelection(plaar, undefined),
adjectives: [makeAdjectiveSelection(sturey)],
},
},
},
},
],
},
{
input: "د سړی نوم",
output: "error",
},
{
input: "د ښځې د ماشومه",
output: "error",
},
{
input: "زما",
output: [
makePronounSelection(T.Person.FirstSingMale),
makePronounSelection(T.Person.FirstSingFemale),
],
},
{
input: "ستا",
output: [
makePronounSelection(T.Person.SecondSingMale),
makePronounSelection(T.Person.SecondSingFemale),
],
},
{
input: "زمونږ",
output: [
makePronounSelection(T.Person.FirstPlurMale),
makePronounSelection(T.Person.FirstPlurFemale),
],
},
{
input: "زموږ",
output: [
makePronounSelection(T.Person.FirstPlurMale),
makePronounSelection(T.Person.FirstPlurFemale),
],
},
{
input: "ستاسو",
output: [
makePronounSelection(T.Person.SecondPlurMale),
makePronounSelection(T.Person.SecondPlurFemale),
],
},
{
input: "ستاسې",
output: [
makePronounSelection(T.Person.SecondPlurMale),
makePronounSelection(T.Person.SecondPlurFemale),
],
},
{
input: "د پلار ستا",
output: "error",
},
];
test("parse possesor", () => {
tests.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const parsed = parsePossesor(tokens, lookup, undefined);
if (output === "error") {
expect(parsed.some((x) => x.errors.length)).toBe(true);
} else {
expect(
parsePossesor(tokens, lookup, undefined)
.filter(isCompleteResult)
.map((x) => x.body.np.selection)
).toEqual(output);
}
});
});

View File

@ -0,0 +1,136 @@
import * as T from "../../../types";
import { parseNP } from "./parse-np";
import { bindParseResult } from "./utils";
// TODO: maybe contractions should just be male to cut down on the
// alternative sentences
const contractions: [string[], T.Person[]][] = [
[["زما"], [T.Person.FirstSingMale, T.Person.FirstSingFemale]],
[["ستا"], [T.Person.SecondSingMale, T.Person.SecondSingFemale]],
[
["زمونږ", "زموږ"],
[T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
],
[
["ستاسو", "ستاسې"],
[T.Person.SecondPlurMale, T.Person.SecondPlurFemale],
],
];
export function parsePossesor(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
prevPossesor: T.PossesorSelection | undefined
): T.ParseResult<T.PossesorSelection>[] {
if (tokens.length === 0) {
if (prevPossesor) {
return [
{
tokens,
body: prevPossesor,
errors: [],
},
];
}
return [];
}
const [first, ...rest] = tokens;
// parse contraction
// then later (if possessor || contractions)
const contractions = parseContractions(first);
if (contractions.length) {
const errors = prevPossesor
? [{ message: "a pronoun cannot have a possesor" }]
: [];
return contractions
.flatMap((p) => parsePossesor(rest, lookup, p))
.map((x) => ({
...x,
errors: [...errors, ...x.errors],
}));
}
if (first.s === "د") {
const np = parseNP(rest, lookup);
return bindParseResult(np, (tokens, body) => {
const possesor: T.PossesorSelection = {
shrunken: false,
np: body.selection,
};
return {
errors: !body.inflected
? // TODO: get ps to say which possesor
// TODO: track the position coming from the parseNP etc for highlighting
[{ message: `possesor should be inflected` }]
: [],
// add and check error - can't add possesor to pronoun
next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)),
};
});
}
if (first.s === "زما") {
return [
{
tokens: rest,
body: {
shrunken: false,
np: {
type: "NP",
selection: {
type: "pronoun",
distance: "far",
person: T.Person.FirstSingMale,
},
},
},
errors: [],
},
];
}
if (prevPossesor) {
return [
{
tokens,
body: prevPossesor,
errors: [],
},
];
}
return [];
}
function addPoss(
possesor: T.PossesorSelection | undefined,
possesorOf: T.PossesorSelection
): T.PossesorSelection {
return {
...possesorOf,
...(possesorOf.np.selection.type !== "pronoun"
? {
np: {
...possesorOf.np,
selection: {
...possesorOf.np.selection,
possesor,
},
},
}
: {}),
};
}
function parseContractions({ s }: T.Token): T.PossesorSelection[] {
const c = contractions.find(([ps]) => ps.includes(s));
if (!c) {
return [];
}
return c[1].map((person) => ({
shrunken: false,
np: {
type: "NP",
selection: {
type: "pronoun",
distance: "far",
person,
},
},
}));
}

View File

@ -2,7 +2,7 @@ import * as T from "../../../types";
type Result = ReturnType<typeof parsePronoun>[number];
// TODO: map for doubling true, false, and masc fem
// TODO: add chaa
export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
inflected: boolean;
selection: T.PronounSelection;
@ -21,6 +21,19 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
},
errors: [],
}));
} else if (s === "ما") {
return [0, 1].map((person) => ({
tokens: rest,
body: {
inflected: true,
selection: {
type: "pronoun",
person,
distance: "far",
},
},
errors: [],
}));
} else if (s === "ته") {
return [2, 3].map((person) => ({
tokens: rest,
@ -34,6 +47,19 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
},
errors: [],
}));
} else if (s === "تا") {
return [2, 3].map((person) => ({
tokens: rest,
body: {
inflected: true,
selection: {
type: "pronoun",
person,
distance: "far",
},
},
errors: [],
}));
} else if (s === "هغه") {
return [
...[false, true].map<Result>((inflected) => ({
@ -42,7 +68,7 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
inflected,
selection: {
type: "pronoun",
person: 5,
person: 4,
distance: "far",
},
},
@ -54,7 +80,7 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
inflected: false,
selection: {
type: "pronoun",
person: 5,
person: 4,
distance: "far",
},
},

View File

@ -0,0 +1,64 @@
import * as T from "../../../types";
export function parseVerb(
tokens: Readonly<T.Token[]>,
verbLookup: (s: (e: T.VerbDictionaryEntry) => boolean) => T.VerbEntry[]
): T.ParseResult<Omit<T.VBE, "ps">>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const people = getVerbEnding(first.s);
if (people.length === 0) {
return [];
}
const verbs = findByStem(first.s.slice(0, -1), verbLookup);
return people.flatMap((person) =>
verbs.map((verb) => ({
tokens: rest,
body: {
type: "VB",
person,
info: {
type: "verb",
aspect: "imperfective",
base: "stem",
verb,
},
},
errors: [],
}))
);
}
function getVerbEnding(p: string): T.Person[] {
if (p.endsWith("م")) {
return [T.Person.FirstSingMale, T.Person.FirstSingFemale];
} else if (p.endsWith("ې")) {
return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
} else if (p.endsWith("ي")) {
return [
T.Person.ThirdSingMale,
T.Person.ThirdSingFemale,
T.Person.ThirdPlurMale,
T.Person.ThirdPlurFemale,
];
} else if (p.endsWith("و")) {
return [T.Person.FirstPlurMale, T.Person.FirstPlurFemale];
} else if (p.endsWith("ئ")) {
return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
}
return [];
}
function findByStem(
stem: string,
verbLookup: (s: (e: T.VerbDictionaryEntry) => boolean) => T.VerbEntry[]
): T.VerbEntry[] {
return verbLookup(
(e) =>
e.psp === stem ||
(!e.psp && !e.c.includes("comp") && e.p.slice(0, -1) === stem)
);
}

View File

@ -0,0 +1,117 @@
import * as T from "../../../types";
import { parseNP } from "./parse-np";
import { bindParseResult } from "./utils";
import { parseVerb } from "./parse-verb";
import {
makeObjectSelectionComplete,
makeSubjectSelectionComplete,
} from "../phrase-building/blocks-utils";
import { vEntry } from "../new-verb-engine/rs-helpers";
import { getPersonFromNP, isThirdPerson } from "../phrase-building/vp-tools";
// to hide equatives type-doubling issue
const kedulStat = vEntry({
ts: 1581086654898,
i: 11100,
p: "کېدل",
f: "kedul",
g: "kedul",
e: "to become _____",
r: 2,
c: "v. intrans.",
ssp: "ش",
ssf: "sh",
prp: "شول",
prf: "shwul",
pprtp: "شوی",
pprtf: "shúway",
noOo: true,
ec: "become",
});
export function parseVP(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
verbLookup: (s: (e: T.VerbDictionaryEntry) => boolean) => T.VerbEntry[]
): T.ParseResult<T.VPSelectionComplete>[] {
if (tokens.length === 0) {
return [];
}
// how to make this into a nice pipeline... 🤔
const NP1 = parseNP(tokens, lookup);
const NP2 = bindParseResult(NP1, (tokens) => parseNP(tokens, lookup), true);
const vb = bindParseResult(
NP2,
(tokens) => parseVerb(tokens, verbLookup),
true
);
// TODO: be able to bind mulitple vals
return bindParseResult<Omit<T.VBE, "ps">, T.VPSelectionComplete>(
vb,
(tokens, v) => {
const w: T.ParseResult<T.VPSelectionComplete>[] = [];
NP1.forEach(({ body: np1 }) => {
NP2.forEach(({ body: np2 }) => {
[
[np1, np2],
[np2, np1],
].forEach(([s, o]) => {
const errors: T.ParseError[] = [];
const subjPerson = getPersonFromNP(s.selection);
if (s.inflected) {
errors.push({ message: "subject should not be inflected" });
}
if (o.selection.selection.type === "pronoun") {
if (!isThirdPerson(subjPerson) && !o.inflected) {
errors.push({
message:
"1st or 2nd person object pronoun should be inflected",
});
}
} else if (o.inflected) {
errors.push({ message: "object should not be inflected" });
}
if (getPersonFromNP(s.selection) !== v.person) {
errors.push({ message: "verb does not match subject" });
}
const blocks: T.VPSBlockComplete[] = [
{
key: 1,
block: makeSubjectSelectionComplete(s.selection),
},
{
key: 2,
block: makeObjectSelectionComplete(o.selection),
},
];
const verb: T.VerbSelectionComplete = {
type: "verb",
verb: v.info.type === "verb" ? v.info.verb : kedulStat,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "presentVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
};
w.push({
tokens,
body: {
blocks,
verb,
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
},
errors,
});
});
});
});
return w;
}
);
}

View File

@ -1,7 +1,8 @@
import { Token } from "../../../types";
import { standardizePashto } from "../standardize-pashto";
export function tokenizer(s: string): Token[] {
const words = s.trim().split(/ +/);
const words = standardizePashto(s).trim().split(/ +/);
const indexed: { i: number; s: string }[] = [];
for (let i = 0; i < words.length; i++) {
indexed.push({ i, s: words[i] });

View File

@ -0,0 +1,81 @@
import * as T from "../../../types";
/**
* Monadic binding for ParseResult[]
*
* Takes a given array of parse results
* and a function to take the tokens and body of each parse result
* and do something further with them
*
* all the results are flatMapped into a new ParseResult[] monad
* and the errors are passed on and pruned
*
* @param previous - the set of results (monad) to start with
* @param f - a function that takes a remaining list of tokens and one body of the previous result
* and returns the next set of possible results, optionally with an object containing any errors
* @param getCritical - if needed, a function that returns with *part* of the result body to compare
* for identical results while pruning out the unneccesary errors
* @param ignorePrevious - pass in true if you don't need the previous ParseResult to calculate
* the next one. This will add effeciancy by only caring about how many tokens are available
* from the different previous results
* @returns
*/
export function bindParseResult<C extends object, D extends object>(
previous: T.ParseResult<C>[],
f: (
tokens: Readonly<T.Token[]>,
r: C
) =>
| T.ParseResult<D>[]
| {
errors: T.ParseError[];
next: T.ParseResult<D>[];
},
ignorePrevious?: boolean
): T.ParseResult<D>[] {
// const prev = ignorePrevious
// ? (() => {
// const resArr: T.ParseResult<C>[] = [];
// previous.filter((item) => {
// var i = resArr.findIndex(
// (x) => x.tokens.length === item.tokens.length
// );
// if (i <= -1) {
// resArr.push(item);
// }
// return null;
// });
// return resArr;
// })()
// : previous;
const prev = previous;
const nextPossibilities = prev.flatMap(({ tokens, body, errors }) => {
const res = f(tokens, body);
const { errors: errsPassed, next } = Array.isArray(res)
? { errors: [], next: res }
: res;
return next.map((x) => ({
tokens: x.tokens,
body: x.body,
errors: [...errsPassed, ...x.errors, ...errors],
}));
});
return cleanOutFails(nextPossibilities);
}
export function cleanOutFails<C extends object>(
results: T.ParseResult<C>[]
): T.ParseResult<C>[] {
// if there's any success anywhere, remove any of the errors
const errorsGone = results.find((x) => x.errors.length === 0)
? results.filter((x) => x.errors.length === 0)
: results;
// @ts-ignore
return Array.from(new Set(errorsGone.map(JSON.stringify))).map(JSON.parse);
}
export function isCompleteResult<C extends object>(
r: T.ParseResult<C>
): boolean {
return !r.tokens.length && !r.errors.length;
}

View File

@ -157,6 +157,15 @@ export function makeSubjectSelection(
};
}
export function makeSubjectSelectionComplete(
selection: T.NPSelection
): T.SubjectSelectionComplete {
return {
type: "subjectSelection",
selection,
};
}
export function makeObjectSelection(
selection:
| T.ObjectSelection
@ -195,6 +204,15 @@ export function makeObjectSelection(
};
}
export function makeObjectSelectionComplete(
selection: T.NPSelection
): T.ObjectSelectionComplete {
return {
type: "objectSelection",
selection,
};
}
export function EPSBlocksAreComplete(
blocks: T.EPSBlock[]
): blocks is T.EPSBlockComplete[] {

View File

@ -344,9 +344,7 @@ function getPsFromPiece(
}
function getPsFromWelded(v: T.Welded): T.PsString[] {
function getPsFromSide(
v: T.VBBasic | T.Welded | T.NComp | T.VBGenNum
): T.PsString[] {
function getPsFromSide(v: T.VB | T.NComp): T.PsString[] {
if (v.type === "VB") {
return flattenLengths(v.ps);
}

View File

@ -31,6 +31,29 @@ export function makeAdjectiveSelection(
};
}
export function makePossesorSelection(
np: T.NPSelection["selection"]
): T.PossesorSelection {
return {
shrunken: false,
np: {
type: "NP",
selection: np,
},
};
}
export function makePronounSelection(
person: T.Person,
distance?: "near" | "far"
): T.PronounSelection {
return {
type: "pronoun",
distance: distance || "far",
person,
};
}
export function makeParticipleSelection(
verb: T.VerbEntry
): T.ParticipleSelection {

View File

@ -1,11 +1,12 @@
import {
isFirstPerson,
isSecondPerson,
} from "../misc-helpers";
import { isFirstPerson, isSecondPerson } from "../misc-helpers";
import * as T from "../../../types";
import { concatPsString } from "../p-text-helpers";
function getBaseAndAdjectives({ selection }: T.Rendered<T.NPSelection | T.ComplementSelection | T.APSelection>): T.PsString[] {
function getBaseAndAdjectives({
selection,
}: T.Rendered<
T.NPSelection | T.ComplementSelection | T.APSelection
>): T.PsString[] {
if (selection.type === "sandwich") {
return getSandwichPsBaseAndAdjectives(selection);
}
@ -13,37 +14,52 @@ function getBaseAndAdjectives({ selection }: T.Rendered<T.NPSelection | T.Comple
if (!adjs) {
return selection.ps;
}
return selection.ps.map(p => (
return selection.ps.map((p) =>
concatPsString(
adjs.reduce((accum, curr) => (
adjs.reduce(
(accum, curr) =>
// TODO: with variations of adjs?
concatPsString(accum, (accum.p === "" && accum.f === "") ? "" : " ", curr.ps[0])
), { p: "", f: "" }),
concatPsString(
accum,
accum.p === "" && accum.f === "" ? "" : " ",
curr.ps[0]
),
{ p: "", f: "" }
),
" ",
p,
p
)
));
);
}
function getSandwichPsBaseAndAdjectives(s: T.Rendered<T.SandwichSelection<T.Sandwich>>): T.PsString[] {
function getSandwichPsBaseAndAdjectives(
s: T.Rendered<T.SandwichSelection<T.Sandwich>>
): T.PsString[] {
const insideBase = getBaseAndAdjectives(s.inside);
const willContractWithPronoun = s.before && s.before.p === "د" && s.inside.selection.type === "pronoun"
&& (isFirstPerson(s.inside.selection.person) || isSecondPerson(s.inside.selection.person))
const contracted = (willContractWithPronoun && s.inside.selection.type === "pronoun")
const willContractWithPronoun =
s.before &&
s.before.p === "د" &&
s.inside.selection.type === "pronoun" &&
(isFirstPerson(s.inside.selection.person) ||
isSecondPerson(s.inside.selection.person));
const contracted =
willContractWithPronoun && s.inside.selection.type === "pronoun"
? contractPronoun(s.inside.selection)
: undefined
return insideBase.map((inside) => (
: undefined;
return insideBase.map((inside) =>
concatPsString(
(s.before && !willContractWithPronoun) ? s.before : "",
s.before && !willContractWithPronoun ? s.before : "",
s.before ? " " : "",
contracted ? contracted : inside,
s.after ? " " : "",
s.after ? s.after : "",
s.after ? s.after : ""
)
));
);
}
function contractPronoun(n: T.Rendered<T.PronounSelection>): T.PsString | undefined {
function contractPronoun(
n: T.Rendered<T.PronounSelection>
): T.PsString | undefined {
return isFirstPerson(n.person)
? concatPsString({ p: "ز", f: "z" }, n.ps[0])
: isSecondPerson(n.person)
@ -51,7 +67,9 @@ function contractPronoun(n: T.Rendered<T.PronounSelection>): T.PsString | undefi
: undefined;
}
function trimOffShrunkenPossesive(p: T.Rendered<T.NPSelection>): T.Rendered<T.NPSelection> {
function trimOffShrunkenPossesive(
p: T.Rendered<T.NPSelection>
): T.Rendered<T.NPSelection> {
if (!("possesor" in p.selection)) {
return p;
}
@ -79,33 +97,47 @@ function trimOffShrunkenPossesive(p: T.Rendered<T.NPSelection>): T.Rendered<T.NP
};
}
export function getPashtoFromRendered(b: T.Rendered<T.NPSelection> | T.Rendered<T.ComplementSelection> | T.Rendered<T.APSelection>, subjectsPerson: false | T.Person): T.PsString[] {
export function getPashtoFromRendered(
b:
| T.Rendered<T.NPSelection>
| T.Rendered<T.ComplementSelection>
| T.Rendered<T.APSelection>,
subjectsPerson: false | T.Person
): T.PsString[] {
const base = getBaseAndAdjectives(b);
if (b.selection.type === "loc. adv." || b.selection.type === "adverb") {
return base;
}
if (b.selection.type === "adjective") {
if (!b.selection.sandwich) {
return base
return base;
}
// TODO: Kinda cheating
const sandwichPs = getPashtoFromRendered({ type: "AP", selection: b.selection.sandwich }, false);
return base.flatMap(p => (
sandwichPs.flatMap(s => (
concatPsString(s, " ", p)
))
));
const sandwichPs = getPashtoFromRendered(
{ type: "AP", selection: b.selection.sandwich },
false
);
return base.flatMap((p) =>
sandwichPs.flatMap((s) => concatPsString(s, " ", p))
);
}
const trimmed = b.selection.type === "sandwich" ? {
const trimmed =
b.selection.type === "sandwich"
? {
type: b.type,
selection: {
...b.selection,
inside: trimOffShrunkenPossesive(b.selection.inside),
},
} : trimOffShrunkenPossesive({ type: "NP", selection: b.selection });
}
: trimOffShrunkenPossesive({ type: "NP", selection: b.selection });
if (trimmed.selection.type === "sandwich") {
return trimmed.selection.inside.selection.possesor
? addPossesor(trimmed.selection.inside.selection.possesor.np, base, subjectsPerson)
? addPossesor(
trimmed.selection.inside.selection.possesor.np,
base,
subjectsPerson
)
: base;
}
if (trimmed.selection.possesor) {
@ -114,53 +146,69 @@ export function getPashtoFromRendered(b: T.Rendered<T.NPSelection> | T.Rendered<
return base;
}
function addPossesor(owner: T.Rendered<T.NPSelection>, existing: T.PsString[], subjectsPerson: false | T.Person): T.PsString[] {
function addPossesor(
owner: T.Rendered<T.NPSelection>,
existing: T.PsString[],
subjectsPerson: false | T.Person
): T.PsString[] {
function willBeReflexive(subj: T.Person, obj: T.Person): boolean {
return (
([0, 1].includes(subj) && [0, 1].includes(obj))
||
([0, 1].includes(subj) && [0, 1].includes(obj)) ||
([2, 3].includes(subj) && [8, 9].includes(obj))
);
}
const wPossesor = existing.flatMap(ps => (
getBaseAndAdjectives(owner).map(v => (
(owner.selection.type === "pronoun" && subjectsPerson !== false && willBeReflexive(subjectsPerson, owner.selection.person))
const wPossesor = existing.flatMap((ps) =>
getBaseAndAdjectives(owner).map((v) =>
owner.selection.type === "pronoun" &&
subjectsPerson !== false &&
willBeReflexive(subjectsPerson, owner.selection.person)
? concatPsString({ p: "خپل", f: "khpul" }, " ", ps)
: (owner.selection.type === "pronoun" && isFirstPerson(owner.selection.person))
: owner.selection.type === "pronoun" &&
isFirstPerson(owner.selection.person)
? concatPsString({ p: "ز", f: "z" }, v, " ", ps)
: (owner.selection.type === "pronoun" && isSecondPerson(owner.selection.person))
: owner.selection.type === "pronoun" &&
isSecondPerson(owner.selection.person)
? concatPsString({ p: "س", f: "s" }, v, " ", ps)
: concatPsString({ p: "د", f: "du" }, " ", v, " ", ps)
))
));
)
);
if (!owner.selection.possesor) {
return wPossesor;
}
return addPossesor(owner.selection.possesor.np, wPossesor, subjectsPerson);
}
function addArticlesAndAdjs(np: T.Rendered<T.NounSelection>): string | undefined {
function addArticlesAndAdjs(
np: T.Rendered<T.NounSelection>
): string | undefined {
if (!np.e) return undefined;
try {
// split out the atricles so adjectives can be stuck inbetween them and the word
const chunks = np.e.split("the)");
const [articles, word] = chunks.length === 1
? ["", np.e]
: [chunks[0] + "the) ", chunks[1]];
const [articles, word] =
chunks.length === 1 ? ["", np.e] : [chunks[0] + "the) ", chunks[1]];
const adjs = !np.adjectives
? ""
: np.adjectives.reduce((accum, curr): string => {
if (!curr.e) throw new Error("no english for adjective");
return accum + curr.e + " ";
}, "");
const genderTag = np.genderCanChange ? (np.gender === "fem" ? " (f.)" : " (m.)") : "";
const genderTag = np.genderCanChange
? np.gender === "fem"
? " (f.)"
: " (m.)"
: "";
return `${articles}${adjs}${word}${genderTag}`;
} catch (e) {
return undefined;
}
}
function addPossesors(possesor: T.Rendered<T.NPSelection> | undefined, base: string | undefined, type: "noun" | "participle"): string | undefined {
function addPossesors(
possesor: T.Rendered<T.NPSelection> | undefined,
base: string | undefined,
type: "noun" | "participle"
): string | undefined {
function removeArticles(s: string): string {
return s.replace("(the) ", "").replace("(a/the) ", "");
}
@ -169,7 +217,9 @@ function addPossesors(possesor: T.Rendered<T.NPSelection> | undefined, base: str
if (possesor.selection.type === "pronoun") {
return type === "noun"
? `${pronounPossEng(possesor.selection.person)} ${removeArticles(base)}`
: `(${pronounPossEng(possesor.selection.person)}) ${removeArticles(base)} (${possesor.selection.e})`
: `(${pronounPossEng(possesor.selection.person)}) ${removeArticles(
base
)} (${possesor.selection.e})`;
}
const possesorE = getEnglishFromRendered(possesor);
if (!possesorE) return undefined;
@ -180,17 +230,20 @@ function addPossesors(possesor: T.Rendered<T.NPSelection> | undefined, base: str
}
function pronounPossEng(p: T.Person): string {
function gend(x: T.Person): string {
return `${x % 2 === 0 ? "m." : "f."}`;
}
if (p === T.Person.FirstSingMale || p === T.Person.FirstSingFemale) {
return "my";
return `my (${gend(p)})`;
}
if (p === T.Person.FirstPlurMale || p === T.Person.FirstPlurFemale) {
return "our";
return `our (${gend(p)})`;
}
if (p === T.Person.SecondSingMale || p === T.Person.SecondSingFemale) {
return "your";
return `your (${gend(p)})`;
}
if (p === T.Person.SecondPlurMale || p === T.Person.SecondPlurFemale) {
return "your (pl.)";
return `your (${gend(p)} pl.)`;
}
if (p === T.Person.ThirdSingMale) {
return "his/its";
@ -198,10 +251,17 @@ function pronounPossEng(p: T.Person): string {
if (p === T.Person.ThirdSingFemale) {
return "her/its";
}
return "their";
return `their ${gend(p)}`;
}
export function getEnglishFromRendered(r: T.Rendered<T.NPSelection | T.ComplementSelection | T.APSelection | T.SandwichSelection<T.Sandwich>>): string | undefined {
export function getEnglishFromRendered(
r: T.Rendered<
| T.NPSelection
| T.ComplementSelection
| T.APSelection
| T.SandwichSelection<T.Sandwich>
>
): string | undefined {
if (r.type === "sandwich") {
return getEnglishFromRenderedSandwich(r);
}
@ -219,18 +279,30 @@ export function getEnglishFromRendered(r: T.Rendered<T.NPSelection | T.Complemen
return r.selection.e;
}
if (r.selection.type === "participle") {
return addPossesors(r.selection.possesor?.np, r.selection.e, r.selection.type);
return addPossesors(
r.selection.possesor?.np,
r.selection.e,
r.selection.type
);
}
return addPossesors(r.selection.possesor?.np, addArticlesAndAdjs(r.selection), r.selection.type);
return addPossesors(
r.selection.possesor?.np,
addArticlesAndAdjs(r.selection),
r.selection.type
);
}
function getEnglishFromRenderedSandwich(r: T.Rendered<T.SandwichSelection<T.Sandwich>>): string | undefined {
function getEnglishFromRenderedSandwich(
r: T.Rendered<T.SandwichSelection<T.Sandwich>>
): string | undefined {
const insideE = getEnglishFromRendered(r.inside);
if (!insideE) return undefined;
return `${r.e} ${insideE}`;
}
function getEnglishFromRenderedAdjective(a: T.Rendered<T.AdjectiveSelection>): string | undefined {
function getEnglishFromRenderedAdjective(
a: T.Rendered<T.AdjectiveSelection>
): string | undefined {
if (!a.sandwich) {
return a.e;
}

View File

@ -721,6 +721,14 @@ export type EquativeTense =
| "wouldBe"
| "pastSubjunctive"
| "wouldHaveBeen";
export type EquativeTenseWithoutBa =
| "present"
| "subjunctive"
| "habitual"
| "past"
| "wouldBe"
| "pastSubjunctive"
| "wouldHaveBeen";
export type PerfectTense = `${EquativeTense}Perfect`;
export type AbilityTense = `${VerbTense}Modal`;
export type ImperativeTense = `${Aspect}Imperative`;
@ -1201,16 +1209,50 @@ export type RenderVerbOutput = {
hasBa: boolean;
vbs: VerbRenderedOutput;
};
export type VerbRenderedOutput = [[VHead] | [], [VB, VBE] | [VBE]];
export type RootsStemsOutput = [[VHead] | [], [VB, VBA] | [VBA]]; // or perfect / equative
export type VerbRenderedOutput = [[VHead] | [], [VBP, VBE] | [VBE]];
export type RootsStemsOutput = [[VHead] | [], [VBP, VB] | [VB]]; // or perfect / equative
export type VB = VBBasic | VBGenNum | Welded | WeldedGN;
/** A VB block that can have endings attached to it */
export type VBA = Exclude<VB, VBGenNum | WeldedGN>;
export type VB = VBBasic | Welded;
/** A VB block that has had a person verb ending attached */
export type VBE = (VBBasic | Welded) & {
export type VBE = VB & {
person: Person;
}; // or equative
info:
| {
type: "equative";
tense: EquativeTenseWithoutBa;
}
| {
type: "verb";
aspect: Aspect;
base: "stem" | "root";
verb: VerbEntry;
abilityAux?: boolean;
};
};
/** A VB block used for ability verbs or perfect (past participle)
* get optionally swapped in order with the VBE when used with negative
*/
export type VBP = VB & (VBPartInfo | VBAbilityInfo);
export type VBPartInfo = {
info: {
type: "ppart";
genNum: GenderNumber;
verb: VerbEntry;
};
};
export type VBAbilityInfo = {
info: {
type: "ability";
verb: VerbEntry;
aspect: Aspect;
};
};
// in VB OR VBE - add root / stem and entry for parsing info
// but how would that work with perfect and ability verbs ...
export type VBNoLenghts<V extends VB> = V extends VBBasic
? Omit<VBBasic, "ps"> & { ps: PsString[] }
@ -1221,10 +1263,6 @@ export type VBBasic = {
ps: SingleOrLengthOpts<PsString[]>;
};
// TODO: might be a better design decision to keep the GenderNuber stuff
// in the RIGHT side of the weld
export type VBGenNum = VBBasic & GenderNumber;
export type GenderNumber = {
gender: Gender;
number: NounNumber;
@ -1233,11 +1271,9 @@ export type GenderNumber = {
export type Welded = {
type: "welded";
left: NComp | VBBasic | Welded;
right: VBBasic;
right: VBBasic | (VBBasic & (VBPartInfo | VBAbilityInfo));
};
export type WeldedGN = Omit<Welded, "right"> & { right: VBGenNum };
export type VHead = PH | NComp;
/** perfective head block */

View File

@ -27,4 +27,12 @@ module.exports = [
ts: 1527815450,
e: "son", // زوی
},
{
ts: 1527823093,
e: "prophet", // نبي
},
{
ts: 1527822456,
e: "word", // لفظ
},
];

View File

@ -7,6 +7,7 @@
*/
module.exports = [
1527815139, // osedul
1585228579997, // ورتلل
1527815216, // راتلل - to come
1527813473, // الوتل - to fly

View File

@ -7,6 +7,8 @@
*/
module.exports = [
1527817457, // درکول
1659037345120, // بیانېدل
1608137130992, // چیغه کول
1658537998960, // لېونی کول
1527812403, // بچ کول - to save, protect, guard, spare, rescue, economize