From 288718f69aed102627a905619ffda4fe3f9f3fd9 Mon Sep 17 00:00:00 2001 From: adueck Date: Thu, 17 Aug 2023 18:12:09 +0400 Subject: [PATCH] more on parser --- src/lib/src/parsing/parse-ba.ts | 21 - src/lib/src/parsing/parse-block.ts | 61 ++ src/lib/src/parsing/parse-blocks.ts | 96 ++ src/lib/src/parsing/parse-kid.ts | 27 + .../src/parsing/parse-kids-section.test.ts | 83 ++ src/lib/src/parsing/parse-kids-section.ts | 50 + src/lib/src/parsing/parse-pronoun.ts | 2 +- src/lib/src/parsing/parse-verb.ts | 5 + src/lib/src/parsing/parse-vp.ts | 958 +++++++++++++----- src/lib/src/parsing/utils.ts | 40 +- src/lib/src/phrase-building/render-vp.ts | 2 +- src/types.ts | 2 + 12 files changed, 1064 insertions(+), 283 deletions(-) delete mode 100644 src/lib/src/parsing/parse-ba.ts create mode 100644 src/lib/src/parsing/parse-block.ts create mode 100644 src/lib/src/parsing/parse-blocks.ts create mode 100644 src/lib/src/parsing/parse-kid.ts create mode 100644 src/lib/src/parsing/parse-kids-section.test.ts create mode 100644 src/lib/src/parsing/parse-kids-section.ts diff --git a/src/lib/src/parsing/parse-ba.ts b/src/lib/src/parsing/parse-ba.ts deleted file mode 100644 index 538f519..0000000 --- a/src/lib/src/parsing/parse-ba.ts +++ /dev/null @@ -1,21 +0,0 @@ -import * as T from "../../../types"; - -export function parseBa( - tokens: Readonly -): T.ParseResult<{ type: "ba" }>[] { - if (!tokens.length) { - return []; - } - const [first, ...rest] = tokens; - if (first.s === "به") { - return [ - { - body: { - type: "ba", - }, - errors: [], - tokens: rest, - }, - ]; - } else return []; -} diff --git a/src/lib/src/parsing/parse-block.ts b/src/lib/src/parsing/parse-block.ts new file mode 100644 index 0000000..8a67ec0 --- /dev/null +++ b/src/lib/src/parsing/parse-block.ts @@ -0,0 +1,61 @@ +import * as T from "../../../types"; +import { fmapParseResult } from "../fp-ps"; +import { parseNP } from "./parse-np"; +import { parseVerb } from "./parse-verb"; + +export function parseBlock( + tokens: Readonly, + lookup: (s: Partial) => T.DictionaryEntry[], + verbLookup: (s: string) => T.VerbEntry[] +): T.ParseResult< + | [ + { + inflected: boolean; + selection: T.NPSelection; + } + ] + | [ + ( + | { + type: "PH"; + s: string; + } + | undefined + ), + Omit + ] + | [] +>[] { + if (tokens.length === 0) { + return [ + { + tokens: [], + body: [], + errors: [], + }, + ]; + } + + return [ + ...(fmapParseResult((x) => [x], parseNP(tokens, lookup)) as T.ParseResult< + [ + { + inflected: boolean; + selection: T.NPSelection; + } + ] + >[]), + ...(parseVerb(tokens, verbLookup) as T.ParseResult< + [ + ( + | { + type: "PH"; + s: string; + } + | undefined + ), + Omit + ] + >[]), + ]; +} diff --git a/src/lib/src/parsing/parse-blocks.ts b/src/lib/src/parsing/parse-blocks.ts new file mode 100644 index 0000000..1ffebf3 --- /dev/null +++ b/src/lib/src/parsing/parse-blocks.ts @@ -0,0 +1,96 @@ +import * as T from "../../../types"; +import { parseBlock } from "./parse-block"; +import { parseKidsSection } from "./parse-kids-section"; +import { bindParseResult, returnParseResult } from "./utils"; + +export function parseBlocks( + tokens: Readonly, + lookup: (s: Partial) => T.DictionaryEntry[], + verbLookup: (s: string) => T.VerbEntry[], + prevBlocks: ( + | { + inflected: boolean; + selection: T.NPSelection; + } + | { + type: "PH"; + s: string; + } + | Omit + )[], + kids: T.ParsedKid[] +): T.ParseResult<{ + kids: T.ParsedKid[]; + blocks: ( + | { + inflected: boolean; + selection: T.NPSelection; + } + | { + type: "PH"; + s: string; + } + | Omit + )[]; +}>[] { + if (tokens.length === 0) { + // console.log("at end", { prevBlocks, kids }); + return returnParseResult(tokens, { blocks: prevBlocks, kids }); + } + + const block = parseBlock(tokens, lookup, verbLookup); + const kidsR = parseKidsSection(tokens, []); + const allResults = [...block, ...kidsR] as T.ParseResult< + | [ + { + inflected: boolean; + selection: T.NPSelection; + } + ] + | [ + ( + | { + type: "PH"; + s: string; + } + | undefined + ), + Omit + ] + | [] + | { kids: T.ParsedKid[] } + >[]; + if (!allResults.length) { + return [ + { + tokens: [], + body: { blocks: prevBlocks, kids }, + errors: [], + }, + ]; + } + return bindParseResult(allResults, (tokens, r) => { + if ("kids" in r) { + return { + next: parseBlocks(tokens, lookup, verbLookup, prevBlocks, [ + ...kids, + ...r.kids, + ]), + errors: + prevBlocks.length !== 1 + ? [{ message: "kids' section out of place" }] + : [], + }; + } + // filter out the empty PH pieces + // for some reason ts won't let me do filter here + const newBlocks = r.flatMap((x) => (x ? [x] : [])); + return parseBlocks( + tokens, + lookup, + verbLookup, + [...prevBlocks, ...newBlocks], + kids + ); + }); +} diff --git a/src/lib/src/parsing/parse-kid.ts b/src/lib/src/parsing/parse-kid.ts new file mode 100644 index 0000000..c8fdb2d --- /dev/null +++ b/src/lib/src/parsing/parse-kid.ts @@ -0,0 +1,27 @@ +import * as T from "../../../types"; +import { returnParseResult } from "./utils"; + +export function parseKid( + tokens: Readonly +): T.ParseResult[] { + if (tokens.length === 0) { + return []; + } + const [{ s }, ...rest] = tokens; + if (s === "به") { + return returnParseResult(rest, "ba"); + } + if (s === "یې") { + return returnParseResult(rest, "ye"); + } + if (s === "مې") { + return returnParseResult(rest, "me"); + } + if (s === "دې") { + return returnParseResult(rest, "de"); + } + if (s === "مو") { + return returnParseResult(rest, "mU"); + } + return []; +} diff --git a/src/lib/src/parsing/parse-kids-section.test.ts b/src/lib/src/parsing/parse-kids-section.test.ts new file mode 100644 index 0000000..409b291 --- /dev/null +++ b/src/lib/src/parsing/parse-kids-section.test.ts @@ -0,0 +1,83 @@ +/* eslint-disable jest/no-conditional-expect */ +/* eslint-disable jest/valid-title */ +import * as T from "../../../types"; +import { parseKidsSection } from "./parse-kids-section"; +import { tokenizer } from "./tokenizer"; + +const tests: { + label: string; + cases: { + input: string; + output: T.ParsedKid[]; + error?: boolean; + }[]; +}[] = [ + { + label: "basic kids section", + cases: [ + { + input: "به", + output: ["ba"], + }, + { + input: "به دې", + output: ["ba", "de"], + }, + { + input: "", + output: [], + }, + { + input: "مې دې یې", + output: ["me", "de", "ye"], + }, + { + input: "دې به مې", + output: ["de", "ba", "me"], + error: true, + }, + { + input: "مې یې", + output: ["me", "ye"], + }, + { + input: "دې مې", + output: ["de", "me"], + error: true, + }, + ], + }, + { + label: "can parse kids section when tokens come after", + cases: [ + { + input: "به سړی", + output: ["ba"], + }, + { + input: "مې دې واخیسته", + output: ["me", "de"], + }, + ], + }, +]; + +tests.forEach(({ label, cases }) => { + test(label, () => { + cases.forEach(({ input, output, error }) => { + const tokens = tokenizer(input); + const parsed = parseKidsSection(tokens, []); + if (output.length) { + expect(parsed.length).toBe(1); + expect(parsed.map((x) => x.body.kids)).toEqual( + output.length ? [output] : [] + ); + if (error) { + expect(parsed[0].errors.length).toBeTruthy(); + } else { + expect(parsed[0].errors.length).toBe(0); + } + } + }); + }); +}); diff --git a/src/lib/src/parsing/parse-kids-section.ts b/src/lib/src/parsing/parse-kids-section.ts new file mode 100644 index 0000000..286845a --- /dev/null +++ b/src/lib/src/parsing/parse-kids-section.ts @@ -0,0 +1,50 @@ +import * as T from "../../../types"; +import { parseKid } from "./parse-kid"; +import { bindParseResult, returnParseResult } from "./utils"; + +export function parseKidsSection( + tokens: Readonly, + prevKids: T.ParsedKid[] +): T.ParseResult<{ kids: T.ParsedKid[] }>[] { + if (tokens.length === 0) { + return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : []; + } + const parsedKid = parseKid(tokens); + // TODO: is this even necessary ?? + if (!parsedKid.length) { + return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : []; + } + return bindParseResult(parsedKid, (tokens, r) => { + // return parseKidsSection(tokens, [...prevKids, r]); + return { + errors: kidDoubled(r, prevKids) + ? [{ message: `double '${r}' in kids section` }] + : !kidComesBehind(r, prevKids.at(-1)) + ? [{ message: "kids section out of order" }] + : [], + next: parseKidsSection(tokens, [...prevKids, r]), + }; + }); +} + +function kidDoubled(k: T.ParsedKid, prev: T.ParsedKid[]): boolean { + return !!prev.find((x) => x === k); +} + +const kidsOrder: T.ParsedKid[] = ["ba", "me", "de", "ye"]; +function getKidRank(k: T.ParsedKid): number { + if (k === "mU") { + return 1; + } + return kidsOrder.indexOf(k); +} + +function kidComesBehind( + k: T.ParsedKid, + prev: T.ParsedKid | undefined +): boolean { + if (!prev) { + return true; + } + return getKidRank(k) >= getKidRank(prev); +} diff --git a/src/lib/src/parsing/parse-pronoun.ts b/src/lib/src/parsing/parse-pronoun.ts index 82c0116..1f3c43a 100644 --- a/src/lib/src/parsing/parse-pronoun.ts +++ b/src/lib/src/parsing/parse-pronoun.ts @@ -80,7 +80,7 @@ export function parsePronoun(tokens: Readonly): T.ParseResult<{ inflected: false, selection: { type: "pronoun", - person: 4, + person: 5, distance: "far", }, }, diff --git a/src/lib/src/parsing/parse-verb.ts b/src/lib/src/parsing/parse-verb.ts index 7b73d8c..37d56a3 100644 --- a/src/lib/src/parsing/parse-verb.ts +++ b/src/lib/src/parsing/parse-verb.ts @@ -12,6 +12,11 @@ import { // big problem ما سړی یوړ crashes it !! +// TODO: کول verbs! +// check that aawu stuff is working +// check oo`azmooy - +// check څاته + export function parseVerb( tokens: Readonly, verbLookup: (s: string) => T.VerbEntry[] diff --git a/src/lib/src/parsing/parse-vp.ts b/src/lib/src/parsing/parse-vp.ts index 43c6d91..78655e1 100644 --- a/src/lib/src/parsing/parse-vp.ts +++ b/src/lib/src/parsing/parse-vp.ts @@ -1,35 +1,23 @@ import * as T from "../../../types"; -import { parseNP } from "./parse-np"; -import { bindParseResult } from "./utils"; -import { parseVerb } from "./parse-verb"; +import { bindParseResult, returnParseResult } from "./utils"; import { makeObjectSelectionComplete, makeSubjectSelectionComplete, } from "../phrase-building/blocks-utils"; -import { vEntry } from "../new-verb-engine/rs-helpers"; -import { getPersonFromNP, isThirdPerson } from "../phrase-building/vp-tools"; -import { parseBa } from "./parse-ba"; +import { getPersonFromNP, isPastTense } from "../phrase-building/vp-tools"; +import { parseBlocks } from "./parse-blocks"; +import { makePronounSelection } from "../phrase-building/make-selections"; +import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp"; // to hide equatives type-doubling issue -const kedulStat = vEntry({ - ts: 1581086654898, - i: 11100, - p: "کېدل", - f: "kedul", - g: "kedul", - e: "to become _____", - r: 2, - c: "v. intrans.", - ssp: "ش", - ssf: "sh", - prp: "شول", - prf: "shwul", - pprtp: "شوی", - pprtf: "shúway", - noOo: true, - ec: "become", -}); -// cool examples: زه خوږې ماشومې وهم +// cool examples: +// زه خوږې ماشومې وهم +// ماشومان سړي ولیدل +// ماشومانو سړي ولیدل + +// make impossible subjects like I saw me, error + +// PROBLEM! ته وینې doesn't work cause it just takes ته as a verb phrase ? export function parseVP( tokens: Readonly, @@ -39,265 +27,731 @@ export function parseVP( if (tokens.length === 0) { return []; } - // how to make this into a nice pipeline... 🤔 - const NP1 = parseNP(tokens, lookup).filter(({ errors }) => !errors.length); - const ba = bindParseResult(NP1, (tokens, np1) => { - const b = parseBa(tokens); - if (!b.length) { - return [ - { - tokens, - body: { - np1, - ba: false, - }, - errors: [], - }, - ]; - } else { - return b.map(({ tokens, errors }) => ({ - body: { - np1, - ba: true, - }, - errors, - tokens, - })); + const blocks = parseBlocks(tokens, lookup, verbLookup, [], []); + return bindParseResult(blocks, (tokens, { blocks, kids }) => { + const ph = blocks.find((x) => "type" in x && x.type === "PH") as + | { + type: "PH"; + s: string; + } + | undefined; + const verb = blocks.find((x) => "type" in x && x.type === "VB") as + | Omit + | undefined; + const ba = !!kids.find((k) => k === "ba"); + if (!verb || verb.type !== "VB" || verb.info.type !== "verb") { + return []; } - }); - const NP2 = bindParseResult< - { - np1: { - inflected: boolean; - selection: T.NPSelection; - }; - ba: boolean; - }, - { - np1: { - inflected: boolean; - selection: T.NPSelection; - }; - ba: boolean; - np2: - | { - inflected: boolean; - selection: T.NPSelection; - } - | undefined; + if (verb.info.aspect === "perfective") { + // TODO: check that the perfective head is in the right place and actually matches + if (!ph) { + return []; + } } - >(ba, (tokens, { np1, ba }) => { - const np2s = parseNP(tokens, lookup); - if (!np2s.length) { - const r: T.ParseResult<{ - np1: { - inflected: boolean; - selection: T.NPSelection; + const tense = getTenseFromRootsStems(ba, verb.info.base, verb.info.aspect); + const isPast = isPastTense(tense); + + const nps = blocks.filter( + (x): x is { inflected: boolean; selection: T.NPSelection } => + "inflected" in x + ); + // TODO: check that verb and PH match + if (verb.info.verb.entry.c.includes("intrans")) { + if (nps.length > 1) { + return []; + } + if (nps.length === 0) { + const v: T.VerbSelectionComplete = { + type: "verb", + verb: verb.info.verb, + transitivity: "intransitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense, + canChangeVoice: true, + isCompound: false, + voice: "active", }; - ba: boolean; - np2: undefined; - }>[] = [ - { - tokens, - body: { - np1, - np2: undefined, - ba, + const blocks: T.VPSBlockComplete[] = [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(verb.person), + }), }, - errors: [], - }, - ]; - return r; - } - return np2s.map((p) => ({ - tokens: p.tokens, - body: { - np1, - np2: p.body, - ba, - }, - errors: p.errors, - })); - }).filter(({ errors }) => !errors.length); - const vb = bindParseResult(NP2, (tokens, nps) => { - const vb = parseVerb(tokens, verbLookup); - // TODO make a nice functor that just maps or adds in the body - return vb.map((p) => ({ - tokens: p.tokens, - body: { - np2: nps.np2, - v: p.body, - np1: nps.np1, - ba: nps.ba, - }, - errors: p.errors, - })); - }).filter(({ errors }) => !errors.length); - // TODO: be able to bind mulitple vals - return bindParseResult(vb, (tokens, { np1, np2, v: [ph, v], ba }) => { - const w: T.ParseResult[] = []; - if (v.info.type === "equative") { - throw new Error("not yet implemented"); - } - const isPast = v.info.base === "root"; - const intransitive = - v.info.type === "verb" && v.info.verb.entry.c.includes("intrans."); - if (intransitive) { - if (np2) return []; - const s = np1; - const errors: T.ParseError[] = []; - if (s.inflected) { - errors.push({ - message: "subject of intransitive verb should not be inflected", - }); - } - if (getPersonFromNP(s.selection) !== v.person) { - errors.push({ - message: "subject should agree with intransitive verb", - }); - } - const blocks: T.VPSBlockComplete[] = [ - { - key: 1, - block: makeSubjectSelectionComplete(s.selection), - }, - { - key: 2, - block: { - type: "objectSelection", - selection: "none", + { + key: 2, + block: { + type: "objectSelection", + selection: "none", + }, }, - }, - ]; - const verb: T.VerbSelectionComplete = { - type: "verb", - verb: v.info.type === "verb" ? v.info.verb : kedulStat, - transitivity: "intransitive", - canChangeTransitivity: false, - canChangeStatDyn: false, - negative: false, - tense: getTenseFromRootsStems(ba, v.info.base, v.info.aspect), - canChangeVoice: true, - isCompound: false, - voice: "active", - }; - w.push({ - tokens, - body: { + ]; + return returnParseResult(tokens, { blocks, - verb, + verb: v, externalComplement: undefined, form: { - removeKing: false, + removeKing: true, shrinkServant: false, }, - }, - errors, - }); + } as T.VPSelectionComplete); + } + if (nps.length === 1) { + const errors: T.ParseError[] = []; + if (getPersonFromNP(nps[0].selection) !== verb.person) { + errors.push({ message: "subject must agree with intransitive verb" }); + } + if (nps[0].inflected) { + errors.push({ + message: "subject of intransitive verb must not be inflected", + }); + } + const blocks: T.VPSBlockComplete[] = [ + { + key: 1, + block: makeSubjectSelectionComplete(nps[0].selection), + }, + { + key: 2, + block: { + type: "objectSelection", + selection: "none", + }, + }, + ]; + const v: T.VerbSelectionComplete = { + type: "verb", + verb: verb.info.verb, + transitivity: "intransitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense, + canChangeVoice: true, + isCompound: false, + voice: "active", + }; + return returnParseResult( + tokens, + { + blocks, + verb: v, + externalComplement: undefined, + form: { + removeKing: false, + shrinkServant: false, + }, + } as T.VPSelectionComplete, + errors + ); + } } else { - // transitive verb - if (!(np1 && np2)) return []; - [[np1, np2, false] as const, [np2, np1, true] as const].forEach( - ([s, o, reversed]) => { - if (v.info.type === "equative") { - throw new Error("not yet implemented"); - } - if (!s || !o) return []; - // TODO: check if perfective head MATCHES verb - if (v.info.aspect === "perfective" && !ph) { - return []; - } - const subjPerson = getPersonFromNP(s.selection); + // transitive + if (nps.length > 2) { + return []; + } + if (nps.length === 0) { + return []; + } + if (nps.length === 1) { + const np = nps[0]; + return ( + [ + { + removeKing: true, + shrinkServant: false, + }, + { + removeKing: false, + shrinkServant: true, + }, + ] as const + ).flatMap((form) => { const errors: T.ParseError[] = []; - if (intransitive) { - return []; - } - - if (isPast) { - if (getPersonFromNP(o.selection) !== v.person) { - errors.push({ - message: "transitive past tense verb does not match object", - }); + if (form.removeKing) { + // king is gone + // servant is there + const king: T.NPSelection = { + type: "NP", + selection: makePronounSelection(verb.person), + }; + const servant = np.selection; + if (!isPast) { + if (isFirstOrSecondPersPronoun(np.selection)) + if (!np.inflected) { + errors.push({ + message: + "first or second pronoun object of non-past transitive verb must be inflected", + }); + } } else { - if (!s.inflected) { + if (!np.inflected) { errors.push({ - message: "transitive past tense subject should be inflected", + message: + "object of non-past transitive verb must not be inflected", }); } + } + const blocks: T.VPSBlockComplete[] = !isPast + ? [ + { + key: 1, + block: makeSubjectSelectionComplete(king), + }, + { + key: 2, + block: makeObjectSelectionComplete(servant), + }, + ] + : [ + { + key: 1, + block: makeSubjectSelectionComplete(servant), + }, + { + key: 2, + block: makeObjectSelectionComplete(king), + }, + ]; + const v: T.VerbSelectionComplete = { + type: "verb", + // @ts-ignore + verb: verb.info.verb, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense, + canChangeVoice: true, + isCompound: false, + voice: "active", + }; + return returnParseResult( + tokens, + { + blocks, + verb: v, + externalComplement: undefined, + form, + } as T.VPSelectionComplete, + errors + ); + } else { + // servant is shrunken + // king is there + const king = np.selection; + const shrunkenServantPeople = getPeopleFromKids(kids); + if (!shrunkenServantPeople.length) { + return []; + } + const servants = shrunkenServantPeople.map( + (person): T.NPSelection => ({ + type: "NP", + selection: makePronounSelection(person), + }) + ); + if (!isPast) { + if (np.inflected) { + errors.push({ + message: + "object of a past tense transitive verb should not be inflected", + }); + } + } else { + if (np.inflected) { + errors.push({ + message: + "subject of a non-past tense transitive verb should not be inflected", + }); + } + } + const blocksOps: T.VPSBlockComplete[][] = servants.map((servant) => + !isPast + ? [ + { + key: 1, + block: makeSubjectSelectionComplete(king), + }, + { + key: 2, + block: makeObjectSelectionComplete(servant), + }, + ] + : [ + { + key: 1, + block: makeSubjectSelectionComplete(servant), + }, + { + key: 2, + block: makeObjectSelectionComplete(king), + }, + ] + ); + const v: T.VerbSelectionComplete = { + type: "verb", + // @ts-ignore + verb: verb.info.verb, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense, + canChangeVoice: true, + isCompound: false, + voice: "active", + }; + return blocksOps.map((blocks) => ({ + tokens, + body: { + blocks, + verb: v, + externalComplement: undefined, + form, + } as T.VPSelectionComplete, + errors, + })); + } + }); + // possibilities + // present: + // - no king (np is servant) + // - shrunken servant (np is king) + // past: + // - no king (np is servant) + // - shrunken servant (np is king) + } else { + if (isPast) { + return ( + [ + [nps[0], nps[1], false], + [nps[1], nps[0], true], + ] as const + ).flatMap(([s, o, flip]) => { + const errors: T.ParseError[] = []; + if (!s.inflected) { + errors.push({ + message: + "subject of transitive past tense verb must be inflected", + }); + } + if (o.inflected) { + errors.push({ + message: + "object of past tense transitive verb must not be inflected", + }); + } + if (getPersonFromNP(o.selection) !== verb.person) { + errors.push({ + message: + "past tense transitive verb must agree with the object", + }); + } + let blocks: T.VPSBlockComplete[] = [ + { + key: 1, + block: makeSubjectSelectionComplete(s.selection), + }, + { + key: 2, + block: makeObjectSelectionComplete(o.selection), + }, + ]; + if (flip) { + blocks = blocks.reverse(); + } + const v: T.VerbSelectionComplete = { + type: "verb", + // @ts-ignore + verb: verb.info.verb, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense, + canChangeVoice: true, + isCompound: false, + voice: "active", + }; + return returnParseResult( + tokens, + { + blocks, + verb: v, + externalComplement: undefined, + form: { + removeKing: false, + shrinkServant: false, + }, + } as T.VPSelectionComplete, + errors + ); + }); + } else { + return ( + [ + [nps[0], nps[1], false], + [nps[1], nps[0], true], + ] as const + ).flatMap(([s, o, flip]) => { + const errors: T.ParseError[] = []; + if (isFirstOrSecondPersPronoun(o.selection)) { + if (!o.inflected) { + errors.push({ + message: + "object of transitive non-past tense verb must be inflected when it's a first or second person pronoun", + }); + } + } else { if (o.inflected) { errors.push({ message: - "transitive past tense object should not be inflected", + "object of transitive non-past tense verb must not be inflected", }); } } - } else { - if (getPersonFromNP(s.selection) !== v.person) { + if (s.inflected) { errors.push({ - message: "verb does not match subject", + message: + "subject of transitive non-past tense verb must not be inflected", }); - } else { - if (s.inflected) { - errors.push({ message: "subject should not be inflected" }); - } - if (o.selection.selection.type === "pronoun") { - if (!isThirdPerson(subjPerson) && !o.inflected) { - errors.push({ - message: - "1st or 2nd person object pronoun should be inflected", - }); - } - } else if (o.inflected) { - errors.push({ message: "object should not be inflected" }); - } } - } - - const blocks: T.VPSBlockComplete[] = [ - { - key: 1, - block: makeSubjectSelectionComplete(s.selection), - }, - { - key: 2, - block: makeObjectSelectionComplete(o.selection), - }, - ]; - if (reversed) { - blocks.reverse(); - } - const verb: T.VerbSelectionComplete = { - type: "verb", - verb: v.info.type === "verb" ? v.info.verb : kedulStat, - transitivity: "transitive", - canChangeTransitivity: false, - canChangeStatDyn: false, - negative: false, - tense: getTenseFromRootsStems(ba, v.info.base, v.info.aspect), - canChangeVoice: true, - isCompound: false, - voice: "active", - }; - w.push({ - tokens, - body: { - blocks, - verb, - externalComplement: undefined, - form: { - removeKing: false, - shrinkServant: false, + if (getPersonFromNP(s.selection) !== verb.person) { + errors.push({ + message: + "non-past tense transitive verb must agree with the subject", + }); + } + let blocks: T.VPSBlockComplete[] = [ + { + key: 1, + block: makeSubjectSelectionComplete(s.selection), }, - }, - errors, + { + key: 2, + block: makeObjectSelectionComplete(o.selection), + }, + ]; + if (flip) { + blocks = blocks.reverse(); + } + const v: T.VerbSelectionComplete = { + type: "verb", + // @ts-ignore + verb: verb.info.verb, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense, + canChangeVoice: true, + isCompound: false, + voice: "active", + }; + return returnParseResult( + tokens, + { + blocks, + verb: v, + externalComplement: undefined, + form: { + removeKing: false, + shrinkServant: false, + }, + } as T.VPSelectionComplete, + errors + ); }); } - ); + } } - return w; + return []; }); } +function getPeopleFromKids(kids: T.ParsedKid[]): T.Person[] { + const p: T.Person[] = []; + for (let k of kids) { + if (k === "me") { + p.push(T.Person.FirstSingMale); + p.push(T.Person.FirstSingFemale); + } else if (k === "de") { + p.push(T.Person.SecondSingMale); + p.push(T.Person.SecondSingFemale); + } else if (k === "ye") { + p.push(T.Person.ThirdSingMale); + p.push(T.Person.ThirdSingFemale); + p.push(T.Person.ThirdPlurMale); + p.push(T.Person.ThirdPlurFemale); + } else if (k === "mU") { + p.push(T.Person.FirstPlurMale); + p.push(T.Person.FirstPlurFemale); + p.push(T.Person.SecondPlurMale); + p.push(T.Person.SecondPlurFemale); + } + } + return p; +} + +// // how to make this into a nice pipeline... 🤔 +// const NP1 = parseNP(tokens, lookup).filter(({ errors }) => !errors.length); +// const ba = bindParseResult(NP1, (tokens, np1) => { +// const b = parseBa(tokens); +// if (!b.length) { +// return [ +// { +// tokens, +// body: { +// np1, +// ba: false, +// }, +// errors: [], +// }, +// ]; +// } else { +// return b.map(({ tokens, errors }) => ({ +// body: { +// np1, +// ba: true, +// }, +// errors, +// tokens, +// })); +// } +// }); +// const NP2 = bindParseResult< +// { +// np1: { +// inflected: boolean; +// selection: T.NPSelection; +// }; +// ba: boolean; +// }, +// { +// np1: { +// inflected: boolean; +// selection: T.NPSelection; +// }; +// ba: boolean; +// np2: +// | { +// inflected: boolean; +// selection: T.NPSelection; +// } +// | undefined; +// } +// >(ba, (tokens, { np1, ba }) => { +// const np2s = parseNP(tokens, lookup); +// if (!np2s.length) { +// const r: T.ParseResult<{ +// np1: { +// inflected: boolean; +// selection: T.NPSelection; +// }; +// ba: boolean; +// np2: undefined; +// }>[] = [ +// { +// tokens, +// body: { +// np1, +// np2: undefined, +// ba, +// }, +// errors: [], +// }, +// ]; +// return r; +// } +// return np2s.map((p) => ({ +// tokens: p.tokens, +// body: { +// np1, +// np2: p.body, +// ba, +// }, +// errors: p.errors, +// })); +// }).filter(({ errors }) => !errors.length); +// const vb = bindParseResult(NP2, (tokens, nps) => { +// const vb = parseVerb(tokens, verbLookup); +// // TODO make a nice functor that just maps or adds in the body +// return vb.map((p) => ({ +// tokens: p.tokens, +// body: { +// np2: nps.np2, +// v: p.body, +// np1: nps.np1, +// ba: nps.ba, +// }, +// errors: p.errors, +// })); +// }).filter(({ errors }) => !errors.length); +// // TODO: be able to bind mulitple vals +// return bindParseResult(vb, (tokens, { np1, np2, v: [ph, v], ba }) => { +// const w: T.ParseResult[] = []; +// if (v.info.type === "equative") { +// throw new Error("not yet implemented"); +// } +// const isPast = v.info.base === "root"; +// const intransitive = +// v.info.type === "verb" && v.info.verb.entry.c.includes("intrans."); +// if (intransitive) { +// if (np2) return []; +// const s = np1; +// const errors: T.ParseError[] = []; +// if (s.inflected) { +// errors.push({ +// message: "subject of intransitive verb should not be inflected", +// }); +// } +// if (getPersonFromNP(s.selection) !== v.person) { +// errors.push({ +// message: "subject should agree with intransitive verb", +// }); +// } +// const blocks: T.VPSBlockComplete[] = [ +// { +// key: 1, +// block: makeSubjectSelectionComplete(s.selection), +// }, +// { +// key: 2, +// block: { +// type: "objectSelection", +// selection: "none", +// }, +// }, +// ]; +// const verb: T.VerbSelectionComplete = { +// type: "verb", +// verb: v.info.type === "verb" ? v.info.verb : kedulStat, +// transitivity: "intransitive", +// canChangeTransitivity: false, +// canChangeStatDyn: false, +// negative: false, +// tense: getTenseFromRootsStems(ba, v.info.base, v.info.aspect), +// canChangeVoice: true, +// isCompound: false, +// voice: "active", +// }; +// w.push({ +// tokens, +// body: { +// blocks, +// verb, +// externalComplement: undefined, +// form: { +// removeKing: false, +// shrinkServant: false, +// }, +// }, +// errors, +// }); +// } else { +// // transitive verb +// if (!(np1 && np2)) return []; +// [[np1, np2, false] as const, [np2, np1, true] as const].forEach( +// ([s, o, reversed]) => { +// if (v.info.type === "equative") { +// throw new Error("not yet implemented"); +// } +// if (!s || !o) return []; +// // TODO: check if perfective head MATCHES verb +// if (v.info.aspect === "perfective" && !ph) { +// return []; +// } +// const subjPerson = getPersonFromNP(s.selection); +// const errors: T.ParseError[] = []; +// if (intransitive) { +// return []; +// } + +// if (isPast) { +// if (getPersonFromNP(o.selection) !== v.person) { +// errors.push({ +// message: "transitive past tense verb does not match object", +// }); +// } else { +// if (!s.inflected) { +// errors.push({ +// message: "transitive past tense subject should be inflected", +// }); +// } +// if (o.inflected) { +// errors.push({ +// message: +// "transitive past tense object should not be inflected", +// }); +// } +// } +// } else { +// if (getPersonFromNP(s.selection) !== v.person) { +// errors.push({ +// message: "verb does not match subject", +// }); +// } else { +// if (s.inflected) { +// errors.push({ message: "subject should not be inflected" }); +// } +// if (o.selection.selection.type === "pronoun") { +// if (!isThirdPerson(subjPerson) && !o.inflected) { +// errors.push({ +// message: +// "1st or 2nd person object pronoun should be inflected", +// }); +// } +// } else if (o.inflected) { +// errors.push({ message: "object should not be inflected" }); +// } +// } +// } + +// const blocks: T.VPSBlockComplete[] = [ +// { +// key: 1, +// block: makeSubjectSelectionComplete(s.selection), +// }, +// { +// key: 2, +// block: makeObjectSelectionComplete(o.selection), +// }, +// ]; +// if (reversed) { +// blocks.reverse(); +// } +// const verb: T.VerbSelectionComplete = { +// type: "verb", +// verb: v.info.type === "verb" ? v.info.verb : kedulStat, +// transitivity: "transitive", +// canChangeTransitivity: false, +// canChangeStatDyn: false, +// negative: false, +// tense: getTenseFromRootsStems(ba, v.info.base, v.info.aspect), +// canChangeVoice: true, +// isCompound: false, +// voice: "active", +// }; +// w.push({ +// tokens, +// body: { +// blocks, +// verb, +// externalComplement: undefined, +// form: { +// removeKing: false, +// shrinkServant: false, +// }, +// }, +// errors, +// }); +// } +// ); +// } + function getTenseFromRootsStems( hasBa: boolean, base: "root" | "stem", diff --git a/src/lib/src/parsing/utils.ts b/src/lib/src/parsing/utils.ts index 59fc301..2a23922 100644 --- a/src/lib/src/parsing/utils.ts +++ b/src/lib/src/parsing/utils.ts @@ -20,7 +20,7 @@ import * as T from "../../../types"; * from the different previous results * @returns */ -export function bindParseResult( +export function bindParseResult( previous: T.ParseResult[], f: ( tokens: Readonly, @@ -59,18 +59,42 @@ export function bindParseResult( errors: [...errsPassed, ...x.errors, ...errors], })); }); - return cleanOutFails(nextPossibilities); + return cleanOutResults(nextPossibilities); } -export function cleanOutFails( +export function returnParseResult( + tokens: Readonly, + body: D, + errors?: T.ParseError[] +): T.ParseResult[] { + return [ + { + tokens, + body, + errors: errors || [], + }, + ]; +} + +/** + * finds the most successful path(s) and culls out any other more erroneous + * or redundant paths + */ +export function cleanOutResults( results: T.ParseResult[] ): T.ParseResult[] { - // if there's any success anywhere, remove any of the errors - const errorsGone = results.find((x) => x.errors.length === 0) - ? results.filter((x) => x.errors.length === 0) - : results; + if (results.length === 0) { + return results; + } + let min = Infinity; + for (let a of results) { + if (a.errors.length < min) { + min = a.errors.length; + } + } + const errorsCulled = results.filter((x) => x.errors.length === min); // @ts-ignore - return Array.from(new Set(errorsGone.map(JSON.stringify))).map(JSON.parse); + return Array.from(new Set(errorsCulled.map(JSON.stringify))).map(JSON.parse); } export function isCompleteResult( diff --git a/src/lib/src/phrase-building/render-vp.ts b/src/lib/src/phrase-building/render-vp.ts index 095cdb6..ea85e04 100644 --- a/src/lib/src/phrase-building/render-vp.ts +++ b/src/lib/src/phrase-building/render-vp.ts @@ -352,7 +352,7 @@ export function getKingAndServant( }; } -function isFirstOrSecondPersPronoun( +export function isFirstOrSecondPersPronoun( o: "none" | T.NPSelection | T.Person.ThirdPlurMale ): boolean { if (typeof o !== "object") return false; diff --git a/src/types.ts b/src/types.ts index e821faf..4a67e17 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1197,6 +1197,8 @@ export type Kid = { kid: { type: "ba" } | MiniPronoun; }; +export type ParsedKid = "ba" | "me" | "de" | "ye" | "mU"; + export type MiniPronoun = { type: "mini-pronoun"; person: Person;