big improvement by removing the reduntant PH parsing, added tests for negatives, and also added checking for S/O conflict in VP parsing

2023-08-22 19:33:53 +04:00 · 2023-08-22 19:33:53 +04:00 · a3ac5e2cb3
parent a7709c4299
commit a3ac5e2cb3
13 changed files with 604 additions and 1250 deletions
--- a/diagrams/diagram-light.png
+++ b/diagrams/diagram-light.png
--- a/src/lib/library.ts
+++ b/src/lib/library.ts
@ -15,7 +15,10 @@ import {
 } from "./src/verb-info";
 import { makeVPSelectionState } from "./src/phrase-building/verb-selection";
 import { vpsReducer } from "./src/phrase-building/vps-reducer";
-import { isPastTense } from "./src/phrase-building/vp-tools";
+import {
  isPastTense,
  isInvalidSubjObjCombo,
 } from "./src/phrase-building/vp-tools";
 import { getInflectionPattern } from "./src/inflection-pattern";
 import { makePsString, removeFVarients } from "./src/accent-and-ps-utils";
@ -45,12 +48,7 @@ import {
  standardizePhonetics,
 } from "./src/standardize-pashto";
 import { phoneticsToDiacritics } from "./src/phonetics-to-diacritics";
-import {
+import { randomPerson, randomSubjObj, getEnglishVerb } from "./src/np-tools";
  randomPerson,
  isInvalidSubjObjCombo,
  randomSubjObj,
  getEnglishVerb,
 } from "./src/np-tools";
 import {
  getEnglishFromRendered,
  getPashtoFromRendered,
--- a/src/lib/src/np-tools.ts
+++ b/src/lib/src/np-tools.ts
@ -1,80 +1,79 @@
 import * as T from "../../types";
-import { isFirstPerson, parseEc, isSecondPerson } from "./misc-helpers";
+import { parseEc } from "./misc-helpers";
 import { isInvalidSubjObjCombo } from "./phrase-building/vp-tools";
 function getRandPers(): T.Person {
-    return Math.floor(Math.random() * 12);
+  return Math.floor(Math.random() * 12);
 }
-export function randomPerson(a?: { prev?: T.Person, counterPart?: T.VerbObject | T.NPSelection }) {
+export function randomPerson(a?: {
-    // no restrictions, just get any person
+  prev?: T.Person;
-    if (!a) {
+  counterPart?: T.VerbObject | T.NPSelection;
-        return getRandPers();
+}) {
-    }
+  // no restrictions, just get any person
-    if (a.counterPart !== undefined && typeof a.counterPart === "object" && a.counterPart.selection.type === "pronoun") {
+  if (!a) {
-        // with counterpart pronoun
+    return getRandPers();
-        let newP = 0;
+  }
-        do {
+  if (
-            newP = getRandPers();
+    a.counterPart !== undefined &&
-        } while (
+    typeof a.counterPart === "object" &&
-            isInvalidSubjObjCombo(a.counterPart.selection.person, newP)
+    a.counterPart.selection.type === "pronoun"
-            ||
+  ) {
-            (newP === a.prev)
+    // with counterpart pronoun
        );
        return newP;
    }
    // without counterpart pronoun, just previous
    let newP = 0;
    do {
-        newP = getRandPers();
+      newP = getRandPers();
    } while (newP === a.prev);
    return newP;
 }
 export function isInvalidSubjObjCombo(subj: T.Person, obj: T.Person): boolean {
    return (
        (isFirstPerson(subj) && isFirstPerson(obj))
        ||
        (isSecondPerson(subj) && isSecondPerson(obj))
    );
 }
 export function randomSubjObj(old?: { subj: T.Person, obj?: T.Person }): { subj: T.Person, obj: T.Person } {
    let subj = 0;
    let obj = 0;
    do {
        subj = getRandPers();
        obj = getRandPers();
    } while (
-        (old && ((old.subj === subj) || (old.obj === obj)))
+      isInvalidSubjObjCombo(a.counterPart.selection.person, newP) ||
-        ||
+      newP === a.prev
        isInvalidSubjObjCombo(subj, obj)
    );
-    return { subj, obj };
+    return newP;
  }
  // without counterpart pronoun, just previous
  let newP = 0;
  do {
    newP = getRandPers();
  } while (newP === a.prev);
  return newP;
 }
 export function randomSubjObj(old?: { subj: T.Person; obj?: T.Person }): {
  subj: T.Person;
  obj: T.Person;
 } {
  let subj = 0;
  let obj = 0;
  do {
    subj = getRandPers();
    obj = getRandPers();
  } while (
    (old && (old.subj === subj || old.obj === obj)) ||
    isInvalidSubjObjCombo(subj, obj)
  );
  return { subj, obj };
 }
 export function getEnglishVerb(entry: T.DictionaryEntry): string {
-    if (!entry.ec) {
+  if (!entry.ec) {
-        console.error("errored verb");
+    console.error("errored verb");
-        console.error(entry);
+    console.error(entry);
-        throw new Error("no english information for verb");
+    throw new Error("no english information for verb");
-    }
+  }
-    if (entry.ep) {
+  if (entry.ep) {
-        const ec = entry.ec.includes(",") ? parseEc(entry.ec)[0] : entry.ec;
+    const ec = entry.ec.includes(",") ? parseEc(entry.ec)[0] : entry.ec;
-        return `to ${ec} ${entry.ep}`;
+    return `to ${ec} ${entry.ep}`;
-    }
+  }
-    const ec = parseEc(entry.ec);
+  const ec = parseEc(entry.ec);
-    return `to ${ec[0]}`;
+  return `to ${ec[0]}`;
 }
 export function getEnglishParticiple(entry: T.DictionaryEntry): string {
-    if (!entry.ec) {
+  if (!entry.ec) {
-        throw new Error("no english information for participle");
+    throw new Error("no english information for participle");
-    }
+  }
-    const ec = parseEc(entry.ec);
+  const ec = parseEc(entry.ec);
-    if (entry.ep && ec[0] === "am") {
+  if (entry.ep && ec[0] === "am") {
-        return `to be/being ${entry.ep}`;
+    return `to be/being ${entry.ep}`;
-    }
+  }
-    const participle = `${ec[2]} / to ${ec[0]}`;
+  const participle = `${ec[2]} / to ${ec[0]}`;
-    return (entry.ep)
+  return entry.ep ? `${participle} ${entry.ep}` : participle;
-        ? `${participle} ${entry.ep}`
+}
        : participle;
 }
--- a/src/lib/src/parsing/lookup.tsx
+++ b/src/lib/src/parsing/lookup.tsx
@ -49,7 +49,7 @@ export function verbLookup(input: string): T.VerbEntry[] {
  // IMPORTANT TODO FOR EFFECIANCY!
  // check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING
  // if theres no legit verb ending and no tpp possibilities, just return an empty array
-  const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
+  // const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
  const checkTpp = shouldCheckTpp(input);
  const fromAawu = checkTpp && undoAaXuPattern(input);
  const inputWoutOo =
@ -61,86 +61,45 @@ export function verbLookup(input: string): T.VerbEntry[] {
  // TODO: don't do the slice of and checking for useless things when you have a NON verb ending (like with the tpp)
  if (s.endsWith("ېږ")) {
    return verbs.filter(
-      sWoutOo
+      ({ entry }) =>
-        ? ({ entry }) =>
+        [s, sAddedAa].includes(entry.p.slice(0, -1)) ||
-            [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) ||
+        [s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes(
-            [
+          entry.p
-              s.slice(0, -1) + "دل",
+        ) ||
-              sWoutOo.slice(0, -1) + "دل",
+        [s, sAddedAa].includes(entry.p) ||
-              sAddedAa.slice(0, -1) + "دل",
+        [s, sAddedAa].includes(entry.psp || "") ||
-            ].includes(entry.p) ||
+        [s, sAddedAa].includes(entry.prp || "") ||
-            [s, sWoutOo, sAddedAa].includes(entry.p) ||
+        [s, sAddedAa].includes(entry.ssp || "")
            (entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) ||
            entry.prp === s ||
            entry.ssp === s
        : ({ entry }) =>
            [s, sAddedAa].includes(entry.p.slice(0, -1)) ||
            [s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes(
              entry.p
            ) ||
            [s, sAddedAa].includes(entry.p) ||
            [s, sAddedAa].includes(entry.psp || "") ||
            [s, sAddedAa].includes(entry.prp || "") ||
            [s, sAddedAa].includes(entry.ssp || "")
    );
  }
  return verbs.filter(
-    sWoutOo
+    ({ entry }) =>
-      ? ({ entry }) =>
+      [s, sAddedAa].includes(entry.p.slice(0, -1)) ||
-          [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) ||
+      // for short intransitive forms
-          // for short intransitive forms
+      [s, sAddedAa].includes(entry.p.slice(0, -3)) ||
-          [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -3)) ||
+      [s, sAddedAa].includes(entry.p) ||
-          [s, sWoutOo, sAddedAa].includes(entry.p) ||
+      (checkTpp &&
-          (entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) ||
+        [input, fromAawu, sAddedAa].includes(entry.p.slice(0, -1))) ||
-          (checkTpp &&
+      (entry.tppp &&
        arraysHaveCommon(
          [input, inputWoutOo, sAddedAa, inputAddedAa],
          splitVarients(entry.tppp)
        )) ||
      [s, sAddedAa].includes(entry.psp || "") ||
      arraysHaveCommon([entry.prp, entry.prp?.slice(0, -1)], [s, sAddedAa]) ||
      [s, sAddedAa].includes(entry.ssp || "") ||
      (entry.separationAtP &&
        // TODO this is super ugly, do check of short and long function
        (entry.p.slice(entry.separationAtP) === s ||
          entry.p.slice(entry.separationAtP, -1) === s ||
          (checkTpp && entry.p.slice(entry.separationAtP, -1) === input) ||
          entry.psp?.slice(entry.separationAtP) === s ||
          (entry.prp &&
            [
-              input.slice(1),
+              entry.prp.slice(entry.separationAtP),
-              fromAawu && fromAawu.slice(-1),
+              entry.prp.slice(entry.separationAtP).slice(0, -1),
-              inputAddedAa,
+            ].includes(s)) ||
-            ].includes(entry.p.slice(0, -1))) ||
+          (entry.ssp && entry.ssp.slice(entry.separationAtP) === s)))
          (entry.tppp &&
            arraysHaveCommon(
              [input, inputWoutOo, sAddedAa],
              splitVarients(entry.tppp)
            )) ||
          arraysHaveCommon(
            [s, sAddedAa, "و" + s],
            [entry.prp, entry.prp?.slice(0, -1)]
          ) ||
          [s, sAddedAa].includes(entry.ssp || "") ||
          (entry.separationAtP &&
            (entry.p.slice(entry.separationAtP) === s ||
              entry.psp?.slice(entry.separationAtP) === s))
      : ({ entry }) =>
          [s, sAddedAa].includes(entry.p.slice(0, -1)) ||
          // for short intransitive forms
          [s, sAddedAa].includes(entry.p.slice(0, -3)) ||
          [s, sAddedAa].includes(entry.p) ||
          (checkTpp &&
            [input, fromAawu, sAddedAa].includes(entry.p.slice(0, -1))) ||
          (entry.tppp &&
            arraysHaveCommon(
              [input, inputWoutOo, sAddedAa, inputAddedAa],
              splitVarients(entry.tppp)
            )) ||
          [s, sAddedAa].includes(entry.psp || "") ||
          arraysHaveCommon(
            [entry.prp, entry.prp?.slice(0, -1)],
            [s, sAddedAa, "و" + s]
          ) ||
          [s, sAddedAa, "و" + s].includes(entry.ssp || "") ||
          (entry.separationAtP &&
            // TODO this is super ugly, do check of short and long function
            (entry.p.slice(entry.separationAtP) === s ||
              entry.p.slice(entry.separationAtP, -1) === s ||
              (checkTpp && entry.p.slice(entry.separationAtP, -1) === input) ||
              entry.psp?.slice(entry.separationAtP) === s ||
              (entry.prp &&
                [
                  entry.prp.slice(entry.separationAtP),
                  entry.prp.slice(entry.separationAtP).slice(0, -1),
                ].includes(s)) ||
              (entry.ssp && entry.ssp.slice(entry.separationAtP) === s)))
  );
 }
--- a/src/lib/src/parsing/parse-blocks.ts
+++ b/src/lib/src/parsing/parse-blocks.ts
@ -1,5 +1,4 @@
 import * as T from "../../../types";
 import { fmapParseResult } from "../fp-ps";
 import { parseKidsSection } from "./parse-kids-section";
 import { parseNeg } from "./parse-negative";
 import { parseNP } from "./parse-np";
@ -21,22 +20,22 @@ export function parseBlocks(
    return returnParseResult(tokens, { blocks, kids });
  }
  const prevPh: T.ParsedPH | undefined = blocks.find(
-    (b): b is T.ParsedPH => "type" in b && b.type === "PH"
+    (b): b is T.ParsedPH => b.type === "PH"
  );
  const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
-  const np = prevPh ? [] : fmapParseResult((x) => [x], parseNP(tokens, lookup));
+  const np = prevPh ? [] : parseNP(tokens, lookup);
  // UHOH... This could cause double paths ... maybe don't parse the PH in the parse VB!
-  const ph =
+  const ph = vbExists || prevPh ? [] : parsePH(tokens);
-    vbExists || prevPh ? [] : fmapParseResult((x) => [x], parsePH(tokens));
+  const vb = parseVerb(tokens, verbLookup);
-  const vb = fmapParseResult(
+  const neg = parseNeg(tokens);
    ([ph, v]) => (ph ? [ph, v] : [v]),
    parseVerb(tokens, verbLookup)
  );
  const neg = fmapParseResult((x) => [x], parseNeg(tokens));
  const kidsR = parseKidsSection(tokens, []);
-  const allResults = [...np, ...ph, ...neg, ...vb, ...kidsR] as T.ParseResult<
+  const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
-    T.ParsedBlock[] | { kids: T.ParsedKid[] }
+    ...np,
-  >[];
+    ...ph,
    ...neg,
    ...vb,
    ...kidsR,
  ];
  // TODO: is this necessary?
  // if (!allResults.length) {
  //   return [
@ -47,10 +46,9 @@ export function parseBlocks(
  //     },
  //   ];
  // }
  console.log({ allResults });
  return bindParseResult(allResults, (tokens, r) => {
    const errors: T.ParseError[] = [];
-    if ("kids" in r) {
+    if (r.type === "kids") {
      return {
        next: parseBlocks(tokens, lookup, verbLookup, blocks, [
          ...kids,
@ -62,23 +60,21 @@ export function parseBlocks(
            : [],
      };
    }
-    if (prevPh && r.some((x) => "type" in x && x.type === "PH")) {
+    if (prevPh && r.type === "PH") {
      return [];
    }
-    const vb = r.find((x): x is T.ParsedVBE => "type" in x && x.type === "VB");
+    // TODO: will have to handle welded
-    if (!phMatches(prevPh, vb)) {
+    if (r.type === "VB") {
-      return [];
+      if (!phMatches(prevPh, r)) {
        return [];
      }
    }
    // don't allow two negatives
-    if (
+    if (r.type === "negative" && blocks.some((b) => b.type === "negative")) {
      "type" in r[0] &&
      r[0].type === "negative" &&
      blocks.some((b) => "type" in b && b.type === "negative")
    ) {
      return [];
    }
    return {
-      next: parseBlocks(tokens, lookup, verbLookup, [...blocks, ...r], kids),
+      next: parseBlocks(tokens, lookup, verbLookup, [...blocks, r], kids),
      errors,
    };
  });
--- a/src/lib/src/parsing/parse-kids-section.ts
+++ b/src/lib/src/parsing/parse-kids-section.ts
@ -5,14 +5,18 @@ import { bindParseResult, returnParseResult } from "./utils";
 export function parseKidsSection(
  tokens: Readonly<T.Token[]>,
  prevKids: T.ParsedKid[]
-): T.ParseResult<{ kids: T.ParsedKid[] }>[] {
+): T.ParseResult<T.ParsedKidsSection>[] {
  if (tokens.length === 0) {
-    return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
+    return prevKids.length
      ? returnParseResult(tokens, { type: "kids", kids: prevKids })
      : [];
  }
  const parsedKid = parseKid(tokens);
  // TODO: is this even necessary ??
  if (!parsedKid.length) {
-    return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
+    return prevKids.length
      ? returnParseResult(tokens, { type: "kids", kids: prevKids })
      : [];
  }
  return bindParseResult(parsedKid, (tokens, r) => {
    // return parseKidsSection(tokens, [...prevKids, r]);
--- a/src/lib/src/parsing/parse-np.ts
+++ b/src/lib/src/parsing/parse-np.ts
@ -21,11 +21,9 @@ export function parseNP(
          inflected: boolean;
          selection: T.NounSelection;
        }
-  ): {
+  ): T.ParsedNP {
    inflected: boolean;
    selection: T.NPSelection;
  } {
    return {
      type: "NP",
      inflected: a.inflected,
      selection: {
        type: "NP",
--- a/src/lib/src/parsing/parse-ph.ts
+++ b/src/lib/src/parsing/parse-ph.ts
@ -18,7 +18,7 @@ const phs = [
 export function parsePH(
  tokens: Readonly<T.Token[]>
-): T.ParseResult<{ type: "PH"; s: string }>[] {
+): T.ParseResult<T.ParsedPH>[] {
  if (tokens.length === 0) {
    return [];
  }
--- a/src/lib/src/parsing/parse-verb.test.ts
+++ b/src/lib/src/parsing/parse-verb.test.ts
--- a/src/lib/src/parsing/parse-verb.ts
+++ b/src/lib/src/parsing/parse-verb.ts
@ -22,7 +22,7 @@ import {
 export function parseVerb(
  tokens: Readonly<T.Token[]>,
  verbLookup: (s: string) => T.VerbEntry[]
-): T.ParseResult<[{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]>[] {
+): T.ParseResult<T.ParsedVBE>[] {
  if (tokens.length === 0) {
    return [];
  }
@ -57,8 +57,8 @@ function matchVerbs(
    root: T.Person[];
    stem: T.Person[];
  }
-): [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">][] {
+): T.ParsedVBE[] {
-  const w: ReturnType<typeof matchVerbs> = [];
+  const w: T.ParsedVBE[] = [];
  const lEnding = s.endsWith("ل");
  const base = s.endsWith("ل") ? s : s.slice(0, -1);
  const matchShortOrLong = (b: string, x: string) => {
@ -80,167 +80,52 @@ function matchVerbs(
          return e.p.slice(0, -1) === base;
        }
      }),
-      perfective: entries.reduce<
+      perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
        { ph: string | undefined; entry: T.VerbEntry }[]
      >((acc, entry) => {
        const e = entry.entry;
        const baseWAa = "ا" + base;
        if (e.c.includes("comp")) {
          return acc;
        }
        if (e.ssp) {
-          const bRest = e.separationAtP ? e.ssp.slice(e.separationAtP) : "";
+          if (e.separationAtP) {
-          if (bRest === base) {
+            const bRest = e.ssp.slice(e.separationAtP);
-            return [
+            if (bRest === base) {
-              ...acc,
+              return [...acc, entry];
-              {
+            }
-                ph: undefined,
+          } else {
-                entry,
+            if (e.ssp === base) {
-              },
+              return [...acc, entry];
-            ];
+            }
          }
          if (e.ssp === base) {
            return [
              ...acc,
              {
                ph: e.separationAtF
                  ? e.ssp.slice(0, e.separationAtP)
                  : undefined,
                entry,
              },
            ];
          }
        } else if (e.psp) {
-          const bRest = e.separationAtP ? e.psp.slice(e.separationAtP) : "";
+          if (e.separationAtP) {
-          if (bRest === base) {
+            const bRest = e.psp.slice(e.separationAtP);
-            return [
+            if (bRest === base) {
-              ...acc,
+              return [...acc, entry];
              {
                ph: undefined,
                entry,
              },
            ];
          }
          if (e.psp === base && e.separationAtP) {
            return [
              ...acc,
              {
                ph: e.psp.slice(0, e.separationAtP),
                entry,
              },
            ];
          }
          if (!e.sepOo) {
            if (base.startsWith("وا") && base.slice(1) === e.psp) {
              return [
                ...acc,
                {
                  ph: "وا",
                  entry,
                },
              ];
            }
-            if ((base.startsWith("و") && base.slice(1)) === e.psp) {
+          } else {
-              return [
+            if (!e.sepOo) {
-                ...acc,
+              if (baseWAa === e.psp) {
-                {
+                return [...acc, entry];
-                  ph: "و",
+              }
                  entry,
                },
              ];
            }
-            if (baseWAa === e.psp) {
+            if (base === e.psp) {
-              return [
+              return [...acc, entry];
                ...acc,
                {
                  ph: undefined,
                  entry,
                },
              ];
            }
          }
          if (base === e.psp) {
            return [
              ...acc,
              {
                ph: undefined,
                entry,
              },
            ];
          }
        } else if (e.c.includes("intrans.")) {
          const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
          const miniRootEg = miniRoot + "ېږ";
          if ([miniRoot, miniRootEg].includes(base)) {
-            return [
+            return [...acc, entry];
              ...acc,
              {
                ph: undefined,
                entry,
              },
            ];
          } else if (!e.sepOo) {
            if (
              base.startsWith("وا") &&
              [miniRoot, miniRootEg].includes(base.slice(1))
            ) {
              return [
                ...acc,
                {
                  ph: "وا",
                  entry,
                },
              ];
            } else if (
              base.startsWith("و") &&
              [miniRoot, miniRootEg].includes(base.slice(1))
            ) {
              return [
                ...acc,
                {
                  ph: "و",
                  entry,
                },
              ];
            }
          }
        } else {
          const eb = e.p.slice(0, -1);
          if (eb === base) {
-            return [
+            return [...acc, entry];
              ...acc,
              {
                ph: undefined,
                entry,
              },
            ];
          } else if (!e.sepOo) {
            if (base.startsWith("وا") && eb === base.slice(1)) {
              return [
                ...acc,
                {
                  ph: "وا",
                  entry,
                },
              ];
            }
            if (base.startsWith("و") && eb === base.slice(1)) {
              return [
                ...acc,
                {
                  ph: "و",
                  entry,
                },
              ];
            }
            if (baseWAa === base.slice(1)) {
-              return [
+              return [...acc, entry];
                ...acc,
                {
                  ph: undefined,
                  entry,
                },
              ];
            }
          }
        }
@ -250,19 +135,16 @@ function matchVerbs(
    Object.entries(stemMatches).forEach(([aspect, entries]) => {
      entries.forEach((verb) => {
        people.stem.forEach((person) => {
-          w.push([
+          w.push({
-            "ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
+            type: "VB",
-            {
+            person,
-              type: "VB",
+            info: {
-              person,
+              type: "verb",
-              info: {
+              aspect: aspect as T.Aspect,
-                type: "verb",
+              base: "stem",
-                aspect: aspect as T.Aspect,
+              verb: removeFVarientsFromVerb(verb),
                base: "stem",
                verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
              },
            },
-          ]);
+          });
        });
      });
    });
@ -272,56 +154,21 @@ function matchVerbs(
      imperfective: entries.filter(
        ({ entry: e }) => !e.c.includes("comp") && matchShortOrLong(base, e.p)
      ),
-      perfective: entries.reduce<
+      perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
        { ph: string | undefined; entry: T.VerbEntry }[]
      >((acc, entry) => {
        const e = entry.entry;
        if (e.c.includes("comp")) {
          return acc;
        }
        if (e.separationAtP) {
          const b = e.prp || e.p;
          const bHead = b.slice(0, e.separationAtP);
          const bRest = b.slice(e.separationAtP);
-          if (matchShortOrLong(base, b)) {
+          if (matchShortOrLong(base, bRest)) {
-            return [
+            return [...acc, entry];
              ...acc,
              {
                ph: bHead,
                entry,
              },
            ];
          } else if (matchShortOrLong(base, bRest)) {
            return [
              ...acc,
              {
                ph: undefined,
                entry,
              },
            ];
          }
        } else {
          const baseNoOo = base.startsWith("و") && base.slice(1);
          const p = e.prp || e.p;
-          if (baseNoOo && matchShortOrLong(baseNoOo, p)) {
+          if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
-            return [
+            return [...acc, entry];
              ...acc,
              {
                ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و",
                entry,
              },
            ];
          } else if (
            matchShortOrLong(base, p) ||
            matchShortOrLong("ا" + base, p)
          ) {
            return [
              ...acc,
              {
                ph: undefined,
                entry,
              },
            ];
          }
        }
        return acc;
@ -331,19 +178,16 @@ function matchVerbs(
    Object.entries(rootMatches).forEach(([aspect, entries]) => {
      entries.forEach((verb) => {
        people.root.forEach((person) => {
-          w.push([
+          w.push({
-            "ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
+            type: "VB",
-            {
+            person,
-              type: "VB",
+            info: {
-              person,
+              type: "verb",
-              info: {
+              aspect: aspect as T.Aspect,
-                type: "verb",
+              base: "root",
-                aspect: aspect as T.Aspect,
+              verb: removeFVarientsFromVerb(verb),
                base: "root",
                verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
              },
            },
-          ]);
+          });
        });
      });
    });
@ -351,8 +195,6 @@ function matchVerbs(
  const hamzaEnd = s.at(-1) === "ه";
  const oEnd = s.at(-1) === "و";
  const abruptEnd = ["د", "ت", "ړ"].includes(s.slice(-1));
  const b = hamzaEnd || oEnd ? base : s;
  const bNoOo = b.startsWith("و") && b.slice(1);
  const tppMatches = {
    imperfective: entries.filter(
      ({ entry: e }) =>
@ -363,163 +205,63 @@ function matchVerbs(
            (hamzaEnd ? base : abruptEnd ? s : "") === e.p.slice(0, -1)))
      // TODO: if check for modified aaXu thing!
    ),
-    perfective: entries.reduce<
+    perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
      { ph: string | undefined; entry: T.VerbEntry }[]
    >((acc, entry) => {
      const e = entry.entry;
      if (e.c.includes("comp")) {
        return acc;
      }
      if (e.separationAtP) {
        const b = e.prp || e.p;
        const bHead = b.slice(0, e.separationAtP);
        const bRest = b.slice(e.separationAtP);
        if (bRest === "شول") {
          return acc;
        }
        if (abruptEnd) {
          if (s === b.slice(0, -1)) {
            return [
              ...acc,
              {
                ph: bHead,
                entry,
              },
            ];
          }
          if (s === bRest.slice(0, -1)) {
-            return [
+            return [...acc, entry];
              ...acc,
              {
                ph: undefined,
                entry,
              },
            ];
          }
        } else if (hamzaEnd) {
          if (base === b.slice(0, -1)) {
            return [
              ...acc,
              {
                ph: bHead,
                entry,
              },
            ];
          }
          if (base === bRest.slice(0, -1)) {
-            return [
+            return [...acc, entry];
              ...acc,
              {
                ph: undefined,
                entry,
              },
            ];
          }
        } else if (oEnd) {
          if ([b, b.slice(0, -1)].includes(base)) {
            return [
              ...acc,
              {
                ph: bHead,
                entry,
              },
            ];
          }
          if ([bRest, bRest.slice(0, -1)].includes(base)) {
-            return [
+            return [...acc, entry];
              ...acc,
              {
                ph: undefined,
                entry,
              },
            ];
          }
        }
      } else if (!e.prp) {
        if (oEnd) {
-          if (bNoOo && [e.p, e.p.slice(0, -1).includes(bNoOo)]) {
+          if ([e.p, e.p.slice(0, -1)].includes(base)) {
-            return [
+            return [...acc, entry];
              ...acc,
              {
                ph: "و",
                entry,
              },
            ];
          } else if ([e.p, e.p.slice(0, -1)].includes(base)) {
            return [
              ...acc,
              {
                ph: undefined,
                entry,
              },
            ];
          }
        } else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) {
          const b = hamzaEnd ? base : s;
          const p = e.p.slice(0, -1);
-          if (bNoOo && bNoOo === p) {
+          if (b === p) {
-            return [
+            return [...acc, entry];
              ...acc,
              {
                ph: "و",
                entry,
              },
            ];
          } else if (b === p) {
            return [
              ...acc,
              {
                ph: undefined,
                entry,
              },
            ];
          }
        }
      }
-      const sNoOo = s.startsWith("و") && s.slice(1);
+      if (isInVarients(e.tppp, s)) {
-      if (isInVarients(e.tppp, sNoOo)) {
+        return [...acc, entry];
        return [
          ...acc,
          {
            ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و",
            entry,
          },
        ];
      } else if (isInVarients(e.tppp, s)) {
        return [
          ...acc,
          {
            ph: undefined,
            entry,
          },
        ];
      } else if (isInVarients(e.tppp, "ا" + s)) {
-        return [
+        return [...acc, entry];
          ...acc,
          {
            ph: undefined,
            entry,
          },
        ];
      }
      return acc;
    }, []),
  };
  Object.entries(tppMatches).forEach(([aspect, entries]) => {
    entries.forEach((verb) => {
-      w.push([
+      w.push({
-        "ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
+        type: "VB",
-        {
+        person: T.Person.ThirdSingMale,
-          type: "VB",
+        info: {
-          person: T.Person.ThirdSingMale,
+          type: "verb",
-          info: {
+          aspect: aspect as T.Aspect,
-            type: "verb",
+          base: "root",
-            aspect: aspect as T.Aspect,
+          verb: removeFVarientsFromVerb(verb),
            base: "root",
            verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
          },
        },
-      ]);
+      });
    });
  });
  return w;
@ -580,66 +322,26 @@ function getVerbEnding(p: string): {
  };
 }
 // const [ph, rest]: [T.PH | undefined, T.PsString] = v.entry.noOo
 //   ? [undefined, base]
 //   : v.entry.sepOo
 //   ? [{ type: "PH", ps: { p: "و ", f: "óo`" } }, base]
 //   : ["آ", "ا"].includes(base.p.charAt(0)) && base.f.charAt(0) === "a"
 //   ? [{ type: "PH", ps: { p: "وا", f: "wáa" } }, removeAStart(base)]
 //   : ["óo", "oo"].includes(base.f.slice(0, 2))
 //   ? [{ type: "PH", ps: { p: "و", f: "wÚ" } }, base]
 //   : ["ée", "ee"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "ای"
 //   ? [
 //       { type: "PH", ps: { p: "وي", f: "wée" } },
 //       {
 //         p: base.p.slice(2),
 //         f: base.f.slice(2),
 //       },
 //     ]
 //   : ["é", "e"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "اې"
 //   ? [
 //       { type: "PH", ps: { p: "وي", f: "wé" } },
 //       {
 //         p: base.p.slice(2),
 //         f: base.f.slice(1),
 //       },
 //     ]
 //   : ["ó", "o"].includes(base.f[0]) && base.p.slice(0, 2) === "او"
 //   ? [{ type: "PH", ps: { p: "و", f: "óo`" } }, base]
 //   : [{ type: "PH", ps: { p: "و", f: "óo" } }, base];
 // return [ph, removeAccents(rest)];
 // function removeAStart(ps: T.PsString) {
 //   return {
 //     p: ps.p.slice(1),
 //     f: ps.f.slice(ps.f[1] === "a" ? 2 : 1),
 //   };
 // }
 // TODO: could handle all sh- verbs for efficiencies sake
-function parseIrregularVerb(
+function parseIrregularVerb(s: string): T.ParsedVBE[] {
  s: string
 ): [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">][] {
  if (["ته", "راته", "ورته", "درته"].includes(s)) {
    return [
-      [
+      {
-        undefined,
+        type: "VB",
-        {
+        info: {
-          type: "VB",
+          aspect: "imperfective",
-          info: {
+          base: "root",
-            aspect: "imperfective",
+          type: "verb",
-            base: "root",
+          verb: s.startsWith("را")
-            type: "verb",
+            ? raatlul
-            verb: s.startsWith("را")
+            : s.startsWith("ور")
-              ? raatlul
+            ? wartlul
-              : s.startsWith("ور")
+            : s.startsWith("در")
-              ? wartlul
+            ? dartlul
-              : s.startsWith("در")
+            : tlul,
              ? dartlul
              : tlul,
          },
          person: T.Person.ThirdSingMale,
        },
-      ],
+        person: T.Person.ThirdSingMale,
      },
    ];
  }
  if (s === "شو") {
@ -649,38 +351,28 @@ function parseIrregularVerb(
        T.Person.FirstPlurMale,
        T.Person.FirstPlurFemale,
      ].flatMap((person) =>
-        [kedulStat, kedulDyn].map<
+        [kedulStat, kedulDyn].map<T.ParsedVBE>((verb) => ({
-          [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]
+          type: "VB",
-        >((verb) => [
+          info: {
-          undefined,
+            aspect: "perfective",
-          {
+            base: "root",
-            type: "VB",
+            type: "verb",
-            info: {
+            verb,
              aspect: "perfective",
              base: "root",
              type: "verb",
              verb,
            },
            person,
          },
-        ])
+          person,
        }))
      ),
      ...[T.Person.FirstPlurMale, T.Person.FirstPlurFemale].flatMap((person) =>
-        [kedulStat, kedulDyn].map<
+        [kedulStat, kedulDyn].map<T.ParsedVBE>((verb) => ({
-          [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]
+          type: "VB",
-        >((verb) => [
+          info: {
-          undefined,
+            aspect: "perfective",
-          {
+            base: "stem",
-            type: "VB",
+            type: "verb",
-            info: {
+            verb,
              aspect: "perfective",
              base: "stem",
              type: "verb",
              verb,
            },
            person,
          },
-        ])
+          person,
        }))
      ),
    ];
  }
--- a/src/lib/src/parsing/parse-vp.test.ts
+++ b/src/lib/src/parsing/parse-vp.test.ts
@ -21,6 +21,7 @@ const maashoom = wordQuery("ماشوم", "noun");
 const leedul = wordQuery("لیدل", "verb");
 const kenaastul = wordQuery("کېناستل", "verb");
 const wurul = wordQuery("وړل", "verb");
 const akheestul = wordQuery("اخیستل", "verb");
 const tests: {
  label: string;
@ -65,6 +66,20 @@ const tests: {
        output: [],
        error: true,
      },
      {
        input: "زه سړی کور",
        output: [],
      },
      {
        input: "زه دې مې وینم",
        output: [],
        error: true,
      },
      {
        input: "وامې دې خیست",
        output: [],
        error: true,
      },
    ],
  },
  {
@ -1104,6 +1119,263 @@ const tests: {
      },
    ],
  },
  {
    label: "negatives and ordering",
    cases: [
      {
        input: "سړی تا نه ویني",
        output: [...getPeople(2, "sing")].flatMap((objectPerson) => ({
          blocks: [
            {
              key: 1,
              block: makeSubjectSelectionComplete({
                type: "NP",
                selection: makeNounSelection(sarey, undefined),
              }),
            },
            {
              key: 2,
              block: makeObjectSelectionComplete({
                type: "NP",
                selection: makePronounSelection(objectPerson),
              }),
            },
          ],
          verb: {
            type: "verb",
            verb: leedul,
            transitivity: "transitive",
            canChangeTransitivity: false,
            canChangeStatDyn: false,
            negative: true,
            tense: "presentVerb",
            canChangeVoice: true,
            isCompound: false,
            voice: "active",
          },
          externalComplement: undefined,
          form: {
            removeKing: false,
            shrinkServant: false,
          },
        })),
      },
      {
        input: "سړی نه تا ویني",
        output: [],
      },
      {
        input: "سړی تا ونه ویني",
        output: [...getPeople(2, "sing")].flatMap((objectPerson) => ({
          blocks: [
            {
              key: 1,
              block: makeSubjectSelectionComplete({
                type: "NP",
                selection: makeNounSelection(sarey, undefined),
              }),
            },
            {
              key: 2,
              block: makeObjectSelectionComplete({
                type: "NP",
                selection: makePronounSelection(objectPerson),
              }),
            },
          ],
          verb: {
            type: "verb",
            verb: leedul,
            transitivity: "transitive",
            canChangeTransitivity: false,
            canChangeStatDyn: false,
            negative: true,
            tense: "subjunctiveVerb",
            canChangeVoice: true,
            isCompound: false,
            voice: "active",
          },
          externalComplement: undefined,
          form: {
            removeKing: false,
            shrinkServant: false,
          },
        })),
      },
      // with regular و or وا perfective heads, the negative needs to be behind the perfective head
      {
        input: "سړی تا نه وویني",
        output: [],
      },
      {
        input: "سړي وانه خیستله",
        output: [
          {
            blocks: [
              {
                key: 1,
                block: makeSubjectSelectionComplete({
                  type: "NP",
                  selection: makeNounSelection(sarey, undefined),
                }),
              },
              {
                key: 2,
                block: makeObjectSelectionComplete({
                  type: "NP",
                  selection: makePronounSelection(T.Person.ThirdSingFemale),
                }),
              },
            ],
            verb: {
              type: "verb",
              verb: akheestul,
              transitivity: "transitive",
              canChangeTransitivity: false,
              canChangeStatDyn: false,
              negative: true,
              tense: "perfectivePast",
              canChangeVoice: true,
              isCompound: false,
              voice: "active",
            },
            externalComplement: undefined,
            form: {
              removeKing: true,
              shrinkServant: false,
            },
          },
        ],
      },
      {
        input: "سړي نه واخیستله",
        output: [],
      },
      // but for other perfective heads, the negative can go before or after
      {
        input: "زه نه کېنم",
        output: getPeople(1, "sing").flatMap((subjectPerson) =>
          (
            ["presentVerb", "subjunctiveVerb"] as const
          ).map<T.VPSelectionComplete>((tense) => ({
            blocks: [
              {
                key: 1,
                block: makeSubjectSelectionComplete({
                  type: "NP",
                  selection: makePronounSelection(subjectPerson),
                }),
              },
              {
                key: 2,
                block: {
                  type: "objectSelection",
                  selection: "none",
                },
              },
            ],
            verb: {
              type: "verb",
              verb: kenaastul,
              transitivity: "intransitive",
              canChangeTransitivity: false,
              canChangeStatDyn: false,
              negative: true,
              tense,
              canChangeVoice: true,
              isCompound: false,
              voice: "active",
            },
            externalComplement: undefined,
            form: {
              removeKing: false,
              shrinkServant: false,
            },
          }))
        ),
      },
      {
        input: "زه کېنه نم",
        output: getPeople(1, "sing").map<T.VPSelectionComplete>(
          (subjectPerson) => ({
            blocks: [
              {
                key: 1,
                block: makeSubjectSelectionComplete({
                  type: "NP",
                  selection: makePronounSelection(subjectPerson),
                }),
              },
              {
                key: 2,
                block: {
                  type: "objectSelection",
                  selection: "none",
                },
              },
            ],
            verb: {
              type: "verb",
              verb: kenaastul,
              transitivity: "intransitive",
              canChangeTransitivity: false,
              canChangeStatDyn: false,
              negative: true,
              tense: "subjunctiveVerb",
              canChangeVoice: true,
              isCompound: false,
              voice: "active",
            },
            externalComplement: undefined,
            form: {
              removeKing: false,
              shrinkServant: false,
            },
          })
        ),
      },
    ],
  },
  {
    label: "should check for subject / object conflicts",
    cases: [
      {
        input: "زه ما وینم",
        output: [],
        error: true,
      },
      {
        input: "ما زه ولیدلم",
        output: [],
        error: true,
      },
      {
        input: "تاسو تا ولیدئ",
        output: [],
        error: true,
      },
      {
        input: "زه مې وینم",
        output: [],
        error: true,
      },
      {
        input: "زه مې ولیدم",
        output: [],
        error: true,
      },
      {
        input: "ومې لیدم",
        output: [],
        error: true,
      },
      {
        input: "وینم مې",
        output: [],
        error: true,
      },
    ],
  },
 ];
 tests.forEach(({ label, cases }) => {
--- a/src/lib/src/parsing/parse-vp.ts
+++ b/src/lib/src/parsing/parse-vp.ts
@ -4,30 +4,16 @@ import {
  makeObjectSelectionComplete,
  makeSubjectSelectionComplete,
 } from "../phrase-building/blocks-utils";
-import { getPersonFromNP, isPastTense } from "../phrase-building/vp-tools";
+import {
  getPersonFromNP,
  isInvalidSubjObjCombo,
  isPastTense,
 } from "../phrase-building/vp-tools";
 import { parseBlocks } from "./parse-blocks";
 import { makePronounSelection } from "../phrase-building/make-selections";
 import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
 // to hide equatives type-doubling issue
 // demo
 // ماشوم
 // ماشومان
 // خوږ
 // masc plur
 // past tense
 // ماشومانو ښځه ولیدله
 // ماشومانو ښځه ولیدله
 // cool examples:
 // زه ماشوم وهم
 // وهلم // خواږه
 // ومې لیدې
 // ویې وهم
 // this should also conjugate to
 //  وامې نه خیسته
 // وامې نه خیستلو
@ -35,19 +21,11 @@ import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
 // وامې نه اخیست
 // waa-me nú akheest
 // TODO: add tests for negatives and negative order
 // TODO: imperfective past should also be "was going to / would have"
 // map over transitivities, to give transitive / gramm. transitive optionns
 // make impossible subjects like I saw me, error
 // TODO: learn how to yank / use plugin for JSON neovim
 // learn to use jq to edit selected json in vim ?? COOOL
 // TODO: transitivity options
 // TODO: the و is really making it slow down... why?
 export function parseVP(
  tokens: Readonly<T.Token[]>,
  lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
@ -58,11 +36,11 @@ export function parseVP(
  }
  const blocks = parseBlocks(tokens, lookup, verbLookup, [], []);
  return bindParseResult(blocks, (tokens, { blocks, kids }) => {
-    const phIndex = blocks.findIndex((x) => "type" in x && x.type === "PH");
+    const phIndex = blocks.findIndex((x) => x.type === "PH");
-    const vbeIndex = blocks.findIndex((x) => "type" in x && x.type === "VB");
+    const vbeIndex = blocks.findIndex((x) => x.type === "VB");
    const ba = !!kids.find((k) => k === "ba");
    const negIndex = blocks.findIndex(
-      (x) => "type" in x && x.type === "negative" && !x.imperative
+      (x) => x.type === "negative" && !x.imperative
    );
    const ph = phIndex !== -1 ? (blocks[phIndex] as T.ParsedPH) : undefined;
    const verb =
@ -110,10 +88,7 @@ export function parseVP(
      voice: "active",
    };
-    const nps = blocks.filter(
+    const nps = blocks.filter((x): x is T.ParsedNP => x.type === "NP");
      (x): x is { inflected: boolean; selection: T.NPSelection } =>
        "inflected" in x
    );
    // TODO: check that verb and PH match
    if (verb.info.verb.entry.c.includes("intrans")) {
      const errors: T.ParseError[] = [];
@ -258,7 +233,9 @@ export function parseVP(
                shrinkServant: true,
              },
            } as T.VPSelectionComplete,
-            errors
+            pronounConflictInBlocks(blocks)
              ? [...errors, { message: "invalid subject/object combo" }]
              : errors
          )
        );
      }
@ -357,7 +334,9 @@ export function parseVP(
              externalComplement: undefined,
              form,
            } as T.VPSelectionComplete,
-            errors,
+            errors: pronounConflictInBlocks(blocks)
              ? [...errors, { message: "invalid subject/object combo" }]
              : errors,
          }));
        });
      } else {
@ -369,6 +348,16 @@ export function parseVP(
            ] as const
          ).flatMap(([s, o, flip]) => {
            const errors: T.ParseError[] = [];
            if (
              isInvalidSubjObjCombo(
                getPersonFromNP(s.selection),
                getPersonFromNP(o.selection)
              )
            ) {
              errors.push({
                message: "invalid subject/object combo",
              });
            }
            if (!s.inflected) {
              errors.push({
                message:
@ -422,6 +411,16 @@ export function parseVP(
            ] as const
          ).flatMap(([s, o, flip]) => {
            const errors: T.ParseError[] = [];
            if (
              isInvalidSubjObjCombo(
                getPersonFromNP(s.selection),
                getPersonFromNP(o.selection)
              )
            ) {
              errors.push({
                message: "invalid subject/object combo",
              });
            }
            if (isFirstOrSecondPersPronoun(o.selection)) {
              if (!o.inflected) {
                errors.push({
@ -563,3 +562,16 @@ function negativeInPlace({
  }
  return true;
 }
 function pronounConflictInBlocks(blocks: T.VPSBlockComplete[]): boolean {
  const subj = blocks.find((b) => b.block.type === "subjectSelection")
    ?.block as T.SubjectSelectionComplete;
  const obj = blocks.find((b) => b.block.type === "objectSelection")
    ?.block as T.ObjectSelectionComplete;
  const subjPerson = getPersonFromNP(subj.selection);
  const objPerson = getPersonFromNP(obj.selection);
  if (objPerson === undefined) {
    return false;
  }
  return isInvalidSubjObjCombo(subjPerson, objPerson);
 }
--- a/src/types.ts
+++ b/src/types.ts
@ -1196,7 +1196,13 @@ export type Block = {
 export type ParsedBlock = ParsedNP | ParsedPH | ParsedVBE | NegativeBlock;
 export type ParsedKidsSection = {
  type: "kids";
  kids: ParsedKid[];
 };
 export type ParsedNP = {
  type: "NP";
  inflected: boolean;
  selection: NPSelection;
 };