big improvement by removing the reduntant PH parsing, added tests for negatives, and also added checking for S/O conflict in VP parsing

2023-08-22 19:33:53 +04:00 · 2023-08-22 19:33:53 +04:00 · a3ac5e2cb3
parent a7709c4299
commit a3ac5e2cb3
13 changed files with 604 additions and 1250 deletions
--- a/diagrams/diagram-light.png
+++ b/diagrams/diagram-light.png
--- a/src/lib/library.ts
+++ b/src/lib/library.ts
@ -15,7 +15,10 @@ import {
 } from "./src/verb-info";
 import { makeVPSelectionState } from "./src/phrase-building/verb-selection";
 import { vpsReducer } from "./src/phrase-building/vps-reducer";
-import { isPastTense } from "./src/phrase-building/vp-tools";
+import {
+  isPastTense,
+  isInvalidSubjObjCombo,
+} from "./src/phrase-building/vp-tools";
 import { getInflectionPattern } from "./src/inflection-pattern";
 import { makePsString, removeFVarients } from "./src/accent-and-ps-utils";

@ -45,12 +48,7 @@ import {
  standardizePhonetics,
 } from "./src/standardize-pashto";
 import { phoneticsToDiacritics } from "./src/phonetics-to-diacritics";
-import {
-  randomPerson,
-  isInvalidSubjObjCombo,
-  randomSubjObj,
-  getEnglishVerb,
-} from "./src/np-tools";
+import { randomPerson, randomSubjObj, getEnglishVerb } from "./src/np-tools";
 import {
  getEnglishFromRendered,
  getPashtoFromRendered,
--- a/src/lib/src/np-tools.ts
+++ b/src/lib/src/np-tools.ts
@ -1,24 +1,31 @@
 import * as T from "../../types";
-import { isFirstPerson, parseEc, isSecondPerson } from "./misc-helpers";
+import { parseEc } from "./misc-helpers";
+import { isInvalidSubjObjCombo } from "./phrase-building/vp-tools";

 function getRandPers(): T.Person {
  return Math.floor(Math.random() * 12);
 }

-export function randomPerson(a?: { prev?: T.Person, counterPart?: T.VerbObject | T.NPSelection }) {
+export function randomPerson(a?: {
+  prev?: T.Person;
+  counterPart?: T.VerbObject | T.NPSelection;
+}) {
  // no restrictions, just get any person
  if (!a) {
    return getRandPers();
  }
-    if (a.counterPart !== undefined && typeof a.counterPart === "object" && a.counterPart.selection.type === "pronoun") {
+  if (
+    a.counterPart !== undefined &&
+    typeof a.counterPart === "object" &&
+    a.counterPart.selection.type === "pronoun"
+  ) {
    // with counterpart pronoun
    let newP = 0;
    do {
      newP = getRandPers();
    } while (
-            isInvalidSubjObjCombo(a.counterPart.selection.person, newP)
-            ||
-            (newP === a.prev)
+      isInvalidSubjObjCombo(a.counterPart.selection.person, newP) ||
+      newP === a.prev
    );
    return newP;
  }
@ -30,23 +37,17 @@ export function randomPerson(a?: { prev?: T.Person, counterPart?: T.VerbObject |
  return newP;
 }

-export function isInvalidSubjObjCombo(subj: T.Person, obj: T.Person): boolean {
-    return (
-        (isFirstPerson(subj) && isFirstPerson(obj))
-        ||
-        (isSecondPerson(subj) && isSecondPerson(obj))
-    );
-}
-
-export function randomSubjObj(old?: { subj: T.Person, obj?: T.Person }): { subj: T.Person, obj: T.Person } {
+export function randomSubjObj(old?: { subj: T.Person; obj?: T.Person }): {
+  subj: T.Person;
+  obj: T.Person;
+} {
  let subj = 0;
  let obj = 0;
  do {
    subj = getRandPers();
    obj = getRandPers();
  } while (
-        (old && ((old.subj === subj) || (old.obj === obj)))
-        ||
+    (old && (old.subj === subj || old.obj === obj)) ||
    isInvalidSubjObjCombo(subj, obj)
  );
  return { subj, obj };
@ -74,7 +75,5 @@ export function getEnglishParticiple(entry: T.DictionaryEntry): string {
    return `to be/being ${entry.ep}`;
  }
  const participle = `${ec[2]} / to ${ec[0]}`;
-    return (entry.ep)
-        ? `${participle} ${entry.ep}`
-        : participle;
+  return entry.ep ? `${participle} ${entry.ep}` : participle;
 }
--- a/src/lib/src/parsing/lookup.tsx
+++ b/src/lib/src/parsing/lookup.tsx
@ -49,7 +49,7 @@ export function verbLookup(input: string): T.VerbEntry[] {
  // IMPORTANT TODO FOR EFFECIANCY!
  // check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING
  // if theres no legit verb ending and no tpp possibilities, just return an empty array
-  const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
+  // const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
  const checkTpp = shouldCheckTpp(input);
  const fromAawu = checkTpp && undoAaXuPattern(input);
  const inputWoutOo =
@ -61,19 +61,7 @@ export function verbLookup(input: string): T.VerbEntry[] {
  // TODO: don't do the slice of and checking for useless things when you have a NON verb ending (like with the tpp)
  if (s.endsWith("ېږ")) {
    return verbs.filter(
-      sWoutOo
-        ? ({ entry }) =>
-            [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) ||
-            [
-              s.slice(0, -1) + "دل",
-              sWoutOo.slice(0, -1) + "دل",
-              sAddedAa.slice(0, -1) + "دل",
-            ].includes(entry.p) ||
-            [s, sWoutOo, sAddedAa].includes(entry.p) ||
-            (entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) ||
-            entry.prp === s ||
-            entry.ssp === s
-        : ({ entry }) =>
+      ({ entry }) =>
        [s, sAddedAa].includes(entry.p.slice(0, -1)) ||
        [s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes(
          entry.p
@ -85,33 +73,7 @@ export function verbLookup(input: string): T.VerbEntry[] {
    );
  }
  return verbs.filter(
-    sWoutOo
-      ? ({ entry }) =>
-          [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) ||
-          // for short intransitive forms
-          [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -3)) ||
-          [s, sWoutOo, sAddedAa].includes(entry.p) ||
-          (entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) ||
-          (checkTpp &&
-            [
-              input.slice(1),
-              fromAawu && fromAawu.slice(-1),
-              inputAddedAa,
-            ].includes(entry.p.slice(0, -1))) ||
-          (entry.tppp &&
-            arraysHaveCommon(
-              [input, inputWoutOo, sAddedAa],
-              splitVarients(entry.tppp)
-            )) ||
-          arraysHaveCommon(
-            [s, sAddedAa, "و" + s],
-            [entry.prp, entry.prp?.slice(0, -1)]
-          ) ||
-          [s, sAddedAa].includes(entry.ssp || "") ||
-          (entry.separationAtP &&
-            (entry.p.slice(entry.separationAtP) === s ||
-              entry.psp?.slice(entry.separationAtP) === s))
-      : ({ entry }) =>
+    ({ entry }) =>
      [s, sAddedAa].includes(entry.p.slice(0, -1)) ||
      // for short intransitive forms
      [s, sAddedAa].includes(entry.p.slice(0, -3)) ||
@ -124,11 +86,8 @@ export function verbLookup(input: string): T.VerbEntry[] {
          splitVarients(entry.tppp)
        )) ||
      [s, sAddedAa].includes(entry.psp || "") ||
-          arraysHaveCommon(
-            [entry.prp, entry.prp?.slice(0, -1)],
-            [s, sAddedAa, "و" + s]
-          ) ||
-          [s, sAddedAa, "و" + s].includes(entry.ssp || "") ||
+      arraysHaveCommon([entry.prp, entry.prp?.slice(0, -1)], [s, sAddedAa]) ||
+      [s, sAddedAa].includes(entry.ssp || "") ||
      (entry.separationAtP &&
        // TODO this is super ugly, do check of short and long function
        (entry.p.slice(entry.separationAtP) === s ||
--- a/src/lib/src/parsing/parse-blocks.ts
+++ b/src/lib/src/parsing/parse-blocks.ts
@ -1,5 +1,4 @@
 import * as T from "../../../types";
-import { fmapParseResult } from "../fp-ps";
 import { parseKidsSection } from "./parse-kids-section";
 import { parseNeg } from "./parse-negative";
 import { parseNP } from "./parse-np";
@ -21,22 +20,22 @@ export function parseBlocks(
    return returnParseResult(tokens, { blocks, kids });
  }
  const prevPh: T.ParsedPH | undefined = blocks.find(
-    (b): b is T.ParsedPH => "type" in b && b.type === "PH"
+    (b): b is T.ParsedPH => b.type === "PH"
  );
  const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
-  const np = prevPh ? [] : fmapParseResult((x) => [x], parseNP(tokens, lookup));
+  const np = prevPh ? [] : parseNP(tokens, lookup);
  // UHOH... This could cause double paths ... maybe don't parse the PH in the parse VB!
-  const ph =
-    vbExists || prevPh ? [] : fmapParseResult((x) => [x], parsePH(tokens));
-  const vb = fmapParseResult(
-    ([ph, v]) => (ph ? [ph, v] : [v]),
-    parseVerb(tokens, verbLookup)
-  );
-  const neg = fmapParseResult((x) => [x], parseNeg(tokens));
+  const ph = vbExists || prevPh ? [] : parsePH(tokens);
+  const vb = parseVerb(tokens, verbLookup);
+  const neg = parseNeg(tokens);
  const kidsR = parseKidsSection(tokens, []);
-  const allResults = [...np, ...ph, ...neg, ...vb, ...kidsR] as T.ParseResult<
-    T.ParsedBlock[] | { kids: T.ParsedKid[] }
-  >[];
+  const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
+    ...np,
+    ...ph,
+    ...neg,
+    ...vb,
+    ...kidsR,
+  ];
  // TODO: is this necessary?
  // if (!allResults.length) {
  //   return [
@ -47,10 +46,9 @@ export function parseBlocks(
  //     },
  //   ];
  // }
-  console.log({ allResults });
  return bindParseResult(allResults, (tokens, r) => {
    const errors: T.ParseError[] = [];
-    if ("kids" in r) {
+    if (r.type === "kids") {
      return {
        next: parseBlocks(tokens, lookup, verbLookup, blocks, [
          ...kids,
@ -62,23 +60,21 @@ export function parseBlocks(
            : [],
      };
    }
-    if (prevPh && r.some((x) => "type" in x && x.type === "PH")) {
+    if (prevPh && r.type === "PH") {
      return [];
    }
-    const vb = r.find((x): x is T.ParsedVBE => "type" in x && x.type === "VB");
-    if (!phMatches(prevPh, vb)) {
+    // TODO: will have to handle welded
+    if (r.type === "VB") {
+      if (!phMatches(prevPh, r)) {
        return [];
      }
+    }
    // don't allow two negatives
-    if (
-      "type" in r[0] &&
-      r[0].type === "negative" &&
-      blocks.some((b) => "type" in b && b.type === "negative")
-    ) {
+    if (r.type === "negative" && blocks.some((b) => b.type === "negative")) {
      return [];
    }
    return {
-      next: parseBlocks(tokens, lookup, verbLookup, [...blocks, ...r], kids),
+      next: parseBlocks(tokens, lookup, verbLookup, [...blocks, r], kids),
      errors,
    };
  });
--- a/src/lib/src/parsing/parse-kids-section.ts
+++ b/src/lib/src/parsing/parse-kids-section.ts
@ -5,14 +5,18 @@ import { bindParseResult, returnParseResult } from "./utils";
 export function parseKidsSection(
  tokens: Readonly<T.Token[]>,
  prevKids: T.ParsedKid[]
-): T.ParseResult<{ kids: T.ParsedKid[] }>[] {
+): T.ParseResult<T.ParsedKidsSection>[] {
  if (tokens.length === 0) {
-    return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
+    return prevKids.length
+      ? returnParseResult(tokens, { type: "kids", kids: prevKids })
+      : [];
  }
  const parsedKid = parseKid(tokens);
  // TODO: is this even necessary ??
  if (!parsedKid.length) {
-    return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
+    return prevKids.length
+      ? returnParseResult(tokens, { type: "kids", kids: prevKids })
+      : [];
  }
  return bindParseResult(parsedKid, (tokens, r) => {
    // return parseKidsSection(tokens, [...prevKids, r]);
--- a/src/lib/src/parsing/parse-np.ts
+++ b/src/lib/src/parsing/parse-np.ts
@ -21,11 +21,9 @@ export function parseNP(
          inflected: boolean;
          selection: T.NounSelection;
        }
-  ): {
-    inflected: boolean;
-    selection: T.NPSelection;
-  } {
+  ): T.ParsedNP {
    return {
+      type: "NP",
      inflected: a.inflected,
      selection: {
        type: "NP",
--- a/src/lib/src/parsing/parse-ph.ts
+++ b/src/lib/src/parsing/parse-ph.ts
@ -18,7 +18,7 @@ const phs = [

 export function parsePH(
  tokens: Readonly<T.Token[]>
-): T.ParseResult<{ type: "PH"; s: string }>[] {
+): T.ParseResult<T.ParsedPH>[] {
  if (tokens.length === 0) {
    return [];
  }
--- a/src/lib/src/parsing/parse-verb.test.ts
+++ b/src/lib/src/parsing/parse-verb.test.ts
--- a/src/lib/src/parsing/parse-verb.ts
+++ b/src/lib/src/parsing/parse-verb.ts
@ -22,7 +22,7 @@ import {
 export function parseVerb(
  tokens: Readonly<T.Token[]>,
  verbLookup: (s: string) => T.VerbEntry[]
-): T.ParseResult<[{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]>[] {
+): T.ParseResult<T.ParsedVBE>[] {
  if (tokens.length === 0) {
    return [];
  }
@ -57,8 +57,8 @@ function matchVerbs(
    root: T.Person[];
    stem: T.Person[];
  }
-): [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">][] {
-  const w: ReturnType<typeof matchVerbs> = [];
+): T.ParsedVBE[] {
+  const w: T.ParsedVBE[] = [];
  const lEnding = s.endsWith("ل");
  const base = s.endsWith("ل") ? s : s.slice(0, -1);
  const matchShortOrLong = (b: string, x: string) => {
@ -80,167 +80,52 @@ function matchVerbs(
          return e.p.slice(0, -1) === base;
        }
      }),
-      perfective: entries.reduce<
-        { ph: string | undefined; entry: T.VerbEntry }[]
-      >((acc, entry) => {
+      perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
        const e = entry.entry;
        const baseWAa = "ا" + base;
        if (e.c.includes("comp")) {
          return acc;
        }
        if (e.ssp) {
-          const bRest = e.separationAtP ? e.ssp.slice(e.separationAtP) : "";
+          if (e.separationAtP) {
+            const bRest = e.ssp.slice(e.separationAtP);
            if (bRest === base) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
+              return [...acc, entry];
            }
+          } else {
            if (e.ssp === base) {
-            return [
-              ...acc,
-              {
-                ph: e.separationAtF
-                  ? e.ssp.slice(0, e.separationAtP)
-                  : undefined,
-                entry,
-              },
-            ];
+              return [...acc, entry];
+            }
          }
        } else if (e.psp) {
-          const bRest = e.separationAtP ? e.psp.slice(e.separationAtP) : "";
+          if (e.separationAtP) {
+            const bRest = e.psp.slice(e.separationAtP);
            if (bRest === base) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
-          }
-          if (e.psp === base && e.separationAtP) {
-            return [
-              ...acc,
-              {
-                ph: e.psp.slice(0, e.separationAtP),
-                entry,
-              },
-            ];
+              return [...acc, entry];
            }
+          } else {
            if (!e.sepOo) {
-            if (base.startsWith("وا") && base.slice(1) === e.psp) {
-              return [
-                ...acc,
-                {
-                  ph: "وا",
-                  entry,
-                },
-              ];
-            }
-            if ((base.startsWith("و") && base.slice(1)) === e.psp) {
-              return [
-                ...acc,
-                {
-                  ph: "و",
-                  entry,
-                },
-              ];
-            }
              if (baseWAa === e.psp) {
-              return [
-                ...acc,
-                {
-                  ph: undefined,
-                  entry,
-                },
-              ];
+                return [...acc, entry];
              }
            }
            if (base === e.psp) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
+              return [...acc, entry];
+            }
          }
        } else if (e.c.includes("intrans.")) {
          const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
          const miniRootEg = miniRoot + "ېږ";
          if ([miniRoot, miniRootEg].includes(base)) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
-          } else if (!e.sepOo) {
-            if (
-              base.startsWith("وا") &&
-              [miniRoot, miniRootEg].includes(base.slice(1))
-            ) {
-              return [
-                ...acc,
-                {
-                  ph: "وا",
-                  entry,
-                },
-              ];
-            } else if (
-              base.startsWith("و") &&
-              [miniRoot, miniRootEg].includes(base.slice(1))
-            ) {
-              return [
-                ...acc,
-                {
-                  ph: "و",
-                  entry,
-                },
-              ];
-            }
+            return [...acc, entry];
          }
        } else {
          const eb = e.p.slice(0, -1);
          if (eb === base) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
+            return [...acc, entry];
          } else if (!e.sepOo) {
-            if (base.startsWith("وا") && eb === base.slice(1)) {
-              return [
-                ...acc,
-                {
-                  ph: "وا",
-                  entry,
-                },
-              ];
-            }
-            if (base.startsWith("و") && eb === base.slice(1)) {
-              return [
-                ...acc,
-                {
-                  ph: "و",
-                  entry,
-                },
-              ];
-            }
            if (baseWAa === base.slice(1)) {
-              return [
-                ...acc,
-                {
-                  ph: undefined,
-                  entry,
-                },
-              ];
+              return [...acc, entry];
            }
          }
        }
@ -250,19 +135,16 @@ function matchVerbs(
    Object.entries(stemMatches).forEach(([aspect, entries]) => {
      entries.forEach((verb) => {
        people.stem.forEach((person) => {
-          w.push([
-            "ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
-            {
+          w.push({
            type: "VB",
            person,
            info: {
              type: "verb",
              aspect: aspect as T.Aspect,
              base: "stem",
-                verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
+              verb: removeFVarientsFromVerb(verb),
            },
-            },
-          ]);
+          });
        });
      });
    });
@ -272,56 +154,21 @@ function matchVerbs(
      imperfective: entries.filter(
        ({ entry: e }) => !e.c.includes("comp") && matchShortOrLong(base, e.p)
      ),
-      perfective: entries.reduce<
-        { ph: string | undefined; entry: T.VerbEntry }[]
-      >((acc, entry) => {
+      perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
        const e = entry.entry;
        if (e.c.includes("comp")) {
          return acc;
        }
        if (e.separationAtP) {
          const b = e.prp || e.p;
-          const bHead = b.slice(0, e.separationAtP);
          const bRest = b.slice(e.separationAtP);
-          if (matchShortOrLong(base, b)) {
-            return [
-              ...acc,
-              {
-                ph: bHead,
-                entry,
-              },
-            ];
-          } else if (matchShortOrLong(base, bRest)) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
+          if (matchShortOrLong(base, bRest)) {
+            return [...acc, entry];
          }
        } else {
-          const baseNoOo = base.startsWith("و") && base.slice(1);
          const p = e.prp || e.p;
-          if (baseNoOo && matchShortOrLong(baseNoOo, p)) {
-            return [
-              ...acc,
-              {
-                ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و",
-                entry,
-              },
-            ];
-          } else if (
-            matchShortOrLong(base, p) ||
-            matchShortOrLong("ا" + base, p)
-          ) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
+          if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
+            return [...acc, entry];
          }
        }
        return acc;
@ -331,19 +178,16 @@ function matchVerbs(
    Object.entries(rootMatches).forEach(([aspect, entries]) => {
      entries.forEach((verb) => {
        people.root.forEach((person) => {
-          w.push([
-            "ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
-            {
+          w.push({
            type: "VB",
            person,
            info: {
              type: "verb",
              aspect: aspect as T.Aspect,
              base: "root",
-                verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
+              verb: removeFVarientsFromVerb(verb),
            },
-            },
-          ]);
+          });
        });
      });
    });
@ -351,8 +195,6 @@ function matchVerbs(
  const hamzaEnd = s.at(-1) === "ه";
  const oEnd = s.at(-1) === "و";
  const abruptEnd = ["د", "ت", "ړ"].includes(s.slice(-1));
-  const b = hamzaEnd || oEnd ? base : s;
-  const bNoOo = b.startsWith("و") && b.slice(1);
  const tppMatches = {
    imperfective: entries.filter(
      ({ entry: e }) =>
@ -363,163 +205,63 @@ function matchVerbs(
            (hamzaEnd ? base : abruptEnd ? s : "") === e.p.slice(0, -1)))
      // TODO: if check for modified aaXu thing!
    ),
-    perfective: entries.reduce<
-      { ph: string | undefined; entry: T.VerbEntry }[]
-    >((acc, entry) => {
+    perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
      const e = entry.entry;
      if (e.c.includes("comp")) {
        return acc;
      }
      if (e.separationAtP) {
        const b = e.prp || e.p;
-        const bHead = b.slice(0, e.separationAtP);
        const bRest = b.slice(e.separationAtP);
        if (bRest === "شول") {
          return acc;
        }
        if (abruptEnd) {
-          if (s === b.slice(0, -1)) {
-            return [
-              ...acc,
-              {
-                ph: bHead,
-                entry,
-              },
-            ];
-          }
          if (s === bRest.slice(0, -1)) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
+            return [...acc, entry];
          }
        } else if (hamzaEnd) {
-          if (base === b.slice(0, -1)) {
-            return [
-              ...acc,
-              {
-                ph: bHead,
-                entry,
-              },
-            ];
-          }
          if (base === bRest.slice(0, -1)) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
+            return [...acc, entry];
          }
        } else if (oEnd) {
-          if ([b, b.slice(0, -1)].includes(base)) {
-            return [
-              ...acc,
-              {
-                ph: bHead,
-                entry,
-              },
-            ];
-          }
          if ([bRest, bRest.slice(0, -1)].includes(base)) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
+            return [...acc, entry];
          }
        }
      } else if (!e.prp) {
        if (oEnd) {
-          if (bNoOo && [e.p, e.p.slice(0, -1).includes(bNoOo)]) {
-            return [
-              ...acc,
-              {
-                ph: "و",
-                entry,
-              },
-            ];
-          } else if ([e.p, e.p.slice(0, -1)].includes(base)) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
+          if ([e.p, e.p.slice(0, -1)].includes(base)) {
+            return [...acc, entry];
          }
        } else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) {
          const b = hamzaEnd ? base : s;
          const p = e.p.slice(0, -1);
-          if (bNoOo && bNoOo === p) {
-            return [
-              ...acc,
-              {
-                ph: "و",
-                entry,
-              },
-            ];
-          } else if (b === p) {
-            return [
-              ...acc,
-              {
-                ph: undefined,
-                entry,
-              },
-            ];
+          if (b === p) {
+            return [...acc, entry];
          }
        }
      }
-      const sNoOo = s.startsWith("و") && s.slice(1);
-      if (isInVarients(e.tppp, sNoOo)) {
-        return [
-          ...acc,
-          {
-            ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و",
-            entry,
-          },
-        ];
-      } else if (isInVarients(e.tppp, s)) {
-        return [
-          ...acc,
-          {
-            ph: undefined,
-            entry,
-          },
-        ];
+      if (isInVarients(e.tppp, s)) {
+        return [...acc, entry];
      } else if (isInVarients(e.tppp, "ا" + s)) {
-        return [
-          ...acc,
-          {
-            ph: undefined,
-            entry,
-          },
-        ];
+        return [...acc, entry];
      }
      return acc;
    }, []),
  };
  Object.entries(tppMatches).forEach(([aspect, entries]) => {
    entries.forEach((verb) => {
-      w.push([
-        "ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
-        {
+      w.push({
        type: "VB",
        person: T.Person.ThirdSingMale,
        info: {
          type: "verb",
          aspect: aspect as T.Aspect,
          base: "root",
-            verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
+          verb: removeFVarientsFromVerb(verb),
        },
-        },
-      ]);
+      });
    });
  });
  return w;
@ -580,49 +322,10 @@ function getVerbEnding(p: string): {
  };
 }

-// const [ph, rest]: [T.PH | undefined, T.PsString] = v.entry.noOo
-//   ? [undefined, base]
-//   : v.entry.sepOo
-//   ? [{ type: "PH", ps: { p: "و ", f: "óo`" } }, base]
-//   : ["آ", "ا"].includes(base.p.charAt(0)) && base.f.charAt(0) === "a"
-//   ? [{ type: "PH", ps: { p: "وا", f: "wáa" } }, removeAStart(base)]
-//   : ["óo", "oo"].includes(base.f.slice(0, 2))
-//   ? [{ type: "PH", ps: { p: "و", f: "wÚ" } }, base]
-//   : ["ée", "ee"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "ای"
-//   ? [
-//       { type: "PH", ps: { p: "وي", f: "wée" } },
-//       {
-//         p: base.p.slice(2),
-//         f: base.f.slice(2),
-//       },
-//     ]
-//   : ["é", "e"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "اې"
-//   ? [
-//       { type: "PH", ps: { p: "وي", f: "wé" } },
-//       {
-//         p: base.p.slice(2),
-//         f: base.f.slice(1),
-//       },
-//     ]
-//   : ["ó", "o"].includes(base.f[0]) && base.p.slice(0, 2) === "او"
-//   ? [{ type: "PH", ps: { p: "و", f: "óo`" } }, base]
-//   : [{ type: "PH", ps: { p: "و", f: "óo" } }, base];
-// return [ph, removeAccents(rest)];
-// function removeAStart(ps: T.PsString) {
-//   return {
-//     p: ps.p.slice(1),
-//     f: ps.f.slice(ps.f[1] === "a" ? 2 : 1),
-//   };
-// }
-
 // TODO: could handle all sh- verbs for efficiencies sake
-function parseIrregularVerb(
-  s: string
-): [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">][] {
+function parseIrregularVerb(s: string): T.ParsedVBE[] {
  if (["ته", "راته", "ورته", "درته"].includes(s)) {
    return [
-      [
-        undefined,
      {
        type: "VB",
        info: {
@ -639,7 +342,6 @@ function parseIrregularVerb(
        },
        person: T.Person.ThirdSingMale,
      },
-      ],
    ];
  }
  if (s === "شو") {
@ -649,11 +351,7 @@ function parseIrregularVerb(
        T.Person.FirstPlurMale,
        T.Person.FirstPlurFemale,
      ].flatMap((person) =>
-        [kedulStat, kedulDyn].map<
-          [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]
-        >((verb) => [
-          undefined,
-          {
+        [kedulStat, kedulDyn].map<T.ParsedVBE>((verb) => ({
          type: "VB",
          info: {
            aspect: "perfective",
@ -662,15 +360,10 @@ function parseIrregularVerb(
            verb,
          },
          person,
-          },
-        ])
+        }))
      ),
      ...[T.Person.FirstPlurMale, T.Person.FirstPlurFemale].flatMap((person) =>
-        [kedulStat, kedulDyn].map<
-          [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]
-        >((verb) => [
-          undefined,
-          {
+        [kedulStat, kedulDyn].map<T.ParsedVBE>((verb) => ({
          type: "VB",
          info: {
            aspect: "perfective",
@ -679,8 +372,7 @@ function parseIrregularVerb(
            verb,
          },
          person,
-          },
-        ])
+        }))
      ),
    ];
  }
--- a/src/lib/src/parsing/parse-vp.test.ts
+++ b/src/lib/src/parsing/parse-vp.test.ts
@ -21,6 +21,7 @@ const maashoom = wordQuery("ماشوم", "noun");
 const leedul = wordQuery("لیدل", "verb");
 const kenaastul = wordQuery("کېناستل", "verb");
 const wurul = wordQuery("وړل", "verb");
+const akheestul = wordQuery("اخیستل", "verb");

 const tests: {
  label: string;
@ -65,6 +66,20 @@ const tests: {
        output: [],
        error: true,
      },
+      {
+        input: "زه سړی کور",
+        output: [],
+      },
+      {
+        input: "زه دې مې وینم",
+        output: [],
+        error: true,
+      },
+      {
+        input: "وامې دې خیست",
+        output: [],
+        error: true,
+      },
    ],
  },
  {
@ -1104,6 +1119,263 @@ const tests: {
      },
    ],
  },
+  {
+    label: "negatives and ordering",
+    cases: [
+      {
+        input: "سړی تا نه ویني",
+        output: [...getPeople(2, "sing")].flatMap((objectPerson) => ({
+          blocks: [
+            {
+              key: 1,
+              block: makeSubjectSelectionComplete({
+                type: "NP",
+                selection: makeNounSelection(sarey, undefined),
+              }),
+            },
+            {
+              key: 2,
+              block: makeObjectSelectionComplete({
+                type: "NP",
+                selection: makePronounSelection(objectPerson),
+              }),
+            },
+          ],
+          verb: {
+            type: "verb",
+            verb: leedul,
+            transitivity: "transitive",
+            canChangeTransitivity: false,
+            canChangeStatDyn: false,
+            negative: true,
+            tense: "presentVerb",
+            canChangeVoice: true,
+            isCompound: false,
+            voice: "active",
+          },
+          externalComplement: undefined,
+          form: {
+            removeKing: false,
+            shrinkServant: false,
+          },
+        })),
+      },
+      {
+        input: "سړی نه تا ویني",
+        output: [],
+      },
+      {
+        input: "سړی تا ونه ویني",
+        output: [...getPeople(2, "sing")].flatMap((objectPerson) => ({
+          blocks: [
+            {
+              key: 1,
+              block: makeSubjectSelectionComplete({
+                type: "NP",
+                selection: makeNounSelection(sarey, undefined),
+              }),
+            },
+            {
+              key: 2,
+              block: makeObjectSelectionComplete({
+                type: "NP",
+                selection: makePronounSelection(objectPerson),
+              }),
+            },
+          ],
+          verb: {
+            type: "verb",
+            verb: leedul,
+            transitivity: "transitive",
+            canChangeTransitivity: false,
+            canChangeStatDyn: false,
+            negative: true,
+            tense: "subjunctiveVerb",
+            canChangeVoice: true,
+            isCompound: false,
+            voice: "active",
+          },
+          externalComplement: undefined,
+          form: {
+            removeKing: false,
+            shrinkServant: false,
+          },
+        })),
+      },
+      // with regular و or وا perfective heads, the negative needs to be behind the perfective head
+      {
+        input: "سړی تا نه وویني",
+        output: [],
+      },
+      {
+        input: "سړي وانه خیستله",
+        output: [
+          {
+            blocks: [
+              {
+                key: 1,
+                block: makeSubjectSelectionComplete({
+                  type: "NP",
+                  selection: makeNounSelection(sarey, undefined),
+                }),
+              },
+              {
+                key: 2,
+                block: makeObjectSelectionComplete({
+                  type: "NP",
+                  selection: makePronounSelection(T.Person.ThirdSingFemale),
+                }),
+              },
+            ],
+            verb: {
+              type: "verb",
+              verb: akheestul,
+              transitivity: "transitive",
+              canChangeTransitivity: false,
+              canChangeStatDyn: false,
+              negative: true,
+              tense: "perfectivePast",
+              canChangeVoice: true,
+              isCompound: false,
+              voice: "active",
+            },
+            externalComplement: undefined,
+            form: {
+              removeKing: true,
+              shrinkServant: false,
+            },
+          },
+        ],
+      },
+      {
+        input: "سړي نه واخیستله",
+        output: [],
+      },
+      // but for other perfective heads, the negative can go before or after
+      {
+        input: "زه نه کېنم",
+        output: getPeople(1, "sing").flatMap((subjectPerson) =>
+          (
+            ["presentVerb", "subjunctiveVerb"] as const
+          ).map<T.VPSelectionComplete>((tense) => ({
+            blocks: [
+              {
+                key: 1,
+                block: makeSubjectSelectionComplete({
+                  type: "NP",
+                  selection: makePronounSelection(subjectPerson),
+                }),
+              },
+              {
+                key: 2,
+                block: {
+                  type: "objectSelection",
+                  selection: "none",
+                },
+              },
+            ],
+            verb: {
+              type: "verb",
+              verb: kenaastul,
+              transitivity: "intransitive",
+              canChangeTransitivity: false,
+              canChangeStatDyn: false,
+              negative: true,
+              tense,
+              canChangeVoice: true,
+              isCompound: false,
+              voice: "active",
+            },
+            externalComplement: undefined,
+            form: {
+              removeKing: false,
+              shrinkServant: false,
+            },
+          }))
+        ),
+      },
+      {
+        input: "زه کېنه نم",
+        output: getPeople(1, "sing").map<T.VPSelectionComplete>(
+          (subjectPerson) => ({
+            blocks: [
+              {
+                key: 1,
+                block: makeSubjectSelectionComplete({
+                  type: "NP",
+                  selection: makePronounSelection(subjectPerson),
+                }),
+              },
+              {
+                key: 2,
+                block: {
+                  type: "objectSelection",
+                  selection: "none",
+                },
+              },
+            ],
+            verb: {
+              type: "verb",
+              verb: kenaastul,
+              transitivity: "intransitive",
+              canChangeTransitivity: false,
+              canChangeStatDyn: false,
+              negative: true,
+              tense: "subjunctiveVerb",
+              canChangeVoice: true,
+              isCompound: false,
+              voice: "active",
+            },
+            externalComplement: undefined,
+            form: {
+              removeKing: false,
+              shrinkServant: false,
+            },
+          })
+        ),
+      },
+    ],
+  },
+  {
+    label: "should check for subject / object conflicts",
+    cases: [
+      {
+        input: "زه ما وینم",
+        output: [],
+        error: true,
+      },
+      {
+        input: "ما زه ولیدلم",
+        output: [],
+        error: true,
+      },
+      {
+        input: "تاسو تا ولیدئ",
+        output: [],
+        error: true,
+      },
+      {
+        input: "زه مې وینم",
+        output: [],
+        error: true,
+      },
+      {
+        input: "زه مې ولیدم",
+        output: [],
+        error: true,
+      },
+      {
+        input: "ومې لیدم",
+        output: [],
+        error: true,
+      },
+      {
+        input: "وینم مې",
+        output: [],
+        error: true,
+      },
+    ],
+  },
 ];

 tests.forEach(({ label, cases }) => {
--- a/src/lib/src/parsing/parse-vp.ts
+++ b/src/lib/src/parsing/parse-vp.ts
@ -4,30 +4,16 @@ import {
  makeObjectSelectionComplete,
  makeSubjectSelectionComplete,
 } from "../phrase-building/blocks-utils";
-import { getPersonFromNP, isPastTense } from "../phrase-building/vp-tools";
+import {
+  getPersonFromNP,
+  isInvalidSubjObjCombo,
+  isPastTense,
+} from "../phrase-building/vp-tools";
 import { parseBlocks } from "./parse-blocks";
 import { makePronounSelection } from "../phrase-building/make-selections";
 import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
 // to hide equatives type-doubling issue

-// demo
-
-// ماشوم
-// ماشومان
-// خوږ
-// masc plur
-
-// past tense
-// ماشومانو ښځه ولیدله
-// ماشومانو ښځه ولیدله
-
-// cool examples:
-// زه ماشوم وهم
-// وهلم // خواږه
-
-// ومې لیدې
-// ویې وهم
-
 // this should also conjugate to
 //  وامې نه خیسته
 // وامې نه خیستلو
@ -35,19 +21,11 @@ import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
 // وامې نه اخیست
 // waa-me nú akheest

-// TODO: add tests for negatives and negative order
-// TODO: imperfective past should also be "was going to / would have"
 // map over transitivities, to give transitive / gramm. transitive optionns

-// make impossible subjects like I saw me, error
-
 // TODO: learn how to yank / use plugin for JSON neovim
 // learn to use jq to edit selected json in vim ?? COOOL

-// TODO: transitivity options
-
-// TODO: the و is really making it slow down... why?
-
 export function parseVP(
  tokens: Readonly<T.Token[]>,
  lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
@ -58,11 +36,11 @@ export function parseVP(
  }
  const blocks = parseBlocks(tokens, lookup, verbLookup, [], []);
  return bindParseResult(blocks, (tokens, { blocks, kids }) => {
-    const phIndex = blocks.findIndex((x) => "type" in x && x.type === "PH");
-    const vbeIndex = blocks.findIndex((x) => "type" in x && x.type === "VB");
+    const phIndex = blocks.findIndex((x) => x.type === "PH");
+    const vbeIndex = blocks.findIndex((x) => x.type === "VB");
    const ba = !!kids.find((k) => k === "ba");
    const negIndex = blocks.findIndex(
-      (x) => "type" in x && x.type === "negative" && !x.imperative
+      (x) => x.type === "negative" && !x.imperative
    );
    const ph = phIndex !== -1 ? (blocks[phIndex] as T.ParsedPH) : undefined;
    const verb =
@ -110,10 +88,7 @@ export function parseVP(
      voice: "active",
    };

-    const nps = blocks.filter(
-      (x): x is { inflected: boolean; selection: T.NPSelection } =>
-        "inflected" in x
-    );
+    const nps = blocks.filter((x): x is T.ParsedNP => x.type === "NP");
    // TODO: check that verb and PH match
    if (verb.info.verb.entry.c.includes("intrans")) {
      const errors: T.ParseError[] = [];
@ -258,7 +233,9 @@ export function parseVP(
                shrinkServant: true,
              },
            } as T.VPSelectionComplete,
-            errors
+            pronounConflictInBlocks(blocks)
+              ? [...errors, { message: "invalid subject/object combo" }]
+              : errors
          )
        );
      }
@ -357,7 +334,9 @@ export function parseVP(
              externalComplement: undefined,
              form,
            } as T.VPSelectionComplete,
-            errors,
+            errors: pronounConflictInBlocks(blocks)
+              ? [...errors, { message: "invalid subject/object combo" }]
+              : errors,
          }));
        });
      } else {
@ -369,6 +348,16 @@ export function parseVP(
            ] as const
          ).flatMap(([s, o, flip]) => {
            const errors: T.ParseError[] = [];
+            if (
+              isInvalidSubjObjCombo(
+                getPersonFromNP(s.selection),
+                getPersonFromNP(o.selection)
+              )
+            ) {
+              errors.push({
+                message: "invalid subject/object combo",
+              });
+            }
            if (!s.inflected) {
              errors.push({
                message:
@ -422,6 +411,16 @@ export function parseVP(
            ] as const
          ).flatMap(([s, o, flip]) => {
            const errors: T.ParseError[] = [];
+            if (
+              isInvalidSubjObjCombo(
+                getPersonFromNP(s.selection),
+                getPersonFromNP(o.selection)
+              )
+            ) {
+              errors.push({
+                message: "invalid subject/object combo",
+              });
+            }
            if (isFirstOrSecondPersPronoun(o.selection)) {
              if (!o.inflected) {
                errors.push({
@ -563,3 +562,16 @@ function negativeInPlace({
  }
  return true;
 }
+
+function pronounConflictInBlocks(blocks: T.VPSBlockComplete[]): boolean {
+  const subj = blocks.find((b) => b.block.type === "subjectSelection")
+    ?.block as T.SubjectSelectionComplete;
+  const obj = blocks.find((b) => b.block.type === "objectSelection")
+    ?.block as T.ObjectSelectionComplete;
+  const subjPerson = getPersonFromNP(subj.selection);
+  const objPerson = getPersonFromNP(obj.selection);
+  if (objPerson === undefined) {
+    return false;
+  }
+  return isInvalidSubjObjCombo(subjPerson, objPerson);
+}
--- a/src/types.ts
+++ b/src/types.ts
@ -1196,7 +1196,13 @@ export type Block = {

 export type ParsedBlock = ParsedNP | ParsedPH | ParsedVBE | NegativeBlock;

+export type ParsedKidsSection = {
+  type: "kids";
+  kids: ParsedKid[];
+};
+
 export type ParsedNP = {
+  type: "NP";
  inflected: boolean;
  selection: NPSelection;
 };