comment out tests for parser in progress

fix up masc inflection of demonstratives
parser working - a bit slow/rough - with dictionary lookup
2024-12-06 15:10:17 +05:00 · 2024-12-06 15:06:14 +05:00 · 2024-10-14 20:22:32 -04:00
39 changed files with 1163 additions and 1002 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,6 +11,7 @@ lerna-debug.log*
 src/verbs.ts
 src/nouns-adjs.ts
 vocab/mini-dict-entries.ts
+src/lib/src/parsing/split-verbs.ts

 # testing
 /coverage
--- a/get-mini-dict-and-split-verbs.ts
+++ b/get-mini-dict-and-split-verbs.ts
@ -0,0 +1,41 @@
+import * as T from "./src/types";
+import * as tp from "./src/lib/src/type-predicates";
+import fs from "fs";
+
+import { entries as collection } from "./vocab/mini-dict-tss";
+
+const res = await fetch(
+  "https://storage.lingdocs.com/dictionary/dictionary.json"
+);
+const dictionary = (await res.json()) as T.Dictionary;
+
+const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
+  collection.includes(x.ts)
+);
+
+const splitEntries: T.VerbDictionaryEntry[] =
+  dictionary.entries.filter<T.VerbDictionaryEntry>(
+    (x): x is T.VerbDictionaryEntry =>
+      tp.isVerbDictionaryEntry(x) &&
+      !!x.separationAtP &&
+      !["کول", "کېدل"].includes(x.p)
+  );
+
+const miniDictContents = `import { DictionaryEntry } from "../src/types";
+// DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
+export const entries: DictionaryEntry[] = [
+${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
+];
+`;
+
+const splitVerbContents = `import { VerbEntry, VerbDictionaryEntry } from "../../../types";
+// DO NOT MODIFY - GENERATED
+export const entries: VerbEntry[] = [
+${splitEntries
+  .map((e) => `\t{ entry: ${JSON.stringify(e)} as VerbDictionaryEntry },`)
+  .join("\n")}
+];
+`;
+
+fs.writeFileSync("./vocab/mini-dict-entries.ts", miniDictContents);
+fs.writeFileSync("./src/lib/src/parsing/split-verbs.ts", splitVerbContents);
--- a/get-mini-dict.ts
+++ b/get-mini-dict.ts
@ -1,22 +0,0 @@
-import * as T from "./src/types";
-import fs from "fs";
-
-import { entries as collection } from "./vocab/mini-dict-tss";
-
-const res = await fetch(
-  "https://storage.lingdocs.com/dictionary/dictionary.json"
-);
-const dictionary = (await res.json()) as T.Dictionary;
-
-const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
-  collection.includes(x.ts)
-);
-
-const contents = `import { DictionaryEntry } from "../src/types";
-// DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
-export const entries: DictionaryEntry[] = [
-${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
-];
-`;
-
-fs.writeFileSync("./vocab/mini-dict-entries.ts", contents);
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "pashto-inflector-website",
-  "version": "7.7.1",
+  "version": "7.7.3",
  "type": "module",
  "scripts": {
    "patch": "npm version patch --no-git-tag-version && cd src/lib && npm version patch --no-git-tag-version && cd ../components && npm version patch --no-git-tag-version",
@ -14,7 +14,7 @@
    "build-website": "tsc -b && vite build",
    "build-components": "rm -rf src/components/dist && tsc --project src/components/tsconfig.json && cd src/components && node post-build.cjs",
    "build-lib": "rm -rf src/lib/dist && tsc --project src/lib/tsconfig.json && tsup src/lib/library.ts --format cjs && mv dist/library.cjs src/lib/dist/lib",
-    "get-words": "node get-words.cjs && tsx get-mini-dict.ts",
+    "get-words": "node get-words.cjs && tsx get-mini-dict-and-split-verbs.ts",
    "check-all-inflections": "tsx check-all-inflections.ts"
  },
  "dependencies": {
--- a/src/App.tsx
+++ b/src/App.tsx
@ -19,7 +19,7 @@ import { entryFeeder } from "./demo-components/entryFeeder";
 import Hider from "./components/src/Hider";
 import InflectionDemo from "./demo-components/InflectionDemo";
 import SpellingDemo from "./demo-components/SpellingDemo";
-// import ParserDemo from "./demo-components/ParserDemo";
+import ParserDemo from "./demo-components/ParserDemo";
 // import InflectionTable from "./components/src/InflectionsTable";

 function App() {
@ -163,7 +163,7 @@ function App() {
          >
            <SpellingDemo opts={textOptions} onChange={setTextOptions} />
          </Hider>
-          {/* <Hider
+          <Hider
            label="Parser (🚧 IN PROGRESS 🚧)"
            hLevel={3}
            showing={showing === "parser"}
@ -174,7 +174,7 @@ function App() {
              entryFeeder={entryFeeder}
              dictionary={dictionary}
            />
-          </Hider> */}
+          </Hider>
        </div>
      </main>
      <Modal
--- a/src/components/package.json
+++ b/src/components/package.json
@ -1,6 +1,6 @@
 {
  "name": "@lingdocs/ps-react",
-  "version": "7.7.1",
+  "version": "7.7.3",
  "description": "Pashto inflector library module with React components",
  "main": "dist/components/library.js",
  "module": "dist/components/library.js",
--- a/src/demo-components/ParserDemo.tsx
+++ b/src/demo-components/ParserDemo.tsx
@ -3,13 +3,18 @@ import * as T from "../types";
 // import { parsePhrase } from "../lib/src/parsing/parse-phrase";
 import { tokenizer } from "../lib/src/parsing/tokenizer";
 // import { NPDisplay } from "../components/library";
-// import EditableVP from "../components/src/vp-explorer/EditableVP";
-// import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
-import { parseNoun } from "../lib/src/parsing/parse-noun-new";
+import EditableVP from "../components/src/vp-explorer/EditableVP";
+import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
+// import { parseNoun } from "../lib/src/parsing/parse-noun-new";
 import { JsonEditor } from "json-edit-react";
-import { renderNounSelection } from "../lib/src/phrase-building/render-np";
-import { NPBlock } from "../components/src/blocks/Block";
-import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
+// import { renderNounSelection } from "../lib/src/phrase-building/render-np";
+// import { NPBlock } from "../components/src/blocks/Block";
+// import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
+import { parsePhrase } from "../lib/src/parsing/parse-phrase";
+//import { renderVP } from "../lib/src/phrase-building/render-vp";
+// import VPDisplay from "../components/src/vp-explorer/VPDisplay";
+import { entryFeeder } from "./entryFeeder";
+import { removeRedundantVPSs } from "../lib/src/phrase-building/remove-redundant";

 const working = [
  "limited demo vocab",
@ -59,7 +64,7 @@ function ParserDemo({
 }) {
  const [text, setText] = useState<string>("");
  const [result, setResult] = useState<
-    ReturnType<typeof parseNoun>[number]["body"][]
+    ReturnType<typeof parsePhrase>["success"]
  >([]);
  // ReturnType<typeof parsePhrase>["success"]
  const [errors, setErrors] = useState<string[]>([]);
@ -70,16 +75,10 @@ function ParserDemo({
      setErrors([]);
      return;
    }
-    const res = parseNoun(tokenizer(value), dictionary, undefined);
-    const success: ReturnType<typeof parseNoun>[number]["body"][] = res
-      .filter((x) => !x.tokens.length)
-      .map((x) => x.body);
-    const errors = [
-      ...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
-    ];
+    const res = parsePhrase(tokenizer(value), dictionary);
    setText(value);
-    setErrors(errors);
-    setResult(success);
+    setErrors(res.errors);
+    setResult(removeRedundantVPSs(res.success));
  }
  return (
    <div className="mt-3" style={{ marginBottom: "1000px" }}>
@ -141,34 +140,8 @@ function ParserDemo({
          <div className="text-center">Did you mean:</div>
        </>
      )}
-      {result.map((r) => {
-        try {
-          const renderedNP: T.Rendered<T.NPSelection> = {
-            type: "NP",
-            selection: renderNounSelection(r.selection, r.inflected, "none"),
-          };
-          return (
+      {result.map((res) => (
        <>
-              {r.inflected ? "INFLECTED" : "PLAIN"}
-              <NPBlock
-                opts={opts}
-                script="p"
-                english={getEnglishFromRendered(renderedNP)}
-              >
-                {renderedNP}
-              </NPBlock>
-            </>
-          );
-        } catch (e) {
-          console.error(e);
-          return <div>ERROR RENDERING</div>;
-        }
-      })}
-      <JsonEditor data={result} />
-      {/* {result.map((res) =>
-        "inflected" in res ? (
-          <NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
-        ) : "verb" in res ? (
          <EditableVP
            opts={opts}
            entryFeeder={entryFeeder}
@ -176,7 +149,30 @@ function ParserDemo({
          >
            {uncompleteVPSelection(res)}
          </EditableVP>
-        ) : (
+          <details>
+            <summary>AST</summary>
+            <JsonEditor data={res} />
+          </details>
+        </>
+      ))}
+    </div>
+  );
+}
+
+export default ParserDemo;
+
+// {/* {result.map((res) =>
+// "inflected" in res ? (
+//   <NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
+// ) : "verb" in res ? (
+//   <EditableVP
+//     opts={opts}
+//     entryFeeder={entryFeeder}
+//     allVariations={true}
+//   >
+//     {uncompleteVPSelection(res)}
+//   </EditableVP>
+// ) : (
 // (() => {
 //   try {
 //     const rendered = renderVP(res);
@ -199,19 +195,30 @@ function ParserDemo({
 //     return <div>ERROR</div>;
 //   }
 // })()
-          <samp>
-            <pre>{JSON.stringify(res, null, "  ")}</pre>
-          </samp>
-        )
-      )} */}
-      <details>
-        <summary>AST</summary>
-        <samp>
-          <pre>{JSON.stringify(result, null, "  ")}</pre>
-        </samp>
-      </details>
-    </div>
-  );
-}
+//     <samp>
+//       <pre>{JSON.stringify(res, null, "  ")}</pre>
+//     </samp>
+//   )
+// )} */}

-export default ParserDemo;
+// try {
+//   const renderedNP: T.Rendered<T.NPSelection> = {
+//     type: "NP",
+//     selection: renderNounSelection(r.selection, r.inflected, "none"),
+//   };
+//   return (
+//     <>
+//       {r.inflected ? "INFLECTED" : "PLAIN"}
+//       <NPBlock
+//         opts={opts}
+//         script="p"
+//         english={getEnglishFromRendered(renderedNP)}
+//       >
+//         {renderedNP}
+//       </NPBlock>
+//     </>
+//   );
+// } catch (e) {
+//   console.error(e);
+//   return <div>ERROR RENDERING</div>;
+// }
--- a/src/lib/package.json
+++ b/src/lib/package.json
@ -1,6 +1,6 @@
 {
  "name": "@lingdocs/inflect",
-  "version": "7.7.1",
+  "version": "7.7.3",
  "description": "Pashto inflector library",
  "main": "dist/lib/library.cjs",
  "module": "dist/lib/library.js",
--- a/src/lib/src/dictionary/dictionary.ts
+++ b/src/lib/src/dictionary/dictionary.ts
@ -19,7 +19,19 @@ function queryP(p: string): T.DictionaryEntry[] {
  }
  return dictDb.collection.find({ p });
 }
-const memoizedQueryP = queryP;
+const memoizedQueryP = memoize(queryP);
+
+function queryTs(ts: number): T.DictionaryEntry {
+  if (!dictDb.collection) {
+    throw new Error("dictionary not initialized yet");
+  }
+  const res = dictDb.findOneByTs(ts);
+  if (!res) {
+    throw new Error("complement link broken");
+  }
+  return res;
+}
+const memoizedQueryTs = memoize(queryTs);

 function adjLookup(p: string): T.AdjectiveEntry[] {
  const res = memoizedQueryP(p);
@ -33,26 +45,51 @@ function nounLookup(p: string): T.NounEntry[] {

 function otherLookup(
  key: keyof T.DictionaryEntry,
-  p: string
+  p: string,
+  regex?: boolean
 ): T.DictionaryEntry[] {
  if (!dictDb.collection) {
    return [];
  }
-  return dictDb.collection.find({ [key]: p });
+  return dictDb.collection.find({ [key]: regex ? variationRegex(p) : p });
 }

 function specialPluralLookup(p: string): T.NounEntry[] {
  if (!dictDb.collection) {
    return [];
  }
-  const regex = new RegExp(`(^|\\s|,)${p}($|,)`);
+  const regex = variationRegex(p);
  return dictDb.collection
    .find({
-      $or: [{ ppp: { $regex: regex } }, { app: { $regex: regex } }],
+      $or: [{ ppp: regex }, { app: regex }],
    })
    .filter(tp.isNounEntry);
 }

+function verbEntryLookup(p: string): T.VerbEntry[] {
+  if (!dictDb.collection) {
+    return [];
+  }
+  return memoizedQueryP(p)
+    .filter(tp.isVerbDictionaryEntry)
+    .map((entry) =>
+      entry.l
+        ? {
+            entry,
+            complement: memoizedQueryTs(entry.l),
+          }
+        : { entry }
+    );
+}
+
+/**
+ * creates a RegEx mongo query to search for a variation in a certain field
+ * ie. to search for کاته in کوت, کاته
+ */
+function variationRegex(p: string): { $regex: RegExp } {
+  return { $regex: new RegExp(`(^|\\s|,)${p}($|,)`) };
+}
+
 export const dictionary: T.DictionaryAPI = {
  initialize: async () => await dictDb.initialize(),
  update: async () => await dictDb.updateDictionary(() => null),
@ -61,4 +98,5 @@ export const dictionary: T.DictionaryAPI = {
  nounLookup: memoize(nounLookup),
  otherLookup: memoize(otherLookup),
  specialPluralLookup: memoize(specialPluralLookup),
+  verbEntryLookup: memoize(verbEntryLookup),
 };
--- a/src/lib/src/parsing/mini-test-dictionary.ts
+++ b/src/lib/src/parsing/mini-test-dictionary.ts
@ -1,7 +1,15 @@
 import * as T from "../../../types";
-import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
+import {
+  isAdjectiveEntry,
+  isNounEntry,
+  isVerbDictionaryEntry,
+} from "../type-predicates";
 import { entries } from "../../../../vocab/mini-dict-entries";

+function variationRegex(p: string): { $regex: RegExp } {
+  return { $regex: new RegExp(`(^|\\s|,)${p}($|,)`) };
+}
+
 const queryP = (p: string) => entries.filter((e) => e.p === p);
 function adjLookup(p: string): T.AdjectiveEntry[] {
  return queryP(p).filter(isAdjectiveEntry) as T.AdjectiveEntry[];
@ -13,18 +21,37 @@ function nounLookup(p: string): T.NounEntry[] {

 function otherLookup(
  key: keyof T.DictionaryEntry,
-  p: string
+  p: string,
+  regex?: boolean
 ): T.DictionaryEntry[] {
+  if (regex) {
+    const { $regex: regex } = variationRegex(p);
+    return entries.filter((e) => (e[key] as string)?.match(regex));
+  }
  return entries.filter((e) => e[key] === p);
 }

 function specialPluralLookup(p: string): T.NounEntry[] {
-  const regex = new RegExp(`(^|\\s|,)${p}($|,)`);
+  const { $regex: regex } = variationRegex(p);
  return entries.filter(
    (e) => (e.ppp?.match(regex) || e.app?.match(regex)) && isNounEntry(e)
  ) as T.NounEntry[];
 }

+function verbEntryLookup(p: string): T.VerbEntry[] {
+  return entries
+    .filter((e) => e.p === p)
+    .filter(isVerbDictionaryEntry)
+    .map<T.VerbEntry>((entry) =>
+      entry.l
+        ? {
+            entry,
+            complement: entries.find((e) => e.ts === entry.l),
+          }
+        : { entry }
+    );
+}
+
 export const testDictionary: T.DictionaryAPI = {
  // @ts-expect-error we won't mock the initialization
  initialize: async () => 0,
@ -35,4 +62,5 @@ export const testDictionary: T.DictionaryAPI = {
  nounLookup,
  otherLookup,
  specialPluralLookup,
+  verbEntryLookup,
 };
--- a/src/lib/src/parsing/parse-adverb.ts
+++ b/src/lib/src/parsing/parse-adverb.ts
@ -1,16 +1,16 @@
 import * as T from "../../../types";
-import { LookupFunction } from "./lookup";
+import { isAdverbEntry } from "../type-predicates";
 import { returnParseResultS } from "./utils";

 export function parseAdverb(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction
+  dictionary: T.DictionaryAPI
 ): T.ParseResult<T.APSelection>[] {
  if (tokens.length === 0) {
    return [];
  }
  const [first, ...rest] = tokens;
-  const adverbs = lookup(first.s, "adverb");
+  const adverbs = dictionary.queryP(first.s).filter(isAdverbEntry);
  return adverbs.map((entry) =>
    returnParseResultS(rest, {
      type: "AP",
--- a/src/lib/src/parsing/parse-ap.ts
+++ b/src/lib/src/parsing/parse-ap.ts
@ -1,26 +1,25 @@
 import * as T from "../../../types";
 import { fmapParseResult } from "../fp-ps";
-import { LookupFunction } from "./lookup";
 import { parseAdverb } from "./parse-adverb";
 import { parseSandwich } from "./parse-sandwich";

 export function parseAP(
  s: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dicitonary: T.DictionaryAPI,
  possesor: T.PossesorSelection | undefined
 ): T.ParseResult<T.APSelection>[] {
  if (s.length === 0) {
    return [];
  }
  return [
-    ...(!possesor ? parseAdverb(s, lookup) : []),
+    ...(!possesor ? parseAdverb(s, dicitonary) : []),
    ...fmapParseResult(
      (selection) =>
        ({
          type: "AP",
          selection,
        } as const),
-      parseSandwich(s, lookup, possesor)
+      parseSandwich(s, dicitonary, possesor)
    ),
  ];
 }
--- a/src/lib/src/parsing/parse-blocks.ts
+++ b/src/lib/src/parsing/parse-blocks.ts
@ -1,12 +1,11 @@
 import * as T from "../../../types";
-import { LookupFunction } from "./lookup";
 import { parseEquative } from "./parse-equative";
 import { parseKidsSection } from "./parse-kids-section";
 import { parseNeg } from "./parse-negative";
 import { parseNPAP } from "./parse-npap";
 import { parseVBP } from "./parse-vbp";
 import { parsePH } from "./parse-ph";
-import { parseVBE } from "./parse-vbe";
+import { parseVBE } from "./parse-vbe-new";
 import {
  bindParseResult,
  returnParseResult,
@ -18,7 +17,7 @@ import { isKedulStatEntry } from "./parse-verb-helpers";

 export function parseBlocks(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dicitonary: T.DictionaryAPI,
  blocks: T.ParsedBlock[],
  kids: T.ParsedKid[]
 ): T.ParseResult<{
@ -35,13 +34,13 @@ export function parseBlocks(

  // TOOD: rather parse VBP / VBE
  const allBlocks: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
-    ...(!inVerbSection ? parseNPAP(tokens, lookup) : []),
+    ...(!inVerbSection ? parseNPAP(tokens, dicitonary) : []),
    // ensure at most one of each PH, VBE, VBP
    ...(prevPh ? [] : parsePH(tokens)),
    ...(blocks.some(isParsedVBE)
      ? []
-      : [...parseVBE(tokens, lookup), ...parseEquative(tokens)]),
-    ...(blocks.some(isParsedVBP) ? [] : parseVBP(tokens, lookup)),
+      : [...parseVBE(tokens, dicitonary), ...parseEquative(tokens)]),
+    ...(blocks.some(isParsedVBP) ? [] : parseVBP(tokens, dicitonary)),
    ...(blocks.some((b) => b.type === "negative") ? [] : parseNeg(tokens)),
    ...parseKidsSection(tokens, []),
  ];
@ -50,7 +49,7 @@ export function parseBlocks(
    const errors: T.ParseError[] = [];
    if (r.type === "kids") {
      return {
-        next: parseBlocks(tokens, lookup, blocks, [...kids, ...r.kids]),
+        next: parseBlocks(tokens, dicitonary, blocks, [...kids, ...r.kids]),
        errors:
          blocks.length !== 1
            ? [{ message: "kids' section out of place" }]
@ -71,7 +70,7 @@ export function parseBlocks(
      return [];
    }
    return {
-      next: parseBlocks(tokens, lookup, [...blocks, r], kids),
+      next: parseBlocks(tokens, dicitonary, [...blocks, r], kids),
      errors,
    };
  });
--- a/src/lib/src/parsing/parse-noun-new.test.ts
+++ b/src/lib/src/parsing/parse-noun-new.test.ts
@ -1736,7 +1736,7 @@ describe("parsing nouns", () => {
    test(category, () => {
      cases.forEach(({ input, output }) => {
        const tokens = tokenizer(input);
-        const res = parseNoun(tokens, testDictionary, undefined, []).flatMap(
+        const res = parseNoun(tokens, testDictionary, undefined).flatMap(
          // only take the ones that used all the tokens
          ({ body, tokens }) => (tokens.length === 0 ? [body] : [])
        );
--- a/src/lib/src/parsing/parse-noun-new.ts
+++ b/src/lib/src/parsing/parse-noun-new.ts
@ -3,7 +3,12 @@ import { makeNounSelection } from "../phrase-building/make-selections";
 import { parseAdjective } from "./parse-adjective-new";
 import { parseDeterminer } from "./parse-determiner";
 import { parseNounWord } from "./parse-noun-word";
-import { bindParseResult, parserCombMany, toParseError } from "./utils";
+import {
+  bindParseResult,
+  parserCombMany,
+  parserCombSucc3,
+  toParseError,
+} from "./utils";

 type NounResult = { inflected: boolean; selection: T.NounSelection };

@ -15,33 +20,32 @@ export function parseNoun(
  if (tokens.length === 0) {
    return [];
  }
-  const detRes = parserCombMany(parseDeterminer)(tokens, dictionary);
-  // TODO: add recognition of او between adjectives
-  return bindParseResult(detRes, (t, determiners) => {
-    const adjRes = parserCombMany(parseAdjective)(t, dictionary);
-    return bindParseResult(adjRes, (tk, adjectives) => {
-      const nounWord = parseNounWord(tk, dictionary);
-      return bindParseResult(nounWord, (tkns, nr) => {
+  const res = parserCombSucc3([
+    parserCombMany(parseDeterminer),
+    parserCombMany(parseAdjective),
+    parseNounWord,
+  ])(tokens, dictionary);
+  return bindParseResult(res, (tkns, [determiners, adjectives, nounWord]) => {
    const { error: adjErrors } = adjDetsMatch(
      adjectives,
-          nr.gender,
-          nr.inflected ? 1 : 0,
-          nr.plural
+      nounWord.gender,
+      nounWord.inflected ? 1 : 0,
+      nounWord.plural
    );
    const { error: detErrors } = adjDetsMatch(
      determiners,
-          nr.gender,
-          nr.inflected ? 1 : 0,
-          nr.plural
+      nounWord.gender,
+      nounWord.inflected ? 1 : 0,
+      nounWord.plural
    );
    const dupErrors = checkForDeterminerDuplicates(determiners);
-        const s = makeNounSelection(nr.entry, undefined);
+    const s = makeNounSelection(nounWord.entry, undefined);
    const body: NounResult = {
-          inflected: nr.inflected,
+      inflected: nounWord.inflected,
      selection: {
        ...s,
-            gender: nr.gender,
-            number: nr.plural ? "plural" : "singular",
+        gender: nounWord.gender,
+        number: nounWord.plural ? "plural" : "singular",
        adjectives: adjectives.map((a) => a.selection),
        determiners: determiners.length
          ? {
@ -65,8 +69,6 @@ export function parseNoun(
      },
    ];
  });
-    });
-  });
 }

 function checkForDeterminerDuplicates(
--- a/src/lib/src/parsing/parse-noun.ts
+++ b/src/lib/src/parsing/parse-noun.ts
@ -1,191 +0,0 @@
-import * as T from "../../../types";
-import { getInflectionPattern } from "../inflection-pattern";
-import { makeNounSelection } from "../phrase-building/make-selections";
-import {
-  isMascNounEntry,
-  isNounEntry,
-  isPluralNounEntry,
-  isUnisexNounEntry,
-} from "../type-predicates";
-import { getInflectionQueries } from "./inflection-query";
-import { LookupFunction } from "./lookup";
-import { parseAdjective } from "./parse-adjective";
-import { bindParseResult } from "./utils";
-
-type NounResult = { inflected: boolean; selection: T.NounSelection };
-
-export function parseNoun(
-  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction,
-  possesor: T.PossesorSelection | undefined,
-  adjectives: {
-    inflection: (0 | 1 | 2)[];
-    gender: T.Gender[];
-    given: string;
-    selection: T.AdjectiveSelection;
-  }[]
-): T.ParseResult<NounResult>[] {
-  if (tokens.length === 0) {
-    return [];
-  }
-  // TODO: add recognition of او between adjectives
-  const adjRes = parseAdjective(tokens, lookup);
-  const withAdj = bindParseResult(adjRes, (tkns, adj) =>
-    parseNoun(tkns, lookup, possesor, [...adjectives, adj])
-  );
-  const [first, ...rest] = tokens;
-  const searches = getInflectionQueries(first.s, true);
-
-  const w: ReturnType<typeof parseNoun> = [];
-  searches.forEach(({ search, details }) => {
-    const nounEntries = lookup(search, "nounAdj").filter(isNounEntry);
-    details.forEach((deets) => {
-      const fittingEntries = nounEntries.filter(deets.predicate);
-      fittingEntries.forEach((entry) => {
-        const genders: T.Gender[] = isUnisexNounEntry(entry)
-          ? ["masc", "fem"]
-          : isMascNounEntry(entry)
-          ? ["masc"]
-          : ["fem"];
-        deets.gender.forEach((gender) => {
-          if (genders.includes(gender)) {
-            deets.inflection.forEach((inf) => {
-              const { error: adjErrors } = adjsMatch(
-                adjectives,
-                gender,
-                inf,
-                deets.plural
-              );
-              convertInflection(inf, entry, gender, deets.plural).forEach(
-                ({ inflected, number }) => {
-                  const selection = makeNounSelection(entry, undefined);
-                  const errors = [
-                    ...adjErrors.map((message) => ({
-                      message,
-                    })),
-                  ];
-                  w.push({
-                    tokens: rest,
-                    body: {
-                      inflected,
-                      selection: {
-                        ...selection,
-                        gender: selection.genderCanChange
-                          ? gender
-                          : selection.gender,
-                        number: selection.numberCanChange
-                          ? number
-                          : selection.number,
-                        adjectives: adjectives.map((a) => a.selection),
-                        // TODO: could be nicer to validate that the possesor is inflected before
-                        // and just pass in the selection
-                        possesor,
-                      },
-                    },
-                    errors,
-                  });
-                }
-              );
-            });
-          }
-        });
-      });
-    });
-  });
-  return [...withAdj, ...w];
-}
-
-function adjsMatch(
-  adjectives: Parameters<typeof parseNoun>[3],
-  gender: T.Gender,
-  inf: 0 | 1 | 2,
-  plural: boolean | undefined
-): { ok: boolean; error: string[] } {
-  const inflection = (plural && inf < 2 ? inf + 1 : inf) as 0 | 1 | 2;
-  const unmatching = adjectives.filter(
-    (adj) =>
-      !adj.gender.includes(gender) ||
-      !adj.inflection.some((i) => i === inflection)
-  );
-  if (unmatching.length) {
-    return {
-      ok: false,
-      error: unmatching.map((x) => {
-        const adjText =
-          x.given === x.selection.entry.p
-            ? x.given
-            : `${x.given} (${x.selection.entry.p})`;
-        const inflectionIssue = !x.inflection.some((x) => x === inflection)
-          ? ` should be ${showInflection(inflection)}`
-          : ``;
-        return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
-      }),
-    };
-  } else {
-    return {
-      ok: true,
-      error: [],
-    };
-  }
-}
-
-function convertInflection(
-  inflection: 0 | 1 | 2,
-  entry: T.NounEntry | T.AdjectiveEntry,
-  gender: T.Gender,
-  plural: boolean | undefined
-): {
-  inflected: boolean;
-  number: T.NounNumber;
-}[] {
-  const pattern = getInflectionPattern(entry);
-  const inf = (plural && inflection < 2 ? inflection + 1 : inflection) as
-    | 0
-    | 1
-    | 2;
-  if (inf === 0) {
-    return [
-      {
-        inflected: false,
-        number: "singular",
-      },
-    ];
-  } else if (inf === 1) {
-    return [
-      ...(!((isNounEntry(entry) && isPluralNounEntry(entry)) || plural) &&
-      !(pattern === 4 && entry.p.endsWith("ه") && gender === "masc")
-        ? [
-            {
-              inflected: true,
-              number: "singular" as T.NounNumber,
-            },
-          ]
-        : []),
-      ...(pattern > 1 ||
-      (pattern > 0 && gender === "fem") ||
-      (isNounEntry(entry) && isPluralNounEntry(entry)) ||
-      plural
-        ? [
-            {
-              inflected: false,
-              number: "plural" as T.NounNumber,
-            },
-          ]
-        : []),
-    ];
-  }
-  return [
-    {
-      inflected: true,
-      number: "plural",
-    },
-  ];
-}
-
-function showInflection(inf: 0 | 1 | 2): string {
-  return inf === 0
-    ? "plain"
-    : inf === 1
-    ? "first inflection"
-    : "second inflection";
-}
--- a/src/lib/src/parsing/parse-np.ts
+++ b/src/lib/src/parsing/parse-np.ts
@ -1,13 +1,12 @@
 import * as T from "../../../types";
 import { parsePronoun } from "./parse-pronoun";
-import { parseNoun } from "./parse-noun";
+import { parseNoun } from "./parse-noun-new";
 import { fmapParseResult } from "../fp-ps";
 import { parseParticiple } from "./parse-participle";
-import { LookupFunction } from "./lookup";

 export function parseNP(
  s: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dicitonary: T.DictionaryAPI,
  possesor: T.PossesorSelection | undefined
 ): T.ParseResult<T.ParsedNP>[] {
  if (s.length === 0) {
@ -41,7 +40,7 @@ export function parseNP(

  return fmapParseResult(makeNPSl, [
    ...(!possesor ? parsePronoun(s) : []),
-    ...parseNoun(s, lookup, possesor, []),
-    ...parseParticiple(s, lookup, possesor),
+    ...parseNoun(s, dicitonary, possesor),
+    ...parseParticiple(s, dicitonary, possesor),
  ]);
 }
--- a/src/lib/src/parsing/parse-npap.ts
+++ b/src/lib/src/parsing/parse-npap.ts
@ -1,5 +1,4 @@
 import * as T from "../../../types";
-import { LookupFunction } from "./lookup";
 import { parseAP } from "./parse-ap";
 import { parseNP } from "./parse-np";
 import { parsePossesor } from "./parse-possesor";
@ -7,19 +6,25 @@ import { bindParseResult } from "./utils";

 export function parseNPAP(
  s: Readonly<T.Token[]>,
-  lookup: LookupFunction
+  dictionary: T.DictionaryAPI
 ): T.ParseResult<T.APSelection | T.ParsedNP>[] {
  if (s.length === 0) {
    return [];
  }
-  const possesor = parsePossesor(s, lookup, undefined);
+  const possesor = parsePossesor(s, dictionary, undefined);
  if (!possesor.length) {
-    return [...parseNP(s, lookup, undefined), ...parseAP(s, lookup, undefined)];
+    return [
+      ...parseNP(s, dictionary, undefined),
+      ...parseAP(s, dictionary, undefined),
+    ];
  }
  return bindParseResult<T.PossesorSelection, T.APSelection | T.ParsedNP>(
    possesor,
    (tokens, p) => {
-      return [...parseNP(tokens, lookup, p), ...parseAP(tokens, lookup, p)];
+      return [
+        ...parseNP(tokens, dictionary, p),
+        ...parseAP(tokens, dictionary, p),
+      ];
    }
  );
 }
--- a/src/lib/src/parsing/parse-participle.test.ts
+++ b/src/lib/src/parsing/parse-participle.test.ts
@ -4,14 +4,16 @@ import {
  makePossesorSelection,
 } from "../phrase-building/make-selections";
 import * as T from "../../../types";
-import { lookup, wordQuery } from "./lookup";
+import { testDictionary } from "./mini-test-dictionary";
 import { tokenizer } from "./tokenizer";
 import { parseNPAP } from "./parse-npap";

-const leedul = wordQuery("لیدل", "verb");
-const akheestul = wordQuery("اخیستل", "verb");
-const wahul = wordQuery("وهل", "verb");
-const saray = wordQuery("سړی", "noun");
+const leedul = testDictionary.verbEntryLookup("لیدل")[0];
+const akheestul = testDictionary.verbEntryLookup("اخیستل")[0];
+const wahul = testDictionary.verbEntryLookup("وهل")[0];
+const saray = testDictionary.nounLookup("سړی")[0];
+
+// TODO: uncomment and get parsing of short participles working

 const tests: {
  label: string;
@ -113,20 +115,20 @@ const tests: {
          },
        ],
      },
-      {
-        input: "د سړي لیدو",
-        output: [
-          {
-            inflected: true,
-            selection: {
-              ...makeParticipleSelection(leedul),
-              possesor: makePossesorSelection(
-                makeNounSelection(saray, undefined)
-              ),
-            },
-          },
-        ],
-      },
+      // {
+      //   input: "د سړي لیدو",
+      //   output: [
+      //     {
+      //       inflected: true,
+      //       selection: {
+      //         ...makeParticipleSelection(leedul),
+      //         possesor: makePossesorSelection(
+      //           makeNounSelection(saray, undefined)
+      //         ),
+      //       },
+      //     },
+      //   ],
+      // },
    ],
  },
 ];
@ -136,7 +138,7 @@ describe("parsing participles", () => {
    test(label, () => {
      cases.forEach(({ input, output }) => {
        const tokens = tokenizer(input);
-        const res = parseNPAP(tokens, lookup).map(({ body }) => body);
+        const res = parseNPAP(tokens, testDictionary).map(({ body }) => body);
        expect(res).toEqual(
          output.map(
            (x): T.ParsedNP => ({
--- a/src/lib/src/parsing/parse-participle.ts
+++ b/src/lib/src/parsing/parse-participle.ts
@ -1,5 +1,5 @@
 import * as T from "../../../types";
-import { LookupFunction } from "./lookup";
+import { shortVerbEndConsonant } from "./misc";

 type ParticipleResult = {
  inflected: boolean;
@ -7,9 +7,10 @@ type ParticipleResult = {
 };

 // TODO: should have adverbs with participle
+// TODO: NOTE this does not work with compound verbs yet
 export function parseParticiple(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dicitonary: T.DictionaryAPI,
  possesor: T.PossesorSelection | undefined
 ): T.ParseResult<ParticipleResult>[] {
  if (tokens.length === 0) {
@ -20,8 +21,13 @@ export function parseParticiple(
    return [];
  }
  const inflected = first.s.endsWith("و");
-  const matches = lookup(first.s, "participle");
-  return matches.map<T.ParseResult<ParticipleResult>>((verb) => ({
+
+  return [
+    ...dicitonary.verbEntryLookup(inflected ? first.s.slice(0, -1) : first.s),
+    ...(inflected && shortVerbEndConsonant.includes(first.s.at(-2) || "")
+      ? dicitonary.verbEntryLookup(first.s.slice(0, -1) + "ل")
+      : []),
+  ].map<T.ParseResult<ParticipleResult>>((verb) => ({
    tokens: rest,
    body: {
      inflected,
--- a/src/lib/src/parsing/parse-phrase.ts
+++ b/src/lib/src/parsing/parse-phrase.ts
@ -1,24 +1,24 @@
 import * as T from "../../../types";
-import { lookup } from "./lookup";
 import { parseVP } from "./parse-vp";

 // شو should not be sheyaano !!

-export function parsePhrase(s: T.Token[]): {
-  success: (
-    | {
-        inflected: boolean;
-        selection: T.NPSelection;
-      }
-    | Omit<T.VBE, "ps">
-    | T.VPSelectionComplete
-  )[];
+export function parsePhrase(
+  s: T.Token[],
+  dicitonary: T.DictionaryAPI
+): {
+  success: // | {
+  //     inflected: boolean;
+  //     selection: T.NPSelection;
+  //   }
+  // | Omit<T.VBE, "ps">
+  T.VPSelectionComplete[];
  errors: string[];
 } {
  const res = [
    // ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
    // ...parseVerb(s, verbLookup),
-    ...parseVP(s, lookup),
+    ...parseVP(s, dicitonary),
  ];

  const success = res.filter((x) => !x.tokens.length).map((x) => x.body);
--- a/src/lib/src/parsing/parse-possesor.test.ts
+++ b/src/lib/src/parsing/parse-possesor.test.ts
@ -4,16 +4,16 @@ import {
  makeNounSelection,
  makePronounSelection,
 } from "../phrase-building/make-selections";
-import { lookup, wordQuery } from "./lookup";
 import { parsePossesor } from "./parse-possesor";
 import { tokenizer } from "./tokenizer";
 import { isCompleteResult } from "./utils";
+import { testDictionary as dictionary } from "./mini-test-dictionary";

-const sturey = wordQuery("ستړی", "adj");
-const sarey = wordQuery("سړی", "noun");
-const maashoom = wordQuery("ماشوم", "noun");
-const malguray = wordQuery("ملګری", "noun");
-const plaar = wordQuery("پلار", "noun");
+const sturey = dictionary.adjLookup("ستړی")[0];
+const sarey = dictionary.nounLookup("سړی")[0];
+const maashoom = dictionary.nounLookup("ماشوم")[0];
+const malguray = dictionary.nounLookup("ملګری")[0];
+const plaar = dictionary.nounLookup("پلار")[0];

 const tests: {
  input: string;
@ -109,12 +109,12 @@ const tests: {
 test("parse possesor", () => {
  tests.forEach(({ input, output }) => {
    const tokens = tokenizer(input);
-    const parsed = parsePossesor(tokens, lookup, undefined);
+    const parsed = parsePossesor(tokens, dictionary, undefined);
    if (output === "error") {
      expect(parsed.some((x) => x.errors.length)).toBe(true);
    } else {
      expect(
-        parsePossesor(tokens, lookup, undefined)
+        parsePossesor(tokens, dictionary, undefined)
          .filter(isCompleteResult)
          .map((x) => x.body.np.selection)
      ).toEqual(output);
--- a/src/lib/src/parsing/parse-possesor.ts
+++ b/src/lib/src/parsing/parse-possesor.ts
@ -1,5 +1,4 @@
 import * as T from "../../../types";
-import { LookupFunction } from "./lookup";
 import { parseNP } from "./parse-np";
 import { bindParseResult } from "./utils";
 // TODO: maybe contractions should just be male to cut down on the
@ -19,7 +18,7 @@ const contractions: [string[], T.Person[]][] = [

 export function parsePossesor(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dictionary: T.DictionaryAPI,
  prevPossesor: T.PossesorSelection | undefined
 ): T.ParseResult<T.PossesorSelection>[] {
  if (tokens.length === 0) {
@ -43,14 +42,14 @@ export function parsePossesor(
      ? [{ message: "a pronoun cannot have a possesor" }]
      : [];
    return contractions
-      .flatMap((p) => parsePossesor(rest, lookup, p))
+      .flatMap((p) => parsePossesor(rest, dictionary, p))
      .map((x) => ({
        ...x,
        errors: [...errors, ...x.errors],
      }));
  }
  if (first.s === "د") {
-    const np = parseNP(rest, lookup, undefined);
+    const np = parseNP(rest, dictionary, undefined);
    return bindParseResult(np, (tokens, body) => {
      const possesor: T.PossesorSelection = {
        shrunken: false,
@ -63,7 +62,11 @@ export function parsePossesor(
            [{ message: `possesor should be inflected` }]
          : [],
        // add and check error - can't add possesor to pronoun
-        next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)),
+        next: parsePossesor(
+          tokens,
+          dictionary,
+          addPoss(prevPossesor, possesor)
+        ),
      };
    });
  }
--- a/src/lib/src/parsing/parse-sandwich.ts
+++ b/src/lib/src/parsing/parse-sandwich.ts
@ -1,5 +1,4 @@
 import * as T from "../../../types";
-import { LookupFunction } from "./lookup";
 import { sandwiches } from "../sandwiches";
 import { parseNP } from "./parse-np";
 import { bindParseResult } from "./utils";
@ -14,7 +13,7 @@ import { bindParseResult } from "./utils";

 export function parseSandwich(
  s: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dictionary: T.DictionaryAPI,
  possesor: T.PossesorSelection | undefined
 ): T.ParseResult<T.SandwichSelection<T.Sandwich>>[] {
  if (s.length === 0) {
@ -27,7 +26,7 @@ export function parseSandwich(
    (x) => x.before && x.before.p === first.s
  );
  // TODO: this could be be really repetitive...
-  const nps = parseNP(startMatches.length ? rest : s, lookup, possesor);
+  const nps = parseNP(startMatches.length ? rest : s, dictionary, possesor);
  return bindParseResult(nps, (tokens, np) => {
    if (!tokens.length) {
      return [];
--- a/src/lib/src/parsing/parse-vbe-new.test.txt
+++ b/src/lib/src/parsing/parse-vbe-new.test.txt
@ -7,28 +7,35 @@ import {
  wartlul,
  raatlul,
 } from "./irreg-verbs";
-import { lookup, wordQuery } from "./lookup";
-import { parseVBE } from "./parse-vbe";
+import { parseVBE } from "./parse-vbe-new";
 import { tokenizer } from "./tokenizer";
 import { getPeople, removeKeys } from "./utils";
+import { testDictionary } from "./mini-test-dictionary";

-const wahul = wordQuery("وهل", "verb");
-const leekul = wordQuery("لیکل", "verb");
-const manul = wordQuery("منل", "verb");
-// const gaalul = wordQuery("ګالل", "verb");
-const rasedul = wordQuery("رسېدل", "verb");
-const leedul = wordQuery("لیدل", "verb");
-const khorul = wordQuery("خوړل", "verb");
-const kenaastul = wordQuery("کېناستل", "verb");
-const prexodul = wordQuery("پرېښودل", "verb");
-const xodul = wordQuery("ښودل", "verb");
-const kexodul = wordQuery("کېښودل", "verb");
-const katul = wordQuery("کتل", "verb");
-const watul = wordQuery("وتل", "verb");
-const wurul = wordQuery("وړل", "verb");
-const akheestul = wordQuery("اخیستل", "verb");
-const alwatul = wordQuery("الوتل", "verb");
-// const dartlul = wordQuery("درتلل", "verb")
+const wahul = testDictionary.verbEntryLookup("وهل")[0];
+const leekul = testDictionary.verbEntryLookup("لیکل")[0];
+const manul = testDictionary.verbEntryLookup("منل")[0];
+const gaalul = testDictionary.verbEntryLookup("ګالل")[0];
+const rasedul = testDictionary.verbEntryLookup("رسېدل")[0];
+const leedul = testDictionary.verbEntryLookup("لیدل")[0];
+const awuxtul = testDictionary.verbEntryLookup("اوښتل")[0];
+const khorul = testDictionary.verbEntryLookup("خوړل")[0];
+const kenaastul = testDictionary.verbEntryLookup("کېناستل")[0];
+const kxenaastul = testDictionary.verbEntryLookup("کښېناستل")[0];
+const prexodul = testDictionary.verbEntryLookup("پرېښودل")[0];
+const prexowul = testDictionary.verbEntryLookup("پرېښوول")[0];
+const prexawul = testDictionary.verbEntryLookup("پرېښول")[0];
+const xodul = testDictionary.verbEntryLookup("ښودل")[0];
+const kexodul = testDictionary.verbEntryLookup("کېښودل")[0];
+const kxexodul = testDictionary.verbEntryLookup("کښېښودل")[0];
+const katul = testDictionary.verbEntryLookup("کتل")[0];
+const watul = testDictionary.verbEntryLookup("وتل")[0];
+const wurul = testDictionary.verbEntryLookup("وړل")[0];
+const akheestul = testDictionary.verbEntryLookup("اخیستل")[0];
+const alwatul = testDictionary.verbEntryLookup("الوتل")[0];
+const dartlul = testDictionary.verbEntryLookup("درتلل")[0];
+
+// TODO: Prefix searching on split verbs for perfective head parsing

 // TODO: azmoyul etc
 // TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc
@ -311,19 +318,6 @@ const tests: {
          },
        ],
      },
-      {
-        input: "وینم",
-        output: [
-          {
-            stem: {
-              persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
-              aspects: ["imperfective", "perfective"],
-            },
-            verb: leedul,
-          },
-        ],
-      },
-      // TODO!! THESE COULD ALSO BE MALE
      {
        input: "لیده",
        output: [
@ -364,42 +358,6 @@ const tests: {
          },
        ],
      },
-      {
-        input: "خوړ",
-        output: [
-          {
-            root: {
-              persons: [T.Person.ThirdSingMale],
-              aspects: ["imperfective", "perfective"],
-            },
-            verb: khorul,
-          },
-        ],
-      },
-      {
-        input: "کوت",
-        output: [
-          {
-            root: {
-              persons: [T.Person.ThirdSingMale],
-              aspects: ["imperfective", "perfective"],
-            },
-            verb: katul,
-          },
-        ],
-      },
-      {
-        input: "کاته",
-        output: [
-          {
-            root: {
-              persons: [T.Person.ThirdSingMale],
-              aspects: ["imperfective", "perfective"],
-            },
-            verb: katul,
-          },
-        ],
-      },
      {
        input: "خلم",
        output: [
@ -436,6 +394,11 @@ const tests: {
          },
        ],
      },
+    ],
+  },
+  {
+    label: "verbs with seperating perfective heads",
+    cases: [
      {
        input: "الوځې",
        output: [
@ -460,6 +423,18 @@ const tests: {
          },
        ],
      },
+      {
+        input: "لوتلم",
+        output: [
+          {
+            root: {
+              persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
+              aspects: ["perfective"],
+            },
+            verb: alwatul,
+          },
+        ],
+      },
    ],
  },
  {
@ -492,6 +467,13 @@ const tests: {
            },
            verb: kenaastul,
          },
+          {
+            stem: {
+              persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
+              aspects: ["perfective"],
+            },
+            verb: kxenaastul,
+          },
        ],
      },
      {
@ -507,46 +489,64 @@ const tests: {
        ],
      },
      {
-        input: "ناست",
+        input: "کېناسته",
        output: [
          {
            root: {
-              persons: [T.Person.ThirdSingMale],
-              aspects: ["perfective"],
+              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
+              aspects: ["imperfective"],
            },
            verb: kenaastul,
          },
        ],
      },
      {
-        input: "پرېږدو",
-        output: [
+        input: "ناست",
+        output: [kenaastul, kxenaastul].map((verb) => ({
+          root: {
+            persons: [T.Person.ThirdSingMale],
+            aspects: ["perfective"],
+          },
+          verb,
+        })),
+      },
      {
+        input: "ناسته",
+        output: [kenaastul, kxenaastul].map((verb) => ({
+          root: {
+            persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
+            aspects: ["perfective"],
+          },
+          verb,
+        })),
+      },
+      {
+        input: "پرېږدو",
+        output: [prexodul, prexowul, prexawul].map((verb) => ({
          stem: {
            persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
            aspects: ["imperfective"],
          },
-            verb: prexodul,
-          },
-        ],
+          verb,
+        })),
      },
      {
        input: "ږدو",
        output: [
-          {
+          ...[prexodul, prexawul, prexowul, kexodul, kxexodul].map((verb) => ({
            stem: {
              persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
-              aspects: ["perfective"],
+              aspects: ["perfective"] satisfies T.Aspect[],
            },
-            verb: prexodul,
-          },
-          {
+            verb,
+          })),
+          ...[kexodul, kxexodul].map((verb) => ({
            stem: {
              persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
-              aspects: ["imperfective", "perfective"],
-            },
-            verb: kexodul,
+              aspects: ["imperfective"] satisfies T.Aspect[],
            },
+            verb,
+          })),
        ],
      },
      {
@ -571,20 +571,13 @@ const tests: {
            },
            verb: xodul,
          },
-          {
+          ...[prexodul, kexodul, kxexodul].map((verb) => ({
            root: {
              persons: [T.Person.ThirdSingFemale],
-              aspects: ["perfective"],
-            },
-            verb: prexodul,
-          },
-          {
-            root: {
-              persons: [T.Person.ThirdSingFemale],
-              aspects: ["perfective"],
-            },
-            verb: kexodul,
+              aspects: ["perfective"] satisfies T.Aspect[],
            },
+            verb,
+          })),
        ],
      },
      {
@ -661,43 +654,9 @@ const tests: {
          },
        ],
      },
-      {
-        input: "ړلم",
-        output: [
-          {
-            root: {
-              persons: getPeople(1, "sing"),
-              aspects: ["perfective"],
-            },
-            verb: wurul,
-          },
-          {
-            root: {
-              persons: getPeople(1, "sing"),
-              aspects: ["perfective"],
-            },
-            verb: tlul,
-          },
-        ],
-      },
      {
        input: "ړ",
-        output: [
-          {
-            root: {
-              persons: [T.Person.ThirdSingMale],
-              aspects: ["perfective"],
-            },
-            verb: wurul,
-          },
-          {
-            root: {
-              persons: [T.Person.ThirdSingMale],
-              aspects: ["perfective"],
-            },
-            verb: tlul,
-          },
-        ],
+        output: [],
      },
      // should not match with the prefix for perfective
      {
@ -713,6 +672,78 @@ const tests: {
  {
    label: "verbs with different 3rd pers sing past endings",
    cases: [
+      {
+        input: "خوړ",
+        output: [
+          {
+            root: {
+              persons: [T.Person.ThirdSingMale],
+              aspects: ["imperfective", "perfective"],
+            },
+            verb: khorul,
+          },
+        ],
+      },
+      {
+        input: "خوړه",
+        output: [
+          {
+            root: {
+              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
+              aspects: ["imperfective", "perfective"],
+            },
+            verb: khorul,
+          },
+        ],
+      },
+      {
+        input: "کوت",
+        output: [
+          {
+            root: {
+              persons: [T.Person.ThirdSingMale],
+              aspects: ["imperfective", "perfective"],
+            },
+            verb: katul,
+          },
+        ],
+      },
+      {
+        input: "کاته",
+        output: [
+          {
+            root: {
+              persons: [T.Person.ThirdSingMale],
+              aspects: ["imperfective", "perfective"],
+            },
+            verb: katul,
+          },
+        ],
+      },
+      {
+        input: "واته",
+        output: [
+          {
+            root: {
+              persons: [T.Person.ThirdSingMale],
+              aspects: ["imperfective", "perfective"],
+            },
+            verb: watul,
+          },
+        ],
+      },
+      {
+        input: "ووت",
+        output: [
+          {
+            root: {
+              persons: [T.Person.ThirdSingMale],
+              aspects: ["imperfective", "perfective"],
+            },
+            verb: watul,
+          },
+        ],
+      },
      {
        input: "رسېد",
        output: [
@ -725,6 +756,18 @@ const tests: {
          },
        ],
      },
+      {
+        input: "رسېده",
+        output: [
+          {
+            root: {
+              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
+              aspects: ["imperfective", "perfective"],
+            },
+            verb: rasedul,
+          },
+        ],
+      },
      {
        input: "کېناسته",
        output: [
@ -766,27 +809,69 @@ const tests: {
        ],
      },
      {
-        input: "واته",
+        input: "اوښت",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
-              aspects: ["imperfective", "perfective"],
+              aspects: ["imperfective"],
            },
-            verb: watul,
+            verb: awuxtul,
          },
        ],
      },
      {
-        input: "ووت",
+        input: "ښت",
+        output: [],
+      },
+      {
+        input: "اوښته",
+        output: [
+          {
+            root: {
+              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
+              aspects: ["imperfective"],
+            },
+            verb: awuxtul,
+          },
+        ],
+      },
+      {
+        input: "ښود",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["imperfective", "perfective"],
            },
-            verb: watul,
+            verb: xodul,
          },
+          ...[prexodul, kexodul, kxexodul].map((verb) => ({
+            root: {
+              persons: [T.Person.ThirdSingMale],
+              aspects: ["perfective"] satisfies T.Aspect[],
+            },
+            verb,
+          })),
+        ],
+      },
+      {
+        input: "ښوده",
+        output: [
+          {
+            root: {
+              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
+              aspects: ["imperfective", "perfective"],
+            },
+            verb: xodul,
+          },
+          ...[prexodul, kexodul, kxexodul].map((verb) => ({
+            root: {
+              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
+              aspects: ["perfective"] satisfies T.Aspect[],
+            },
+            verb,
+          })),
        ],
      },
    ],
@ -971,7 +1056,7 @@ tests.forEach(({ label, cases }) => {
  test(label, () => {
    cases.forEach(({ input, output }) => {
      const tokens = tokenizer(input);
-      const vbs = parseVBE(tokens, lookup).map((r) => r.body);
+      const vbs = parseVBE(tokens, testDictionary).map((r) => r.body);
      const madeVbsS = output.reduce<T.ParsedVBE[]>((acc, o) => {
        return [
          ...acc,
--- a/src/lib/src/parsing/parse-vbe-new.ts
+++ b/src/lib/src/parsing/parse-vbe-new.ts
@ -0,0 +1,387 @@
+import * as T from "../../../types";
+import { dartlul, raatlul, tlul, wartlul } from "./irreg-verbs";
+import { parseKedul } from "./parse-kedul";
+import { getVerbEnding } from "./parse-verb-helpers";
+import { returnParseResults } from "./utils";
+import { entries as splitVerbEntries } from "./split-verbs";
+import * as tp from "../type-predicates";
+import memoize from "micro-memoize";
+import { pashtoConsonants } from "../pashto-consonants";
+
+// TODO: و ارزول
+
+// TODO: کول verbs!
+// check that aawu stuff is working
+// check oo`azmooy -
+//  TODO: proper use of sepOo (hasBreakawayAleph) when checking for perfective roots/stems
+// check څاته
+// laaRa shum etc
+// TODO: proper use of perfective with sh
+// TODO: use of raa, dar, war with sh
+// TODO: هغه لاړ
+// TODO: don't have کول کېدل in split-verbs
+
+type BaseInfo = Extract<T.ParsedVBE["info"], { type: "verb" }>;
+type StemInfo = Omit<BaseInfo, "base"> & {
+  base: "stem";
+};
+type RootInfo = Omit<BaseInfo, "base"> & {
+  base: "root";
+};
+
+export function parseVBE(
+  tokens: Readonly<T.Token[]>,
+  dictionary: T.DictionaryAPI
+): T.ParseResult<T.ParsedVBE>[] {
+  if (tokens.length === 0) {
+    return [];
+  }
+  const [first, ...rest] = tokens;
+  const irregResults = parseIrregularVerb(first.s);
+  if (irregResults.length) {
+    return returnParseResults(rest, irregResults);
+  }
+  const kedulStat = parseKedul(tokens);
+  const ending = first.s.at(-1) || "";
+  const base = ending === "ل" ? first.s : first.s.slice(0, -1);
+  const { stem, root } = getVerbEnding(ending);
+  // todo imperative for seperating
+  const imperative = getImperativeVerbEnding(ending);
+  const stemRes = returnParseResults(rest, [
+    ...[
+      ...findImperfectiveStem(base, dictionary),
+      ...findPerfectiveStem(base, dictionary),
+    ].flatMap<T.ParsedVBE>((info) => [
+      ...stem.map<T.ParsedVBE>((person) => ({
+        type: "VB",
+        person,
+        info,
+      })),
+      ...imperative.map<T.ParsedVBE>((person) => ({
+        type: "VB",
+        person,
+        info: {
+          ...info,
+          imperative: true,
+        },
+      })),
+    ]),
+  ]);
+  const rootRes = returnParseResults(rest, [
+    ...[
+      ...findImperfectiveRoot(base, dictionary),
+      ...findPerfectiveRoot(base, dictionary),
+    ].flatMap<T.ParsedVBE>((info) => {
+      const shortThird = thirdPersSingMascShortFromRoot(base, ending, info);
+      return [
+        ...shortThird,
+        ...root.map<T.ParsedVBE>((person) => ({
+          type: "VB",
+          person,
+          info,
+        })),
+      ];
+    }),
+    ...specialThirdPersMascSingForm(base, ending, dictionary),
+  ]);
+  return [...kedulStat, ...stemRes, ...rootRes];
+}
+
+function specialThirdPersMascSingForm(
+  base: string,
+  ending: string,
+  dicitonary: T.DictionaryAPI
+): T.ParsedVBE[] {
+  if (ending !== "ه" && !pashtoConsonants.includes(ending)) {
+    return [];
+  }
+  // const imperfectiveWSep = [base + ending, ...(ending === "ه" ? [base] : [])]
+  //   .flatMap((v) =>
+  //     splitVerbEntries.filter((entry) => entry.entry.p.slice(0, -1) === v)
+  //   )
+  //   .map<T.ParsedVBE>((verb) => ({
+  //     type: "VB",
+  //     person: T.Person.ThirdSingMale,
+  //     info: {
+  //       type: "verb",
+  //       aspect: "imperfective",
+  //       base: "root",
+  //       verb,
+  //     },
+  //   }));
+
+  // const perfectiveWSep = [base + ending, ...(ending === "ه" ? [base] : [])]
+  //   .flatMap((v) => {
+  //     const b = splitVerbEntries.filter(({ entry }) => {
+  //       if (entry.tppp) {
+  //         return splitVarients(entry.tppp).some(
+  //           (varient) => varient.slice(entry.separationAtP) === v
+  //         );
+  //       } else {
+  //         return entry.p.slice(entry.separationAtP, -1) === v;
+  //       }
+  //     });
+  //     return b;
+  //   })
+  //   .map<T.ParsedVBE>((verb) => ({
+  //     type: "VB",
+  //     person: T.Person.ThirdSingMale,
+  //     info: {
+  //       type: "verb",
+  //       aspect: "perfective",
+  //       base: "root",
+  //       verb,
+  //     },
+  //   }));
+
+  const hardEnding: T.ParsedVBE[] =
+    (ending === "د" && ["ې", "و"].some((x) => base.endsWith(x))) ||
+    (ending === "ت" &&
+      ["س", "ښ"].some((x) => base.endsWith(x)) &&
+      base.length > 1)
+      ? [
+          ...findPerfectiveRoot(base + ending + "ل", dicitonary),
+          ...findImperfectiveRoot(base + ending + "ل", dicitonary),
+        ].map<T.ParsedVBE>((info) => ({
+          type: "VB",
+          person: T.Person.ThirdSingMale,
+          info,
+        }))
+      : [];
+
+  const regular: T.ParsedVBE[] = [
+    base + ending,
+    ...(ending === "ه" ? [base] : []),
+  ]
+    .flatMap(withAlefAdded)
+    .flatMap((v) => dicitonary.otherLookup("tppp", v, true))
+    .filter(
+      (e): e is T.VerbDictionaryEntry =>
+        tp.isVerbDictionaryEntry(e) && !e.l && !!e.tppp
+    )
+    .flatMap((entry) =>
+      // NOT IF STARTS WITH ALEPH!
+      (entry.separationAtP
+        ? (["imperfective"] as const)
+        : startsWithAleph(entry.p) && !startsWithAleph(base)
+        ? (["perfective"] as const)
+        : (["imperfective", "perfective"] as const)
+      ).map<T.ParsedVBE>((aspect) => ({
+        type: "VB" as const,
+        person: T.Person.ThirdSingMale,
+        info: {
+          type: "verb",
+          aspect,
+          base: "root",
+          verb: { entry },
+        } as const,
+      }))
+    );
+
+  return [...regular, ...hardEnding];
+
+  //   ...imperfectiveWSep, ...perfectiveWSep];
+}
+
+function thirdPersSingMascShortFromRoot(
+  base: string,
+  ending: string,
+  info: RootInfo
+): T.ParsedVBE[] {
+  if (info.verb.entry.tppp) {
+    return [];
+  }
+  if (ending === "ه" && !base.endsWith("ل")) {
+    return [
+      {
+        type: "VB",
+        person: T.Person.ThirdSingMale,
+        info,
+      },
+    ];
+  }
+  return [];
+}
+
+function findImperfectiveStem(
+  s: string,
+  dicitonary: T.DictionaryAPI
+): StemInfo[] {
+  if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
+    return [];
+  }
+  const regulars = regStemSearch(s, dicitonary);
+  const irregulars = dicitonary
+    .otherLookup("psp", s)
+    .filter(
+      (e): e is T.VerbDictionaryEntry => tp.isVerbDictionaryEntry(e) && !e.l
+    )
+    .map<T.VerbEntry>((entry) => ({
+      entry,
+    }));
+  return [...regulars, ...irregulars].map((verb) => ({
+    type: "verb",
+    aspect: "imperfective",
+    base: "stem",
+    verb,
+  }));
+}
+
+function withAlefAdded(s: string): string[] {
+  return [s, ...(startsWithAleph(s) ? [] : ["ا" + s, "آ" + s])];
+}
+
+const stemSplitLookup = memoize((s: string) =>
+  splitVerbEntries.filter(
+    (e) =>
+      (e.entry.ssp || e.entry.psp || e.entry.p).slice(
+        e.entry.separationAtP || 0
+      ) === s
+  )
+);
+
+function findPerfectiveStem(
+  s: string,
+  dicitonary: T.DictionaryAPI
+): StemInfo[] {
+  if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
+    return [];
+  }
+  if (startsWithAleph(s)) {
+    return [];
+  }
+  const baseQ = withAlefAdded(s);
+  const regulars = baseQ
+    .flatMap((q) => regStemSearch(q, dicitonary))
+    .filter((e) => !e.entry.separationAtP);
+  const irregularsBasedOnImperf = baseQ
+    .flatMap((q) => dicitonary.otherLookup("psp", q))
+    .filter(
+      (e): e is T.VerbDictionaryEntry =>
+        tp.isVerbDictionaryEntry(e) && !e.l && !e.ssp && !e.separationAtP
+    )
+    .map<T.VerbEntry>((entry) => ({
+      entry,
+    }));
+  return [...regulars, ...irregularsBasedOnImperf, ...stemSplitLookup(s)].map(
+    (verb) => ({
+      type: "verb",
+      aspect: "perfective",
+      base: "stem",
+      verb,
+    })
+  );
+}
+
+function regStemSearch(s: string, dicitonary: T.DictionaryAPI): T.VerbEntry[] {
+  const regTrans = dicitonary
+    .verbEntryLookup(s + "ل")
+    .filter(
+      (e) =>
+        !e.entry.c.includes("comp") &&
+        !e.entry.ssp &&
+        !e.entry.psp &&
+        !e.entry.c.includes("intrans")
+    );
+  const regIntrans = dicitonary
+    .verbEntryLookup((s.endsWith("ېږ") ? s.slice(0, -2) : s) + "ېدل")
+    .filter(
+      (e) =>
+        !e.entry.c.includes("comp") &&
+        !e.entry.ssp &&
+        !e.entry.psp &&
+        e.entry.c.includes("intrans")
+    );
+  return [...regTrans, ...regIntrans];
+}
+
+function findImperfectiveRoot(
+  s: string,
+  dicitonary: T.DictionaryAPI
+): RootInfo[] {
+  if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
+    return [];
+  }
+  const reg = [s, s + "ل"]
+    .flatMap(dicitonary.verbEntryLookup)
+    .filter((e) => !e.entry.c.includes("comp"));
+  return reg.map((verb) => ({
+    type: "verb",
+    aspect: "imperfective",
+    base: "root",
+    verb,
+  }));
+}
+
+const rootSplitLookup = memoize((s: string) =>
+  splitVerbEntries.filter((e) =>
+    [s, s + "ل"].some(
+      (x) => (e.entry.prp || e.entry.p).slice(e.entry.separationAtP || 0) === x
+    )
+  )
+);
+
+function findPerfectiveRoot(
+  s: string,
+  dicitonary: T.DictionaryAPI
+): RootInfo[] {
+  if (startsWithAleph(s) || ["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
+    return [];
+  }
+  const reg = [s, s + "ل"]
+    .flatMap(withAlefAdded)
+    .flatMap(dicitonary.verbEntryLookup)
+    .filter(
+      (e) =>
+        !e.entry.c.includes("comp") && !e.entry.prp && !e.entry.separationAtP
+    );
+  return [...reg, ...rootSplitLookup(s)].map((verb) => ({
+    type: "verb",
+    aspect: "perfective",
+    base: "root",
+    verb,
+  }));
+}
+
+function getImperativeVerbEnding(e: string): T.Person[] {
+  if (e === "ه") {
+    return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
+  }
+  if (e === "ئ") {
+    return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
+  }
+  return [];
+}
+
+// TODO: could handle all sh- verbs for efficiencies sake
+function parseIrregularVerb(s: string): T.ParsedVBE[] {
+  if (["ته", "راته", "ورته", "درته"].includes(s)) {
+    return [
+      {
+        type: "VB",
+        info: {
+          aspect: "imperfective",
+          base: "root",
+          type: "verb",
+          verb: s.startsWith("را")
+            ? raatlul
+            : s.startsWith("ور")
+            ? wartlul
+            : s.startsWith("در")
+            ? dartlul
+            : tlul,
+        },
+        person: T.Person.ThirdSingMale,
+      },
+    ];
+  }
+  return [];
+}
+
+// function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
+//   return !e.sepOo && startsWithAleph(e.p);
+// }
+
+function startsWithAleph(base: string): boolean {
+  return ["ا", "آ"].includes(base[0]);
+}
--- a/src/lib/src/parsing/parse-vbe.ts
+++ b/src/lib/src/parsing/parse-vbe.ts
@ -1,354 +0,0 @@
-import * as T from "../../../types";
-import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
-import { isInVarients, lastVowelNotA } from "../p-text-helpers";
-import { dartlul, raatlul, tlul, wartlul } from "./irreg-verbs";
-import { LookupFunction } from "./lookup";
-import { shortVerbEndConsonant } from "./misc";
-import { parseKedul } from "./parse-kedul";
-import { getVerbEnding } from "./parse-verb-helpers";
-
-// TODO: کول verbs!
-// check that aawu stuff is working
-// check oo`azmooy -
-// check څاته
-// laaRa shum etc
-// TODO: proper use of perfective with sh
-// TODO: use of raa, dar, war with sh
-// TODO: هغه لاړ
-
-export function parseVBE(
-  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction
-): T.ParseResult<T.ParsedVBE>[] {
-  if (tokens.length === 0) {
-    return [];
-  }
-  const [first, ...rest] = tokens;
-  const irregResults = parseIrregularVerb(first.s);
-  if (irregResults.length) {
-    return irregResults.map((body) => ({
-      tokens: rest,
-      body,
-      errors: [],
-    }));
-  }
-  const kedulStat = parseKedul(tokens);
-  const ending = first.s.at(-1) || "";
-  const people = getVerbEnding(ending);
-  const imperativePeople = getImperativeVerbEnding(ending);
-  // First do rough verb lookup, grab wide pool of possible verbs (low searching complexity for fast lookup)
-  // TODO: can optimize this to not have to look for possible stems/roots if none
-  const verbs = lookup(first.s, "verb");
-  // if (first.s === "سم") {
-  //   console.log({ verbs: JSON.stringify(verbs) });
-  // }
-  // Then find out which ones match exactly and how
-  return [
-    ...kedulStat,
-    ...matchVerbs(first.s, verbs, people, imperativePeople).map((body) => ({
-      tokens: rest,
-      body,
-      errors: [],
-    })),
-  ];
-}
-
-function matchVerbs(
-  s: string,
-  entries: T.VerbEntry[],
-  people: {
-    root: T.Person[];
-    stem: T.Person[];
-  },
-  imperativePeople: T.Person[]
-): T.ParsedVBE[] {
-  const w: T.ParsedVBE[] = [];
-  const lEnding = s.endsWith("ل");
-  const base = s.endsWith("ل") ? s : s.slice(0, -1);
-  if (["کېږ", "کېد", "ش", "شو", "شول"].includes(base)) {
-    return [];
-  }
-  const matchShortOrLong = (b: string, x: string) => {
-    return b === x || (!lEnding && b === x.slice(0, -1));
-  };
-  if (people.stem.length || imperativePeople.length) {
-    const stemMatches = {
-      imperfective: entries.filter(({ entry: e }) => {
-        if (e.c.includes("comp")) {
-          return false;
-        }
-        if (e.psp) {
-          return e.psp === base;
-        }
-        if (e.c.includes("intrans.")) {
-          const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
-          return miniRoot + "ېږ" === base || miniRoot === base;
-        } else {
-          return e.p.slice(0, -1) === base;
-        }
-      }),
-      perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
-        const e = entry.entry;
-        const baseWAa = "ا" + base;
-        if (e.c.includes("comp")) {
-          return acc;
-        }
-        if (e.ssp) {
-          if (e.separationAtP) {
-            const bRest = e.ssp.slice(e.separationAtP);
-            if (bRest === base) {
-              return [...acc, entry];
-            }
-          } else {
-            if (e.ssp === base) {
-              return [...acc, entry];
-            }
-          }
-        } else if (e.psp) {
-          if (hasBreakawayAlef(e) && startsWithAleph(base)) {
-            return acc;
-          }
-          if (e.separationAtP) {
-            const bRest = e.psp.slice(e.separationAtP);
-            if (bRest === base) {
-              return [...acc, entry];
-            }
-          } else {
-            if (!e.sepOo) {
-              if (baseWAa === e.psp) {
-                return [...acc, entry];
-              }
-            }
-            if (base === e.psp) {
-              return [...acc, entry];
-            }
-          }
-        } else if (hasBreakawayAlef(e) && startsWithAleph(base)) {
-          return acc;
-        } else if (e.c.includes("intrans.")) {
-          const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
-          const miniRootEg = miniRoot + "ېږ";
-          if ([miniRoot, miniRootEg].includes(base)) {
-            return [...acc, entry];
-          }
-        } else {
-          const eb = e.p.slice(0, -1);
-          if (eb === base) {
-            return [...acc, entry];
-          } else if (!e.sepOo) {
-            if (baseWAa === base.slice(1)) {
-              return [...acc, entry];
-            }
-          }
-        }
-        return acc;
-      }, []),
-    };
-    Object.entries(stemMatches).forEach(([aspect, entries]) => {
-      entries.forEach((verb) => {
-        people.stem.forEach((person) => {
-          w.push({
-            type: "VB",
-            person,
-            info: {
-              type: "verb",
-              aspect: aspect as T.Aspect,
-              base: "stem",
-              verb: removeFVarientsFromVerb(verb),
-            },
-          });
-        });
-        imperativePeople.forEach((person) => {
-          w.push({
-            type: "VB",
-            person,
-            info: {
-              type: "verb",
-              aspect: aspect as T.Aspect,
-              base: "stem",
-              verb: removeFVarientsFromVerb(verb),
-              imperative: true,
-            },
-          });
-        });
-      });
-    });
-  }
-  if (people.root.length) {
-    const rootMatches = {
-      imperfective: entries.filter(
-        ({ entry: e }) => !e.c.includes("comp") && matchShortOrLong(base, e.p)
-      ),
-      perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
-        const e = entry.entry;
-        if (e.c.includes("comp")) {
-          return acc;
-        }
-        if (e.separationAtP) {
-          const b = e.prp || e.p;
-          const bRest = b.slice(e.separationAtP);
-          if (matchShortOrLong(base, bRest)) {
-            return [...acc, entry];
-          }
-        } else if (hasBreakawayAlef(e) && startsWithAleph(base) && !e.prp) {
-          return acc;
-        } else {
-          const p = e.prp || e.p;
-          if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
-            return [...acc, entry];
-          }
-        }
-        return acc;
-      }, []),
-    };
-
-    Object.entries(rootMatches).forEach(([aspect, entries]) => {
-      entries.forEach((verb) => {
-        people.root.forEach((person) => {
-          w.push({
-            type: "VB",
-            person,
-            info: {
-              type: "verb",
-              aspect: aspect as T.Aspect,
-              base: "root",
-              verb: removeFVarientsFromVerb(verb),
-            },
-          });
-        });
-      });
-    });
-  }
-  const hamzaEnd = s.at(-1) === "ه";
-  const oEnd = s.at(-1) === "و";
-  const abruptEnd = shortVerbEndConsonant.includes(s.slice(-1));
-  const tppMatches = {
-    imperfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
-      const e = entry.entry;
-      if (e.c.includes("comp")) {
-        return acc;
-      }
-      if (!e.prp && isInVarients(e.tppp, s)) {
-        return [...acc, entry];
-      }
-      if (oEnd && matchShortOrLong(base, e.p)) {
-        return [...acc, entry];
-      }
-      if (
-        lastVowelNotA(e.g.slice(0, -2)) &&
-        (hamzaEnd ? base : abruptEnd ? s : "XX") === e.p.slice(0, -1)
-      ) {
-        return [...acc, entry];
-      }
-      // TODO: if check for modified aaXu thing!
-      return acc;
-    }, []),
-    perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
-      const e = entry.entry;
-      if (e.c.includes("comp")) {
-        return acc;
-      }
-      if (e.separationAtP) {
-        const b = e.prp || e.p;
-        const bRest = b.slice(e.separationAtP);
-        if (bRest === "شول") {
-          return acc;
-        }
-        if (abruptEnd) {
-          if (s === bRest.slice(0, -1)) {
-            return [...acc, entry];
-          }
-        } else if (hamzaEnd) {
-          if (base === bRest.slice(0, -1)) {
-            return [...acc, entry];
-          }
-        } else if (oEnd) {
-          if ([bRest, bRest.slice(0, -1)].includes(base)) {
-            return [...acc, entry];
-          }
-        }
-      } else if (!e.prp) {
-        if (hasBreakawayAlef(e) && startsWithAleph(base)) {
-          return acc;
-        }
-        if (oEnd) {
-          if ([e.p, e.p.slice(0, -1)].includes(base)) {
-            return [...acc, entry];
-          }
-        } else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) {
-          const b = hamzaEnd ? base : s;
-          const p = e.p.slice(0, -1);
-          if (b === p) {
-            return [...acc, entry];
-          }
-        }
-      }
-      if (!e.separationAtP) {
-        if (isInVarients(e.tppp, s)) {
-          return [...acc, entry];
-        } else if (isInVarients(e.tppp, "ا" + s)) {
-          return [...acc, entry];
-        }
-      }
-      return acc;
-    }, []),
-  };
-  Object.entries(tppMatches).forEach(([aspect, entries]) => {
-    entries.forEach((verb) => {
-      w.push({
-        type: "VB",
-        person: T.Person.ThirdSingMale,
-        info: {
-          type: "verb",
-          aspect: aspect as T.Aspect,
-          base: "root",
-          verb: removeFVarientsFromVerb(verb),
-        },
-      });
-    });
-  });
-  return w;
-}
-
-function getImperativeVerbEnding(e: string): T.Person[] {
-  if (e === "ه") {
-    return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
-  }
-  if (e === "ئ") {
-    return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
-  }
-  return [];
-}
-
-// TODO: could handle all sh- verbs for efficiencies sake
-function parseIrregularVerb(s: string): T.ParsedVBE[] {
-  if (["ته", "راته", "ورته", "درته"].includes(s)) {
-    return [
-      {
-        type: "VB",
-        info: {
-          aspect: "imperfective",
-          base: "root",
-          type: "verb",
-          verb: s.startsWith("را")
-            ? raatlul
-            : s.startsWith("ور")
-            ? wartlul
-            : s.startsWith("در")
-            ? dartlul
-            : tlul,
-        },
-        person: T.Person.ThirdSingMale,
-      },
-    ];
-  }
-  return [];
-}
-
-function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
-  return !e.sepOo && ["ا", "آ"].includes(e.p[0]);
-}
-
-function startsWithAleph(base: string): boolean {
-  return ["ا", "آ"].includes(base[0]);
-}
--- a/src/lib/src/parsing/parse-vbp.test.txt
+++ b/src/lib/src/parsing/parse-vbp.test.txt
--- a/src/lib/src/parsing/parse-vbp.ts
+++ b/src/lib/src/parsing/parse-vbp.ts
@ -1,46 +1,46 @@
 import * as T from "../../../types";
-import { LookupFunction } from "./lookup";
-import { returnParseResult } from "./utils";
+// import { returnParseResult } from "./utils";

 export function parseVBP(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction
+  dictionary: T.DictionaryAPI
 ): T.ParseResult<T.ParsedVBP>[] {
  if (tokens.length === 0) {
    return [];
  }
-  return [
-    ...parsePastPart(tokens, lookup),
-    // ...parseAbility(tokens),
-  ];
+  return [];
+  // return [
+  //   ...parsePastPart(tokens, lookup),
+  //   // ...parseAbility(tokens),
+  // ];
 }

-function parsePastPart(
-  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction
-): T.ParseResult<T.ParsedVBP>[] {
-  const [{ s }, ...rest] = tokens;
-  const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې";
-  if (!ending || !["ی", "ي", "ې"].includes(ending)) {
-    return [];
-  }
-  // TODO: ALSO HANDLE SHORT FORMS
-  const wOutEnd = s.slice(0, -1);
-  const matches = lookup(wOutEnd, "pPart");
-  const genNums = endingGenderNum(ending);
-  return matches
-    .flatMap<T.ParsedVBP>((verb) =>
-      genNums.map<T.ParsedVBP>((genNum) => ({
-        type: "VB",
-        info: {
-          type: "ppart",
-          verb,
-          genNum,
-        },
-      }))
-    )
-    .flatMap((m) => returnParseResult(rest, m));
-}
+// function parsePastPart(
+//   tokens: Readonly<T.Token[]>,
+//   dicitonary: T.DictionaryAPI,
+// ): T.ParseResult<T.ParsedVBP>[] {
+//   const [{ s }, ...rest] = tokens;
+//   const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې";
+//   if (!ending || !["ی", "ي", "ې"].includes(ending)) {
+//     return [];
+//   }
+//   // TODO: ALSO HANDLE SHORT FORMS
+//   const wOutEnd = s.slice(0, -1);
+//   const matches = lookup(wOutEnd, "pPart");
+//   const genNums = endingGenderNum(ending);
+//   return matches
+//     .flatMap<T.ParsedVBP>((verb) =>
+//       genNums.map<T.ParsedVBP>((genNum) => ({
+//         type: "VB",
+//         info: {
+//           type: "ppart",
+//           verb,
+//           genNum,
+//         },
+//       }))
+//     )
+//     .flatMap((m) => returnParseResult(rest, m));
+// }

 // function parseAbility(
 //   tokens: Readonly<T.Token[]>,
@ -70,33 +70,33 @@ function parsePastPart(
 //     .flatMap((m) => returnParseResult(rest, m));
 // }

-function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] {
-  if (ending === "ی") {
-    return [
-      {
-        gender: "masc",
-        number: "singular",
-      },
-    ];
-  }
-  if (ending === "ي") {
-    return [
-      {
-        gender: "masc",
-        number: "plural",
-      },
-    ];
-  }
-  // if (ending === "ې") {
-  return [
-    {
-      gender: "fem",
-      number: "singular",
-    },
-    {
-      gender: "fem",
-      number: "plural",
-    },
-  ];
+// function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] {
+//   if (ending === "ی") {
+//     return [
+//       {
+//         gender: "masc",
+//         number: "singular",
+//       },
+//     ];
+//   }
+//   if (ending === "ي") {
+//     return [
+//       {
+//         gender: "masc",
+//         number: "plural",
+//       },
+//     ];
+//   }
+//   // if (ending === "ې") {
+//   return [
+//     {
+//       gender: "fem",
+//       number: "singular",
+//     },
+//     {
+//       gender: "fem",
+//       number: "plural",
+//     },
+//   ];
+//   // }
 // }
-}
--- a/src/lib/src/parsing/parse-verb-helpers.ts
+++ b/src/lib/src/parsing/parse-verb-helpers.ts
@ -4,6 +4,10 @@ export function isKedulStatEntry(v: T.VerbDictionaryEntry): boolean {
  return v.p === "کېدل" && v.e === "to become _____";
 }

+/**
+ * gets the possible people for stem and root endings
+ * but DOES NOT INCLUDE short third pers masc sing
+ */
 export function getVerbEnding(e: string): {
  stem: T.Person[];
  root: T.Person[];
@ -34,7 +38,11 @@ export function getVerbEnding(e: string): {
    };
  } else if (e === "و") {
    return {
-      root: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
+      root: [
+        T.Person.FirstPlurMale,
+        T.Person.FirstPlurFemale,
+        T.Person.ThirdSingMale,
+      ],
      stem: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
    };
  } else if (e === "ئ") {
--- a/src/lib/src/parsing/parse-vp.test.txt
+++ b/src/lib/src/parsing/parse-vp.test.txt
--- a/src/lib/src/parsing/parse-vp.ts
+++ b/src/lib/src/parsing/parse-vp.ts
@ -24,7 +24,6 @@ import {
 import { parseBlocks } from "./parse-blocks";
 import { makePronounSelection } from "../phrase-building/make-selections";
 import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
-import { LookupFunction } from "./lookup";
 import { isSecondPerson, personToGenNum } from "../misc-helpers";
 import { equals, zip } from "rambda";
 import { isImperativeTense } from "../type-predicates";
@ -41,12 +40,12 @@ import { isImperativeTense } from "../type-predicates";

 export function parseVP(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction
+  dictionary: T.DictionaryAPI
 ): T.ParseResult<T.VPSelectionComplete>[] {
  if (tokens.length === 0) {
    return [];
  }
-  const blocks = parseBlocks(tokens, lookup, [], []);
+  const blocks = parseBlocks(tokens, dictionary, [], []);
  return bindParseResult(
    createPossesivePossibilities(blocks),
    (tokens, { blocks, kids }) => {
@ -892,7 +891,7 @@ function getMiniPronouns(kids: T.ParsedKid[]): T.ParsedMiniPronoun[] {

 function getPeopleFromMiniPronouns(kids: T.ParsedKid[]): T.Person[] {
  const p: T.Person[] = [];
-  for (let k of kids) {
+  for (const k of kids) {
    if (k === "me") {
      p.push(T.Person.FirstSingMale);
      p.push(T.Person.FirstSingFemale);
--- a/src/lib/src/parsing/utils.ts
+++ b/src/lib/src/parsing/utils.ts
@ -163,6 +163,38 @@ export function parserCombMany<R>(parser: Parser<R>): Parser<R[]> {
  return r;
 }

+export function parserCombSucc2<A, B>(
+  parsers: [Parser<A>, Parser<B>]
+): Parser<[A, B]> {
+  return function (
+    tokens: Readonly<T.Token[]>,
+    dictionary: T.DictionaryAPI
+  ): T.ParseResult<[A, B]>[] {
+    return bindParseResult(parsers[0](tokens, dictionary), (t, a) =>
+      bindParseResult(parsers[1](t, dictionary), (tk, b) =>
+        returnParseResult(tk, [a, b])
+      )
+    );
+  };
+}
+
+export function parserCombSucc3<A, B, C>(
+  parsers: [Parser<A>, Parser<B>, Parser<C>]
+): Parser<[A, B, C]> {
+  return function (
+    tokens: Readonly<T.Token[]>,
+    dictionary: T.DictionaryAPI
+  ): T.ParseResult<[A, B, C]>[] {
+    return bindParseResult(parsers[0](tokens, dictionary), (t, a) =>
+      bindParseResult(parsers[1](t, dictionary), (tk, b) =>
+        bindParseResult(parsers[2](tk, dictionary), (tkn, c) =>
+          returnParseResult(tkn, [a, b, c])
+        )
+      )
+    );
+  };
+}
+
 export function isCompleteResult<C extends object>(
  r: T.ParseResult<C>
 ): boolean {
--- a/src/lib/src/phrase-building/np-tools.ts
+++ b/src/lib/src/phrase-building/np-tools.ts
@ -214,7 +214,7 @@ function addArticlesAndAdjs(
        ? np.determiners.determiners
            // @ts-ignore - weird, ts is not recognizing this as rendered
            .map((x) => (moreThanOneDet ? `(${x.e})` : x.e))
-            .join(" ")
+            .join(" ") + " "
        : "";
    const detsWithoutNoun = np.determiners && !np.determiners.withNoun;
    return `${np.determiners ? "" : articles}${determiners}${
--- a/src/lib/src/phrase-building/remove-redundant.ts
+++ b/src/lib/src/phrase-building/remove-redundant.ts
@ -0,0 +1,58 @@
+import * as T from "../../../types";
+import { compileVP } from "./compile";
+import { renderVP } from "./render-vp";
+
+export function removeRedundantVPSs(
+  vs: T.VPSelectionComplete[]
+): T.VPSelectionComplete[] {
+  const versions = vs.map((x) => compileVP(renderVP(x), x.form));
+  const toRemove = new Set<number>();
+  versions.forEach((a, i) => {
+    const duplicates = findAllIndices(
+      versions.slice(i + 1),
+      (b) => !toRemove.has(i) && isDuplicate(a, b)
+    );
+    duplicates.forEach((d) => toRemove.add(d + i + 1));
+  });
+  return vs.reduce<T.VPSelectionComplete[]>((acc, v, i) => {
+    if (toRemove.has(i)) {
+      return acc;
+    }
+    return [...acc, v];
+  }, []);
+}
+
+function isDuplicate(
+  a: {
+    ps: T.SingleOrLengthOpts<T.PsString[]>;
+    e?: string[];
+  },
+  b: { ps: T.SingleOrLengthOpts<T.PsString[]>; e?: string[] }
+): boolean {
+  if (!a.e || !b.e) {
+    return false;
+  }
+  if (a.e.length !== b.e.length) {
+    return false;
+  }
+  return a.e.every(
+    (x, i) =>
+      removeGenderGloss(x) === removeGenderGloss(b.e ? b.e[i] : "") &&
+      JSON.stringify(a.ps) === JSON.stringify(b.ps)
+  );
+}
+
+function removeGenderGloss(s: string): string {
+  // TODO: combine into one RegEx
+  return s.replaceAll(/\((m|f)\.\)/g, "").replaceAll(/\((m|f)\. pl\.\)/g, "");
+}
+
+function findAllIndices<N>(arr: N[], f: (x: N) => boolean): number[] {
+  const indices: number[] = [];
+  arr.forEach((x, i) => {
+    if (f(x)) {
+      indices.push(i);
+    }
+  });
+  return indices;
+}
--- a/src/lib/src/phrase-building/render-np.ts
+++ b/src/lib/src/phrase-building/render-np.ts
@ -178,7 +178,7 @@ function renderDeterminer({
      ? number === "plural"
        ? { p: "دغو", f: "dágho" }
        : gender === "masc"
-        ? { p: "دغه", f: "dághu" }
+        ? { p: "دغه", f: "dágha" }
        : { p: "دغې", f: "dághe" }
      : { p: "دغه", f: "dágha" };
    return {
@ -196,7 +196,7 @@ function renderDeterminer({
      ? number === "plural"
        ? { p: "هغو", f: "hágho" }
        : gender === "masc"
-        ? { p: "هغه", f: "hághu" }
+        ? { p: "هغه", f: "hágha" }
        : { p: "هغې", f: "hághe" }
      : { p: "هغه", f: "hágha" };
    return {
--- a/src/types.ts
+++ b/src/types.ts
@ -1259,8 +1259,13 @@ export type DictionaryAPI = {
  queryP: (p: string) => DictionaryEntry[];
  adjLookup: (p: string) => AdjectiveEntry[];
  nounLookup: (p: string) => NounEntry[];
-  otherLookup: (key: keyof DictionaryEntry, p: string) => DictionaryEntry[];
+  otherLookup: (
+    key: keyof DictionaryEntry,
+    p: string,
+    regex?: boolean
+  ) => DictionaryEntry[];
  specialPluralLookup: (p: string) => NounEntry[];
+  verbEntryLookup: (p: string) => VerbEntry[];
 };

 export type Parser<R> = (
--- a/tsconfig.node.json
+++ b/tsconfig.node.json
@ -18,5 +18,5 @@
    "noUnusedParameters": true,
    "noFallthroughCasesInSwitch": true
  },
-  "include": ["vite.config.ts", "get-mini-dict.ts"]
+  "include": ["vite.config.ts", "get-mini-dict-and-split-verbs.ts"]
 }
--- a/vocab/mini-dict-tss.ts
+++ b/vocab/mini-dict-tss.ts
@ -34,6 +34,7 @@ export const entries: T.DictionaryEntry["ts"][] = [
  1527812908, // مېلمه
  1575924767041, // شپون
  1527815333, // نتور
+  1527812881, // ماشوم

  // fem nouns
  1527811877, // دوستي
@ -50,4 +51,28 @@ export const entries: T.DictionaryEntry["ts"][] = [
  1589023873660, // فتح - fatha
  1527814342, // نفع - nafa
  1527815329, // تجربه
+
+  // verbs
+  1527815399, // وهل
+  1527817298, // اخیستل
+  1527812275, // لیدل
+  1527812856, // لیکل
+  1527815085, // منل
+  1527817661, // ګالل
+  1527813573, // رسېدل
+  1527812790, // خوړل
+  1527812759, // کېناستل
+  1527812758, // کښېناستل
+  1527815190, // پرېښودل
+  1527811293, // ښودل
+  1527812284, // کېښودل
+  1527812751, // کتل
+  1527823376, // وتل
+  1527816865, // وړل
+  1527813473, // الوتل
+  1585228551150, // درتلل
+  1527817577, // کښېښودل
+  1527814012, // اوښتل
+  1577390597820, // پرېښوول
+  1527815191, // پرېښول
 ];
Author	SHA1	Message	Date
adueck	f17ebddaa1	comment out tests for parser in progress	2024-12-06 15:10:17 +05:00
adueck	0ade410698	fix up masc inflection of demonstratives	2024-12-06 15:06:14 +05:00
adueck	73eb04d7e0	parser working - a bit slow/rough - with dictionary lookup	2024-10-14 20:22:32 -04:00