comment out tests for parser in progress

fix up masc inflection of demonstratives
parser working - a bit slow/rough - with dictionary lookup
2024-12-06 15:10:17 +05:00 · 2024-12-06 15:06:14 +05:00 · 2024-10-14 20:22:32 -04:00
39 changed files with 1163 additions and 1002 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,6 +11,7 @@ lerna-debug.log*
 src/verbs.ts
 src/nouns-adjs.ts
 vocab/mini-dict-entries.ts
 src/lib/src/parsing/split-verbs.ts
 # testing
 /coverage
--- a/get-mini-dict-and-split-verbs.ts
+++ b/get-mini-dict-and-split-verbs.ts
@ -0,0 +1,41 @@
 import * as T from "./src/types";
 import * as tp from "./src/lib/src/type-predicates";
 import fs from "fs";
 import { entries as collection } from "./vocab/mini-dict-tss";
 const res = await fetch(
  "https://storage.lingdocs.com/dictionary/dictionary.json"
 );
 const dictionary = (await res.json()) as T.Dictionary;
 const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
  collection.includes(x.ts)
 );
 const splitEntries: T.VerbDictionaryEntry[] =
  dictionary.entries.filter<T.VerbDictionaryEntry>(
    (x): x is T.VerbDictionaryEntry =>
      tp.isVerbDictionaryEntry(x) &&
      !!x.separationAtP &&
      !["کول", "کېدل"].includes(x.p)
  );
 const miniDictContents = `import { DictionaryEntry } from "../src/types";
 // DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
 export const entries: DictionaryEntry[] = [
 ${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
 ];
 `;
 const splitVerbContents = `import { VerbEntry, VerbDictionaryEntry } from "../../../types";
 // DO NOT MODIFY - GENERATED
 export const entries: VerbEntry[] = [
 ${splitEntries
  .map((e) => `\t{ entry: ${JSON.stringify(e)} as VerbDictionaryEntry },`)
  .join("\n")}
 ];
 `;
 fs.writeFileSync("./vocab/mini-dict-entries.ts", miniDictContents);
 fs.writeFileSync("./src/lib/src/parsing/split-verbs.ts", splitVerbContents);
--- a/get-mini-dict.ts
+++ b/get-mini-dict.ts
@ -1,22 +0,0 @@
 import * as T from "./src/types";
 import fs from "fs";
 import { entries as collection } from "./vocab/mini-dict-tss";
 const res = await fetch(
  "https://storage.lingdocs.com/dictionary/dictionary.json"
 );
 const dictionary = (await res.json()) as T.Dictionary;
 const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
  collection.includes(x.ts)
 );
 const contents = `import { DictionaryEntry } from "../src/types";
 // DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
 export const entries: DictionaryEntry[] = [
 ${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
 ];
 `;
 fs.writeFileSync("./vocab/mini-dict-entries.ts", contents);
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "pashto-inflector-website",
-  "version": "7.7.1",
+  "version": "7.7.3",
  "type": "module",
  "scripts": {
    "patch": "npm version patch --no-git-tag-version && cd src/lib && npm version patch --no-git-tag-version && cd ../components && npm version patch --no-git-tag-version",
@ -14,7 +14,7 @@
    "build-website": "tsc -b && vite build",
    "build-components": "rm -rf src/components/dist && tsc --project src/components/tsconfig.json && cd src/components && node post-build.cjs",
    "build-lib": "rm -rf src/lib/dist && tsc --project src/lib/tsconfig.json && tsup src/lib/library.ts --format cjs && mv dist/library.cjs src/lib/dist/lib",
-    "get-words": "node get-words.cjs && tsx get-mini-dict.ts",
+    "get-words": "node get-words.cjs && tsx get-mini-dict-and-split-verbs.ts",
    "check-all-inflections": "tsx check-all-inflections.ts"
  },
  "dependencies": {
--- a/src/App.tsx
+++ b/src/App.tsx
@ -19,7 +19,7 @@ import { entryFeeder } from "./demo-components/entryFeeder";
 import Hider from "./components/src/Hider";
 import InflectionDemo from "./demo-components/InflectionDemo";
 import SpellingDemo from "./demo-components/SpellingDemo";
-// import ParserDemo from "./demo-components/ParserDemo";
+import ParserDemo from "./demo-components/ParserDemo";
 // import InflectionTable from "./components/src/InflectionsTable";
 function App() {
@ -163,7 +163,7 @@ function App() {
          >
            <SpellingDemo opts={textOptions} onChange={setTextOptions} />
          </Hider>
-          {/* <Hider
+          <Hider
            label="Parser (🚧 IN PROGRESS 🚧)"
            hLevel={3}
            showing={showing === "parser"}
@ -174,7 +174,7 @@ function App() {
              entryFeeder={entryFeeder}
              dictionary={dictionary}
            />
-          </Hider> */}
+          </Hider>
        </div>
      </main>
      <Modal
--- a/src/components/package.json
+++ b/src/components/package.json
@ -1,6 +1,6 @@
 {
  "name": "@lingdocs/ps-react",
-  "version": "7.7.1",
+  "version": "7.7.3",
  "description": "Pashto inflector library module with React components",
  "main": "dist/components/library.js",
  "module": "dist/components/library.js",
--- a/src/demo-components/ParserDemo.tsx
+++ b/src/demo-components/ParserDemo.tsx
@ -3,13 +3,18 @@ import * as T from "../types";
 // import { parsePhrase } from "../lib/src/parsing/parse-phrase";
 import { tokenizer } from "../lib/src/parsing/tokenizer";
 // import { NPDisplay } from "../components/library";
-// import EditableVP from "../components/src/vp-explorer/EditableVP";
+import EditableVP from "../components/src/vp-explorer/EditableVP";
-// import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
+import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
-import { parseNoun } from "../lib/src/parsing/parse-noun-new";
+// import { parseNoun } from "../lib/src/parsing/parse-noun-new";
 import { JsonEditor } from "json-edit-react";
-import { renderNounSelection } from "../lib/src/phrase-building/render-np";
+// import { renderNounSelection } from "../lib/src/phrase-building/render-np";
-import { NPBlock } from "../components/src/blocks/Block";
+// import { NPBlock } from "../components/src/blocks/Block";
-import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
+// import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
 import { parsePhrase } from "../lib/src/parsing/parse-phrase";
 //import { renderVP } from "../lib/src/phrase-building/render-vp";
 // import VPDisplay from "../components/src/vp-explorer/VPDisplay";
 import { entryFeeder } from "./entryFeeder";
 import { removeRedundantVPSs } from "../lib/src/phrase-building/remove-redundant";
 const working = [
  "limited demo vocab",
@ -59,7 +64,7 @@ function ParserDemo({
 }) {
  const [text, setText] = useState<string>("");
  const [result, setResult] = useState<
-    ReturnType<typeof parseNoun>[number]["body"][]
+    ReturnType<typeof parsePhrase>["success"]
  >([]);
  // ReturnType<typeof parsePhrase>["success"]
  const [errors, setErrors] = useState<string[]>([]);
@ -70,16 +75,10 @@ function ParserDemo({
      setErrors([]);
      return;
    }
-    const res = parseNoun(tokenizer(value), dictionary, undefined);
+    const res = parsePhrase(tokenizer(value), dictionary);
    const success: ReturnType<typeof parseNoun>[number]["body"][] = res
      .filter((x) => !x.tokens.length)
      .map((x) => x.body);
    const errors = [
      ...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
    ];
    setText(value);
-    setErrors(errors);
+    setErrors(res.errors);
-    setResult(success);
+    setResult(removeRedundantVPSs(res.success));
  }
  return (
    <div className="mt-3" style={{ marginBottom: "1000px" }}>
@ -141,34 +140,8 @@ function ParserDemo({
          <div className="text-center">Did you mean:</div>
        </>
      )}
-      {result.map((r) => {
+      {result.map((res) => (
-        try {
+        <>
          const renderedNP: T.Rendered<T.NPSelection> = {
            type: "NP",
            selection: renderNounSelection(r.selection, r.inflected, "none"),
          };
          return (
            <>
              {r.inflected ? "INFLECTED" : "PLAIN"}
              <NPBlock
                opts={opts}
                script="p"
                english={getEnglishFromRendered(renderedNP)}
              >
                {renderedNP}
              </NPBlock>
            </>
          );
        } catch (e) {
          console.error(e);
          return <div>ERROR RENDERING</div>;
        }
      })}
      <JsonEditor data={result} />
      {/* {result.map((res) =>
        "inflected" in res ? (
          <NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
        ) : "verb" in res ? (
          <EditableVP
            opts={opts}
            entryFeeder={entryFeeder}
@ -176,42 +149,76 @@ function ParserDemo({
          >
            {uncompleteVPSelection(res)}
          </EditableVP>
-        ) : (
+          <details>
-          // (() => {
+            <summary>AST</summary>
-          //   try {
+            <JsonEditor data={res} />
-          //     const rendered = renderVP(res);
+          </details>
-          //     const compiled = compileVP(rendered, res.form);
+        </>
-          //     return (
+      ))}
          //       <div>
          //         <CompiledPTextDisplay compiled={compiled} opts={opts} />
          //         {compiled.e && (
          //           <div className={`text-muted mt-2 text-center`}>
          //             {compiled.e.map((e, i) => (
          //               <div key={i}>{e}</div>
          //             ))}
          //           </div>
          //         )}
          //       </div>
          //     );
          //   } catch (e) {
          //     console.error(e);
          //     console.log({ res });
          //     return <div>ERROR</div>;
          //   }
          // })()
          <samp>
            <pre>{JSON.stringify(res, null, "  ")}</pre>
          </samp>
        )
      )} */}
      <details>
        <summary>AST</summary>
        <samp>
          <pre>{JSON.stringify(result, null, "  ")}</pre>
        </samp>
      </details>
    </div>
  );
 }
 export default ParserDemo;
 // {/* {result.map((res) =>
 // "inflected" in res ? (
 //   <NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
 // ) : "verb" in res ? (
 //   <EditableVP
 //     opts={opts}
 //     entryFeeder={entryFeeder}
 //     allVariations={true}
 //   >
 //     {uncompleteVPSelection(res)}
 //   </EditableVP>
 // ) : (
 // (() => {
 //   try {
 //     const rendered = renderVP(res);
 //     const compiled = compileVP(rendered, res.form);
 //     return (
 //       <div>
 //         <CompiledPTextDisplay compiled={compiled} opts={opts} />
 //         {compiled.e && (
 //           <div className={`text-muted mt-2 text-center`}>
 //             {compiled.e.map((e, i) => (
 //               <div key={i}>{e}</div>
 //             ))}
 //           </div>
 //         )}
 //       </div>
 //     );
 //   } catch (e) {
 //     console.error(e);
 //     console.log({ res });
 //     return <div>ERROR</div>;
 //   }
 // })()
 //     <samp>
 //       <pre>{JSON.stringify(res, null, "  ")}</pre>
 //     </samp>
 //   )
 // )} */}
 // try {
 //   const renderedNP: T.Rendered<T.NPSelection> = {
 //     type: "NP",
 //     selection: renderNounSelection(r.selection, r.inflected, "none"),
 //   };
 //   return (
 //     <>
 //       {r.inflected ? "INFLECTED" : "PLAIN"}
 //       <NPBlock
 //         opts={opts}
 //         script="p"
 //         english={getEnglishFromRendered(renderedNP)}
 //       >
 //         {renderedNP}
 //       </NPBlock>
 //     </>
 //   );
 // } catch (e) {
 //   console.error(e);
 //   return <div>ERROR RENDERING</div>;
 // }
--- a/src/lib/package.json
+++ b/src/lib/package.json
@ -1,6 +1,6 @@
 {
  "name": "@lingdocs/inflect",
-  "version": "7.7.1",
+  "version": "7.7.3",
  "description": "Pashto inflector library",
  "main": "dist/lib/library.cjs",
  "module": "dist/lib/library.js",
--- a/src/lib/src/dictionary/dictionary.ts
+++ b/src/lib/src/dictionary/dictionary.ts
@ -19,7 +19,19 @@ function queryP(p: string): T.DictionaryEntry[] {
  }
  return dictDb.collection.find({ p });
 }
-const memoizedQueryP = queryP;
+const memoizedQueryP = memoize(queryP);
 function queryTs(ts: number): T.DictionaryEntry {
  if (!dictDb.collection) {
    throw new Error("dictionary not initialized yet");
  }
  const res = dictDb.findOneByTs(ts);
  if (!res) {
    throw new Error("complement link broken");
  }
  return res;
 }
 const memoizedQueryTs = memoize(queryTs);
 function adjLookup(p: string): T.AdjectiveEntry[] {
  const res = memoizedQueryP(p);
@ -33,26 +45,51 @@ function nounLookup(p: string): T.NounEntry[] {
 function otherLookup(
  key: keyof T.DictionaryEntry,
-  p: string
+  p: string,
  regex?: boolean
 ): T.DictionaryEntry[] {
  if (!dictDb.collection) {
    return [];
  }
-  return dictDb.collection.find({ [key]: p });
+  return dictDb.collection.find({ [key]: regex ? variationRegex(p) : p });
 }
 function specialPluralLookup(p: string): T.NounEntry[] {
  if (!dictDb.collection) {
    return [];
  }
-  const regex = new RegExp(`(^|\\s|,)${p}($|,)`);
+  const regex = variationRegex(p);
  return dictDb.collection
    .find({
-      $or: [{ ppp: { $regex: regex } }, { app: { $regex: regex } }],
+      $or: [{ ppp: regex }, { app: regex }],
    })
    .filter(tp.isNounEntry);
 }
 function verbEntryLookup(p: string): T.VerbEntry[] {
  if (!dictDb.collection) {
    return [];
  }
  return memoizedQueryP(p)
    .filter(tp.isVerbDictionaryEntry)
    .map((entry) =>
      entry.l
        ? {
            entry,
            complement: memoizedQueryTs(entry.l),
          }
        : { entry }
    );
 }
 /**
 * creates a RegEx mongo query to search for a variation in a certain field
 * ie. to search for کاته in کوت, کاته
 */
 function variationRegex(p: string): { $regex: RegExp } {
  return { $regex: new RegExp(`(^|\\s|,)${p}($|,)`) };
 }
 export const dictionary: T.DictionaryAPI = {
  initialize: async () => await dictDb.initialize(),
  update: async () => await dictDb.updateDictionary(() => null),
@ -61,4 +98,5 @@ export const dictionary: T.DictionaryAPI = {
  nounLookup: memoize(nounLookup),
  otherLookup: memoize(otherLookup),
  specialPluralLookup: memoize(specialPluralLookup),
  verbEntryLookup: memoize(verbEntryLookup),
 };
--- a/src/lib/src/parsing/mini-test-dictionary.ts
+++ b/src/lib/src/parsing/mini-test-dictionary.ts
@ -1,7 +1,15 @@
 import * as T from "../../../types";
-import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
+import {
  isAdjectiveEntry,
  isNounEntry,
  isVerbDictionaryEntry,
 } from "../type-predicates";
 import { entries } from "../../../../vocab/mini-dict-entries";
 function variationRegex(p: string): { $regex: RegExp } {
  return { $regex: new RegExp(`(^|\\s|,)${p}($|,)`) };
 }
 const queryP = (p: string) => entries.filter((e) => e.p === p);
 function adjLookup(p: string): T.AdjectiveEntry[] {
  return queryP(p).filter(isAdjectiveEntry) as T.AdjectiveEntry[];
@ -13,18 +21,37 @@ function nounLookup(p: string): T.NounEntry[] {
 function otherLookup(
  key: keyof T.DictionaryEntry,
-  p: string
+  p: string,
  regex?: boolean
 ): T.DictionaryEntry[] {
  if (regex) {
    const { $regex: regex } = variationRegex(p);
    return entries.filter((e) => (e[key] as string)?.match(regex));
  }
  return entries.filter((e) => e[key] === p);
 }
 function specialPluralLookup(p: string): T.NounEntry[] {
-  const regex = new RegExp(`(^|\\s|,)${p}($|,)`);
+  const { $regex: regex } = variationRegex(p);
  return entries.filter(
    (e) => (e.ppp?.match(regex) || e.app?.match(regex)) && isNounEntry(e)
  ) as T.NounEntry[];
 }
 function verbEntryLookup(p: string): T.VerbEntry[] {
  return entries
    .filter((e) => e.p === p)
    .filter(isVerbDictionaryEntry)
    .map<T.VerbEntry>((entry) =>
      entry.l
        ? {
            entry,
            complement: entries.find((e) => e.ts === entry.l),
          }
        : { entry }
    );
 }
 export const testDictionary: T.DictionaryAPI = {
  // @ts-expect-error we won't mock the initialization
  initialize: async () => 0,
@ -35,4 +62,5 @@ export const testDictionary: T.DictionaryAPI = {
  nounLookup,
  otherLookup,
  specialPluralLookup,
  verbEntryLookup,
 };
--- a/src/lib/src/parsing/parse-adverb.ts
+++ b/src/lib/src/parsing/parse-adverb.ts
@ -1,16 +1,16 @@
 import * as T from "../../../types";
-import { LookupFunction } from "./lookup";
+import { isAdverbEntry } from "../type-predicates";
 import { returnParseResultS } from "./utils";
 export function parseAdverb(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction
+  dictionary: T.DictionaryAPI
 ): T.ParseResult<T.APSelection>[] {
  if (tokens.length === 0) {
    return [];
  }
  const [first, ...rest] = tokens;
-  const adverbs = lookup(first.s, "adverb");
+  const adverbs = dictionary.queryP(first.s).filter(isAdverbEntry);
  return adverbs.map((entry) =>
    returnParseResultS(rest, {
      type: "AP",
--- a/src/lib/src/parsing/parse-ap.ts
+++ b/src/lib/src/parsing/parse-ap.ts
@ -1,26 +1,25 @@
 import * as T from "../../../types";
 import { fmapParseResult } from "../fp-ps";
 import { LookupFunction } from "./lookup";
 import { parseAdverb } from "./parse-adverb";
 import { parseSandwich } from "./parse-sandwich";
 export function parseAP(
  s: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dicitonary: T.DictionaryAPI,
  possesor: T.PossesorSelection | undefined
 ): T.ParseResult<T.APSelection>[] {
  if (s.length === 0) {
    return [];
  }
  return [
-    ...(!possesor ? parseAdverb(s, lookup) : []),
+    ...(!possesor ? parseAdverb(s, dicitonary) : []),
    ...fmapParseResult(
      (selection) =>
        ({
          type: "AP",
          selection,
        } as const),
-      parseSandwich(s, lookup, possesor)
+      parseSandwich(s, dicitonary, possesor)
    ),
  ];
 }
--- a/src/lib/src/parsing/parse-blocks.ts
+++ b/src/lib/src/parsing/parse-blocks.ts
@ -1,12 +1,11 @@
 import * as T from "../../../types";
 import { LookupFunction } from "./lookup";
 import { parseEquative } from "./parse-equative";
 import { parseKidsSection } from "./parse-kids-section";
 import { parseNeg } from "./parse-negative";
 import { parseNPAP } from "./parse-npap";
 import { parseVBP } from "./parse-vbp";
 import { parsePH } from "./parse-ph";
-import { parseVBE } from "./parse-vbe";
+import { parseVBE } from "./parse-vbe-new";
 import {
  bindParseResult,
  returnParseResult,
@ -18,7 +17,7 @@ import { isKedulStatEntry } from "./parse-verb-helpers";
 export function parseBlocks(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dicitonary: T.DictionaryAPI,
  blocks: T.ParsedBlock[],
  kids: T.ParsedKid[]
 ): T.ParseResult<{
@ -35,13 +34,13 @@ export function parseBlocks(
  // TOOD: rather parse VBP / VBE
  const allBlocks: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
-    ...(!inVerbSection ? parseNPAP(tokens, lookup) : []),
+    ...(!inVerbSection ? parseNPAP(tokens, dicitonary) : []),
    // ensure at most one of each PH, VBE, VBP
    ...(prevPh ? [] : parsePH(tokens)),
    ...(blocks.some(isParsedVBE)
      ? []
-      : [...parseVBE(tokens, lookup), ...parseEquative(tokens)]),
+      : [...parseVBE(tokens, dicitonary), ...parseEquative(tokens)]),
-    ...(blocks.some(isParsedVBP) ? [] : parseVBP(tokens, lookup)),
+    ...(blocks.some(isParsedVBP) ? [] : parseVBP(tokens, dicitonary)),
    ...(blocks.some((b) => b.type === "negative") ? [] : parseNeg(tokens)),
    ...parseKidsSection(tokens, []),
  ];
@ -50,7 +49,7 @@ export function parseBlocks(
    const errors: T.ParseError[] = [];
    if (r.type === "kids") {
      return {
-        next: parseBlocks(tokens, lookup, blocks, [...kids, ...r.kids]),
+        next: parseBlocks(tokens, dicitonary, blocks, [...kids, ...r.kids]),
        errors:
          blocks.length !== 1
            ? [{ message: "kids' section out of place" }]
@ -71,7 +70,7 @@ export function parseBlocks(
      return [];
    }
    return {
-      next: parseBlocks(tokens, lookup, [...blocks, r], kids),
+      next: parseBlocks(tokens, dicitonary, [...blocks, r], kids),
      errors,
    };
  });
--- a/src/lib/src/parsing/parse-noun-new.test.ts
+++ b/src/lib/src/parsing/parse-noun-new.test.ts
@ -1736,7 +1736,7 @@ describe("parsing nouns", () => {
    test(category, () => {
      cases.forEach(({ input, output }) => {
        const tokens = tokenizer(input);
-        const res = parseNoun(tokens, testDictionary, undefined, []).flatMap(
+        const res = parseNoun(tokens, testDictionary, undefined).flatMap(
          // only take the ones that used all the tokens
          ({ body, tokens }) => (tokens.length === 0 ? [body] : [])
        );
--- a/src/lib/src/parsing/parse-noun-new.ts
+++ b/src/lib/src/parsing/parse-noun-new.ts
@ -3,7 +3,12 @@ import { makeNounSelection } from "../phrase-building/make-selections";
 import { parseAdjective } from "./parse-adjective-new";
 import { parseDeterminer } from "./parse-determiner";
 import { parseNounWord } from "./parse-noun-word";
-import { bindParseResult, parserCombMany, toParseError } from "./utils";
+import {
  bindParseResult,
  parserCombMany,
  parserCombSucc3,
  toParseError,
 } from "./utils";
 type NounResult = { inflected: boolean; selection: T.NounSelection };
@ -15,57 +20,54 @@ export function parseNoun(
  if (tokens.length === 0) {
    return [];
  }
-  const detRes = parserCombMany(parseDeterminer)(tokens, dictionary);
+  const res = parserCombSucc3([
-  // TODO: add recognition of او between adjectives
+    parserCombMany(parseDeterminer),
-  return bindParseResult(detRes, (t, determiners) => {
+    parserCombMany(parseAdjective),
-    const adjRes = parserCombMany(parseAdjective)(t, dictionary);
+    parseNounWord,
-    return bindParseResult(adjRes, (tk, adjectives) => {
+  ])(tokens, dictionary);
-      const nounWord = parseNounWord(tk, dictionary);
+  return bindParseResult(res, (tkns, [determiners, adjectives, nounWord]) => {
-      return bindParseResult(nounWord, (tkns, nr) => {
+    const { error: adjErrors } = adjDetsMatch(
-        const { error: adjErrors } = adjDetsMatch(
+      adjectives,
-          adjectives,
+      nounWord.gender,
-          nr.gender,
+      nounWord.inflected ? 1 : 0,
-          nr.inflected ? 1 : 0,
+      nounWord.plural
-          nr.plural
+    );
-        );
+    const { error: detErrors } = adjDetsMatch(
-        const { error: detErrors } = adjDetsMatch(
+      determiners,
-          determiners,
+      nounWord.gender,
-          nr.gender,
+      nounWord.inflected ? 1 : 0,
-          nr.inflected ? 1 : 0,
+      nounWord.plural
-          nr.plural
+    );
-        );
+    const dupErrors = checkForDeterminerDuplicates(determiners);
-        const dupErrors = checkForDeterminerDuplicates(determiners);
+    const s = makeNounSelection(nounWord.entry, undefined);
-        const s = makeNounSelection(nr.entry, undefined);
+    const body: NounResult = {
-        const body: NounResult = {
+      inflected: nounWord.inflected,
-          inflected: nr.inflected,
+      selection: {
-          selection: {
+        ...s,
-            ...s,
+        gender: nounWord.gender,
-            gender: nr.gender,
+        number: nounWord.plural ? "plural" : "singular",
-            number: nr.plural ? "plural" : "singular",
+        adjectives: adjectives.map((a) => a.selection),
-            adjectives: adjectives.map((a) => a.selection),
+        determiners: determiners.length
-            determiners: determiners.length
+          ? {
-              ? {
+              type: "determiners",
-                  type: "determiners",
+              withNoun: true,
-                  withNoun: true,
+              determiners: determiners.map((d) => d.selection),
-                  determiners: determiners.map((d) => d.selection),
+            }
-                }
+          : undefined,
-              : undefined,
+        possesor,
-            possesor,
+      },
-          },
+    };
-        };
+    return [
-        return [
+      {
-          {
+        body,
-            body,
+        tokens: tkns,
-            tokens: tkns,
+        errors: [
-            errors: [
+          ...detErrors.map(toParseError),
-              ...detErrors.map(toParseError),
+          ...dupErrors.map(toParseError),
-              ...dupErrors.map(toParseError),
+          ...adjErrors.map(toParseError),
-              ...adjErrors.map(toParseError),
+        ],
-            ],
+      },
-          },
+    ];
        ];
      });
    });
  });
 }
--- a/src/lib/src/parsing/parse-noun.ts
+++ b/src/lib/src/parsing/parse-noun.ts
@ -1,191 +0,0 @@
 import * as T from "../../../types";
 import { getInflectionPattern } from "../inflection-pattern";
 import { makeNounSelection } from "../phrase-building/make-selections";
 import {
  isMascNounEntry,
  isNounEntry,
  isPluralNounEntry,
  isUnisexNounEntry,
 } from "../type-predicates";
 import { getInflectionQueries } from "./inflection-query";
 import { LookupFunction } from "./lookup";
 import { parseAdjective } from "./parse-adjective";
 import { bindParseResult } from "./utils";
 type NounResult = { inflected: boolean; selection: T.NounSelection };
 export function parseNoun(
  tokens: Readonly<T.Token[]>,
  lookup: LookupFunction,
  possesor: T.PossesorSelection | undefined,
  adjectives: {
    inflection: (0 | 1 | 2)[];
    gender: T.Gender[];
    given: string;
    selection: T.AdjectiveSelection;
  }[]
 ): T.ParseResult<NounResult>[] {
  if (tokens.length === 0) {
    return [];
  }
  // TODO: add recognition of او between adjectives
  const adjRes = parseAdjective(tokens, lookup);
  const withAdj = bindParseResult(adjRes, (tkns, adj) =>
    parseNoun(tkns, lookup, possesor, [...adjectives, adj])
  );
  const [first, ...rest] = tokens;
  const searches = getInflectionQueries(first.s, true);
  const w: ReturnType<typeof parseNoun> = [];
  searches.forEach(({ search, details }) => {
    const nounEntries = lookup(search, "nounAdj").filter(isNounEntry);
    details.forEach((deets) => {
      const fittingEntries = nounEntries.filter(deets.predicate);
      fittingEntries.forEach((entry) => {
        const genders: T.Gender[] = isUnisexNounEntry(entry)
          ? ["masc", "fem"]
          : isMascNounEntry(entry)
          ? ["masc"]
          : ["fem"];
        deets.gender.forEach((gender) => {
          if (genders.includes(gender)) {
            deets.inflection.forEach((inf) => {
              const { error: adjErrors } = adjsMatch(
                adjectives,
                gender,
                inf,
                deets.plural
              );
              convertInflection(inf, entry, gender, deets.plural).forEach(
                ({ inflected, number }) => {
                  const selection = makeNounSelection(entry, undefined);
                  const errors = [
                    ...adjErrors.map((message) => ({
                      message,
                    })),
                  ];
                  w.push({
                    tokens: rest,
                    body: {
                      inflected,
                      selection: {
                        ...selection,
                        gender: selection.genderCanChange
                          ? gender
                          : selection.gender,
                        number: selection.numberCanChange
                          ? number
                          : selection.number,
                        adjectives: adjectives.map((a) => a.selection),
                        // TODO: could be nicer to validate that the possesor is inflected before
                        // and just pass in the selection
                        possesor,
                      },
                    },
                    errors,
                  });
                }
              );
            });
          }
        });
      });
    });
  });
  return [...withAdj, ...w];
 }
 function adjsMatch(
  adjectives: Parameters<typeof parseNoun>[3],
  gender: T.Gender,
  inf: 0 | 1 | 2,
  plural: boolean | undefined
 ): { ok: boolean; error: string[] } {
  const inflection = (plural && inf < 2 ? inf + 1 : inf) as 0 | 1 | 2;
  const unmatching = adjectives.filter(
    (adj) =>
      !adj.gender.includes(gender) ||
      !adj.inflection.some((i) => i === inflection)
  );
  if (unmatching.length) {
    return {
      ok: false,
      error: unmatching.map((x) => {
        const adjText =
          x.given === x.selection.entry.p
            ? x.given
            : `${x.given} (${x.selection.entry.p})`;
        const inflectionIssue = !x.inflection.some((x) => x === inflection)
          ? ` should be ${showInflection(inflection)}`
          : ``;
        return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
      }),
    };
  } else {
    return {
      ok: true,
      error: [],
    };
  }
 }
 function convertInflection(
  inflection: 0 | 1 | 2,
  entry: T.NounEntry | T.AdjectiveEntry,
  gender: T.Gender,
  plural: boolean | undefined
 ): {
  inflected: boolean;
  number: T.NounNumber;
 }[] {
  const pattern = getInflectionPattern(entry);
  const inf = (plural && inflection < 2 ? inflection + 1 : inflection) as
    | 0
    | 1
    | 2;
  if (inf === 0) {
    return [
      {
        inflected: false,
        number: "singular",
      },
    ];
  } else if (inf === 1) {
    return [
      ...(!((isNounEntry(entry) && isPluralNounEntry(entry)) || plural) &&
      !(pattern === 4 && entry.p.endsWith("ه") && gender === "masc")
        ? [
            {
              inflected: true,
              number: "singular" as T.NounNumber,
            },
          ]
        : []),
      ...(pattern > 1 ||
      (pattern > 0 && gender === "fem") ||
      (isNounEntry(entry) && isPluralNounEntry(entry)) ||
      plural
        ? [
            {
              inflected: false,
              number: "plural" as T.NounNumber,
            },
          ]
        : []),
    ];
  }
  return [
    {
      inflected: true,
      number: "plural",
    },
  ];
 }
 function showInflection(inf: 0 | 1 | 2): string {
  return inf === 0
    ? "plain"
    : inf === 1
    ? "first inflection"
    : "second inflection";
 }
--- a/src/lib/src/parsing/parse-np.ts
+++ b/src/lib/src/parsing/parse-np.ts
@ -1,13 +1,12 @@
 import * as T from "../../../types";
 import { parsePronoun } from "./parse-pronoun";
-import { parseNoun } from "./parse-noun";
+import { parseNoun } from "./parse-noun-new";
 import { fmapParseResult } from "../fp-ps";
 import { parseParticiple } from "./parse-participle";
 import { LookupFunction } from "./lookup";
 export function parseNP(
  s: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dicitonary: T.DictionaryAPI,
  possesor: T.PossesorSelection | undefined
 ): T.ParseResult<T.ParsedNP>[] {
  if (s.length === 0) {
@ -41,7 +40,7 @@ export function parseNP(
  return fmapParseResult(makeNPSl, [
    ...(!possesor ? parsePronoun(s) : []),
-    ...parseNoun(s, lookup, possesor, []),
+    ...parseNoun(s, dicitonary, possesor),
-    ...parseParticiple(s, lookup, possesor),
+    ...parseParticiple(s, dicitonary, possesor),
  ]);
 }
--- a/src/lib/src/parsing/parse-npap.ts
+++ b/src/lib/src/parsing/parse-npap.ts
@ -1,5 +1,4 @@
 import * as T from "../../../types";
 import { LookupFunction } from "./lookup";
 import { parseAP } from "./parse-ap";
 import { parseNP } from "./parse-np";
 import { parsePossesor } from "./parse-possesor";
@ -7,19 +6,25 @@ import { bindParseResult } from "./utils";
 export function parseNPAP(
  s: Readonly<T.Token[]>,
-  lookup: LookupFunction
+  dictionary: T.DictionaryAPI
 ): T.ParseResult<T.APSelection | T.ParsedNP>[] {
  if (s.length === 0) {
    return [];
  }
-  const possesor = parsePossesor(s, lookup, undefined);
+  const possesor = parsePossesor(s, dictionary, undefined);
  if (!possesor.length) {
-    return [...parseNP(s, lookup, undefined), ...parseAP(s, lookup, undefined)];
+    return [
      ...parseNP(s, dictionary, undefined),
      ...parseAP(s, dictionary, undefined),
    ];
  }
  return bindParseResult<T.PossesorSelection, T.APSelection | T.ParsedNP>(
    possesor,
    (tokens, p) => {
-      return [...parseNP(tokens, lookup, p), ...parseAP(tokens, lookup, p)];
+      return [
        ...parseNP(tokens, dictionary, p),
        ...parseAP(tokens, dictionary, p),
      ];
    }
  );
 }
--- a/src/lib/src/parsing/parse-participle.test.ts
+++ b/src/lib/src/parsing/parse-participle.test.ts
@ -4,14 +4,16 @@ import {
  makePossesorSelection,
 } from "../phrase-building/make-selections";
 import * as T from "../../../types";
-import { lookup, wordQuery } from "./lookup";
+import { testDictionary } from "./mini-test-dictionary";
 import { tokenizer } from "./tokenizer";
 import { parseNPAP } from "./parse-npap";
-const leedul = wordQuery("لیدل", "verb");
+const leedul = testDictionary.verbEntryLookup("لیدل")[0];
-const akheestul = wordQuery("اخیستل", "verb");
+const akheestul = testDictionary.verbEntryLookup("اخیستل")[0];
-const wahul = wordQuery("وهل", "verb");
+const wahul = testDictionary.verbEntryLookup("وهل")[0];
-const saray = wordQuery("سړی", "noun");
+const saray = testDictionary.nounLookup("سړی")[0];
 // TODO: uncomment and get parsing of short participles working
 const tests: {
  label: string;
@ -113,20 +115,20 @@ const tests: {
          },
        ],
      },
-      {
+      // {
-        input: "د سړي لیدو",
+      //   input: "د سړي لیدو",
-        output: [
+      //   output: [
-          {
+      //     {
-            inflected: true,
+      //       inflected: true,
-            selection: {
+      //       selection: {
-              ...makeParticipleSelection(leedul),
+      //         ...makeParticipleSelection(leedul),
-              possesor: makePossesorSelection(
+      //         possesor: makePossesorSelection(
-                makeNounSelection(saray, undefined)
+      //           makeNounSelection(saray, undefined)
-              ),
+      //         ),
-            },
+      //       },
-          },
+      //     },
-        ],
+      //   ],
-      },
+      // },
    ],
  },
 ];
@ -136,7 +138,7 @@ describe("parsing participles", () => {
    test(label, () => {
      cases.forEach(({ input, output }) => {
        const tokens = tokenizer(input);
-        const res = parseNPAP(tokens, lookup).map(({ body }) => body);
+        const res = parseNPAP(tokens, testDictionary).map(({ body }) => body);
        expect(res).toEqual(
          output.map(
            (x): T.ParsedNP => ({
--- a/src/lib/src/parsing/parse-participle.ts
+++ b/src/lib/src/parsing/parse-participle.ts
@ -1,5 +1,5 @@
 import * as T from "../../../types";
-import { LookupFunction } from "./lookup";
+import { shortVerbEndConsonant } from "./misc";
 type ParticipleResult = {
  inflected: boolean;
@ -7,9 +7,10 @@ type ParticipleResult = {
 };
 // TODO: should have adverbs with participle
 // TODO: NOTE this does not work with compound verbs yet
 export function parseParticiple(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dicitonary: T.DictionaryAPI,
  possesor: T.PossesorSelection | undefined
 ): T.ParseResult<ParticipleResult>[] {
  if (tokens.length === 0) {
@ -20,8 +21,13 @@ export function parseParticiple(
    return [];
  }
  const inflected = first.s.endsWith("و");
-  const matches = lookup(first.s, "participle");
+
-  return matches.map<T.ParseResult<ParticipleResult>>((verb) => ({
+  return [
    ...dicitonary.verbEntryLookup(inflected ? first.s.slice(0, -1) : first.s),
    ...(inflected && shortVerbEndConsonant.includes(first.s.at(-2) || "")
      ? dicitonary.verbEntryLookup(first.s.slice(0, -1) + "ل")
      : []),
  ].map<T.ParseResult<ParticipleResult>>((verb) => ({
    tokens: rest,
    body: {
      inflected,
--- a/src/lib/src/parsing/parse-phrase.ts
+++ b/src/lib/src/parsing/parse-phrase.ts
@ -1,24 +1,24 @@
 import * as T from "../../../types";
 import { lookup } from "./lookup";
 import { parseVP } from "./parse-vp";
 // شو should not be sheyaano !!
-export function parsePhrase(s: T.Token[]): {
+export function parsePhrase(
-  success: (
+  s: T.Token[],
-    | {
+  dicitonary: T.DictionaryAPI
-        inflected: boolean;
+): {
-        selection: T.NPSelection;
+  success: // | {
-      }
+  //     inflected: boolean;
-    | Omit<T.VBE, "ps">
+  //     selection: T.NPSelection;
-    | T.VPSelectionComplete
+  //   }
-  )[];
+  // | Omit<T.VBE, "ps">
  T.VPSelectionComplete[];
  errors: string[];
 } {
  const res = [
    // ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
    // ...parseVerb(s, verbLookup),
-    ...parseVP(s, lookup),
+    ...parseVP(s, dicitonary),
  ];
  const success = res.filter((x) => !x.tokens.length).map((x) => x.body);
--- a/src/lib/src/parsing/parse-possesor.test.ts
+++ b/src/lib/src/parsing/parse-possesor.test.ts
@ -4,16 +4,16 @@ import {
  makeNounSelection,
  makePronounSelection,
 } from "../phrase-building/make-selections";
 import { lookup, wordQuery } from "./lookup";
 import { parsePossesor } from "./parse-possesor";
 import { tokenizer } from "./tokenizer";
 import { isCompleteResult } from "./utils";
 import { testDictionary as dictionary } from "./mini-test-dictionary";
-const sturey = wordQuery("ستړی", "adj");
+const sturey = dictionary.adjLookup("ستړی")[0];
-const sarey = wordQuery("سړی", "noun");
+const sarey = dictionary.nounLookup("سړی")[0];
-const maashoom = wordQuery("ماشوم", "noun");
+const maashoom = dictionary.nounLookup("ماشوم")[0];
-const malguray = wordQuery("ملګری", "noun");
+const malguray = dictionary.nounLookup("ملګری")[0];
-const plaar = wordQuery("پلار", "noun");
+const plaar = dictionary.nounLookup("پلار")[0];
 const tests: {
  input: string;
@ -109,12 +109,12 @@ const tests: {
 test("parse possesor", () => {
  tests.forEach(({ input, output }) => {
    const tokens = tokenizer(input);
-    const parsed = parsePossesor(tokens, lookup, undefined);
+    const parsed = parsePossesor(tokens, dictionary, undefined);
    if (output === "error") {
      expect(parsed.some((x) => x.errors.length)).toBe(true);
    } else {
      expect(
-        parsePossesor(tokens, lookup, undefined)
+        parsePossesor(tokens, dictionary, undefined)
          .filter(isCompleteResult)
          .map((x) => x.body.np.selection)
      ).toEqual(output);
--- a/src/lib/src/parsing/parse-possesor.ts
+++ b/src/lib/src/parsing/parse-possesor.ts
@ -1,5 +1,4 @@
 import * as T from "../../../types";
 import { LookupFunction } from "./lookup";
 import { parseNP } from "./parse-np";
 import { bindParseResult } from "./utils";
 // TODO: maybe contractions should just be male to cut down on the
@ -19,7 +18,7 @@ const contractions: [string[], T.Person[]][] = [
 export function parsePossesor(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dictionary: T.DictionaryAPI,
  prevPossesor: T.PossesorSelection | undefined
 ): T.ParseResult<T.PossesorSelection>[] {
  if (tokens.length === 0) {
@ -43,14 +42,14 @@ export function parsePossesor(
      ? [{ message: "a pronoun cannot have a possesor" }]
      : [];
    return contractions
-      .flatMap((p) => parsePossesor(rest, lookup, p))
+      .flatMap((p) => parsePossesor(rest, dictionary, p))
      .map((x) => ({
        ...x,
        errors: [...errors, ...x.errors],
      }));
  }
  if (first.s === "د") {
-    const np = parseNP(rest, lookup, undefined);
+    const np = parseNP(rest, dictionary, undefined);
    return bindParseResult(np, (tokens, body) => {
      const possesor: T.PossesorSelection = {
        shrunken: false,
@ -63,7 +62,11 @@ export function parsePossesor(
            [{ message: `possesor should be inflected` }]
          : [],
        // add and check error - can't add possesor to pronoun
-        next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)),
+        next: parsePossesor(
          tokens,
          dictionary,
          addPoss(prevPossesor, possesor)
        ),
      };
    });
  }
--- a/src/lib/src/parsing/parse-sandwich.ts
+++ b/src/lib/src/parsing/parse-sandwich.ts
@ -1,5 +1,4 @@
 import * as T from "../../../types";
 import { LookupFunction } from "./lookup";
 import { sandwiches } from "../sandwiches";
 import { parseNP } from "./parse-np";
 import { bindParseResult } from "./utils";
@ -14,7 +13,7 @@ import { bindParseResult } from "./utils";
 export function parseSandwich(
  s: Readonly<T.Token[]>,
-  lookup: LookupFunction,
+  dictionary: T.DictionaryAPI,
  possesor: T.PossesorSelection | undefined
 ): T.ParseResult<T.SandwichSelection<T.Sandwich>>[] {
  if (s.length === 0) {
@ -27,7 +26,7 @@ export function parseSandwich(
    (x) => x.before && x.before.p === first.s
  );
  // TODO: this could be be really repetitive...
-  const nps = parseNP(startMatches.length ? rest : s, lookup, possesor);
+  const nps = parseNP(startMatches.length ? rest : s, dictionary, possesor);
  return bindParseResult(nps, (tokens, np) => {
    if (!tokens.length) {
      return [];
--- a/src/lib/src/parsing/parse-vbe-new.test.txt
+++ b/src/lib/src/parsing/parse-vbe-new.test.txt
@ -7,28 +7,35 @@ import {
  wartlul,
  raatlul,
 } from "./irreg-verbs";
-import { lookup, wordQuery } from "./lookup";
+import { parseVBE } from "./parse-vbe-new";
 import { parseVBE } from "./parse-vbe";
 import { tokenizer } from "./tokenizer";
 import { getPeople, removeKeys } from "./utils";
 import { testDictionary } from "./mini-test-dictionary";
-const wahul = wordQuery("وهل", "verb");
+const wahul = testDictionary.verbEntryLookup("وهل")[0];
-const leekul = wordQuery("لیکل", "verb");
+const leekul = testDictionary.verbEntryLookup("لیکل")[0];
-const manul = wordQuery("منل", "verb");
+const manul = testDictionary.verbEntryLookup("منل")[0];
-// const gaalul = wordQuery("ګالل", "verb");
+const gaalul = testDictionary.verbEntryLookup("ګالل")[0];
-const rasedul = wordQuery("رسېدل", "verb");
+const rasedul = testDictionary.verbEntryLookup("رسېدل")[0];
-const leedul = wordQuery("لیدل", "verb");
+const leedul = testDictionary.verbEntryLookup("لیدل")[0];
-const khorul = wordQuery("خوړل", "verb");
+const awuxtul = testDictionary.verbEntryLookup("اوښتل")[0];
-const kenaastul = wordQuery("کېناستل", "verb");
+const khorul = testDictionary.verbEntryLookup("خوړل")[0];
-const prexodul = wordQuery("پرېښودل", "verb");
+const kenaastul = testDictionary.verbEntryLookup("کېناستل")[0];
-const xodul = wordQuery("ښودل", "verb");
+const kxenaastul = testDictionary.verbEntryLookup("کښېناستل")[0];
-const kexodul = wordQuery("کېښودل", "verb");
+const prexodul = testDictionary.verbEntryLookup("پرېښودل")[0];
-const katul = wordQuery("کتل", "verb");
+const prexowul = testDictionary.verbEntryLookup("پرېښوول")[0];
-const watul = wordQuery("وتل", "verb");
+const prexawul = testDictionary.verbEntryLookup("پرېښول")[0];
-const wurul = wordQuery("وړل", "verb");
+const xodul = testDictionary.verbEntryLookup("ښودل")[0];
-const akheestul = wordQuery("اخیستل", "verb");
+const kexodul = testDictionary.verbEntryLookup("کېښودل")[0];
-const alwatul = wordQuery("الوتل", "verb");
+const kxexodul = testDictionary.verbEntryLookup("کښېښودل")[0];
-// const dartlul = wordQuery("درتلل", "verb")
+const katul = testDictionary.verbEntryLookup("کتل")[0];
 const watul = testDictionary.verbEntryLookup("وتل")[0];
 const wurul = testDictionary.verbEntryLookup("وړل")[0];
 const akheestul = testDictionary.verbEntryLookup("اخیستل")[0];
 const alwatul = testDictionary.verbEntryLookup("الوتل")[0];
 const dartlul = testDictionary.verbEntryLookup("درتلل")[0];
 // TODO: Prefix searching on split verbs for perfective head parsing
 // TODO: azmoyul etc
 // TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc
@ -311,19 +318,6 @@ const tests: {
          },
        ],
      },
      {
        input: "وینم",
        output: [
          {
            stem: {
              persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
              aspects: ["imperfective", "perfective"],
            },
            verb: leedul,
          },
        ],
      },
      // TODO!! THESE COULD ALSO BE MALE
      {
        input: "لیده",
        output: [
@ -364,42 +358,6 @@ const tests: {
          },
        ],
      },
      {
        input: "خوړ",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["imperfective", "perfective"],
            },
            verb: khorul,
          },
        ],
      },
      {
        input: "کوت",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["imperfective", "perfective"],
            },
            verb: katul,
          },
        ],
      },
      {
        input: "کاته",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["imperfective", "perfective"],
            },
            verb: katul,
          },
        ],
      },
      {
        input: "خلم",
        output: [
@ -436,6 +394,11 @@ const tests: {
          },
        ],
      },
    ],
  },
  {
    label: "verbs with seperating perfective heads",
    cases: [
      {
        input: "الوځې",
        output: [
@ -460,6 +423,18 @@ const tests: {
          },
        ],
      },
      {
        input: "لوتلم",
        output: [
          {
            root: {
              persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
              aspects: ["perfective"],
            },
            verb: alwatul,
          },
        ],
      },
    ],
  },
  {
@ -492,6 +467,13 @@ const tests: {
            },
            verb: kenaastul,
          },
          {
            stem: {
              persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
              aspects: ["perfective"],
            },
            verb: kxenaastul,
          },
        ],
      },
      {
@ -507,46 +489,64 @@ const tests: {
        ],
      },
      {
-        input: "ناست",
+        input: "کېناسته",
        output: [
          {
            root: {
-              persons: [T.Person.ThirdSingMale],
+              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
-              aspects: ["perfective"],
+              aspects: ["imperfective"],
            },
            verb: kenaastul,
          },
        ],
      },
      {
-        input: "پرېږدو",
+        input: "ناست",
-        output: [
+        output: [kenaastul, kxenaastul].map((verb) => ({
-          {
+          root: {
-            stem: {
+            persons: [T.Person.ThirdSingMale],
-              persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
+            aspects: ["perfective"],
              aspects: ["imperfective"],
            },
            verb: prexodul,
          },
-        ],
+          verb,
        })),
      },
      {
        input: "ناسته",
        output: [kenaastul, kxenaastul].map((verb) => ({
          root: {
            persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
            aspects: ["perfective"],
          },
          verb,
        })),
      },
      {
        input: "پرېږدو",
        output: [prexodul, prexowul, prexawul].map((verb) => ({
          stem: {
            persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
            aspects: ["imperfective"],
          },
          verb,
        })),
      },
      {
        input: "ږدو",
        output: [
-          {
+          ...[prexodul, prexawul, prexowul, kexodul, kxexodul].map((verb) => ({
            stem: {
              persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
-              aspects: ["perfective"],
+              aspects: ["perfective"] satisfies T.Aspect[],
            },
-            verb: prexodul,
+            verb,
-          },
+          })),
-          {
+          ...[kexodul, kxexodul].map((verb) => ({
            stem: {
              persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
-              aspects: ["imperfective", "perfective"],
+              aspects: ["imperfective"] satisfies T.Aspect[],
            },
-            verb: kexodul,
+            verb,
-          },
+          })),
        ],
      },
      {
@ -571,20 +571,13 @@ const tests: {
            },
            verb: xodul,
          },
-          {
+          ...[prexodul, kexodul, kxexodul].map((verb) => ({
            root: {
              persons: [T.Person.ThirdSingFemale],
-              aspects: ["perfective"],
+              aspects: ["perfective"] satisfies T.Aspect[],
            },
-            verb: prexodul,
+            verb,
-          },
+          })),
          {
            root: {
              persons: [T.Person.ThirdSingFemale],
              aspects: ["perfective"],
            },
            verb: kexodul,
          },
        ],
      },
      {
@ -661,43 +654,9 @@ const tests: {
          },
        ],
      },
      {
        input: "ړلم",
        output: [
          {
            root: {
              persons: getPeople(1, "sing"),
              aspects: ["perfective"],
            },
            verb: wurul,
          },
          {
            root: {
              persons: getPeople(1, "sing"),
              aspects: ["perfective"],
            },
            verb: tlul,
          },
        ],
      },
      {
        input: "ړ",
-        output: [
+        output: [],
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["perfective"],
            },
            verb: wurul,
          },
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["perfective"],
            },
            verb: tlul,
          },
        ],
      },
      // should not match with the prefix for perfective
      {
@ -713,6 +672,78 @@ const tests: {
  {
    label: "verbs with different 3rd pers sing past endings",
    cases: [
      {
        input: "خوړ",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["imperfective", "perfective"],
            },
            verb: khorul,
          },
        ],
      },
      {
        input: "خوړه",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
              aspects: ["imperfective", "perfective"],
            },
            verb: khorul,
          },
        ],
      },
      {
        input: "کوت",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["imperfective", "perfective"],
            },
            verb: katul,
          },
        ],
      },
      {
        input: "کاته",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["imperfective", "perfective"],
            },
            verb: katul,
          },
        ],
      },
      {
        input: "واته",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["imperfective", "perfective"],
            },
            verb: watul,
          },
        ],
      },
      {
        input: "ووت",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["imperfective", "perfective"],
            },
            verb: watul,
          },
        ],
      },
      {
        input: "رسېد",
        output: [
@ -725,6 +756,18 @@ const tests: {
          },
        ],
      },
      {
        input: "رسېده",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
              aspects: ["imperfective", "perfective"],
            },
            verb: rasedul,
          },
        ],
      },
      {
        input: "کېناسته",
        output: [
@ -766,27 +809,69 @@ const tests: {
        ],
      },
      {
-        input: "واته",
+        input: "اوښت",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
-              aspects: ["imperfective", "perfective"],
+              aspects: ["imperfective"],
            },
-            verb: watul,
+            verb: awuxtul,
          },
        ],
      },
      {
-        input: "ووت",
+        input: "ښت",
        output: [],
      },
      {
        input: "اوښته",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
              aspects: ["imperfective"],
            },
            verb: awuxtul,
          },
        ],
      },
      {
        input: "ښود",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["imperfective", "perfective"],
            },
-            verb: watul,
+            verb: xodul,
          },
          ...[prexodul, kexodul, kxexodul].map((verb) => ({
            root: {
              persons: [T.Person.ThirdSingMale],
              aspects: ["perfective"] satisfies T.Aspect[],
            },
            verb,
          })),
        ],
      },
      {
        input: "ښوده",
        output: [
          {
            root: {
              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
              aspects: ["imperfective", "perfective"],
            },
            verb: xodul,
          },
          ...[prexodul, kexodul, kxexodul].map((verb) => ({
            root: {
              persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
              aspects: ["perfective"] satisfies T.Aspect[],
            },
            verb,
          })),
        ],
      },
    ],
@ -971,7 +1056,7 @@ tests.forEach(({ label, cases }) => {
  test(label, () => {
    cases.forEach(({ input, output }) => {
      const tokens = tokenizer(input);
-      const vbs = parseVBE(tokens, lookup).map((r) => r.body);
+      const vbs = parseVBE(tokens, testDictionary).map((r) => r.body);
      const madeVbsS = output.reduce<T.ParsedVBE[]>((acc, o) => {
        return [
          ...acc,
--- a/src/lib/src/parsing/parse-vbe-new.ts
+++ b/src/lib/src/parsing/parse-vbe-new.ts
@ -0,0 +1,387 @@
 import * as T from "../../../types";
 import { dartlul, raatlul, tlul, wartlul } from "./irreg-verbs";
 import { parseKedul } from "./parse-kedul";
 import { getVerbEnding } from "./parse-verb-helpers";
 import { returnParseResults } from "./utils";
 import { entries as splitVerbEntries } from "./split-verbs";
 import * as tp from "../type-predicates";
 import memoize from "micro-memoize";
 import { pashtoConsonants } from "../pashto-consonants";
 // TODO: و ارزول
 // TODO: کول verbs!
 // check that aawu stuff is working
 // check oo`azmooy -
 //  TODO: proper use of sepOo (hasBreakawayAleph) when checking for perfective roots/stems
 // check څاته
 // laaRa shum etc
 // TODO: proper use of perfective with sh
 // TODO: use of raa, dar, war with sh
 // TODO: هغه لاړ
 // TODO: don't have کول کېدل in split-verbs
 type BaseInfo = Extract<T.ParsedVBE["info"], { type: "verb" }>;
 type StemInfo = Omit<BaseInfo, "base"> & {
  base: "stem";
 };
 type RootInfo = Omit<BaseInfo, "base"> & {
  base: "root";
 };
 export function parseVBE(
  tokens: Readonly<T.Token[]>,
  dictionary: T.DictionaryAPI
 ): T.ParseResult<T.ParsedVBE>[] {
  if (tokens.length === 0) {
    return [];
  }
  const [first, ...rest] = tokens;
  const irregResults = parseIrregularVerb(first.s);
  if (irregResults.length) {
    return returnParseResults(rest, irregResults);
  }
  const kedulStat = parseKedul(tokens);
  const ending = first.s.at(-1) || "";
  const base = ending === "ل" ? first.s : first.s.slice(0, -1);
  const { stem, root } = getVerbEnding(ending);
  // todo imperative for seperating
  const imperative = getImperativeVerbEnding(ending);
  const stemRes = returnParseResults(rest, [
    ...[
      ...findImperfectiveStem(base, dictionary),
      ...findPerfectiveStem(base, dictionary),
    ].flatMap<T.ParsedVBE>((info) => [
      ...stem.map<T.ParsedVBE>((person) => ({
        type: "VB",
        person,
        info,
      })),
      ...imperative.map<T.ParsedVBE>((person) => ({
        type: "VB",
        person,
        info: {
          ...info,
          imperative: true,
        },
      })),
    ]),
  ]);
  const rootRes = returnParseResults(rest, [
    ...[
      ...findImperfectiveRoot(base, dictionary),
      ...findPerfectiveRoot(base, dictionary),
    ].flatMap<T.ParsedVBE>((info) => {
      const shortThird = thirdPersSingMascShortFromRoot(base, ending, info);
      return [
        ...shortThird,
        ...root.map<T.ParsedVBE>((person) => ({
          type: "VB",
          person,
          info,
        })),
      ];
    }),
    ...specialThirdPersMascSingForm(base, ending, dictionary),
  ]);
  return [...kedulStat, ...stemRes, ...rootRes];
 }
 function specialThirdPersMascSingForm(
  base: string,
  ending: string,
  dicitonary: T.DictionaryAPI
 ): T.ParsedVBE[] {
  if (ending !== "ه" && !pashtoConsonants.includes(ending)) {
    return [];
  }
  // const imperfectiveWSep = [base + ending, ...(ending === "ه" ? [base] : [])]
  //   .flatMap((v) =>
  //     splitVerbEntries.filter((entry) => entry.entry.p.slice(0, -1) === v)
  //   )
  //   .map<T.ParsedVBE>((verb) => ({
  //     type: "VB",
  //     person: T.Person.ThirdSingMale,
  //     info: {
  //       type: "verb",
  //       aspect: "imperfective",
  //       base: "root",
  //       verb,
  //     },
  //   }));
  // const perfectiveWSep = [base + ending, ...(ending === "ه" ? [base] : [])]
  //   .flatMap((v) => {
  //     const b = splitVerbEntries.filter(({ entry }) => {
  //       if (entry.tppp) {
  //         return splitVarients(entry.tppp).some(
  //           (varient) => varient.slice(entry.separationAtP) === v
  //         );
  //       } else {
  //         return entry.p.slice(entry.separationAtP, -1) === v;
  //       }
  //     });
  //     return b;
  //   })
  //   .map<T.ParsedVBE>((verb) => ({
  //     type: "VB",
  //     person: T.Person.ThirdSingMale,
  //     info: {
  //       type: "verb",
  //       aspect: "perfective",
  //       base: "root",
  //       verb,
  //     },
  //   }));
  const hardEnding: T.ParsedVBE[] =
    (ending === "د" && ["ې", "و"].some((x) => base.endsWith(x))) ||
    (ending === "ت" &&
      ["س", "ښ"].some((x) => base.endsWith(x)) &&
      base.length > 1)
      ? [
          ...findPerfectiveRoot(base + ending + "ل", dicitonary),
          ...findImperfectiveRoot(base + ending + "ل", dicitonary),
        ].map<T.ParsedVBE>((info) => ({
          type: "VB",
          person: T.Person.ThirdSingMale,
          info,
        }))
      : [];
  const regular: T.ParsedVBE[] = [
    base + ending,
    ...(ending === "ه" ? [base] : []),
  ]
    .flatMap(withAlefAdded)
    .flatMap((v) => dicitonary.otherLookup("tppp", v, true))
    .filter(
      (e): e is T.VerbDictionaryEntry =>
        tp.isVerbDictionaryEntry(e) && !e.l && !!e.tppp
    )
    .flatMap((entry) =>
      // NOT IF STARTS WITH ALEPH!
      (entry.separationAtP
        ? (["imperfective"] as const)
        : startsWithAleph(entry.p) && !startsWithAleph(base)
        ? (["perfective"] as const)
        : (["imperfective", "perfective"] as const)
      ).map<T.ParsedVBE>((aspect) => ({
        type: "VB" as const,
        person: T.Person.ThirdSingMale,
        info: {
          type: "verb",
          aspect,
          base: "root",
          verb: { entry },
        } as const,
      }))
    );
  return [...regular, ...hardEnding];
  //   ...imperfectiveWSep, ...perfectiveWSep];
 }
 function thirdPersSingMascShortFromRoot(
  base: string,
  ending: string,
  info: RootInfo
 ): T.ParsedVBE[] {
  if (info.verb.entry.tppp) {
    return [];
  }
  if (ending === "ه" && !base.endsWith("ل")) {
    return [
      {
        type: "VB",
        person: T.Person.ThirdSingMale,
        info,
      },
    ];
  }
  return [];
 }
 function findImperfectiveStem(
  s: string,
  dicitonary: T.DictionaryAPI
 ): StemInfo[] {
  if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
    return [];
  }
  const regulars = regStemSearch(s, dicitonary);
  const irregulars = dicitonary
    .otherLookup("psp", s)
    .filter(
      (e): e is T.VerbDictionaryEntry => tp.isVerbDictionaryEntry(e) && !e.l
    )
    .map<T.VerbEntry>((entry) => ({
      entry,
    }));
  return [...regulars, ...irregulars].map((verb) => ({
    type: "verb",
    aspect: "imperfective",
    base: "stem",
    verb,
  }));
 }
 function withAlefAdded(s: string): string[] {
  return [s, ...(startsWithAleph(s) ? [] : ["ا" + s, "آ" + s])];
 }
 const stemSplitLookup = memoize((s: string) =>
  splitVerbEntries.filter(
    (e) =>
      (e.entry.ssp || e.entry.psp || e.entry.p).slice(
        e.entry.separationAtP || 0
      ) === s
  )
 );
 function findPerfectiveStem(
  s: string,
  dicitonary: T.DictionaryAPI
 ): StemInfo[] {
  if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
    return [];
  }
  if (startsWithAleph(s)) {
    return [];
  }
  const baseQ = withAlefAdded(s);
  const regulars = baseQ
    .flatMap((q) => regStemSearch(q, dicitonary))
    .filter((e) => !e.entry.separationAtP);
  const irregularsBasedOnImperf = baseQ
    .flatMap((q) => dicitonary.otherLookup("psp", q))
    .filter(
      (e): e is T.VerbDictionaryEntry =>
        tp.isVerbDictionaryEntry(e) && !e.l && !e.ssp && !e.separationAtP
    )
    .map<T.VerbEntry>((entry) => ({
      entry,
    }));
  return [...regulars, ...irregularsBasedOnImperf, ...stemSplitLookup(s)].map(
    (verb) => ({
      type: "verb",
      aspect: "perfective",
      base: "stem",
      verb,
    })
  );
 }
 function regStemSearch(s: string, dicitonary: T.DictionaryAPI): T.VerbEntry[] {
  const regTrans = dicitonary
    .verbEntryLookup(s + "ل")
    .filter(
      (e) =>
        !e.entry.c.includes("comp") &&
        !e.entry.ssp &&
        !e.entry.psp &&
        !e.entry.c.includes("intrans")
    );
  const regIntrans = dicitonary
    .verbEntryLookup((s.endsWith("ېږ") ? s.slice(0, -2) : s) + "ېدل")
    .filter(
      (e) =>
        !e.entry.c.includes("comp") &&
        !e.entry.ssp &&
        !e.entry.psp &&
        e.entry.c.includes("intrans")
    );
  return [...regTrans, ...regIntrans];
 }
 function findImperfectiveRoot(
  s: string,
  dicitonary: T.DictionaryAPI
 ): RootInfo[] {
  if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
    return [];
  }
  const reg = [s, s + "ل"]
    .flatMap(dicitonary.verbEntryLookup)
    .filter((e) => !e.entry.c.includes("comp"));
  return reg.map((verb) => ({
    type: "verb",
    aspect: "imperfective",
    base: "root",
    verb,
  }));
 }
 const rootSplitLookup = memoize((s: string) =>
  splitVerbEntries.filter((e) =>
    [s, s + "ل"].some(
      (x) => (e.entry.prp || e.entry.p).slice(e.entry.separationAtP || 0) === x
    )
  )
 );
 function findPerfectiveRoot(
  s: string,
  dicitonary: T.DictionaryAPI
 ): RootInfo[] {
  if (startsWithAleph(s) || ["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
    return [];
  }
  const reg = [s, s + "ل"]
    .flatMap(withAlefAdded)
    .flatMap(dicitonary.verbEntryLookup)
    .filter(
      (e) =>
        !e.entry.c.includes("comp") && !e.entry.prp && !e.entry.separationAtP
    );
  return [...reg, ...rootSplitLookup(s)].map((verb) => ({
    type: "verb",
    aspect: "perfective",
    base: "root",
    verb,
  }));
 }
 function getImperativeVerbEnding(e: string): T.Person[] {
  if (e === "ه") {
    return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
  }
  if (e === "ئ") {
    return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
  }
  return [];
 }
 // TODO: could handle all sh- verbs for efficiencies sake
 function parseIrregularVerb(s: string): T.ParsedVBE[] {
  if (["ته", "راته", "ورته", "درته"].includes(s)) {
    return [
      {
        type: "VB",
        info: {
          aspect: "imperfective",
          base: "root",
          type: "verb",
          verb: s.startsWith("را")
            ? raatlul
            : s.startsWith("ور")
            ? wartlul
            : s.startsWith("در")
            ? dartlul
            : tlul,
        },
        person: T.Person.ThirdSingMale,
      },
    ];
  }
  return [];
 }
 // function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
 //   return !e.sepOo && startsWithAleph(e.p);
 // }
 function startsWithAleph(base: string): boolean {
  return ["ا", "آ"].includes(base[0]);
 }
--- a/src/lib/src/parsing/parse-vbe.ts
+++ b/src/lib/src/parsing/parse-vbe.ts
@ -1,354 +0,0 @@
 import * as T from "../../../types";
 import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
 import { isInVarients, lastVowelNotA } from "../p-text-helpers";
 import { dartlul, raatlul, tlul, wartlul } from "./irreg-verbs";
 import { LookupFunction } from "./lookup";
 import { shortVerbEndConsonant } from "./misc";
 import { parseKedul } from "./parse-kedul";
 import { getVerbEnding } from "./parse-verb-helpers";
 // TODO: کول verbs!
 // check that aawu stuff is working
 // check oo`azmooy -
 // check څاته
 // laaRa shum etc
 // TODO: proper use of perfective with sh
 // TODO: use of raa, dar, war with sh
 // TODO: هغه لاړ
 export function parseVBE(
  tokens: Readonly<T.Token[]>,
  lookup: LookupFunction
 ): T.ParseResult<T.ParsedVBE>[] {
  if (tokens.length === 0) {
    return [];
  }
  const [first, ...rest] = tokens;
  const irregResults = parseIrregularVerb(first.s);
  if (irregResults.length) {
    return irregResults.map((body) => ({
      tokens: rest,
      body,
      errors: [],
    }));
  }
  const kedulStat = parseKedul(tokens);
  const ending = first.s.at(-1) || "";
  const people = getVerbEnding(ending);
  const imperativePeople = getImperativeVerbEnding(ending);
  // First do rough verb lookup, grab wide pool of possible verbs (low searching complexity for fast lookup)
  // TODO: can optimize this to not have to look for possible stems/roots if none
  const verbs = lookup(first.s, "verb");
  // if (first.s === "سم") {
  //   console.log({ verbs: JSON.stringify(verbs) });
  // }
  // Then find out which ones match exactly and how
  return [
    ...kedulStat,
    ...matchVerbs(first.s, verbs, people, imperativePeople).map((body) => ({
      tokens: rest,
      body,
      errors: [],
    })),
  ];
 }
 function matchVerbs(
  s: string,
  entries: T.VerbEntry[],
  people: {
    root: T.Person[];
    stem: T.Person[];
  },
  imperativePeople: T.Person[]
 ): T.ParsedVBE[] {
  const w: T.ParsedVBE[] = [];
  const lEnding = s.endsWith("ل");
  const base = s.endsWith("ل") ? s : s.slice(0, -1);
  if (["کېږ", "کېد", "ش", "شو", "شول"].includes(base)) {
    return [];
  }
  const matchShortOrLong = (b: string, x: string) => {
    return b === x || (!lEnding && b === x.slice(0, -1));
  };
  if (people.stem.length || imperativePeople.length) {
    const stemMatches = {
      imperfective: entries.filter(({ entry: e }) => {
        if (e.c.includes("comp")) {
          return false;
        }
        if (e.psp) {
          return e.psp === base;
        }
        if (e.c.includes("intrans.")) {
          const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
          return miniRoot + "ېږ" === base || miniRoot === base;
        } else {
          return e.p.slice(0, -1) === base;
        }
      }),
      perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
        const e = entry.entry;
        const baseWAa = "ا" + base;
        if (e.c.includes("comp")) {
          return acc;
        }
        if (e.ssp) {
          if (e.separationAtP) {
            const bRest = e.ssp.slice(e.separationAtP);
            if (bRest === base) {
              return [...acc, entry];
            }
          } else {
            if (e.ssp === base) {
              return [...acc, entry];
            }
          }
        } else if (e.psp) {
          if (hasBreakawayAlef(e) && startsWithAleph(base)) {
            return acc;
          }
          if (e.separationAtP) {
            const bRest = e.psp.slice(e.separationAtP);
            if (bRest === base) {
              return [...acc, entry];
            }
          } else {
            if (!e.sepOo) {
              if (baseWAa === e.psp) {
                return [...acc, entry];
              }
            }
            if (base === e.psp) {
              return [...acc, entry];
            }
          }
        } else if (hasBreakawayAlef(e) && startsWithAleph(base)) {
          return acc;
        } else if (e.c.includes("intrans.")) {
          const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
          const miniRootEg = miniRoot + "ېږ";
          if ([miniRoot, miniRootEg].includes(base)) {
            return [...acc, entry];
          }
        } else {
          const eb = e.p.slice(0, -1);
          if (eb === base) {
            return [...acc, entry];
          } else if (!e.sepOo) {
            if (baseWAa === base.slice(1)) {
              return [...acc, entry];
            }
          }
        }
        return acc;
      }, []),
    };
    Object.entries(stemMatches).forEach(([aspect, entries]) => {
      entries.forEach((verb) => {
        people.stem.forEach((person) => {
          w.push({
            type: "VB",
            person,
            info: {
              type: "verb",
              aspect: aspect as T.Aspect,
              base: "stem",
              verb: removeFVarientsFromVerb(verb),
            },
          });
        });
        imperativePeople.forEach((person) => {
          w.push({
            type: "VB",
            person,
            info: {
              type: "verb",
              aspect: aspect as T.Aspect,
              base: "stem",
              verb: removeFVarientsFromVerb(verb),
              imperative: true,
            },
          });
        });
      });
    });
  }
  if (people.root.length) {
    const rootMatches = {
      imperfective: entries.filter(
        ({ entry: e }) => !e.c.includes("comp") && matchShortOrLong(base, e.p)
      ),
      perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
        const e = entry.entry;
        if (e.c.includes("comp")) {
          return acc;
        }
        if (e.separationAtP) {
          const b = e.prp || e.p;
          const bRest = b.slice(e.separationAtP);
          if (matchShortOrLong(base, bRest)) {
            return [...acc, entry];
          }
        } else if (hasBreakawayAlef(e) && startsWithAleph(base) && !e.prp) {
          return acc;
        } else {
          const p = e.prp || e.p;
          if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
            return [...acc, entry];
          }
        }
        return acc;
      }, []),
    };
    Object.entries(rootMatches).forEach(([aspect, entries]) => {
      entries.forEach((verb) => {
        people.root.forEach((person) => {
          w.push({
            type: "VB",
            person,
            info: {
              type: "verb",
              aspect: aspect as T.Aspect,
              base: "root",
              verb: removeFVarientsFromVerb(verb),
            },
          });
        });
      });
    });
  }
  const hamzaEnd = s.at(-1) === "ه";
  const oEnd = s.at(-1) === "و";
  const abruptEnd = shortVerbEndConsonant.includes(s.slice(-1));
  const tppMatches = {
    imperfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
      const e = entry.entry;
      if (e.c.includes("comp")) {
        return acc;
      }
      if (!e.prp && isInVarients(e.tppp, s)) {
        return [...acc, entry];
      }
      if (oEnd && matchShortOrLong(base, e.p)) {
        return [...acc, entry];
      }
      if (
        lastVowelNotA(e.g.slice(0, -2)) &&
        (hamzaEnd ? base : abruptEnd ? s : "XX") === e.p.slice(0, -1)
      ) {
        return [...acc, entry];
      }
      // TODO: if check for modified aaXu thing!
      return acc;
    }, []),
    perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
      const e = entry.entry;
      if (e.c.includes("comp")) {
        return acc;
      }
      if (e.separationAtP) {
        const b = e.prp || e.p;
        const bRest = b.slice(e.separationAtP);
        if (bRest === "شول") {
          return acc;
        }
        if (abruptEnd) {
          if (s === bRest.slice(0, -1)) {
            return [...acc, entry];
          }
        } else if (hamzaEnd) {
          if (base === bRest.slice(0, -1)) {
            return [...acc, entry];
          }
        } else if (oEnd) {
          if ([bRest, bRest.slice(0, -1)].includes(base)) {
            return [...acc, entry];
          }
        }
      } else if (!e.prp) {
        if (hasBreakawayAlef(e) && startsWithAleph(base)) {
          return acc;
        }
        if (oEnd) {
          if ([e.p, e.p.slice(0, -1)].includes(base)) {
            return [...acc, entry];
          }
        } else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) {
          const b = hamzaEnd ? base : s;
          const p = e.p.slice(0, -1);
          if (b === p) {
            return [...acc, entry];
          }
        }
      }
      if (!e.separationAtP) {
        if (isInVarients(e.tppp, s)) {
          return [...acc, entry];
        } else if (isInVarients(e.tppp, "ا" + s)) {
          return [...acc, entry];
        }
      }
      return acc;
    }, []),
  };
  Object.entries(tppMatches).forEach(([aspect, entries]) => {
    entries.forEach((verb) => {
      w.push({
        type: "VB",
        person: T.Person.ThirdSingMale,
        info: {
          type: "verb",
          aspect: aspect as T.Aspect,
          base: "root",
          verb: removeFVarientsFromVerb(verb),
        },
      });
    });
  });
  return w;
 }
 function getImperativeVerbEnding(e: string): T.Person[] {
  if (e === "ه") {
    return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
  }
  if (e === "ئ") {
    return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
  }
  return [];
 }
 // TODO: could handle all sh- verbs for efficiencies sake
 function parseIrregularVerb(s: string): T.ParsedVBE[] {
  if (["ته", "راته", "ورته", "درته"].includes(s)) {
    return [
      {
        type: "VB",
        info: {
          aspect: "imperfective",
          base: "root",
          type: "verb",
          verb: s.startsWith("را")
            ? raatlul
            : s.startsWith("ور")
            ? wartlul
            : s.startsWith("در")
            ? dartlul
            : tlul,
        },
        person: T.Person.ThirdSingMale,
      },
    ];
  }
  return [];
 }
 function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
  return !e.sepOo && ["ا", "آ"].includes(e.p[0]);
 }
 function startsWithAleph(base: string): boolean {
  return ["ا", "آ"].includes(base[0]);
 }
--- a/src/lib/src/parsing/parse-vbp.test.txt
+++ b/src/lib/src/parsing/parse-vbp.test.txt
--- a/src/lib/src/parsing/parse-vbp.ts
+++ b/src/lib/src/parsing/parse-vbp.ts
@ -1,46 +1,46 @@
 import * as T from "../../../types";
-import { LookupFunction } from "./lookup";
+// import { returnParseResult } from "./utils";
 import { returnParseResult } from "./utils";
 export function parseVBP(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction
+  dictionary: T.DictionaryAPI
 ): T.ParseResult<T.ParsedVBP>[] {
  if (tokens.length === 0) {
    return [];
  }
-  return [
+  return [];
-    ...parsePastPart(tokens, lookup),
+  // return [
-    // ...parseAbility(tokens),
+  //   ...parsePastPart(tokens, lookup),
-  ];
+  //   // ...parseAbility(tokens),
  // ];
 }
-function parsePastPart(
+// function parsePastPart(
-  tokens: Readonly<T.Token[]>,
+//   tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction
+//   dicitonary: T.DictionaryAPI,
-): T.ParseResult<T.ParsedVBP>[] {
+// ): T.ParseResult<T.ParsedVBP>[] {
-  const [{ s }, ...rest] = tokens;
+//   const [{ s }, ...rest] = tokens;
-  const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې";
+//   const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې";
-  if (!ending || !["ی", "ي", "ې"].includes(ending)) {
+//   if (!ending || !["ی", "ي", "ې"].includes(ending)) {
-    return [];
+//     return [];
-  }
+//   }
-  // TODO: ALSO HANDLE SHORT FORMS
+//   // TODO: ALSO HANDLE SHORT FORMS
-  const wOutEnd = s.slice(0, -1);
+//   const wOutEnd = s.slice(0, -1);
-  const matches = lookup(wOutEnd, "pPart");
+//   const matches = lookup(wOutEnd, "pPart");
-  const genNums = endingGenderNum(ending);
+//   const genNums = endingGenderNum(ending);
-  return matches
+//   return matches
-    .flatMap<T.ParsedVBP>((verb) =>
+//     .flatMap<T.ParsedVBP>((verb) =>
-      genNums.map<T.ParsedVBP>((genNum) => ({
+//       genNums.map<T.ParsedVBP>((genNum) => ({
-        type: "VB",
+//         type: "VB",
-        info: {
+//         info: {
-          type: "ppart",
+//           type: "ppart",
-          verb,
+//           verb,
-          genNum,
+//           genNum,
-        },
+//         },
-      }))
+//       }))
-    )
+//     )
-    .flatMap((m) => returnParseResult(rest, m));
+//     .flatMap((m) => returnParseResult(rest, m));
-}
+// }
 // function parseAbility(
 //   tokens: Readonly<T.Token[]>,
@ -70,33 +70,33 @@ function parsePastPart(
 //     .flatMap((m) => returnParseResult(rest, m));
 // }
-function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] {
+// function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] {
-  if (ending === "ی") {
+//   if (ending === "ی") {
-    return [
+//     return [
-      {
+//       {
-        gender: "masc",
+//         gender: "masc",
-        number: "singular",
+//         number: "singular",
-      },
+//       },
-    ];
+//     ];
-  }
+//   }
-  if (ending === "ي") {
+//   if (ending === "ي") {
-    return [
+//     return [
-      {
+//       {
-        gender: "masc",
+//         gender: "masc",
-        number: "plural",
+//         number: "plural",
-      },
+//       },
-    ];
+//     ];
-  }
+//   }
-  // if (ending === "ې") {
+//   // if (ending === "ې") {
-  return [
+//   return [
-    {
+//     {
-      gender: "fem",
+//       gender: "fem",
-      number: "singular",
+//       number: "singular",
-    },
+//     },
-    {
+//     {
-      gender: "fem",
+//       gender: "fem",
-      number: "plural",
+//       number: "plural",
-    },
+//     },
-  ];
+//   ];
-  // }
+//   // }
-}
+// }
--- a/src/lib/src/parsing/parse-verb-helpers.ts
+++ b/src/lib/src/parsing/parse-verb-helpers.ts
@ -4,6 +4,10 @@ export function isKedulStatEntry(v: T.VerbDictionaryEntry): boolean {
  return v.p === "کېدل" && v.e === "to become _____";
 }
 /**
 * gets the possible people for stem and root endings
 * but DOES NOT INCLUDE short third pers masc sing
 */
 export function getVerbEnding(e: string): {
  stem: T.Person[];
  root: T.Person[];
@ -34,7 +38,11 @@ export function getVerbEnding(e: string): {
    };
  } else if (e === "و") {
    return {
-      root: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
+      root: [
        T.Person.FirstPlurMale,
        T.Person.FirstPlurFemale,
        T.Person.ThirdSingMale,
      ],
      stem: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
    };
  } else if (e === "ئ") {
--- a/src/lib/src/parsing/parse-vp.test.txt
+++ b/src/lib/src/parsing/parse-vp.test.txt
--- a/src/lib/src/parsing/parse-vp.ts
+++ b/src/lib/src/parsing/parse-vp.ts
@ -24,7 +24,6 @@ import {
 import { parseBlocks } from "./parse-blocks";
 import { makePronounSelection } from "../phrase-building/make-selections";
 import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
 import { LookupFunction } from "./lookup";
 import { isSecondPerson, personToGenNum } from "../misc-helpers";
 import { equals, zip } from "rambda";
 import { isImperativeTense } from "../type-predicates";
@ -41,12 +40,12 @@ import { isImperativeTense } from "../type-predicates";
 export function parseVP(
  tokens: Readonly<T.Token[]>,
-  lookup: LookupFunction
+  dictionary: T.DictionaryAPI
 ): T.ParseResult<T.VPSelectionComplete>[] {
  if (tokens.length === 0) {
    return [];
  }
-  const blocks = parseBlocks(tokens, lookup, [], []);
+  const blocks = parseBlocks(tokens, dictionary, [], []);
  return bindParseResult(
    createPossesivePossibilities(blocks),
    (tokens, { blocks, kids }) => {
@ -892,7 +891,7 @@ function getMiniPronouns(kids: T.ParsedKid[]): T.ParsedMiniPronoun[] {
 function getPeopleFromMiniPronouns(kids: T.ParsedKid[]): T.Person[] {
  const p: T.Person[] = [];
-  for (let k of kids) {
+  for (const k of kids) {
    if (k === "me") {
      p.push(T.Person.FirstSingMale);
      p.push(T.Person.FirstSingFemale);
--- a/src/lib/src/parsing/utils.ts
+++ b/src/lib/src/parsing/utils.ts
@ -163,6 +163,38 @@ export function parserCombMany<R>(parser: Parser<R>): Parser<R[]> {
  return r;
 }
 export function parserCombSucc2<A, B>(
  parsers: [Parser<A>, Parser<B>]
 ): Parser<[A, B]> {
  return function (
    tokens: Readonly<T.Token[]>,
    dictionary: T.DictionaryAPI
  ): T.ParseResult<[A, B]>[] {
    return bindParseResult(parsers[0](tokens, dictionary), (t, a) =>
      bindParseResult(parsers[1](t, dictionary), (tk, b) =>
        returnParseResult(tk, [a, b])
      )
    );
  };
 }
 export function parserCombSucc3<A, B, C>(
  parsers: [Parser<A>, Parser<B>, Parser<C>]
 ): Parser<[A, B, C]> {
  return function (
    tokens: Readonly<T.Token[]>,
    dictionary: T.DictionaryAPI
  ): T.ParseResult<[A, B, C]>[] {
    return bindParseResult(parsers[0](tokens, dictionary), (t, a) =>
      bindParseResult(parsers[1](t, dictionary), (tk, b) =>
        bindParseResult(parsers[2](tk, dictionary), (tkn, c) =>
          returnParseResult(tkn, [a, b, c])
        )
      )
    );
  };
 }
 export function isCompleteResult<C extends object>(
  r: T.ParseResult<C>
 ): boolean {
--- a/src/lib/src/phrase-building/np-tools.ts
+++ b/src/lib/src/phrase-building/np-tools.ts
@ -214,7 +214,7 @@ function addArticlesAndAdjs(
        ? np.determiners.determiners
            // @ts-ignore - weird, ts is not recognizing this as rendered
            .map((x) => (moreThanOneDet ? `(${x.e})` : x.e))
-            .join(" ")
+            .join(" ") + " "
        : "";
    const detsWithoutNoun = np.determiners && !np.determiners.withNoun;
    return `${np.determiners ? "" : articles}${determiners}${
--- a/src/lib/src/phrase-building/remove-redundant.ts
+++ b/src/lib/src/phrase-building/remove-redundant.ts
@ -0,0 +1,58 @@
 import * as T from "../../../types";
 import { compileVP } from "./compile";
 import { renderVP } from "./render-vp";
 export function removeRedundantVPSs(
  vs: T.VPSelectionComplete[]
 ): T.VPSelectionComplete[] {
  const versions = vs.map((x) => compileVP(renderVP(x), x.form));
  const toRemove = new Set<number>();
  versions.forEach((a, i) => {
    const duplicates = findAllIndices(
      versions.slice(i + 1),
      (b) => !toRemove.has(i) && isDuplicate(a, b)
    );
    duplicates.forEach((d) => toRemove.add(d + i + 1));
  });
  return vs.reduce<T.VPSelectionComplete[]>((acc, v, i) => {
    if (toRemove.has(i)) {
      return acc;
    }
    return [...acc, v];
  }, []);
 }
 function isDuplicate(
  a: {
    ps: T.SingleOrLengthOpts<T.PsString[]>;
    e?: string[];
  },
  b: { ps: T.SingleOrLengthOpts<T.PsString[]>; e?: string[] }
 ): boolean {
  if (!a.e || !b.e) {
    return false;
  }
  if (a.e.length !== b.e.length) {
    return false;
  }
  return a.e.every(
    (x, i) =>
      removeGenderGloss(x) === removeGenderGloss(b.e ? b.e[i] : "") &&
      JSON.stringify(a.ps) === JSON.stringify(b.ps)
  );
 }
 function removeGenderGloss(s: string): string {
  // TODO: combine into one RegEx
  return s.replaceAll(/\((m|f)\.\)/g, "").replaceAll(/\((m|f)\. pl\.\)/g, "");
 }
 function findAllIndices<N>(arr: N[], f: (x: N) => boolean): number[] {
  const indices: number[] = [];
  arr.forEach((x, i) => {
    if (f(x)) {
      indices.push(i);
    }
  });
  return indices;
 }
--- a/src/lib/src/phrase-building/render-np.ts
+++ b/src/lib/src/phrase-building/render-np.ts
@ -178,7 +178,7 @@ function renderDeterminer({
      ? number === "plural"
        ? { p: "دغو", f: "dágho" }
        : gender === "masc"
-        ? { p: "دغه", f: "dághu" }
+        ? { p: "دغه", f: "dágha" }
        : { p: "دغې", f: "dághe" }
      : { p: "دغه", f: "dágha" };
    return {
@ -196,7 +196,7 @@ function renderDeterminer({
      ? number === "plural"
        ? { p: "هغو", f: "hágho" }
        : gender === "masc"
-        ? { p: "هغه", f: "hághu" }
+        ? { p: "هغه", f: "hágha" }
        : { p: "هغې", f: "hághe" }
      : { p: "هغه", f: "hágha" };
    return {
--- a/src/types.ts
+++ b/src/types.ts
@ -1259,8 +1259,13 @@ export type DictionaryAPI = {
  queryP: (p: string) => DictionaryEntry[];
  adjLookup: (p: string) => AdjectiveEntry[];
  nounLookup: (p: string) => NounEntry[];
-  otherLookup: (key: keyof DictionaryEntry, p: string) => DictionaryEntry[];
+  otherLookup: (
    key: keyof DictionaryEntry,
    p: string,
    regex?: boolean
  ) => DictionaryEntry[];
  specialPluralLookup: (p: string) => NounEntry[];
  verbEntryLookup: (p: string) => VerbEntry[];
 };
 export type Parser<R> = (
--- a/tsconfig.node.json
+++ b/tsconfig.node.json
@ -18,5 +18,5 @@
    "noUnusedParameters": true,
    "noFallthroughCasesInSwitch": true
  },
-  "include": ["vite.config.ts", "get-mini-dict.ts"]
+  "include": ["vite.config.ts", "get-mini-dict-and-split-verbs.ts"]
 }
--- a/vocab/mini-dict-tss.ts
+++ b/vocab/mini-dict-tss.ts
@ -34,6 +34,7 @@ export const entries: T.DictionaryEntry["ts"][] = [
  1527812908, // مېلمه
  1575924767041, // شپون
  1527815333, // نتور
  1527812881, // ماشوم
  // fem nouns
  1527811877, // دوستي
@ -50,4 +51,28 @@ export const entries: T.DictionaryEntry["ts"][] = [
  1589023873660, // فتح - fatha
  1527814342, // نفع - nafa
  1527815329, // تجربه
  // verbs
  1527815399, // وهل
  1527817298, // اخیستل
  1527812275, // لیدل
  1527812856, // لیکل
  1527815085, // منل
  1527817661, // ګالل
  1527813573, // رسېدل
  1527812790, // خوړل
  1527812759, // کېناستل
  1527812758, // کښېناستل
  1527815190, // پرېښودل
  1527811293, // ښودل
  1527812284, // کېښودل
  1527812751, // کتل
  1527823376, // وتل
  1527816865, // وړل
  1527813473, // الوتل
  1585228551150, // درتلل
  1527817577, // کښېښودل
  1527814012, // اوښتل
  1577390597820, // پرېښوول
  1527815191, // پرېښول
 ];
Author	SHA1	Message	Date
adueck	f17ebddaa1	comment out tests for parser in progress	2024-12-06 15:10:17 +05:00
adueck	0ade410698	fix up masc inflection of demonstratives	2024-12-06 15:06:14 +05:00
adueck	73eb04d7e0	parser working - a bit slow/rough - with dictionary lookup	2024-10-14 20:22:32 -04:00