some verb bug fixes and starting with AP parsing

2023-09-04 14:26:24 +04:00 · 2023-09-04 14:26:24 +04:00 · a9f93dc717
parent ded0030395
commit a9f93dc717
10 changed files with 416 additions and 155 deletions
--- a/src/lib/src/parsing/lookup.tsx
+++ b/src/lib/src/parsing/lookup.tsx
@ -1,11 +1,16 @@
 import nounsAdjs from "../../../nouns-adjs";
 import verbs from "../../../verbs";
 import * as T from "../../../types";
-import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
+import {
  isAdjectiveEntry,
  isAdverbEntry,
  isNounEntry,
 } from "../type-predicates";
 import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
 import { splitVarients, undoAaXuPattern } from "../p-text-helpers";
 import { arraysHaveCommon } from "../misc-helpers";
 import { shortVerbEndConsonant } from "./misc";
 import { kawulDyn, kawulStat, kedulDyn, kedulStat, tlul } from "./irreg-verbs";
 export type LookupFunction = typeof lookup;
@ -13,11 +18,13 @@ export function lookup(
  s: Partial<T.DictionaryEntry>,
  type: "nounAdj"
 ): T.DictionaryEntry[];
 export function lookup(s: string, type: "adverb"): T.AdverbEntry[];
 export function lookup(s: string, type: "pPart"): T.VerbEntry[];
 export function lookup(s: string, type: "verb" | "participle"): T.VerbEntry[];
 export function lookup(
  s: string | Partial<T.DictionaryEntry>,
-  type: "nounAdj" | "verb" | "participle"
+  type: "nounAdj" | "verb" | "participle" | "pPart" | "adverb"
-): T.DictionaryEntry[] | T.VerbEntry[] {
+): T.DictionaryEntry[] | T.VerbEntry[] | T.AdverbEntry[] {
  if (type === "nounAdj") {
    if (typeof s !== "object") {
      throw new Error("invalid query for noun / adj lookup");
@ -30,6 +37,12 @@ export function lookup(
  if (type === "verb") {
    return verbLookup(s);
  }
  if (type === "pPart") {
    return pPartLookup(s);
  }
  if (type === "adverb") {
    return adverbLookup(s);
  }
  return participleLookup(s);
 }
@ -60,6 +73,12 @@ function nounAdjLookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
  return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
 }
 function adverbLookup(s: string): T.AdverbEntry[] {
  return nounsAdjs.filter(
    (a) => isAdverbEntry(a) && a.p === s
  ) as T.AdverbEntry[];
 }
 export function shouldCheckTpp(s: string): boolean {
  return (
    ["د", "ړ", "ت", "ځ", "و", "ډ", "ڼ", "ن", "ه"].includes(s.slice(-1)) ||
@ -85,6 +104,27 @@ function participleLookup(input: string): T.VerbEntry[] {
  return [];
 }
 function pPartLookup(input: string): T.VerbEntry[] {
  if (input === "کړ") {
    return [kawulStat, kawulDyn];
  }
  if (input === "شو") {
    return [kedulStat, kedulDyn];
  }
  if (input === "تل") {
    // TODO: is also ورتلل، راتلل، درتلل like this?
    return [tlul];
  }
  if (["ست", "ښت"].includes(input.slice(-2))) {
    const p = input + "ل";
    return verbs.filter((e) => e.entry.p === p);
  }
  if (input.at(-1) === "ل") {
    return verbs.filter((e) => e.entry.p === input);
  }
  return [];
 }
 function verbLookup(input: string): T.VerbEntry[] {
  // TODO:
  // only look up forms if there's an ending
--- a/src/lib/src/parsing/parse-ap.ts
+++ b/src/lib/src/parsing/parse-ap.ts
@ -1 +1,23 @@
-// TODO: ability to treat a doubled noun as an adverb
+import * as T from "../../../types";
 import { LookupFunction } from "./lookup";
 import { returnParseResultS } from "./utils";
 export function parseAP(
  tokens: Readonly<T.Token[]>,
  lookup: LookupFunction
 ): T.ParseResult<T.APSelection>[] {
  if (tokens.length === 0) {
    return [];
  }
  const [first, ...rest] = tokens;
  const adverbs = lookup(first.s, "adverb");
  return adverbs.map((entry) =>
    returnParseResultS(rest, {
      type: "AP",
      selection: {
        type: "adverb",
        entry,
      },
    })
  );
 }
--- a/src/lib/src/parsing/parse-blocks.ts
+++ b/src/lib/src/parsing/parse-blocks.ts
@ -1,5 +1,6 @@
 import * as T from "../../../types";
 import { LookupFunction } from "./lookup";
 import { parseAP } from "./parse-ap";
 import { parseEquative } from "./parse-equative";
 import { parseKidsSection } from "./parse-kids-section";
 import { parseNeg } from "./parse-negative";
@ -25,21 +26,16 @@ export function parseBlocks(
    (b): b is T.ParsedPH => b.type === "PH"
  );
  const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
-  const np = prevPh ? [] : parseNP(tokens, lookup);
+
  const ph = vbExists || prevPh ? [] : parsePH(tokens);
  const vb = parseVerb(tokens, lookup);
  const vbp = parsePastPart(tokens, lookup);
  const eq = parseEquative(tokens);
  const neg = parseNeg(tokens);
  const kidsR = parseKidsSection(tokens, []);
  const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
-    ...np,
+    ...(prevPh ? [] : parseAP(tokens, lookup)),
-    ...ph,
+    ...(prevPh ? [] : parseNP(tokens, lookup)),
-    ...neg,
+    ...(vbExists || prevPh ? [] : parsePH(tokens)),
-    ...vb,
+    ...parseVerb(tokens, lookup),
-    ...vbp,
+    ...parsePastPart(tokens, lookup),
-    ...eq,
+    ...parseEquative(tokens),
-    ...kidsR,
+    ...parseNeg(tokens),
    ...parseKidsSection(tokens, []),
  ];
  // TODO: is this necessary?
  // if (!allResults.length) {
--- a/src/lib/src/parsing/parse-past-part.test.ts
+++ b/src/lib/src/parsing/parse-past-part.test.ts
@ -0,0 +1,186 @@
 import * as T from "../../../types";
 import { lookup, wordQuery } from "./lookup";
 import { tokenizer } from "./tokenizer";
 import { parsePastPart } from "./parse-past-part";
 import { kawulDyn, kawulStat, kedulDyn, kedulStat } from "./irreg-verbs";
 const leedul = wordQuery("لیدل", "verb");
 const akheestul = wordQuery("اخیستل", "verb");
 const wahul = wordQuery("وهل", "verb");
 const awuxtul = wordQuery("اوښتل", "verb");
 const tlul = wordQuery("tlul", "verb");
 const tests: {
  label: string;
  cases: {
    input: string;
    output: T.ParsedVBP[];
  }[];
 }[] = [
  {
    label: "regular past participles",
    cases: [
      {
        input: "لیدلی",
        output: [
          {
            type: "VB",
            info: {
              type: "ppart",
              genNum: {
                gender: "masc",
                number: "singular",
              },
              verb: leedul,
            },
          },
        ],
      },
      {
        input: "وهلي",
        output: [
          {
            type: "VB",
            info: {
              type: "ppart",
              genNum: {
                gender: "masc",
                number: "plural",
              },
              verb: wahul,
            },
          },
        ],
      },
      {
        input: "وهلې",
        output: (["singular", "plural"] as const).map((number) => ({
          type: "VB",
          info: {
            type: "ppart",
            genNum: {
              gender: "fem",
              number,
            },
            verb: wahul,
          },
        })),
      },
    ],
  },
  {
    label: "past participles with short forms",
    cases: [
      {
        input: "اخیستی",
        output: [
          {
            type: "VB",
            info: {
              type: "ppart",
              genNum: {
                gender: "masc",
                number: "singular",
              },
              verb: akheestul,
            },
          },
        ],
      },
      {
        input: "اخیستلی",
        output: [
          {
            type: "VB",
            info: {
              type: "ppart",
              genNum: {
                gender: "masc",
                number: "singular",
              },
              verb: akheestul,
            },
          },
        ],
      },
      {
        input: "اوښتی",
        output: [
          {
            type: "VB",
            info: {
              type: "ppart",
              genNum: {
                gender: "masc",
                number: "singular",
              },
              verb: awuxtul,
            },
          },
        ],
      },
    ],
  },
  {
    label: "irregular past participles",
    cases: [
      {
        input: "تلی",
        output: [
          {
            type: "VB",
            info: {
              type: "ppart",
              genNum: {
                gender: "masc",
                number: "singular",
              },
              verb: tlul,
            },
          },
        ],
      },
      {
        input: "کړي",
        output: [kawulStat, kawulDyn].map((verb) => ({
          type: "VB",
          info: {
            type: "ppart",
            genNum: {
              gender: "masc",
              number: "plural",
            },
            verb,
          },
        })),
      },
      {
        input: "شوي",
        output: [kedulStat, kedulDyn].map((verb) => ({
          type: "VB",
          info: {
            type: "ppart",
            genNum: {
              gender: "masc",
              number: "plural",
            },
            verb,
          },
        })),
      },
    ],
  },
 ];
 describe("parsing past participles", () => {
  tests.forEach(({ label, cases }) => {
    // eslint-disable-next-line jest/valid-title
    test(label, () => {
      cases.forEach(({ input, output }) => {
        const tokens = tokenizer(input);
        const res = parsePastPart(tokens, lookup).map(({ body }) => body);
        expect(res).toEqual(output);
      });
    });
  });
 });
--- a/src/lib/src/parsing/parse-past-part.ts
+++ b/src/lib/src/parsing/parse-past-part.ts
@ -16,7 +16,7 @@ export function parsePastPart(
  }
  // TODO: ALSO HANDLE SHORT FORMS
  const wOutEnd = s.slice(0, -1);
-  const matches = lookup(wOutEnd, "participle");
+  const matches = lookup(wOutEnd, "pPart");
  const genNums = endingGenderNum(ending);
  return matches
    .flatMap<T.ParsedVBP>((verb) =>
--- a/src/lib/src/parsing/parse-verb.test.ts
+++ b/src/lib/src/parsing/parse-verb.test.ts
@ -31,7 +31,8 @@ const akheestul = wordQuery("اخیستل", "verb");
 const alwatul = wordQuery("الوتل", "verb");
 // const dartlul = wordQuery("درتلل", "verb")
-// todo alwatul waalwatul akhistul azmoyul etc
+// TODO: azmoyul etc
 // TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc
 const tests: {
  label: string;
@ -394,7 +395,7 @@ const tests: {
          {
            root: {
              persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
-              aspects: ["imperfective", "perfective"],
+              aspects: ["imperfective"],
            },
            verb: akheestul,
          },
@ -419,7 +420,7 @@ const tests: {
          {
            stem: {
              persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale],
-              aspects: ["imperfective", "perfective"],
+              aspects: ["imperfective"],
            },
            verb: alwatul,
          },
--- a/src/lib/src/parsing/parse-verb.ts
+++ b/src/lib/src/parsing/parse-verb.ts
@ -12,9 +12,6 @@ import {
 import { LookupFunction } from "./lookup";
 import { shortVerbEndConsonant } from "./misc";
 // big problem ما سړی یوړ crashes it !!
 // BIG problem - issue with و being considered a VB for a lot of little verbs like بلل
 // TODO: کول verbs!
 // check that aawu stuff is working
 // check oo`azmooy -
@ -102,6 +99,9 @@ function matchVerbs(
            }
          }
        } else if (e.psp) {
          if (hasBreakawayAlef(e) && startsWithAleph(base)) {
            return acc;
          }
          if (e.separationAtP) {
            const bRest = e.psp.slice(e.separationAtP);
            if (bRest === base) {
@ -117,6 +117,8 @@ function matchVerbs(
              return [...acc, entry];
            }
          }
        } else if (hasBreakawayAlef(e) && startsWithAleph(base)) {
          return acc;
        } else if (e.c.includes("intrans.")) {
          const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
          const miniRootEg = miniRoot + "ېږ";
@ -169,6 +171,8 @@ function matchVerbs(
          if (matchShortOrLong(base, bRest)) {
            return [...acc, entry];
          }
        } else if (hasBreakawayAlef(e) && startsWithAleph(base) && !e.prp) {
          return acc;
        } else {
          const p = e.prp || e.p;
          if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
@ -245,6 +249,9 @@ function matchVerbs(
          }
        }
      } else if (!e.prp) {
        if (hasBreakawayAlef(e) && startsWithAleph(base)) {
          return acc;
        }
        if (oEnd) {
          if ([e.p, e.p.slice(0, -1)].includes(base)) {
            return [...acc, entry];
@ -395,3 +402,11 @@ function parseIrregularVerb(s: string): T.ParsedVBE[] {
  }
  return [];
 }
 function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
  return !e.sepOo && ["ا", "آ"].includes(e.p[0]);
 }
 function startsWithAleph(base: string): boolean {
  return ["ا", "آ"].includes(base[0]);
 }
--- a/src/lib/src/parsing/parse-vp.test.ts
+++ b/src/lib/src/parsing/parse-vp.test.ts
@ -86,6 +86,10 @@ const tests: {
        output: [],
        error: true,
      },
      {
        input: "ما وانه اخیست",
        output: [],
      },
    ],
  },
  {
@ -1005,129 +1009,129 @@ const tests: {
          }))
        ),
      },
-      // {
+      {
-      //   input: "ودې وینم",
+        input: "ودې وینم",
-      //   output: getPeople(2, "sing").flatMap((objectPerson) =>
+        output: getPeople(2, "sing").flatMap((objectPerson) =>
-      //     getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
+          getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
-      //       blocks: [
+            blocks: [
-      //         {
+              {
-      //           key: 1,
+                key: 1,
-      //           block: makeSubjectSelectionComplete({
+                block: makeSubjectSelectionComplete({
-      //             type: "NP",
+                  type: "NP",
-      //             selection: makePronounSelection(subjectPerson),
+                  selection: makePronounSelection(subjectPerson),
-      //           }),
+                }),
-      //         },
+              },
-      //         {
+              {
-      //           key: 2,
+                key: 2,
-      //           block: makeObjectSelectionComplete({
+                block: makeObjectSelectionComplete({
-      //             type: "NP",
+                  type: "NP",
-      //             selection: makePronounSelection(objectPerson),
+                  selection: makePronounSelection(objectPerson),
-      //           }),
+                }),
-      //         },
+              },
-      //       ],
+            ],
-      //       verb: {
+            verb: {
-      //         type: "verb",
+              type: "verb",
-      //         verb: leedul,
+              verb: leedul,
-      //         transitivity: "transitive",
+              transitivity: "transitive",
-      //         canChangeTransitivity: false,
+              canChangeTransitivity: false,
-      //         canChangeStatDyn: false,
+              canChangeStatDyn: false,
-      //         negative: false,
+              negative: false,
-      //         tense: "subjunctiveVerb",
+              tense: "subjunctiveVerb",
-      //         canChangeVoice: true,
+              canChangeVoice: true,
-      //         isCompound: false,
+              isCompound: false,
-      //         voice: "active",
+              voice: "active",
-      //       },
+            },
-      //       externalComplement: undefined,
+            externalComplement: undefined,
-      //       form: {
+            form: {
-      //         removeKing: true,
+              removeKing: true,
-      //         shrinkServant: true,
+              shrinkServant: true,
-      //       },
+            },
-      //     }))
+          }))
-      //   ),
+        ),
-      // },
+      },
-      // {
+      {
-      //   input: "وینم به دې",
+        input: "وینم به دې",
-      //   output: getPeople(2, "sing").flatMap((objectPerson) =>
+        output: getPeople(2, "sing").flatMap((objectPerson) =>
-      //     getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
+          getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
-      //       blocks: [
+            blocks: [
-      //         {
+              {
-      //           key: 1,
+                key: 1,
-      //           block: makeSubjectSelectionComplete({
+                block: makeSubjectSelectionComplete({
-      //             type: "NP",
+                  type: "NP",
-      //             selection: makePronounSelection(subjectPerson),
+                  selection: makePronounSelection(subjectPerson),
-      //           }),
+                }),
-      //         },
+              },
-      //         {
+              {
-      //           key: 2,
+                key: 2,
-      //           block: makeObjectSelectionComplete({
+                block: makeObjectSelectionComplete({
-      //             type: "NP",
+                  type: "NP",
-      //             selection: makePronounSelection(objectPerson),
+                  selection: makePronounSelection(objectPerson),
-      //           }),
+                }),
-      //         },
+              },
-      //       ],
+            ],
-      //       verb: {
+            verb: {
-      //         type: "verb",
+              type: "verb",
-      //         verb: leedul,
+              verb: leedul,
-      //         transitivity: "transitive",
+              transitivity: "transitive",
-      //         canChangeTransitivity: false,
+              canChangeTransitivity: false,
-      //         canChangeStatDyn: false,
+              canChangeStatDyn: false,
-      //         negative: false,
+              negative: false,
-      //         tense: "imperfectiveFuture",
+              tense: "imperfectiveFuture",
-      //         canChangeVoice: true,
+              canChangeVoice: true,
-      //         isCompound: false,
+              isCompound: false,
-      //         voice: "active",
+              voice: "active",
-      //       },
+            },
-      //       externalComplement: undefined,
+            externalComplement: undefined,
-      //       form: {
+            form: {
-      //         removeKing: true,
+              removeKing: true,
-      //         shrinkServant: true,
+              shrinkServant: true,
-      //       },
+            },
-      //     }))
+          }))
-      //   ),
+        ),
-      // },
+      },
-      // {
+      {
-      //   input: "یو به مې ړلې",
+        input: "یو به مې ړلې",
-      //   output: [...getPeople(2, "sing"), T.Person.ThirdPlurFemale].flatMap(
+        output: [...getPeople(2, "sing"), T.Person.ThirdPlurFemale].flatMap(
-      //     (objectPerson) =>
+          (objectPerson) =>
-      //       getPeople(1, "sing").map<T.VPSelectionComplete>(
+            getPeople(1, "sing").map<T.VPSelectionComplete>(
-      //         (subjectPerson) => ({
+              (subjectPerson) => ({
-      //           blocks: [
+                blocks: [
-      //             {
+                  {
-      //               key: 1,
+                    key: 1,
-      //               block: makeSubjectSelectionComplete({
+                    block: makeSubjectSelectionComplete({
-      //                 type: "NP",
+                      type: "NP",
-      //                 selection: makePronounSelection(subjectPerson),
+                      selection: makePronounSelection(subjectPerson),
-      //               }),
+                    }),
-      //             },
+                  },
-      //             {
+                  {
-      //               key: 2,
+                    key: 2,
-      //               block: makeObjectSelectionComplete({
+                    block: makeObjectSelectionComplete({
-      //                 type: "NP",
+                      type: "NP",
-      //                 selection: makePronounSelection(objectPerson),
+                      selection: makePronounSelection(objectPerson),
-      //               }),
+                    }),
-      //             },
+                  },
-      //           ],
+                ],
-      //           verb: {
+                verb: {
-      //             type: "verb",
+                  type: "verb",
-      //             verb: wurul,
+                  verb: wurul,
-      //             transitivity: "transitive",
+                  transitivity: "transitive",
-      //             canChangeTransitivity: false,
+                  canChangeTransitivity: false,
-      //             canChangeStatDyn: false,
+                  canChangeStatDyn: false,
-      //             negative: false,
+                  negative: false,
-      //             tense: "habitualPerfectivePast",
+                  tense: "habitualPerfectivePast",
-      //             canChangeVoice: true,
+                  canChangeVoice: true,
-      //             isCompound: false,
+                  isCompound: false,
-      //             voice: "active",
+                  voice: "active",
-      //           },
+                },
-      //           externalComplement: undefined,
+                externalComplement: undefined,
-      //           form: {
+                form: {
-      //             removeKing: true,
+                  removeKing: true,
-      //             shrinkServant: true,
+                  shrinkServant: true,
-      //           },
+                },
-      //         })
+              })
-      //       )
+            )
-      //   ),
+        ),
-      // },
+      },
    ],
  },
  {
--- a/src/lib/src/parsing/parse-vp.ts
+++ b/src/lib/src/parsing/parse-vp.ts
@ -17,13 +17,6 @@ import { personToGenNum } from "../misc-helpers";
 import { equals } from "rambda";
 // to hide equatives type-doubling issue
 // this should also conjugate to
 //  وامې نه خیسته
 // وامې نه خیستلو
 // waa-me nú kheestulo
 // وامې نه اخیست
 // waa-me nú akheest
 // TODO: word query for kawul/kedul/stat/dyn
 // TODO: learn how to yank / use plugin for JSON neovim
@ -37,6 +30,9 @@ import { equals } from "rambda";
 // so we don't get something like ښځو زه خوړلې یم with a hanging
 // یم not used
 // TODO: way to get an error message for past participle and equative
 // not matching up
 export function parseVP(
  tokens: Readonly<T.Token[]>,
  lookup: LookupFunction
--- a/src/types.ts
+++ b/src/types.ts
@ -1199,6 +1199,7 @@ export type ParsedBlock =
  | ParsedPH
  | ParsedVBE
  | ParsedVBP
  | APSelection
  | NegativeBlock;
 export type ParsedKidsSection = {