some verb bug fixes and starting with AP parsing

2023-09-04 14:26:24 +04:00 · 2023-09-04 14:26:24 +04:00 · a9f93dc717
parent ded0030395
commit a9f93dc717
10 changed files with 416 additions and 155 deletions
--- a/src/lib/src/parsing/lookup.tsx
+++ b/src/lib/src/parsing/lookup.tsx
@ -1,11 +1,16 @@
 import nounsAdjs from "../../../nouns-adjs";
 import verbs from "../../../verbs";
 import * as T from "../../../types";
-import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
+import {
+  isAdjectiveEntry,
+  isAdverbEntry,
+  isNounEntry,
+} from "../type-predicates";
 import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
 import { splitVarients, undoAaXuPattern } from "../p-text-helpers";
 import { arraysHaveCommon } from "../misc-helpers";
 import { shortVerbEndConsonant } from "./misc";
+import { kawulDyn, kawulStat, kedulDyn, kedulStat, tlul } from "./irreg-verbs";

 export type LookupFunction = typeof lookup;

@ -13,11 +18,13 @@ export function lookup(
  s: Partial<T.DictionaryEntry>,
  type: "nounAdj"
 ): T.DictionaryEntry[];
+export function lookup(s: string, type: "adverb"): T.AdverbEntry[];
+export function lookup(s: string, type: "pPart"): T.VerbEntry[];
 export function lookup(s: string, type: "verb" | "participle"): T.VerbEntry[];
 export function lookup(
  s: string | Partial<T.DictionaryEntry>,
-  type: "nounAdj" | "verb" | "participle"
-): T.DictionaryEntry[] | T.VerbEntry[] {
+  type: "nounAdj" | "verb" | "participle" | "pPart" | "adverb"
+): T.DictionaryEntry[] | T.VerbEntry[] | T.AdverbEntry[] {
  if (type === "nounAdj") {
    if (typeof s !== "object") {
      throw new Error("invalid query for noun / adj lookup");
@ -30,6 +37,12 @@ export function lookup(
  if (type === "verb") {
    return verbLookup(s);
  }
+  if (type === "pPart") {
+    return pPartLookup(s);
+  }
+  if (type === "adverb") {
+    return adverbLookup(s);
+  }
  return participleLookup(s);
 }

@ -60,6 +73,12 @@ function nounAdjLookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
  return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
 }

+function adverbLookup(s: string): T.AdverbEntry[] {
+  return nounsAdjs.filter(
+    (a) => isAdverbEntry(a) && a.p === s
+  ) as T.AdverbEntry[];
+}
+
 export function shouldCheckTpp(s: string): boolean {
  return (
    ["د", "ړ", "ت", "ځ", "و", "ډ", "ڼ", "ن", "ه"].includes(s.slice(-1)) ||
@ -85,6 +104,27 @@ function participleLookup(input: string): T.VerbEntry[] {
  return [];
 }

+function pPartLookup(input: string): T.VerbEntry[] {
+  if (input === "کړ") {
+    return [kawulStat, kawulDyn];
+  }
+  if (input === "شو") {
+    return [kedulStat, kedulDyn];
+  }
+  if (input === "تل") {
+    // TODO: is also ورتلل، راتلل، درتلل like this?
+    return [tlul];
+  }
+  if (["ست", "ښت"].includes(input.slice(-2))) {
+    const p = input + "ل";
+    return verbs.filter((e) => e.entry.p === p);
+  }
+  if (input.at(-1) === "ل") {
+    return verbs.filter((e) => e.entry.p === input);
+  }
+  return [];
+}
+
 function verbLookup(input: string): T.VerbEntry[] {
  // TODO:
  // only look up forms if there's an ending
--- a/src/lib/src/parsing/parse-ap.ts
+++ b/src/lib/src/parsing/parse-ap.ts
@ -1 +1,23 @@
-// TODO: ability to treat a doubled noun as an adverb
+import * as T from "../../../types";
+import { LookupFunction } from "./lookup";
+import { returnParseResultS } from "./utils";
+
+export function parseAP(
+  tokens: Readonly<T.Token[]>,
+  lookup: LookupFunction
+): T.ParseResult<T.APSelection>[] {
+  if (tokens.length === 0) {
+    return [];
+  }
+  const [first, ...rest] = tokens;
+  const adverbs = lookup(first.s, "adverb");
+  return adverbs.map((entry) =>
+    returnParseResultS(rest, {
+      type: "AP",
+      selection: {
+        type: "adverb",
+        entry,
+      },
+    })
+  );
+}
--- a/src/lib/src/parsing/parse-blocks.ts
+++ b/src/lib/src/parsing/parse-blocks.ts
@ -1,5 +1,6 @@
 import * as T from "../../../types";
 import { LookupFunction } from "./lookup";
+import { parseAP } from "./parse-ap";
 import { parseEquative } from "./parse-equative";
 import { parseKidsSection } from "./parse-kids-section";
 import { parseNeg } from "./parse-negative";
@ -25,21 +26,16 @@ export function parseBlocks(
    (b): b is T.ParsedPH => b.type === "PH"
  );
  const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
-  const np = prevPh ? [] : parseNP(tokens, lookup);
-  const ph = vbExists || prevPh ? [] : parsePH(tokens);
-  const vb = parseVerb(tokens, lookup);
-  const vbp = parsePastPart(tokens, lookup);
-  const eq = parseEquative(tokens);
-  const neg = parseNeg(tokens);
-  const kidsR = parseKidsSection(tokens, []);
+
  const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
-    ...np,
-    ...ph,
-    ...neg,
-    ...vb,
-    ...vbp,
-    ...eq,
-    ...kidsR,
+    ...(prevPh ? [] : parseAP(tokens, lookup)),
+    ...(prevPh ? [] : parseNP(tokens, lookup)),
+    ...(vbExists || prevPh ? [] : parsePH(tokens)),
+    ...parseVerb(tokens, lookup),
+    ...parsePastPart(tokens, lookup),
+    ...parseEquative(tokens),
+    ...parseNeg(tokens),
+    ...parseKidsSection(tokens, []),
  ];
  // TODO: is this necessary?
  // if (!allResults.length) {
--- a/src/lib/src/parsing/parse-past-part.test.ts
+++ b/src/lib/src/parsing/parse-past-part.test.ts
@ -0,0 +1,186 @@
+import * as T from "../../../types";
+import { lookup, wordQuery } from "./lookup";
+import { tokenizer } from "./tokenizer";
+import { parsePastPart } from "./parse-past-part";
+import { kawulDyn, kawulStat, kedulDyn, kedulStat } from "./irreg-verbs";
+
+const leedul = wordQuery("لیدل", "verb");
+const akheestul = wordQuery("اخیستل", "verb");
+const wahul = wordQuery("وهل", "verb");
+const awuxtul = wordQuery("اوښتل", "verb");
+const tlul = wordQuery("tlul", "verb");
+
+const tests: {
+  label: string;
+  cases: {
+    input: string;
+    output: T.ParsedVBP[];
+  }[];
+}[] = [
+  {
+    label: "regular past participles",
+    cases: [
+      {
+        input: "لیدلی",
+        output: [
+          {
+            type: "VB",
+            info: {
+              type: "ppart",
+              genNum: {
+                gender: "masc",
+                number: "singular",
+              },
+              verb: leedul,
+            },
+          },
+        ],
+      },
+      {
+        input: "وهلي",
+        output: [
+          {
+            type: "VB",
+            info: {
+              type: "ppart",
+              genNum: {
+                gender: "masc",
+                number: "plural",
+              },
+              verb: wahul,
+            },
+          },
+        ],
+      },
+      {
+        input: "وهلې",
+        output: (["singular", "plural"] as const).map((number) => ({
+          type: "VB",
+          info: {
+            type: "ppart",
+            genNum: {
+              gender: "fem",
+              number,
+            },
+            verb: wahul,
+          },
+        })),
+      },
+    ],
+  },
+  {
+    label: "past participles with short forms",
+    cases: [
+      {
+        input: "اخیستی",
+        output: [
+          {
+            type: "VB",
+            info: {
+              type: "ppart",
+              genNum: {
+                gender: "masc",
+                number: "singular",
+              },
+              verb: akheestul,
+            },
+          },
+        ],
+      },
+      {
+        input: "اخیستلی",
+        output: [
+          {
+            type: "VB",
+            info: {
+              type: "ppart",
+              genNum: {
+                gender: "masc",
+                number: "singular",
+              },
+              verb: akheestul,
+            },
+          },
+        ],
+      },
+      {
+        input: "اوښتی",
+        output: [
+          {
+            type: "VB",
+            info: {
+              type: "ppart",
+              genNum: {
+                gender: "masc",
+                number: "singular",
+              },
+              verb: awuxtul,
+            },
+          },
+        ],
+      },
+    ],
+  },
+  {
+    label: "irregular past participles",
+    cases: [
+      {
+        input: "تلی",
+        output: [
+          {
+            type: "VB",
+            info: {
+              type: "ppart",
+              genNum: {
+                gender: "masc",
+                number: "singular",
+              },
+              verb: tlul,
+            },
+          },
+        ],
+      },
+      {
+        input: "کړي",
+        output: [kawulStat, kawulDyn].map((verb) => ({
+          type: "VB",
+          info: {
+            type: "ppart",
+            genNum: {
+              gender: "masc",
+              number: "plural",
+            },
+            verb,
+          },
+        })),
+      },
+      {
+        input: "شوي",
+        output: [kedulStat, kedulDyn].map((verb) => ({
+          type: "VB",
+          info: {
+            type: "ppart",
+            genNum: {
+              gender: "masc",
+              number: "plural",
+            },
+            verb,
+          },
+        })),
+      },
+    ],
+  },
+];
+
+describe("parsing past participles", () => {
+  tests.forEach(({ label, cases }) => {
+    // eslint-disable-next-line jest/valid-title
+    test(label, () => {
+      cases.forEach(({ input, output }) => {
+        const tokens = tokenizer(input);
+        const res = parsePastPart(tokens, lookup).map(({ body }) => body);
+        expect(res).toEqual(output);
+      });
+    });
+  });
+});
--- a/src/lib/src/parsing/parse-past-part.ts
+++ b/src/lib/src/parsing/parse-past-part.ts
@ -16,7 +16,7 @@ export function parsePastPart(
  }
  // TODO: ALSO HANDLE SHORT FORMS
  const wOutEnd = s.slice(0, -1);
-  const matches = lookup(wOutEnd, "participle");
+  const matches = lookup(wOutEnd, "pPart");
  const genNums = endingGenderNum(ending);
  return matches
    .flatMap<T.ParsedVBP>((verb) =>
--- a/src/lib/src/parsing/parse-verb.test.ts
+++ b/src/lib/src/parsing/parse-verb.test.ts
@ -31,7 +31,8 @@ const akheestul = wordQuery("اخیستل", "verb");
 const alwatul = wordQuery("الوتل", "verb");
 // const dartlul = wordQuery("درتلل", "verb")

-// todo alwatul waalwatul akhistul azmoyul etc
+// TODO: azmoyul etc
+// TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc

 const tests: {
  label: string;
@ -394,7 +395,7 @@ const tests: {
          {
            root: {
              persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
-              aspects: ["imperfective", "perfective"],
+              aspects: ["imperfective"],
            },
            verb: akheestul,
          },
@ -419,7 +420,7 @@ const tests: {
          {
            stem: {
              persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale],
-              aspects: ["imperfective", "perfective"],
+              aspects: ["imperfective"],
            },
            verb: alwatul,
          },
--- a/src/lib/src/parsing/parse-verb.ts
+++ b/src/lib/src/parsing/parse-verb.ts
@ -12,9 +12,6 @@ import {
 import { LookupFunction } from "./lookup";
 import { shortVerbEndConsonant } from "./misc";

-// big problem ما سړی یوړ crashes it !!
-// BIG problem - issue with و being considered a VB for a lot of little verbs like بلل
-
 // TODO: کول verbs!
 // check that aawu stuff is working
 // check oo`azmooy -
@ -102,6 +99,9 @@ function matchVerbs(
            }
          }
        } else if (e.psp) {
+          if (hasBreakawayAlef(e) && startsWithAleph(base)) {
+            return acc;
+          }
          if (e.separationAtP) {
            const bRest = e.psp.slice(e.separationAtP);
            if (bRest === base) {
@ -117,6 +117,8 @@ function matchVerbs(
              return [...acc, entry];
            }
          }
+        } else if (hasBreakawayAlef(e) && startsWithAleph(base)) {
+          return acc;
        } else if (e.c.includes("intrans.")) {
          const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
          const miniRootEg = miniRoot + "ېږ";
@ -169,6 +171,8 @@ function matchVerbs(
          if (matchShortOrLong(base, bRest)) {
            return [...acc, entry];
          }
+        } else if (hasBreakawayAlef(e) && startsWithAleph(base) && !e.prp) {
+          return acc;
        } else {
          const p = e.prp || e.p;
          if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
@ -245,6 +249,9 @@ function matchVerbs(
          }
        }
      } else if (!e.prp) {
+        if (hasBreakawayAlef(e) && startsWithAleph(base)) {
+          return acc;
+        }
        if (oEnd) {
          if ([e.p, e.p.slice(0, -1)].includes(base)) {
            return [...acc, entry];
@ -395,3 +402,11 @@ function parseIrregularVerb(s: string): T.ParsedVBE[] {
  }
  return [];
 }
+
+function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
+  return !e.sepOo && ["ا", "آ"].includes(e.p[0]);
+}
+
+function startsWithAleph(base: string): boolean {
+  return ["ا", "آ"].includes(base[0]);
+}
--- a/src/lib/src/parsing/parse-vp.test.ts
+++ b/src/lib/src/parsing/parse-vp.test.ts
@ -86,6 +86,10 @@ const tests: {
        output: [],
        error: true,
      },
+      {
+        input: "ما وانه اخیست",
+        output: [],
+      },
    ],
  },
  {
@ -1005,129 +1009,129 @@ const tests: {
          }))
        ),
      },
-      // {
-      //   input: "ودې وینم",
-      //   output: getPeople(2, "sing").flatMap((objectPerson) =>
-      //     getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
-      //       blocks: [
-      //         {
-      //           key: 1,
-      //           block: makeSubjectSelectionComplete({
-      //             type: "NP",
-      //             selection: makePronounSelection(subjectPerson),
-      //           }),
-      //         },
-      //         {
-      //           key: 2,
-      //           block: makeObjectSelectionComplete({
-      //             type: "NP",
-      //             selection: makePronounSelection(objectPerson),
-      //           }),
-      //         },
-      //       ],
-      //       verb: {
-      //         type: "verb",
-      //         verb: leedul,
-      //         transitivity: "transitive",
-      //         canChangeTransitivity: false,
-      //         canChangeStatDyn: false,
-      //         negative: false,
-      //         tense: "subjunctiveVerb",
-      //         canChangeVoice: true,
-      //         isCompound: false,
-      //         voice: "active",
-      //       },
-      //       externalComplement: undefined,
-      //       form: {
-      //         removeKing: true,
-      //         shrinkServant: true,
-      //       },
-      //     }))
-      //   ),
-      // },
-      // {
-      //   input: "وینم به دې",
-      //   output: getPeople(2, "sing").flatMap((objectPerson) =>
-      //     getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
-      //       blocks: [
-      //         {
-      //           key: 1,
-      //           block: makeSubjectSelectionComplete({
-      //             type: "NP",
-      //             selection: makePronounSelection(subjectPerson),
-      //           }),
-      //         },
-      //         {
-      //           key: 2,
-      //           block: makeObjectSelectionComplete({
-      //             type: "NP",
-      //             selection: makePronounSelection(objectPerson),
-      //           }),
-      //         },
-      //       ],
-      //       verb: {
-      //         type: "verb",
-      //         verb: leedul,
-      //         transitivity: "transitive",
-      //         canChangeTransitivity: false,
-      //         canChangeStatDyn: false,
-      //         negative: false,
-      //         tense: "imperfectiveFuture",
-      //         canChangeVoice: true,
-      //         isCompound: false,
-      //         voice: "active",
-      //       },
-      //       externalComplement: undefined,
-      //       form: {
-      //         removeKing: true,
-      //         shrinkServant: true,
-      //       },
-      //     }))
-      //   ),
-      // },
-      // {
-      //   input: "یو به مې ړلې",
-      //   output: [...getPeople(2, "sing"), T.Person.ThirdPlurFemale].flatMap(
-      //     (objectPerson) =>
-      //       getPeople(1, "sing").map<T.VPSelectionComplete>(
-      //         (subjectPerson) => ({
-      //           blocks: [
-      //             {
-      //               key: 1,
-      //               block: makeSubjectSelectionComplete({
-      //                 type: "NP",
-      //                 selection: makePronounSelection(subjectPerson),
-      //               }),
-      //             },
-      //             {
-      //               key: 2,
-      //               block: makeObjectSelectionComplete({
-      //                 type: "NP",
-      //                 selection: makePronounSelection(objectPerson),
-      //               }),
-      //             },
-      //           ],
-      //           verb: {
-      //             type: "verb",
-      //             verb: wurul,
-      //             transitivity: "transitive",
-      //             canChangeTransitivity: false,
-      //             canChangeStatDyn: false,
-      //             negative: false,
-      //             tense: "habitualPerfectivePast",
-      //             canChangeVoice: true,
-      //             isCompound: false,
-      //             voice: "active",
-      //           },
-      //           externalComplement: undefined,
-      //           form: {
-      //             removeKing: true,
-      //             shrinkServant: true,
-      //           },
-      //         })
-      //       )
-      //   ),
-      // },
+      {
+        input: "ودې وینم",
+        output: getPeople(2, "sing").flatMap((objectPerson) =>
+          getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
+            blocks: [
+              {
+                key: 1,
+                block: makeSubjectSelectionComplete({
+                  type: "NP",
+                  selection: makePronounSelection(subjectPerson),
+                }),
+              },
+              {
+                key: 2,
+                block: makeObjectSelectionComplete({
+                  type: "NP",
+                  selection: makePronounSelection(objectPerson),
+                }),
+              },
+            ],
+            verb: {
+              type: "verb",
+              verb: leedul,
+              transitivity: "transitive",
+              canChangeTransitivity: false,
+              canChangeStatDyn: false,
+              negative: false,
+              tense: "subjunctiveVerb",
+              canChangeVoice: true,
+              isCompound: false,
+              voice: "active",
+            },
+            externalComplement: undefined,
+            form: {
+              removeKing: true,
+              shrinkServant: true,
+            },
+          }))
+        ),
+      },
+      {
+        input: "وینم به دې",
+        output: getPeople(2, "sing").flatMap((objectPerson) =>
+          getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
+            blocks: [
+              {
+                key: 1,
+                block: makeSubjectSelectionComplete({
+                  type: "NP",
+                  selection: makePronounSelection(subjectPerson),
+                }),
+              },
+              {
+                key: 2,
+                block: makeObjectSelectionComplete({
+                  type: "NP",
+                  selection: makePronounSelection(objectPerson),
+                }),
+              },
+            ],
+            verb: {
+              type: "verb",
+              verb: leedul,
+              transitivity: "transitive",
+              canChangeTransitivity: false,
+              canChangeStatDyn: false,
+              negative: false,
+              tense: "imperfectiveFuture",
+              canChangeVoice: true,
+              isCompound: false,
+              voice: "active",
+            },
+            externalComplement: undefined,
+            form: {
+              removeKing: true,
+              shrinkServant: true,
+            },
+          }))
+        ),
+      },
+      {
+        input: "یو به مې ړلې",
+        output: [...getPeople(2, "sing"), T.Person.ThirdPlurFemale].flatMap(
+          (objectPerson) =>
+            getPeople(1, "sing").map<T.VPSelectionComplete>(
+              (subjectPerson) => ({
+                blocks: [
+                  {
+                    key: 1,
+                    block: makeSubjectSelectionComplete({
+                      type: "NP",
+                      selection: makePronounSelection(subjectPerson),
+                    }),
+                  },
+                  {
+                    key: 2,
+                    block: makeObjectSelectionComplete({
+                      type: "NP",
+                      selection: makePronounSelection(objectPerson),
+                    }),
+                  },
+                ],
+                verb: {
+                  type: "verb",
+                  verb: wurul,
+                  transitivity: "transitive",
+                  canChangeTransitivity: false,
+                  canChangeStatDyn: false,
+                  negative: false,
+                  tense: "habitualPerfectivePast",
+                  canChangeVoice: true,
+                  isCompound: false,
+                  voice: "active",
+                },
+                externalComplement: undefined,
+                form: {
+                  removeKing: true,
+                  shrinkServant: true,
+                },
+              })
+            )
+        ),
+      },
    ],
  },
  {
--- a/src/lib/src/parsing/parse-vp.ts
+++ b/src/lib/src/parsing/parse-vp.ts
@ -17,13 +17,6 @@ import { personToGenNum } from "../misc-helpers";
 import { equals } from "rambda";
 // to hide equatives type-doubling issue

-// this should also conjugate to
-//  وامې نه خیسته
-// وامې نه خیستلو
-// waa-me nú kheestulo
-// وامې نه اخیست
-// waa-me nú akheest
-
 // TODO: word query for kawul/kedul/stat/dyn

 // TODO: learn how to yank / use plugin for JSON neovim
@ -37,6 +30,9 @@ import { equals } from "rambda";
 // so we don't get something like ښځو زه خوړلې یم with a hanging
 // یم not used

+// TODO: way to get an error message for past participle and equative
+// not matching up
+
 export function parseVP(
  tokens: Readonly<T.Token[]>,
  lookup: LookupFunction
--- a/src/types.ts
+++ b/src/types.ts
@ -1199,6 +1199,7 @@ export type ParsedBlock =
  | ParsedPH
  | ParsedVBE
  | ParsedVBP
+  | APSelection
  | NegativeBlock;

 export type ParsedKidsSection = {