more on parser

2023-08-17 18:12:09 +04:00 · 2023-08-17 18:12:09 +04:00 · 288718f69a
parent b384771db5
commit 288718f69a
12 changed files with 1064 additions and 283 deletions
--- a/src/lib/src/parsing/parse-ba.ts
+++ b/src/lib/src/parsing/parse-ba.ts
@ -1,21 +0,0 @@
 import * as T from "../../../types";
 export function parseBa(
  tokens: Readonly<T.Token[]>
 ): T.ParseResult<{ type: "ba" }>[] {
  if (!tokens.length) {
    return [];
  }
  const [first, ...rest] = tokens;
  if (first.s === "به") {
    return [
      {
        body: {
          type: "ba",
        },
        errors: [],
        tokens: rest,
      },
    ];
  } else return [];
 }
--- a/src/lib/src/parsing/parse-block.ts
+++ b/src/lib/src/parsing/parse-block.ts
@ -0,0 +1,61 @@
 import * as T from "../../../types";
 import { fmapParseResult } from "../fp-ps";
 import { parseNP } from "./parse-np";
 import { parseVerb } from "./parse-verb";
 export function parseBlock(
  tokens: Readonly<T.Token[]>,
  lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
  verbLookup: (s: string) => T.VerbEntry[]
 ): T.ParseResult<
  | [
      {
        inflected: boolean;
        selection: T.NPSelection;
      }
    ]
  | [
      (
        | {
            type: "PH";
            s: string;
          }
        | undefined
      ),
      Omit<T.VBE, "ps">
    ]
  | []
 >[] {
  if (tokens.length === 0) {
    return [
      {
        tokens: [],
        body: [],
        errors: [],
      },
    ];
  }
  return [
    ...(fmapParseResult((x) => [x], parseNP(tokens, lookup)) as T.ParseResult<
      [
        {
          inflected: boolean;
          selection: T.NPSelection;
        }
      ]
    >[]),
    ...(parseVerb(tokens, verbLookup) as T.ParseResult<
      [
        (
          | {
              type: "PH";
              s: string;
            }
          | undefined
        ),
        Omit<T.VBE, "ps">
      ]
    >[]),
  ];
 }
--- a/src/lib/src/parsing/parse-blocks.ts
+++ b/src/lib/src/parsing/parse-blocks.ts
@ -0,0 +1,96 @@
 import * as T from "../../../types";
 import { parseBlock } from "./parse-block";
 import { parseKidsSection } from "./parse-kids-section";
 import { bindParseResult, returnParseResult } from "./utils";
 export function parseBlocks(
  tokens: Readonly<T.Token[]>,
  lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
  verbLookup: (s: string) => T.VerbEntry[],
  prevBlocks: (
    | {
        inflected: boolean;
        selection: T.NPSelection;
      }
    | {
        type: "PH";
        s: string;
      }
    | Omit<T.VBE, "ps">
  )[],
  kids: T.ParsedKid[]
 ): T.ParseResult<{
  kids: T.ParsedKid[];
  blocks: (
    | {
        inflected: boolean;
        selection: T.NPSelection;
      }
    | {
        type: "PH";
        s: string;
      }
    | Omit<T.VBE, "ps">
  )[];
 }>[] {
  if (tokens.length === 0) {
    // console.log("at end", { prevBlocks, kids });
    return returnParseResult(tokens, { blocks: prevBlocks, kids });
  }
  const block = parseBlock(tokens, lookup, verbLookup);
  const kidsR = parseKidsSection(tokens, []);
  const allResults = [...block, ...kidsR] as T.ParseResult<
    | [
        {
          inflected: boolean;
          selection: T.NPSelection;
        }
      ]
    | [
        (
          | {
              type: "PH";
              s: string;
            }
          | undefined
        ),
        Omit<T.VBE, "ps">
      ]
    | []
    | { kids: T.ParsedKid[] }
  >[];
  if (!allResults.length) {
    return [
      {
        tokens: [],
        body: { blocks: prevBlocks, kids },
        errors: [],
      },
    ];
  }
  return bindParseResult(allResults, (tokens, r) => {
    if ("kids" in r) {
      return {
        next: parseBlocks(tokens, lookup, verbLookup, prevBlocks, [
          ...kids,
          ...r.kids,
        ]),
        errors:
          prevBlocks.length !== 1
            ? [{ message: "kids' section out of place" }]
            : [],
      };
    }
    // filter out the empty PH pieces
    // for some reason ts won't let me do filter here
    const newBlocks = r.flatMap((x) => (x ? [x] : []));
    return parseBlocks(
      tokens,
      lookup,
      verbLookup,
      [...prevBlocks, ...newBlocks],
      kids
    );
  });
 }
--- a/src/lib/src/parsing/parse-kid.ts
+++ b/src/lib/src/parsing/parse-kid.ts
@ -0,0 +1,27 @@
 import * as T from "../../../types";
 import { returnParseResult } from "./utils";
 export function parseKid(
  tokens: Readonly<T.Token[]>
 ): T.ParseResult<T.ParsedKid>[] {
  if (tokens.length === 0) {
    return [];
  }
  const [{ s }, ...rest] = tokens;
  if (s === "به") {
    return returnParseResult(rest, "ba");
  }
  if (s === "یې") {
    return returnParseResult(rest, "ye");
  }
  if (s === "مې") {
    return returnParseResult(rest, "me");
  }
  if (s === "دې") {
    return returnParseResult(rest, "de");
  }
  if (s === "مو") {
    return returnParseResult(rest, "mU");
  }
  return [];
 }
--- a/src/lib/src/parsing/parse-kids-section.test.ts
+++ b/src/lib/src/parsing/parse-kids-section.test.ts
@ -0,0 +1,83 @@
 /* eslint-disable jest/no-conditional-expect */
 /* eslint-disable jest/valid-title */
 import * as T from "../../../types";
 import { parseKidsSection } from "./parse-kids-section";
 import { tokenizer } from "./tokenizer";
 const tests: {
  label: string;
  cases: {
    input: string;
    output: T.ParsedKid[];
    error?: boolean;
  }[];
 }[] = [
  {
    label: "basic kids section",
    cases: [
      {
        input: "به",
        output: ["ba"],
      },
      {
        input: "به دې",
        output: ["ba", "de"],
      },
      {
        input: "",
        output: [],
      },
      {
        input: "مې دې یې",
        output: ["me", "de", "ye"],
      },
      {
        input: "دې به مې",
        output: ["de", "ba", "me"],
        error: true,
      },
      {
        input: "مې یې",
        output: ["me", "ye"],
      },
      {
        input: "دې مې",
        output: ["de", "me"],
        error: true,
      },
    ],
  },
  {
    label: "can parse kids section when tokens come after",
    cases: [
      {
        input: "به سړی",
        output: ["ba"],
      },
      {
        input: "مې دې واخیسته",
        output: ["me", "de"],
      },
    ],
  },
 ];
 tests.forEach(({ label, cases }) => {
  test(label, () => {
    cases.forEach(({ input, output, error }) => {
      const tokens = tokenizer(input);
      const parsed = parseKidsSection(tokens, []);
      if (output.length) {
        expect(parsed.length).toBe(1);
        expect(parsed.map((x) => x.body.kids)).toEqual(
          output.length ? [output] : []
        );
        if (error) {
          expect(parsed[0].errors.length).toBeTruthy();
        } else {
          expect(parsed[0].errors.length).toBe(0);
        }
      }
    });
  });
 });
--- a/src/lib/src/parsing/parse-kids-section.ts
+++ b/src/lib/src/parsing/parse-kids-section.ts
@ -0,0 +1,50 @@
 import * as T from "../../../types";
 import { parseKid } from "./parse-kid";
 import { bindParseResult, returnParseResult } from "./utils";
 export function parseKidsSection(
  tokens: Readonly<T.Token[]>,
  prevKids: T.ParsedKid[]
 ): T.ParseResult<{ kids: T.ParsedKid[] }>[] {
  if (tokens.length === 0) {
    return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
  }
  const parsedKid = parseKid(tokens);
  // TODO: is this even necessary ??
  if (!parsedKid.length) {
    return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
  }
  return bindParseResult(parsedKid, (tokens, r) => {
    // return parseKidsSection(tokens, [...prevKids, r]);
    return {
      errors: kidDoubled(r, prevKids)
        ? [{ message: `double '${r}' in kids section` }]
        : !kidComesBehind(r, prevKids.at(-1))
        ? [{ message: "kids section out of order" }]
        : [],
      next: parseKidsSection(tokens, [...prevKids, r]),
    };
  });
 }
 function kidDoubled(k: T.ParsedKid, prev: T.ParsedKid[]): boolean {
  return !!prev.find((x) => x === k);
 }
 const kidsOrder: T.ParsedKid[] = ["ba", "me", "de", "ye"];
 function getKidRank(k: T.ParsedKid): number {
  if (k === "mU") {
    return 1;
  }
  return kidsOrder.indexOf(k);
 }
 function kidComesBehind(
  k: T.ParsedKid,
  prev: T.ParsedKid | undefined
 ): boolean {
  if (!prev) {
    return true;
  }
  return getKidRank(k) >= getKidRank(prev);
 }
--- a/src/lib/src/parsing/parse-pronoun.ts
+++ b/src/lib/src/parsing/parse-pronoun.ts
@ -80,7 +80,7 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
          inflected: false,
          selection: {
            type: "pronoun",
-            person: 4,
+            person: 5,
            distance: "far",
          },
        },
--- a/src/lib/src/parsing/parse-verb.ts
+++ b/src/lib/src/parsing/parse-verb.ts
@ -12,6 +12,11 @@ import {
 // big problem ما سړی یوړ crashes it !!
 // TODO: کول verbs!
 // check that aawu stuff is working
 // check oo`azmooy -
 // check څاته
 export function parseVerb(
  tokens: Readonly<T.Token[]>,
  verbLookup: (s: string) => T.VerbEntry[]
--- a/src/lib/src/parsing/parse-vp.ts
+++ b/src/lib/src/parsing/parse-vp.ts
--- a/src/lib/src/parsing/utils.ts
+++ b/src/lib/src/parsing/utils.ts
@ -20,7 +20,7 @@ import * as T from "../../../types";
 * from the different previous results
 * @returns
 */
-export function bindParseResult<C extends object, D extends object>(
+export function bindParseResult<C, D>(
  previous: T.ParseResult<C>[],
  f: (
    tokens: Readonly<T.Token[]>,
@ -59,18 +59,42 @@ export function bindParseResult<C extends object, D extends object>(
      errors: [...errsPassed, ...x.errors, ...errors],
    }));
  });
-  return cleanOutFails(nextPossibilities);
+  return cleanOutResults(nextPossibilities);
 }
-export function cleanOutFails<C extends object>(
+export function returnParseResult<D>(
  tokens: Readonly<T.Token[]>,
  body: D,
  errors?: T.ParseError[]
 ): T.ParseResult<D>[] {
  return [
    {
      tokens,
      body,
      errors: errors || [],
    },
  ];
 }
 /**
 * finds the most successful path(s) and culls out any other more erroneous
 * or redundant paths
 */
 export function cleanOutResults<C>(
  results: T.ParseResult<C>[]
 ): T.ParseResult<C>[] {
-  // if there's any success anywhere, remove any of the errors
+  if (results.length === 0) {
-  const errorsGone = results.find((x) => x.errors.length === 0)
+    return results;
-    ? results.filter((x) => x.errors.length === 0)
+  }
-    : results;
+  let min = Infinity;
  for (let a of results) {
    if (a.errors.length < min) {
      min = a.errors.length;
    }
  }
  const errorsCulled = results.filter((x) => x.errors.length === min);
  // @ts-ignore
-  return Array.from(new Set(errorsGone.map(JSON.stringify))).map(JSON.parse);
+  return Array.from(new Set(errorsCulled.map(JSON.stringify))).map(JSON.parse);
 }
 export function isCompleteResult<C extends object>(
--- a/src/lib/src/phrase-building/render-vp.ts
+++ b/src/lib/src/phrase-building/render-vp.ts
@ -352,7 +352,7 @@ export function getKingAndServant(
      };
 }
-function isFirstOrSecondPersPronoun(
+export function isFirstOrSecondPersPronoun(
  o: "none" | T.NPSelection | T.Person.ThirdPlurMale
 ): boolean {
  if (typeof o !== "object") return false;
--- a/src/types.ts
+++ b/src/types.ts
@ -1197,6 +1197,8 @@ export type Kid = {
  kid: { type: "ba" } | MiniPronoun;
 };
 export type ParsedKid = "ba" | "me" | "de" | "ye" | "mU";
 export type MiniPronoun = {
  type: "mini-pronoun";
  person: Person;