possesives sort of working

2023-08-01 20:19:03 +04:00 · 2023-08-01 20:19:03 +04:00 · a084433064
parent f0624252bc
commit a084433064
4 changed files with 117 additions and 90 deletions
--- a/src/lib/src/parsing/inflection-query.ts
+++ b/src/lib/src/parsing/inflection-query.ts
@ -1,5 +1,4 @@
 import * as T from "../../../types";
-import { endsInConsonant } from "../p-text-helpers";
 import {
  isPattern1Entry,
  isPattern2Entry,
@ -110,7 +109,10 @@ export function getInflectionQueries(
        },
      });
    }
-    if (s.endsWith("ان") && !["ا", "و"].includes(s.at(-3) || "")) {
+    if (
+      s.endsWith("ان") &&
+      !["ا", "و"].includes(s.charAt(s.length - 3) || "")
+    ) {
      queries.push({
        search: { p: s.slice(0, -2) },
        details: {
@ -127,7 +129,10 @@ export function getInflectionQueries(
        },
      });
    }
-    if (s.endsWith("انې") && !["ا", "و"].includes(s.at(-4) || "")) {
+    if (
+      s.endsWith("انې") &&
+      !["ا", "و"].includes(s.charAt(s.length - 4) || "")
+    ) {
      queries.push({
        search: { p: s.slice(0, -3) },
        details: {
@ -144,7 +149,10 @@ export function getInflectionQueries(
        },
      });
    }
-    if (s.endsWith("ګان") && ["ا", "و"].includes(s.at(-4) || "")) {
+    if (
+      s.endsWith("ګان") &&
+      ["ا", "و"].includes(s.charAt(s.length - 4) || "")
+    ) {
      queries.push({
        search: { p: s.slice(0, -3) },
        details: {
@ -160,7 +168,10 @@ export function getInflectionQueries(
        },
      });
    }
-    if (s.endsWith("ګانې") && ["ا", "و"].includes(s.at(-5) || "")) {
+    if (
+      s.endsWith("ګانې") &&
+      ["ا", "و"].includes(s.charAt(s.length - 5) || "")
+    ) {
      queries.push({
        search: { p: s.slice(0, -4) },
        details: {
@ -176,7 +187,7 @@ export function getInflectionQueries(
        },
      });
    }
-    if (s.endsWith("وې") && ["ا", "و"].includes(s.at(-3) || "")) {
+    if (s.endsWith("وې") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) {
      queries.push({
        search: { p: s.slice(0, -2) },
        details: {
@ -192,7 +203,7 @@ export function getInflectionQueries(
        },
      });
    }
-    if (s.endsWith("وو") && ["ا", "و"].includes(s.at(-3) || "")) {
+    if (s.endsWith("وو") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) {
      queries.push({
        search: { p: s.slice(0, -2) },
        details: {
@ -208,7 +219,10 @@ export function getInflectionQueries(
        },
      });
    }
-    if (s.endsWith("ګانو") && ["ا", "و"].includes(s.at(-5) || "")) {
+    if (
+      s.endsWith("ګانو") &&
+      ["ا", "و"].includes(s.charAt(s.length - 5) || "")
+    ) {
      queries.push({
        search: { p: s.slice(0, -4) },
        details: {
--- a/src/lib/src/parsing/parse-noun.test.ts
+++ b/src/lib/src/parsing/parse-noun.test.ts
@ -1301,7 +1301,7 @@ describe("parsing nouns", () => {
    test(category, () => {
      cases.forEach(({ input, output }) => {
        const tokens = tokenizer(input);
-        const { success } = parseNoun(tokens, lookup, []);
+        const { success } = parseNoun(tokens, lookup, undefined);
        const res = success.map(([tkns, r]) => r);
        expect(res).toEqual(output);
      });
@ -1408,7 +1408,8 @@ const adjsTests: {
          },
        ],
      },
-      // TODO: WHY DOES ADDING زړو break this ???
+      // TODO: testing issue with the parser returning multiple options needs
+      // to be worked out to test double adjectives
      {
        input: "غټو کورونو",
        output: [
@ -1435,9 +1436,9 @@ describe("parsing nouns with adjectives", () => {
    test(category, () => {
      cases.forEach(({ input, output }) => {
        const tokens = tokenizer(input);
-        expect(parseNoun(tokens, lookup, []).success.map((x) => x[1])).toEqual(
-          output
-        );
+        expect(
+          parseNoun(tokens, lookup, undefined).success.map((x) => x[1])
+        ).toEqual(output);
      });
    });
  });
--- a/src/lib/src/parsing/parse-noun.ts
+++ b/src/lib/src/parsing/parse-noun.ts
@ -2,7 +2,6 @@ import * as T from "../../../types";
 import { getInflectionPattern } from "../inflection-pattern";
 import { makeNounSelection } from "../phrase-building/make-selections";
 import {
-  isFemNounEntry,
  isMascNounEntry,
  isNounEntry,
  isPluralNounEntry,
@ -18,6 +17,71 @@ import { parseAdjective } from "./parse-adjective";
 export function parseNoun(
  tokens: Readonly<T.Token[]>,
  lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
+  prevPossesor: T.NounSelection | undefined
+): {
+  success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][];
+  errors: string[];
+} {
+  if (tokens.length === 0) {
+    return {
+      success: [],
+      errors: [],
+    };
+  }
+  const [first, ...rest] = tokens;
+  const possesor =
+    first.s === "د" ? parseNoun(rest, lookup, undefined) : undefined;
+  if (possesor) {
+    const runsAfterPossesor: [
+      Readonly<T.Token[]>,
+      { inflected: boolean; selection: T.NounSelection } | undefined
+    ][] = possesor ? [...possesor.success] : [[tokens, undefined]];
+    // could be a case for a monad ??
+    return runsAfterPossesor.reduce<ReturnType<typeof parseNoun>>(
+      (acc, [tokens, possesor]) => {
+        if (possesor?.inflected === false) {
+          return {
+            success: [...acc.success],
+            errors: [...acc.errors, "possesor should be inflected"],
+          };
+        }
+        const { success, errors } = parseNoun(
+          tokens,
+          lookup,
+          possesor
+            ? {
+                ...possesor.selection,
+                possesor: prevPossesor
+                  ? {
+                      shrunken: false,
+                      np: {
+                        type: "NP",
+                        selection: prevPossesor,
+                      },
+                    }
+                  : undefined,
+              }
+            : undefined
+        );
+        return {
+          success: [...acc.success, ...success],
+          errors: [...acc.errors, ...errors],
+        };
+      },
+      { success: [], errors: [] }
+    );
+  } else {
+    return parseNounAfterPossesor(tokens, lookup, prevPossesor, []);
+  }
+}
+
+// create NP parsing function for that
+// TODO with possesor, parse an NP not a noun
+
+function parseNounAfterPossesor(
+  tokens: Readonly<T.Token[]>,
+  lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
+  possesor: T.NounSelection | undefined,
  adjectives: {
    inflection: (0 | 1 | 2)[];
    gender: T.Gender[];
@ -34,16 +98,14 @@ export function parseNoun(
      errors: [],
    };
  }
-  const [first, ...rest] = tokens;
  // TODO: add recognition of او between adjectives
  const adjRes = parseAdjective(tokens, lookup);
  const withAdj = adjRes.map(([tkns, adj]) =>
-    parseNoun(tkns, lookup, [...adjectives, adj])
+    parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
  );
+  const [first, ...rest] = tokens;
  const success: ReturnType<typeof parseNoun>["success"] = [];
  const errors: string[] = [];
-  // const possesor =
-  //   first === "د" ? parseNoun(rest, lookup, adjectives).success : undefined;

  const searches = getInflectionQueries(first.s, true);

@ -52,8 +114,13 @@ export function parseNoun(
    details.forEach((deets) => {
      const fittingEntries = nounEntries.filter(deets.predicate);
      fittingEntries.forEach((entry) => {
-        if (isUnisexNounEntry(entry)) {
-          deets.gender.forEach((gender) => {
+        const genders: T.Gender[] = isUnisexNounEntry(entry)
+          ? ["masc", "fem"]
+          : isMascNounEntry(entry)
+          ? ["masc"]
+          : ["fem"];
+        deets.gender.forEach((gender) => {
+          if (genders.includes(gender)) {
            deets.inflection.forEach((inf) => {
              const { ok, error } = adjsMatch(
                adjectives,
@ -78,6 +145,17 @@ export function parseNoun(
                            ? number
                            : selection.number,
                          adjectives: adjectives.map((a) => a.selection),
+                          // TODO: could be nicer to validate that the possesor is inflected before
+                          // and just pass in the selection
+                          possesor: possesor
+                            ? {
+                                shrunken: false,
+                                np: {
+                                  type: "NP",
+                                  selection: possesor,
+                                },
+                              }
+                            : undefined,
                        },
                      },
                    ]);
@ -89,74 +167,8 @@ export function parseNoun(
                });
              }
            });
-          });
-        } else if (isMascNounEntry(entry) && deets.gender.includes("masc")) {
-          deets.inflection.forEach((inf) => {
-            const { ok, error } = adjsMatch(
-              adjectives,
-              "masc",
-              inf,
-              deets.plural
-            );
-            if (ok) {
-              convertInflection(inf, entry, "masc", deets.plural).forEach(
-                ({ inflected, number }) => {
-                  const selection = makeNounSelection(entry, undefined);
-                  success.push([
-                    rest,
-                    {
-                      inflected,
-                      selection: {
-                        ...selection,
-                        number: selection.numberCanChange
-                          ? number
-                          : selection.number,
-                        adjectives: adjectives.map((a) => a.selection),
-                      },
-                    },
-                  ]);
-                }
-              );
-            } else {
-              error.forEach((e) => {
-                errors.push(e);
-              });
-            }
-          });
-        } else if (isFemNounEntry(entry) && deets.gender.includes("fem")) {
-          deets.inflection.forEach((inf) => {
-            const { ok, error } = adjsMatch(
-              adjectives,
-              "fem",
-              inf,
-              deets.plural
-            );
-            if (ok) {
-              convertInflection(inf, entry, "fem", deets.plural).forEach(
-                ({ inflected, number }) => {
-                  const selection = makeNounSelection(entry, undefined);
-                  success.push([
-                    rest,
-                    {
-                      inflected,
-                      selection: {
-                        ...selection,
-                        number: selection.numberCanChange
-                          ? number
-                          : selection.number,
-                        adjectives: adjectives.map((a) => a.selection),
-                      },
-                    },
-                  ]);
-                }
-              );
-            } else {
-              error.forEach((e) => {
-                errors.push(e);
-              });
-            }
-          });
-        }
+          }
+        });
      });
    });
  });
@ -167,7 +179,7 @@ export function parseNoun(
 }

 function adjsMatch(
-  adjectives: Parameters<typeof parseNoun>[2],
+  adjectives: Parameters<typeof parseNounAfterPossesor>[3],
  gender: T.Gender,
  inf: 0 | 1 | 2,
  plural: boolean | undefined
--- a/src/lib/src/parsing/parse-phrase.ts
+++ b/src/lib/src/parsing/parse-phrase.ts
@ -12,7 +12,7 @@ export function parsePhrase(
 } {
  const adjsRes = parseAdjective(s, lookup);
  const prnsRes = parsePronoun(s);
-  const nounsRes = parseNoun(s, lookup, []);
+  const nounsRes = parseNoun(s, lookup, undefined);

  const correct = [...adjsRes, ...prnsRes, ...nounsRes.success]
    .filter(([tkns]) => tkns.length === 0)