From a08443306406b078c5b0e0065ef8207b79544d56 Mon Sep 17 00:00:00 2001 From: adueck Date: Tue, 1 Aug 2023 20:19:03 +0400 Subject: [PATCH] possesives sort of working --- src/lib/src/parsing/inflection-query.ts | 30 +++-- src/lib/src/parsing/parse-noun.test.ts | 11 +- src/lib/src/parsing/parse-noun.ts | 164 +++++++++++++----------- src/lib/src/parsing/parse-phrase.ts | 2 +- 4 files changed, 117 insertions(+), 90 deletions(-) diff --git a/src/lib/src/parsing/inflection-query.ts b/src/lib/src/parsing/inflection-query.ts index 0cf2aff..fa41545 100644 --- a/src/lib/src/parsing/inflection-query.ts +++ b/src/lib/src/parsing/inflection-query.ts @@ -1,5 +1,4 @@ import * as T from "../../../types"; -import { endsInConsonant } from "../p-text-helpers"; import { isPattern1Entry, isPattern2Entry, @@ -110,7 +109,10 @@ export function getInflectionQueries( }, }); } - if (s.endsWith("ان") && !["ا", "و"].includes(s.at(-3) || "")) { + if ( + s.endsWith("ان") && + !["ا", "و"].includes(s.charAt(s.length - 3) || "") + ) { queries.push({ search: { p: s.slice(0, -2) }, details: { @@ -127,7 +129,10 @@ export function getInflectionQueries( }, }); } - if (s.endsWith("انې") && !["ا", "و"].includes(s.at(-4) || "")) { + if ( + s.endsWith("انې") && + !["ا", "و"].includes(s.charAt(s.length - 4) || "") + ) { queries.push({ search: { p: s.slice(0, -3) }, details: { @@ -144,7 +149,10 @@ export function getInflectionQueries( }, }); } - if (s.endsWith("ګان") && ["ا", "و"].includes(s.at(-4) || "")) { + if ( + s.endsWith("ګان") && + ["ا", "و"].includes(s.charAt(s.length - 4) || "") + ) { queries.push({ search: { p: s.slice(0, -3) }, details: { @@ -160,7 +168,10 @@ export function getInflectionQueries( }, }); } - if (s.endsWith("ګانې") && ["ا", "و"].includes(s.at(-5) || "")) { + if ( + s.endsWith("ګانې") && + ["ا", "و"].includes(s.charAt(s.length - 5) || "") + ) { queries.push({ search: { p: s.slice(0, -4) }, details: { @@ -176,7 +187,7 @@ export function getInflectionQueries( }, }); } - if (s.endsWith("وې") && ["ا", "و"].includes(s.at(-3) || "")) { + if (s.endsWith("وې") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) { queries.push({ search: { p: s.slice(0, -2) }, details: { @@ -192,7 +203,7 @@ export function getInflectionQueries( }, }); } - if (s.endsWith("وو") && ["ا", "و"].includes(s.at(-3) || "")) { + if (s.endsWith("وو") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) { queries.push({ search: { p: s.slice(0, -2) }, details: { @@ -208,7 +219,10 @@ export function getInflectionQueries( }, }); } - if (s.endsWith("ګانو") && ["ا", "و"].includes(s.at(-5) || "")) { + if ( + s.endsWith("ګانو") && + ["ا", "و"].includes(s.charAt(s.length - 5) || "") + ) { queries.push({ search: { p: s.slice(0, -4) }, details: { diff --git a/src/lib/src/parsing/parse-noun.test.ts b/src/lib/src/parsing/parse-noun.test.ts index 3ee197f..7fc13a0 100644 --- a/src/lib/src/parsing/parse-noun.test.ts +++ b/src/lib/src/parsing/parse-noun.test.ts @@ -1301,7 +1301,7 @@ describe("parsing nouns", () => { test(category, () => { cases.forEach(({ input, output }) => { const tokens = tokenizer(input); - const { success } = parseNoun(tokens, lookup, []); + const { success } = parseNoun(tokens, lookup, undefined); const res = success.map(([tkns, r]) => r); expect(res).toEqual(output); }); @@ -1408,7 +1408,8 @@ const adjsTests: { }, ], }, - // TODO: WHY DOES ADDING زړو break this ??? + // TODO: testing issue with the parser returning multiple options needs + // to be worked out to test double adjectives { input: "غټو کورونو", output: [ @@ -1435,9 +1436,9 @@ describe("parsing nouns with adjectives", () => { test(category, () => { cases.forEach(({ input, output }) => { const tokens = tokenizer(input); - expect(parseNoun(tokens, lookup, []).success.map((x) => x[1])).toEqual( - output - ); + expect( + parseNoun(tokens, lookup, undefined).success.map((x) => x[1]) + ).toEqual(output); }); }); }); diff --git a/src/lib/src/parsing/parse-noun.ts b/src/lib/src/parsing/parse-noun.ts index 48a41e5..5d0c2b6 100644 --- a/src/lib/src/parsing/parse-noun.ts +++ b/src/lib/src/parsing/parse-noun.ts @@ -2,7 +2,6 @@ import * as T from "../../../types"; import { getInflectionPattern } from "../inflection-pattern"; import { makeNounSelection } from "../phrase-building/make-selections"; import { - isFemNounEntry, isMascNounEntry, isNounEntry, isPluralNounEntry, @@ -18,6 +17,71 @@ import { parseAdjective } from "./parse-adjective"; export function parseNoun( tokens: Readonly, lookup: (s: Partial) => T.DictionaryEntry[], + prevPossesor: T.NounSelection | undefined +): { + success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][]; + errors: string[]; +} { + if (tokens.length === 0) { + return { + success: [], + errors: [], + }; + } + const [first, ...rest] = tokens; + const possesor = + first.s === "د" ? parseNoun(rest, lookup, undefined) : undefined; + if (possesor) { + const runsAfterPossesor: [ + Readonly, + { inflected: boolean; selection: T.NounSelection } | undefined + ][] = possesor ? [...possesor.success] : [[tokens, undefined]]; + // could be a case for a monad ?? + return runsAfterPossesor.reduce>( + (acc, [tokens, possesor]) => { + if (possesor?.inflected === false) { + return { + success: [...acc.success], + errors: [...acc.errors, "possesor should be inflected"], + }; + } + const { success, errors } = parseNoun( + tokens, + lookup, + possesor + ? { + ...possesor.selection, + possesor: prevPossesor + ? { + shrunken: false, + np: { + type: "NP", + selection: prevPossesor, + }, + } + : undefined, + } + : undefined + ); + return { + success: [...acc.success, ...success], + errors: [...acc.errors, ...errors], + }; + }, + { success: [], errors: [] } + ); + } else { + return parseNounAfterPossesor(tokens, lookup, prevPossesor, []); + } +} + +// create NP parsing function for that +// TODO with possesor, parse an NP not a noun + +function parseNounAfterPossesor( + tokens: Readonly, + lookup: (s: Partial) => T.DictionaryEntry[], + possesor: T.NounSelection | undefined, adjectives: { inflection: (0 | 1 | 2)[]; gender: T.Gender[]; @@ -34,16 +98,14 @@ export function parseNoun( errors: [], }; } - const [first, ...rest] = tokens; // TODO: add recognition of او between adjectives const adjRes = parseAdjective(tokens, lookup); const withAdj = adjRes.map(([tkns, adj]) => - parseNoun(tkns, lookup, [...adjectives, adj]) + parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj]) ); + const [first, ...rest] = tokens; const success: ReturnType["success"] = []; const errors: string[] = []; - // const possesor = - // first === "د" ? parseNoun(rest, lookup, adjectives).success : undefined; const searches = getInflectionQueries(first.s, true); @@ -52,8 +114,13 @@ export function parseNoun( details.forEach((deets) => { const fittingEntries = nounEntries.filter(deets.predicate); fittingEntries.forEach((entry) => { - if (isUnisexNounEntry(entry)) { - deets.gender.forEach((gender) => { + const genders: T.Gender[] = isUnisexNounEntry(entry) + ? ["masc", "fem"] + : isMascNounEntry(entry) + ? ["masc"] + : ["fem"]; + deets.gender.forEach((gender) => { + if (genders.includes(gender)) { deets.inflection.forEach((inf) => { const { ok, error } = adjsMatch( adjectives, @@ -78,6 +145,17 @@ export function parseNoun( ? number : selection.number, adjectives: adjectives.map((a) => a.selection), + // TODO: could be nicer to validate that the possesor is inflected before + // and just pass in the selection + possesor: possesor + ? { + shrunken: false, + np: { + type: "NP", + selection: possesor, + }, + } + : undefined, }, }, ]); @@ -89,74 +167,8 @@ export function parseNoun( }); } }); - }); - } else if (isMascNounEntry(entry) && deets.gender.includes("masc")) { - deets.inflection.forEach((inf) => { - const { ok, error } = adjsMatch( - adjectives, - "masc", - inf, - deets.plural - ); - if (ok) { - convertInflection(inf, entry, "masc", deets.plural).forEach( - ({ inflected, number }) => { - const selection = makeNounSelection(entry, undefined); - success.push([ - rest, - { - inflected, - selection: { - ...selection, - number: selection.numberCanChange - ? number - : selection.number, - adjectives: adjectives.map((a) => a.selection), - }, - }, - ]); - } - ); - } else { - error.forEach((e) => { - errors.push(e); - }); - } - }); - } else if (isFemNounEntry(entry) && deets.gender.includes("fem")) { - deets.inflection.forEach((inf) => { - const { ok, error } = adjsMatch( - adjectives, - "fem", - inf, - deets.plural - ); - if (ok) { - convertInflection(inf, entry, "fem", deets.plural).forEach( - ({ inflected, number }) => { - const selection = makeNounSelection(entry, undefined); - success.push([ - rest, - { - inflected, - selection: { - ...selection, - number: selection.numberCanChange - ? number - : selection.number, - adjectives: adjectives.map((a) => a.selection), - }, - }, - ]); - } - ); - } else { - error.forEach((e) => { - errors.push(e); - }); - } - }); - } + } + }); }); }); }); @@ -167,7 +179,7 @@ export function parseNoun( } function adjsMatch( - adjectives: Parameters[2], + adjectives: Parameters[3], gender: T.Gender, inf: 0 | 1 | 2, plural: boolean | undefined diff --git a/src/lib/src/parsing/parse-phrase.ts b/src/lib/src/parsing/parse-phrase.ts index ff5082b..9157bcc 100644 --- a/src/lib/src/parsing/parse-phrase.ts +++ b/src/lib/src/parsing/parse-phrase.ts @@ -12,7 +12,7 @@ export function parsePhrase( } { const adjsRes = parseAdjective(s, lookup); const prnsRes = parsePronoun(s); - const nounsRes = parseNoun(s, lookup, []); + const nounsRes = parseNoun(s, lookup, undefined); const correct = [...adjsRes, ...prnsRes, ...nounsRes.success] .filter(([tkns]) => tkns.length === 0)