From d4db23142e2e45a4195b7fc850d5877e64a333b8 Mon Sep 17 00:00:00 2001 From: adueck Date: Sun, 1 Oct 2023 16:18:29 -0700 Subject: [PATCH] really beta sandwich parsing --- src/lib/src/parsing/compare.ts | 1 - src/lib/src/parsing/parse-ap.ts | 17 +- src/lib/src/parsing/parse-blocks.ts | 7 +- src/lib/src/parsing/parse-noun.test.ts | 6 +- src/lib/src/parsing/parse-noun.ts | 21 +- src/lib/src/parsing/parse-np.ts | 9 +- src/lib/src/parsing/parse-npap.ts | 25 ++ src/lib/src/parsing/parse-participle.test.ts | 31 +- src/lib/src/parsing/parse-participle.ts | 20 +- src/lib/src/parsing/parse-phrase.ts | 3 +- src/lib/src/parsing/parse-possesor.ts | 2 +- src/lib/src/parsing/parse-sandwich.ts | 39 +++ src/lib/src/parsing/parse-vp.test.ts | 342 +++++++++---------- src/lib/src/parsing/parse-vp.ts | 66 ++-- 14 files changed, 318 insertions(+), 271 deletions(-) create mode 100644 src/lib/src/parsing/parse-npap.ts create mode 100644 src/lib/src/parsing/parse-sandwich.ts diff --git a/src/lib/src/parsing/compare.ts b/src/lib/src/parsing/compare.ts index 8b13789..e69de29 100644 --- a/src/lib/src/parsing/compare.ts +++ b/src/lib/src/parsing/compare.ts @@ -1 +0,0 @@ - diff --git a/src/lib/src/parsing/parse-ap.ts b/src/lib/src/parsing/parse-ap.ts index ed1e664..fc0729e 100644 --- a/src/lib/src/parsing/parse-ap.ts +++ b/src/lib/src/parsing/parse-ap.ts @@ -1,13 +1,26 @@ import * as T from "../../../types"; +import { fmapParseResult } from "../fp-ps"; import { LookupFunction } from "./lookup"; import { parseAdverb } from "./parse-adverb"; +import { parseSandwich } from "./parse-sandwich"; export function parseAP( s: Readonly, - lookup: LookupFunction + lookup: LookupFunction, + possesor: T.PossesorSelection | undefined ): T.ParseResult[] { if (s.length === 0) { return []; } - return parseAdverb(s, lookup); + return [ + ...(!possesor ? parseAdverb(s, lookup) : []), + ...fmapParseResult( + (selection) => + ({ + type: "AP", + selection, + } as const), + parseSandwich(s, lookup, possesor) + ), + ]; } diff --git a/src/lib/src/parsing/parse-blocks.ts b/src/lib/src/parsing/parse-blocks.ts index 8a643c9..8574d27 100644 --- a/src/lib/src/parsing/parse-blocks.ts +++ b/src/lib/src/parsing/parse-blocks.ts @@ -1,10 +1,9 @@ import * as T from "../../../types"; import { LookupFunction } from "./lookup"; -import { parseAP } from "./parse-ap"; import { parseEquative } from "./parse-equative"; import { parseKidsSection } from "./parse-kids-section"; import { parseNeg } from "./parse-negative"; -import { parseNP } from "./parse-np"; +import { parseNPAP } from "./parse-npap"; import { parsePastPart } from "./parse-past-part"; import { parsePH } from "./parse-ph"; import { parseVerb } from "./parse-verb"; @@ -34,9 +33,7 @@ export function parseBlocks( ); const allBlocks: T.ParseResult[] = [ - ...(!inVerbSection - ? [...parseAP(tokens, lookup), ...parseNP(tokens, lookup)] - : []), + ...(!inVerbSection ? parseNPAP(tokens, lookup) : []), // ensure at most one of each PH, VBE, VBP ...(prevPh ? [] : parsePH(tokens)), ...(blocks.some(isParsedVBE) diff --git a/src/lib/src/parsing/parse-noun.test.ts b/src/lib/src/parsing/parse-noun.test.ts index 992f9b2..6a8be91 100644 --- a/src/lib/src/parsing/parse-noun.test.ts +++ b/src/lib/src/parsing/parse-noun.test.ts @@ -1371,7 +1371,9 @@ describe("parsing nouns", () => { test(category, () => { cases.forEach(({ input, output }) => { const tokens = tokenizer(input); - const res = parseNoun(tokens, lookup).map(({ body }) => body); + const res = parseNoun(tokens, lookup, undefined, []).map( + ({ body }) => body + ); expect(res).toEqual(output); }); }); @@ -1503,7 +1505,7 @@ describe("parsing nouns with adjectives", () => { test(category, () => { cases.forEach(({ input, output }) => { const tokens = tokenizer(input); - const res = parseNoun(tokens, lookup) + const res = parseNoun(tokens, lookup, undefined, []) .filter(isCompleteResult) .map(({ body }) => body); expect(res).toEqual(output); diff --git a/src/lib/src/parsing/parse-noun.ts b/src/lib/src/parsing/parse-noun.ts index 0e235f3..4252f81 100644 --- a/src/lib/src/parsing/parse-noun.ts +++ b/src/lib/src/parsing/parse-noun.ts @@ -10,28 +10,11 @@ import { import { getInflectionQueries } from "./inflection-query"; import { LookupFunction } from "./lookup"; import { parseAdjective } from "./parse-adjective"; -import { parsePossesor } from "./parse-possesor"; import { bindParseResult } from "./utils"; type NounResult = { inflected: boolean; selection: T.NounSelection }; export function parseNoun( - tokens: Readonly, - lookup: LookupFunction -): T.ParseResult[] { - if (tokens.length === 0) { - return []; - } - const possesor = parsePossesor(tokens, lookup, undefined); - if (possesor.length) { - return bindParseResult(possesor, (tokens, p) => { - return parseNounAfterPossesor(tokens, lookup, p, []); - }); - } - return parseNounAfterPossesor(tokens, lookup, undefined, []); -} - -function parseNounAfterPossesor( tokens: Readonly, lookup: LookupFunction, possesor: T.PossesorSelection | undefined, @@ -48,7 +31,7 @@ function parseNounAfterPossesor( // TODO: add recognition of او between adjectives const adjRes = parseAdjective(tokens, lookup); const withAdj = bindParseResult(adjRes, (tkns, adj) => - parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj]) + parseNoun(tkns, lookup, possesor, [...adjectives, adj]) ); const [first, ...rest] = tokens; const searches = getInflectionQueries(first.s, true); @@ -113,7 +96,7 @@ function parseNounAfterPossesor( } function adjsMatch( - adjectives: Parameters[3], + adjectives: Parameters[3], gender: T.Gender, inf: 0 | 1 | 2, plural: boolean | undefined diff --git a/src/lib/src/parsing/parse-np.ts b/src/lib/src/parsing/parse-np.ts index fd99851..daf2cab 100644 --- a/src/lib/src/parsing/parse-np.ts +++ b/src/lib/src/parsing/parse-np.ts @@ -7,7 +7,8 @@ import { LookupFunction } from "./lookup"; export function parseNP( s: Readonly, - lookup: LookupFunction + lookup: LookupFunction, + possesor: T.PossesorSelection | undefined ): T.ParseResult[] { if (s.length === 0) { return []; @@ -39,8 +40,8 @@ export function parseNP( } return fmapParseResult(makeNPSl, [ - ...parsePronoun(s), - ...parseNoun(s, lookup), - ...parseParticiple(s, lookup), + ...(!possesor ? parsePronoun(s) : []), + ...parseNoun(s, lookup, possesor, []), + ...parseParticiple(s, lookup, possesor), ]); } diff --git a/src/lib/src/parsing/parse-npap.ts b/src/lib/src/parsing/parse-npap.ts new file mode 100644 index 0000000..be2c038 --- /dev/null +++ b/src/lib/src/parsing/parse-npap.ts @@ -0,0 +1,25 @@ +import * as T from "../../../types"; +import { LookupFunction } from "./lookup"; +import { parseAP } from "./parse-ap"; +import { parseNP } from "./parse-np"; +import { parsePossesor } from "./parse-possesor"; +import { bindParseResult } from "./utils"; + +export function parseNPAP( + s: Readonly, + lookup: LookupFunction +): T.ParseResult[] { + if (s.length === 0) { + return []; + } + const possesor = parsePossesor(s, lookup, undefined); + if (!possesor.length) { + return [...parseNP(s, lookup, undefined), ...parseAP(s, lookup, undefined)]; + } + return bindParseResult( + possesor, + (tokens, p) => { + return [...parseNP(tokens, lookup, p), ...parseAP(tokens, lookup, p)]; + } + ); +} diff --git a/src/lib/src/parsing/parse-participle.test.ts b/src/lib/src/parsing/parse-participle.test.ts index d4352a8..aa9fcfb 100644 --- a/src/lib/src/parsing/parse-participle.test.ts +++ b/src/lib/src/parsing/parse-participle.test.ts @@ -6,7 +6,7 @@ import { import * as T from "../../../types"; import { lookup, wordQuery } from "./lookup"; import { tokenizer } from "./tokenizer"; -import { parseParticiple } from "./parse-participle"; +import { parseNPAP } from "./parse-npap"; const leedul = wordQuery("لیدل", "verb"); const akheestul = wordQuery("اخیستل", "verb"); @@ -113,6 +113,20 @@ const tests: { }, ], }, + { + input: "د سړي لیدو", + output: [ + { + inflected: true, + selection: { + ...makeParticipleSelection(leedul), + possesor: makePossesorSelection( + makeNounSelection(saray, undefined) + ), + }, + }, + ], + }, ], }, ]; @@ -123,8 +137,19 @@ describe("parsing participles", () => { test(label, () => { cases.forEach(({ input, output }) => { const tokens = tokenizer(input); - const res = parseParticiple(tokens, lookup).map(({ body }) => body); - expect(res).toEqual(output); + const res = parseNPAP(tokens, lookup).map(({ body }) => body); + expect(res).toEqual( + output.map( + (x): T.ParsedNP => ({ + type: "NP", + inflected: x.inflected, + selection: { + type: "NP", + selection: x.selection, + }, + }) + ) + ); }); }); }); diff --git a/src/lib/src/parsing/parse-participle.ts b/src/lib/src/parsing/parse-participle.ts index ef93d23..b9b5241 100644 --- a/src/lib/src/parsing/parse-participle.ts +++ b/src/lib/src/parsing/parse-participle.ts @@ -1,31 +1,13 @@ import * as T from "../../../types"; import { LookupFunction } from "./lookup"; -import { parsePossesor } from "./parse-possesor"; -import { bindParseResult } from "./utils"; type ParticipleResult = { inflected: boolean; selection: T.ParticipleSelection; }; -export function parseParticiple( - tokens: Readonly, - lookup: LookupFunction -): T.ParseResult[] { - if (tokens.length === 0) { - return []; - } - const possesor = parsePossesor(tokens, lookup, undefined); - if (possesor.length) { - return bindParseResult(possesor, (tokens, p) => { - return parseParticipleAfterPossesor(tokens, lookup, p); - }); - } - return parseParticipleAfterPossesor(tokens, lookup, undefined); -} - // TODO: should have adverbs with participle -function parseParticipleAfterPossesor( +export function parseParticiple( tokens: Readonly, lookup: LookupFunction, possesor: T.PossesorSelection | undefined diff --git a/src/lib/src/parsing/parse-phrase.ts b/src/lib/src/parsing/parse-phrase.ts index 1266572..cbacc15 100644 --- a/src/lib/src/parsing/parse-phrase.ts +++ b/src/lib/src/parsing/parse-phrase.ts @@ -1,6 +1,5 @@ import * as T from "../../../types"; import { lookup } from "./lookup"; -import { parseNP } from "./parse-np"; import { parseVP } from "./parse-vp"; // شو should not be sheyaano !! @@ -17,7 +16,7 @@ export function parsePhrase(s: T.Token[]): { errors: string[]; } { const res = [ - ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length), + // ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length), // ...parseVerb(s, verbLookup), ...parseVP(s, lookup), ]; diff --git a/src/lib/src/parsing/parse-possesor.ts b/src/lib/src/parsing/parse-possesor.ts index 794b4f0..09d65cf 100644 --- a/src/lib/src/parsing/parse-possesor.ts +++ b/src/lib/src/parsing/parse-possesor.ts @@ -50,7 +50,7 @@ export function parsePossesor( })); } if (first.s === "د") { - const np = parseNP(rest, lookup); + const np = parseNP(rest, lookup, undefined); return bindParseResult(np, (tokens, body) => { const possesor: T.PossesorSelection = { shrunken: false, diff --git a/src/lib/src/parsing/parse-sandwich.ts b/src/lib/src/parsing/parse-sandwich.ts new file mode 100644 index 0000000..ed63b93 --- /dev/null +++ b/src/lib/src/parsing/parse-sandwich.ts @@ -0,0 +1,39 @@ +import * as T from "../../../types"; +import { LookupFunction } from "./lookup"; +import { sandwiches } from "../sandwiches"; +import { parseNP } from "./parse-np"; +import { bindParseResult } from "./utils"; + +export function parseSandwich( + s: Readonly, + lookup: LookupFunction, + possesor: T.PossesorSelection | undefined +): T.ParseResult>[] { + if (s.length === 0) { + return []; + } + + const [first, ...rest] = s; + + const startMatches = sandwiches.filter((x) => x.before?.p === first.s); + if (!startMatches) { + return []; + } + // TODO: parse without possesive! + const nps = parseNP(rest, lookup, possesor); + return bindParseResult(nps, (tokens, np) => { + const sandMatches = startMatches.filter((x) => x.after?.p === tokens[0]?.s); + // TODO: allow pattern #1 not inflected + const errors: T.ParseError[] = np.inflected + ? [] + : [{ message: "NP inside sandwich must be inflected" }]; + return sandMatches.map((s) => ({ + tokens: tokens.slice(1), + body: { + ...s, + inside: np.selection, + }, + errors, + })); + }); +} diff --git a/src/lib/src/parsing/parse-vp.test.ts b/src/lib/src/parsing/parse-vp.test.ts index fc9055b..1dfb42a 100644 --- a/src/lib/src/parsing/parse-vp.test.ts +++ b/src/lib/src/parsing/parse-vp.test.ts @@ -1474,177 +1474,177 @@ const tests: { }, })), }, - // { - // input: "ما خندل", - // output: getPeople(1, "sing").map((person) => ({ - // blocks: [ - // { - // key: 1, - // block: makeSubjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(person), - // }), - // }, - // { - // key: 2, - // block: { - // type: "objectSelection", - // selection: T.Person.ThirdPlurMale, - // }, - // }, - // ], - // verb: { - // type: "verb", - // verb: khandul, - // transitivity: "grammatically transitive", - // canChangeTransitivity: false, - // canChangeStatDyn: false, - // negative: false, - // tense: "imperfectivePast", - // canChangeVoice: true, - // isCompound: false, - // voice: "active", - // }, - // externalComplement: undefined, - // form: { - // removeKing: false, - // shrinkServant: false, - // }, - // })), - // }, - // { - // input: "خندل مې", - // output: getPeople(1, "sing").map((person) => ({ - // blocks: [ - // { - // key: 1, - // block: makeSubjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(person), - // }), - // }, - // { - // key: 2, - // block: { - // type: "objectSelection", - // selection: T.Person.ThirdPlurMale, - // }, - // }, - // ], - // verb: { - // type: "verb", - // verb: khandul, - // transitivity: "grammatically transitive", - // canChangeTransitivity: false, - // canChangeStatDyn: false, - // negative: false, - // tense: "imperfectivePast", - // canChangeVoice: true, - // isCompound: false, - // voice: "active", - // }, - // externalComplement: undefined, - // form: { - // removeKing: false, - // shrinkServant: true, - // }, - // })), - // }, - // { - // input: "خندل", - // output: [], - // }, - // { - // input: "خاندم مې", - // output: [], - // error: true, - // }, - // { - // input: "زه وینم", - // output: getPeople(1, "sing").map((person) => ({ - // blocks: [ - // { - // key: 1, - // block: makeSubjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(person), - // }), - // }, - // { - // key: 2, - // block: { - // type: "objectSelection", - // selection: T.Person.ThirdPlurMale, - // }, - // }, - // ], - // verb: { - // type: "verb", - // verb: leedul, - // transitivity: "grammatically transitive", - // canChangeTransitivity: false, - // canChangeStatDyn: false, - // negative: false, - // tense: "presentVerb", - // canChangeVoice: true, - // isCompound: false, - // voice: "active", - // }, - // externalComplement: undefined, - // form: { - // removeKing: false, - // shrinkServant: false, - // }, - // })), - // }, - // { - // input: "ما ولیدل", - // output: getPeople(1, "sing").flatMap((person) => - // ( - // ["transitive", "grammatically transitive"] as const - // ).map((transitivity) => ({ - // blocks: [ - // { - // key: 1, - // block: makeSubjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(person), - // }), - // }, - // { - // key: 2, - // block: - // transitivity === "grammatically transitive" - // ? { - // type: "objectSelection", - // selection: T.Person.ThirdPlurMale, - // } - // : makeObjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(T.Person.ThirdPlurMale), - // }), - // }, - // ], - // verb: { - // type: "verb", - // verb: leedul, - // transitivity, - // canChangeTransitivity: false, - // canChangeStatDyn: false, - // negative: false, - // tense: "perfectivePast", - // canChangeVoice: true, - // isCompound: false, - // voice: "active", - // }, - // externalComplement: undefined, - // form: { - // removeKing: transitivity === "transitive", - // shrinkServant: false, - // }, - // })) - // ), - // }, + { + input: "ما خندل", + output: getPeople(1, "sing").map((person) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(person), + }), + }, + { + key: 2, + block: { + type: "objectSelection", + selection: T.Person.ThirdPlurMale, + }, + }, + ], + verb: { + type: "verb", + verb: khandul, + transitivity: "grammatically transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "imperfectivePast", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: false, + shrinkServant: false, + }, + })), + }, + { + input: "خندل مې", + output: getPeople(1, "sing").map((person) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(person), + }), + }, + { + key: 2, + block: { + type: "objectSelection", + selection: T.Person.ThirdPlurMale, + }, + }, + ], + verb: { + type: "verb", + verb: khandul, + transitivity: "grammatically transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "imperfectivePast", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: false, + shrinkServant: true, + }, + })), + }, + { + input: "خندل", + output: [], + }, + { + input: "خاندم مې", + output: [], + error: true, + }, + { + input: "زه وینم", + output: getPeople(1, "sing").map((person) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(person), + }), + }, + { + key: 2, + block: { + type: "objectSelection", + selection: T.Person.ThirdPlurMale, + }, + }, + ], + verb: { + type: "verb", + verb: leedul, + transitivity: "grammatically transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "presentVerb", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: false, + shrinkServant: false, + }, + })), + }, + { + input: "ما ولیدل", + output: getPeople(1, "sing").flatMap((person) => + ( + ["transitive", "grammatically transitive"] as const + ).map((transitivity) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(person), + }), + }, + { + key: 2, + block: + transitivity === "grammatically transitive" + ? { + type: "objectSelection", + selection: T.Person.ThirdPlurMale, + } + : makeObjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(T.Person.ThirdPlurMale), + }), + }, + ], + verb: { + type: "verb", + verb: leedul, + transitivity, + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "perfectivePast", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: transitivity === "transitive", + shrinkServant: false, + }, + })) + ), + }, ], }, ]; diff --git a/src/lib/src/parsing/parse-vp.ts b/src/lib/src/parsing/parse-vp.ts index 444e1a4..4dd9304 100644 --- a/src/lib/src/parsing/parse-vp.ts +++ b/src/lib/src/parsing/parse-vp.ts @@ -28,14 +28,11 @@ import { equals, zip } from "rambda"; // TODO: word query for kawul/kedul/stat/dyn -// TODO: test grammatically transitive stuff -// test raaba ye wree +// TODO: test all types with pronouns // TODO: way to get an error message for past participle and equative // not matching up -// TODO: negative with perfect forms - export function parseVP( tokens: Readonly, lookup: LookupFunction @@ -400,7 +397,6 @@ function finishTransitive({ ) ); } - // TODO: allow APs for this if (nps.length === 1) { const np = nps[0]; // possibilities @@ -476,27 +472,39 @@ function finishTransitive({ }); } } - const blocksOps: T.VPSBlockComplete[][] = servants.map((servant) => - !isPast + const blocksOps: T.VPSBlockComplete[][] = servants.map< + T.VPSBlockComplete[] + >((servant) => + !isPast && form.removeKing ? [ { - key: 1, + key: 2345, block: makeSubjectSelectionComplete(king), }, + ...mapOutnpsAndAps(["O"], npsAndAps), + ] + : !isPast && form.shrinkServant + ? [ + ...mapOutnpsAndAps(["S"], npsAndAps), { - key: 2, + key: 2345, block: makeObjectSelectionComplete(servant), }, ] + : isPast && form.removeKing + ? [ + ...mapOutnpsAndAps(["S"], npsAndAps), + { + key: 2345, + block: makeObjectSelectionComplete(king), + }, + ] : [ { - key: 1, + key: 2345, block: makeSubjectSelectionComplete(servant), }, - { - key: 2, - block: makeObjectSelectionComplete(king), - }, + ...mapOutnpsAndAps(["O"], npsAndAps), ] ); return blocksOps.map((blocks) => ({ @@ -547,23 +555,10 @@ function finishTransitive({ message: "past tense transitive verb must agree with the object", }); } - let blocks: T.VPSBlockComplete[] = [ - { - key: 1, - block: makeSubjectSelectionComplete(s.selection), - }, - { - key: 2, - block: makeObjectSelectionComplete(o.selection), - }, - ]; - if (flip) { - blocks = blocks.reverse(); - } return returnParseResult( tokens, { - blocks, + blocks: mapOutnpsAndAps(!flip ? ["S", "O"] : ["O", "S"], npsAndAps), verb: v, externalComplement: undefined, form: { @@ -619,23 +614,10 @@ function finishTransitive({ "non-past tense transitive verb must agree with the subject", }); } - let blocks: T.VPSBlockComplete[] = [ - { - key: 1, - block: makeSubjectSelectionComplete(s.selection), - }, - { - key: 2, - block: makeObjectSelectionComplete(o.selection), - }, - ]; - if (flip) { - blocks = blocks.reverse(); - } return returnParseResult( tokens, { - blocks, + blocks: mapOutnpsAndAps(!flip ? ["S", "O"] : ["O", "S"], npsAndAps), verb: v, externalComplement: undefined, form: {