From 56890cf4b97a2815be4fe5846b8ddceb8faf04c0 Mon Sep 17 00:00:00 2001 From: adueck Date: Mon, 28 Aug 2023 14:18:14 +0400 Subject: [PATCH] parsing participles, and improved participle rendering --- src/components/src/blocks/Block.tsx | 7 +- .../src/vp-explorer/NewVerbFormDisplay.tsx | 9 +- .../src/vp-explorer/VPExplorerQuiz.tsx | 9 +- src/components/src/vp-explorer/VPPicker.tsx | 1 - src/demo-components/ParserDemo.tsx | 7 +- src/lib/src/parsing/lookup.tsx | 18 + src/lib/src/parsing/misc.ts | 7 + src/lib/src/parsing/parse-blocks.ts | 25 +- src/lib/src/parsing/parse-noun.test.ts | 10 +- src/lib/src/parsing/parse-noun.ts | 5 +- src/lib/src/parsing/parse-np.ts | 11 +- src/lib/src/parsing/parse-participle.test.ts | 133 ++++++ src/lib/src/parsing/parse-participle.ts | 54 +++ src/lib/src/parsing/parse-phrase.ts | 13 +- src/lib/src/parsing/parse-possesor.test.ts | 6 +- src/lib/src/parsing/parse-possesor.ts | 12 +- src/lib/src/parsing/parse-verb.ts | 3 +- src/lib/src/parsing/parse-vp.test.ts | 4 +- src/lib/src/parsing/parse-vp.ts | 12 +- src/lib/src/phrase-building/compile.ts | 2 +- src/lib/src/phrase-building/np-tools.ts | 9 +- src/lib/src/phrase-building/render-np.ts | 425 +++++++++++------- 22 files changed, 582 insertions(+), 200 deletions(-) create mode 100644 src/lib/src/parsing/misc.ts create mode 100644 src/lib/src/parsing/parse-participle.test.ts create mode 100644 src/lib/src/parsing/parse-participle.ts diff --git a/src/components/src/blocks/Block.tsx b/src/components/src/blocks/Block.tsx index 5e32c93..157eaed 100644 --- a/src/components/src/blocks/Block.tsx +++ b/src/components/src/blocks/Block.tsx @@ -10,6 +10,7 @@ import { useState } from "react"; import { getLength } from "../../../lib/src/p-text-helpers"; import { roleIcon } from "../vp-explorer/VPExplorerExplanationModal"; import { negativeParticle } from "../../../lib/src/grammar-units"; +import { flattenLengths } from "../../library"; function Block({ opts, @@ -493,7 +494,7 @@ function ComplementBlock({ }) { return (
- {adv.ps[0][script]} + {flattenLengths(adv.ps)[0][script]}
Loc. Adv.
{adv.e}
@@ -614,7 +615,7 @@ function CompNounBlock({ extraClassName={`!inside && hasPossesor ? "pt-2" : ""`} padding={"1rem"} > - {noun.ps[0][script]} + {flattenLengths(noun.ps)[0][script]}
Comp. Noun
{noun.e} @@ -656,7 +657,7 @@ export function NPBlock({ ,
{" "} - {np.selection.ps[0][script]} + {flattenLengths(np.selection.ps)[0][script]}
, ]; const el = script === "p" ? elements.reverse() : elements; diff --git a/src/components/src/vp-explorer/NewVerbFormDisplay.tsx b/src/components/src/vp-explorer/NewVerbFormDisplay.tsx index f228619..daf7646 100644 --- a/src/components/src/vp-explorer/NewVerbFormDisplay.tsx +++ b/src/components/src/vp-explorer/NewVerbFormDisplay.tsx @@ -1,6 +1,9 @@ import { useEffect, useState } from "react"; import ButtonSelect from "../ButtonSelect"; -import { combineIntoText } from "../../../lib/src/phrase-building/compile"; +import { + combineIntoText, + flattenLengths, +} from "../../../lib/src/phrase-building/compile"; import { insertNegative } from "../../../lib/src/phrase-building/render-vp"; import * as T from "../../../types"; import TableCell from "../TableCell"; @@ -327,7 +330,9 @@ function AgreementInfo({ {transitivity === "transitive" && past && objNP && (
- {objNP.selection.ps[0]} + + {flattenLengths(objNP.selection.ps)[0]} + {` `}({printGenNum(personToGenNum(objNP.selection.person))})
)} diff --git a/src/components/src/vp-explorer/VPExplorerQuiz.tsx b/src/components/src/vp-explorer/VPExplorerQuiz.tsx index b51518e..c3e0d70 100644 --- a/src/components/src/vp-explorer/VPExplorerQuiz.tsx +++ b/src/components/src/vp-explorer/VPExplorerQuiz.tsx @@ -8,7 +8,10 @@ import shuffleArray from "../../../lib/src/shuffle-array"; import InlinePs from "../InlinePs"; import { psStringEquals } from "../../../lib/src/p-text-helpers"; import { renderVP } from "../../../lib/src/phrase-building/render-vp"; -import { compileVP } from "../../../lib/src/phrase-building/compile"; +import { + compileVP, + flattenLengths, +} from "../../../lib/src/phrase-building/compile"; import { getRandomTense } from "./TensePicker"; import { getTenseFromVerbSelection, @@ -386,7 +389,9 @@ function QuizNPDisplay({
{stage === "blanks" && (
- {children.selection.ps[0]} + + {flattenLengths(children.selection.ps)[0]} +
)}
{children.selection.e}
diff --git a/src/components/src/vp-explorer/VPPicker.tsx b/src/components/src/vp-explorer/VPPicker.tsx index 8df6c93..3c96f18 100644 --- a/src/components/src/vp-explorer/VPPicker.tsx +++ b/src/components/src/vp-explorer/VPPicker.tsx @@ -421,7 +421,6 @@ function isGenStatCompNoun( | undefined ) { if (!block) return false; - console.log({ block }); if ( block.type === "objectSelection" && typeof block.selection === "object" && diff --git a/src/demo-components/ParserDemo.tsx b/src/demo-components/ParserDemo.tsx index 97119cf..19805a5 100644 --- a/src/demo-components/ParserDemo.tsx +++ b/src/demo-components/ParserDemo.tsx @@ -1,7 +1,6 @@ import { useState } from "react"; import * as T from "../types"; import { parsePhrase } from "../lib/src/parsing/parse-phrase"; -import { lookup } from "../lib/src/parsing/lookup"; import { tokenizer } from "../lib/src/parsing/tokenizer"; import { CompiledPTextDisplay, @@ -14,15 +13,15 @@ const working = [ "limited demo vocab", "phrases with simple verbs", "basic verb tenses", - "noun phrases (except participles)", + "noun phrases", "mini-pronouns for shrunken servants", "grammar error correction", "negatives", ]; const todo = [ - "participles", "compound verbs", + "adjectival participles", "adverbial phrases", "relative clauses", "equative verbs", @@ -60,7 +59,7 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) { setErrors([]); return; } - const { success, errors } = parsePhrase(tokenizer(value), lookup); + const { success, errors } = parsePhrase(tokenizer(value)); setText(value); setErrors(errors); setResult(success); diff --git a/src/lib/src/parsing/lookup.tsx b/src/lib/src/parsing/lookup.tsx index e145ea9..807c491 100644 --- a/src/lib/src/parsing/lookup.tsx +++ b/src/lib/src/parsing/lookup.tsx @@ -5,6 +5,7 @@ import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; import { removeFVarientsFromVerb } from "../accent-and-ps-utils"; import { splitVarients, undoAaXuPattern } from "../p-text-helpers"; import { arraysHaveCommon } from "../misc-helpers"; +import { shortVerbEndConsonant } from "./misc"; export function lookup(s: Partial): T.DictionaryEntry[] { const [key, value] = Object.entries(s)[0]; @@ -41,6 +42,23 @@ export function shouldCheckTpp(s: string): boolean { ); } +export function participleLookup(input: string): T.VerbEntry[] { + if (input.endsWith("ل")) { + return verbs.filter((e) => e.entry.p === input); + } + // TODO: short forms + if (input.endsWith("و")) { + const s = input.slice(0, -1); + return [ + ...verbs.filter((e) => e.entry.p === s), + ...(shortVerbEndConsonant.includes(s[s.length - 1]) + ? verbs.filter((e) => e.entry.p === s + "ل") + : []), + ]; + } + return []; +} + export function verbLookup(input: string): T.VerbEntry[] { // TODO: // only look up forms if there's an ending diff --git a/src/lib/src/parsing/misc.ts b/src/lib/src/parsing/misc.ts new file mode 100644 index 0000000..c37d9cb --- /dev/null +++ b/src/lib/src/parsing/misc.ts @@ -0,0 +1,7 @@ +/** + * These are the consonants that a short verb root can end with + * to make it possible to have 3rd person masc sing past + * congugations without an ending, (ie. ولید) or participles without the + * ل (ie. اخیستو, لیدو) + */ +export const shortVerbEndConsonant = ["د", "ت", "ړ"]; diff --git a/src/lib/src/parsing/parse-blocks.ts b/src/lib/src/parsing/parse-blocks.ts index bb559fb..2156ae7 100644 --- a/src/lib/src/parsing/parse-blocks.ts +++ b/src/lib/src/parsing/parse-blocks.ts @@ -10,6 +10,7 @@ export function parseBlocks( tokens: Readonly, lookup: (s: Partial) => T.DictionaryEntry[], verbLookup: (s: string) => T.VerbEntry[], + participleLookup: (s: string) => T.VerbEntry[], blocks: T.ParsedBlock[], kids: T.ParsedKid[] ): T.ParseResult<{ @@ -23,8 +24,7 @@ export function parseBlocks( (b): b is T.ParsedPH => b.type === "PH" ); const vbExists = blocks.some((b) => "type" in b && b.type === "VB"); - const np = prevPh ? [] : parseNP(tokens, lookup); - // UHOH... This could cause double paths ... maybe don't parse the PH in the parse VB! + const np = prevPh ? [] : parseNP(tokens, lookup, participleLookup); const ph = vbExists || prevPh ? [] : parsePH(tokens); const vb = parseVerb(tokens, verbLookup); const neg = parseNeg(tokens); @@ -50,10 +50,14 @@ export function parseBlocks( const errors: T.ParseError[] = []; if (r.type === "kids") { return { - next: parseBlocks(tokens, lookup, verbLookup, blocks, [ - ...kids, - ...r.kids, - ]), + next: parseBlocks( + tokens, + lookup, + verbLookup, + participleLookup, + blocks, + [...kids, ...r.kids] + ), errors: blocks.length !== 1 ? [{ message: "kids' section out of place" }] @@ -74,7 +78,14 @@ export function parseBlocks( return []; } return { - next: parseBlocks(tokens, lookup, verbLookup, [...blocks, r], kids), + next: parseBlocks( + tokens, + lookup, + verbLookup, + participleLookup, + [...blocks, r], + kids + ), errors, }; }); diff --git a/src/lib/src/parsing/parse-noun.test.ts b/src/lib/src/parsing/parse-noun.test.ts index bbbef7e..c6c38d0 100644 --- a/src/lib/src/parsing/parse-noun.test.ts +++ b/src/lib/src/parsing/parse-noun.test.ts @@ -3,7 +3,7 @@ import { makeNounSelection, } from "../phrase-building/make-selections"; import * as T from "../../../types"; -import { lookup, wordQuery } from "./lookup"; +import { lookup, participleLookup, wordQuery } from "./lookup"; import { parseNoun } from "./parse-noun"; import { tokenizer } from "./tokenizer"; import { isCompleteResult } from "./utils"; @@ -41,7 +41,7 @@ const nabee = wordQuery("نبي", "noun"); const lafz = wordQuery("لفظ", "noun"); // TODO: test for adjective errors etc - +// TODO: زړو should not be hearts // bundled plural const tests: { @@ -1371,7 +1371,9 @@ describe("parsing nouns", () => { test(category, () => { cases.forEach(({ input, output }) => { const tokens = tokenizer(input); - const res = parseNoun(tokens, lookup).map(({ body }) => body); + const res = parseNoun(tokens, lookup, participleLookup).map( + ({ body }) => body + ); expect(res).toEqual(output); }); }); @@ -1503,7 +1505,7 @@ describe("parsing nouns with adjectives", () => { test(category, () => { cases.forEach(({ input, output }) => { const tokens = tokenizer(input); - const res = parseNoun(tokens, lookup) + const res = parseNoun(tokens, lookup, participleLookup) .filter(isCompleteResult) .map(({ body }) => body); expect(res).toEqual(output); diff --git a/src/lib/src/parsing/parse-noun.ts b/src/lib/src/parsing/parse-noun.ts index cd2f1c9..df6fb08 100644 --- a/src/lib/src/parsing/parse-noun.ts +++ b/src/lib/src/parsing/parse-noun.ts @@ -16,12 +16,13 @@ type NounResult = { inflected: boolean; selection: T.NounSelection }; export function parseNoun( tokens: Readonly, - lookup: (s: Partial) => T.DictionaryEntry[] + lookup: (s: Partial) => T.DictionaryEntry[], + pariticipleLookup: (s: string) => T.VerbEntry[] ): T.ParseResult[] { if (tokens.length === 0) { return []; } - const possesor = parsePossesor(tokens, lookup, undefined); + const possesor = parsePossesor(tokens, lookup, pariticipleLookup, undefined); if (possesor.length) { return bindParseResult(possesor, (tokens, p) => { return parseNounAfterPossesor(tokens, lookup, p, []); diff --git a/src/lib/src/parsing/parse-np.ts b/src/lib/src/parsing/parse-np.ts index fb4a483..a88cf2b 100644 --- a/src/lib/src/parsing/parse-np.ts +++ b/src/lib/src/parsing/parse-np.ts @@ -2,10 +2,12 @@ import * as T from "../../../types"; import { parsePronoun } from "./parse-pronoun"; import { parseNoun } from "./parse-noun"; import { fmapParseResult } from "../fp-ps"; +import { parseParticiple } from "./parse-participle"; export function parseNP( s: Readonly, - lookup: (s: Partial) => T.DictionaryEntry[] + lookup: (s: Partial) => T.DictionaryEntry[], + participleLookup: (input: string) => T.VerbEntry[] ): T.ParseResult[] { if (s.length === 0) { return []; @@ -21,6 +23,10 @@ export function parseNP( inflected: boolean; selection: T.NounSelection; } + | { + inflected: boolean; + selection: T.ParticipleSelection; + } ): T.ParsedNP { return { type: "NP", @@ -34,6 +40,7 @@ export function parseNP( return fmapParseResult(makeNPSl, [ ...parsePronoun(s), - ...parseNoun(s, lookup), + ...parseNoun(s, lookup, participleLookup), + ...parseParticiple(s, lookup, participleLookup), ]); } diff --git a/src/lib/src/parsing/parse-participle.test.ts b/src/lib/src/parsing/parse-participle.test.ts new file mode 100644 index 0000000..54d0ffa --- /dev/null +++ b/src/lib/src/parsing/parse-participle.test.ts @@ -0,0 +1,133 @@ +import { + makeNounSelection, + makeParticipleSelection, + makePossesorSelection, +} from "../phrase-building/make-selections"; +import * as T from "../../../types"; +import { lookup, participleLookup, wordQuery } from "./lookup"; +import { tokenizer } from "./tokenizer"; +import { parseParticiple } from "./parse-participle"; + +const leedul = wordQuery("لیدل", "verb"); +const akheestul = wordQuery("اخیستل", "verb"); +const wahul = wordQuery("وهل", "verb"); +const saray = wordQuery("سړی", "noun"); + +const tests: { + label: string; + cases: { + input: string; + output: { + inflected: boolean; + selection: T.ParticipleSelection; + }[]; + }[]; +}[] = [ + { + label: "uninflected participles", + cases: [ + { + input: "وهل", + output: [ + { + inflected: false, + selection: makeParticipleSelection(wahul), + }, + ], + }, + { + input: "لیدل", + output: [ + { + inflected: false, + selection: makeParticipleSelection(leedul), + }, + ], + }, + ], + }, + { + label: "inflected participles", + cases: [ + { + input: "وهلو", + output: [ + { + inflected: true, + selection: makeParticipleSelection(wahul), + }, + ], + }, + { + input: "اخیستلو", + output: [ + { + inflected: true, + selection: makeParticipleSelection(akheestul), + }, + ], + }, + ], + }, + { + label: "short forms of inflected participles", + cases: [ + { + input: "لیدو", + output: [ + { + inflected: true, + selection: makeParticipleSelection(leedul), + }, + ], + }, + { + input: "اخیستو", + output: [ + { + inflected: true, + selection: makeParticipleSelection(akheestul), + }, + ], + }, + { + input: "وهو", + output: [], + }, + ], + }, + { + label: "with subj/obj", + cases: [ + { + input: "د سړي لیدل", + output: [ + { + inflected: false, + selection: { + ...makeParticipleSelection(leedul), + possesor: makePossesorSelection( + makeNounSelection(saray, undefined) + ), + }, + }, + ], + }, + ], + }, +]; + +describe("parsing participles", () => { + tests.forEach(({ label, cases }) => { + // eslint-disable-next-line jest/valid-title + test(label, () => { + cases.forEach(({ input, output }) => { + const tokens = tokenizer(input); + const res = parseParticiple(tokens, lookup, participleLookup).map( + ({ body }) => body + ); + expect(res).toEqual(output); + }); + }); + }); +}); diff --git a/src/lib/src/parsing/parse-participle.ts b/src/lib/src/parsing/parse-participle.ts new file mode 100644 index 0000000..b6938fb --- /dev/null +++ b/src/lib/src/parsing/parse-participle.ts @@ -0,0 +1,54 @@ +import * as T from "../../../types"; +import { parsePossesor } from "./parse-possesor"; +import { bindParseResult } from "./utils"; + +type ParticipleResult = { + inflected: boolean; + selection: T.ParticipleSelection; +}; + +export function parseParticiple( + tokens: Readonly, + lookup: (s: Partial) => T.DictionaryEntry[], + participleLookup: (s: string) => T.VerbEntry[] +): T.ParseResult[] { + if (tokens.length === 0) { + return []; + } + const possesor = parsePossesor(tokens, lookup, participleLookup, undefined); + if (possesor.length) { + return bindParseResult(possesor, (tokens, p) => { + return parseParticipleAfterPossesor(tokens, participleLookup, p); + }); + } + return parseParticipleAfterPossesor(tokens, participleLookup, undefined); +} + +// TODO: should have adverbs with participle +function parseParticipleAfterPossesor( + tokens: Readonly, + participleLookup: (s: string) => T.VerbEntry[], + possesor: T.PossesorSelection | undefined +): T.ParseResult[] { + if (tokens.length === 0) { + return []; + } + const [first, ...rest] = tokens; + if (!["ل", "و"].includes(first.s.at(-1) || "")) { + return []; + } + const inflected = first.s.endsWith("و"); + const matches = participleLookup(first.s); + return matches.map>((verb) => ({ + tokens: rest, + body: { + inflected, + selection: { + type: "participle", + verb, + possesor, + }, + }, + errors: [], + })); +} diff --git a/src/lib/src/parsing/parse-phrase.ts b/src/lib/src/parsing/parse-phrase.ts index 980eee6..8c2c74f 100644 --- a/src/lib/src/parsing/parse-phrase.ts +++ b/src/lib/src/parsing/parse-phrase.ts @@ -1,14 +1,11 @@ import * as T from "../../../types"; -import { verbLookup } from "./lookup"; +import { verbLookup, lookup, participleLookup } from "./lookup"; import { parseNP } from "./parse-np"; import { parseVP } from "./parse-vp"; // شو should not be sheyaano !! -export function parsePhrase( - s: T.Token[], - lookup: (s: Partial) => T.DictionaryEntry[] -): { +export function parsePhrase(s: T.Token[]): { success: ( | { inflected: boolean; @@ -20,9 +17,11 @@ export function parsePhrase( errors: string[]; } { const res = [ - ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length), + ...parseNP(s, lookup, participleLookup).filter( + ({ tokens }) => !tokens.length + ), // ...parseVerb(s, verbLookup), - ...parseVP(s, lookup, verbLookup), + ...parseVP(s, lookup, verbLookup, participleLookup), ]; const success = res.map((x) => x.body); diff --git a/src/lib/src/parsing/parse-possesor.test.ts b/src/lib/src/parsing/parse-possesor.test.ts index b0b644c..64291de 100644 --- a/src/lib/src/parsing/parse-possesor.test.ts +++ b/src/lib/src/parsing/parse-possesor.test.ts @@ -5,7 +5,7 @@ import { makeNounSelection, makePronounSelection, } from "../phrase-building/make-selections"; -import { lookup, wordQuery } from "./lookup"; +import { lookup, participleLookup, wordQuery } from "./lookup"; import { parsePossesor } from "./parse-possesor"; import { tokenizer } from "./tokenizer"; import { isCompleteResult } from "./utils"; @@ -110,12 +110,12 @@ const tests: { test("parse possesor", () => { tests.forEach(({ input, output }) => { const tokens = tokenizer(input); - const parsed = parsePossesor(tokens, lookup, undefined); + const parsed = parsePossesor(tokens, lookup, participleLookup, undefined); if (output === "error") { expect(parsed.some((x) => x.errors.length)).toBe(true); } else { expect( - parsePossesor(tokens, lookup, undefined) + parsePossesor(tokens, lookup, participleLookup, undefined) .filter(isCompleteResult) .map((x) => x.body.np.selection) ).toEqual(output); diff --git a/src/lib/src/parsing/parse-possesor.ts b/src/lib/src/parsing/parse-possesor.ts index ad4712f..c873d19 100644 --- a/src/lib/src/parsing/parse-possesor.ts +++ b/src/lib/src/parsing/parse-possesor.ts @@ -19,6 +19,7 @@ const contractions: [string[], T.Person[]][] = [ export function parsePossesor( tokens: Readonly, lookup: (s: Partial) => T.DictionaryEntry[], + participleLookup: (s: string) => T.VerbEntry[], prevPossesor: T.PossesorSelection | undefined ): T.ParseResult[] { if (tokens.length === 0) { @@ -42,14 +43,14 @@ export function parsePossesor( ? [{ message: "a pronoun cannot have a possesor" }] : []; return contractions - .flatMap((p) => parsePossesor(rest, lookup, p)) + .flatMap((p) => parsePossesor(rest, lookup, participleLookup, p)) .map((x) => ({ ...x, errors: [...errors, ...x.errors], })); } if (first.s === "د") { - const np = parseNP(rest, lookup); + const np = parseNP(rest, lookup, participleLookup); return bindParseResult(np, (tokens, body) => { const possesor: T.PossesorSelection = { shrunken: false, @@ -62,7 +63,12 @@ export function parsePossesor( [{ message: `possesor should be inflected` }] : [], // add and check error - can't add possesor to pronoun - next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)), + next: parsePossesor( + tokens, + lookup, + participleLookup, + addPoss(prevPossesor, possesor) + ), }; }); } diff --git a/src/lib/src/parsing/parse-verb.ts b/src/lib/src/parsing/parse-verb.ts index 1a58570..28d02a3 100644 --- a/src/lib/src/parsing/parse-verb.ts +++ b/src/lib/src/parsing/parse-verb.ts @@ -9,6 +9,7 @@ import { tlul, wartlul, } from "./irreg-verbs"; +import { shortVerbEndConsonant } from "./misc"; // big problem ما سړی یوړ crashes it !! // BIG problem - issue with و being considered a VB for a lot of little verbs like بلل @@ -194,7 +195,7 @@ function matchVerbs( } const hamzaEnd = s.at(-1) === "ه"; const oEnd = s.at(-1) === "و"; - const abruptEnd = ["د", "ت", "ړ"].includes(s.slice(-1)); + const abruptEnd = shortVerbEndConsonant.includes(s.slice(-1)); const tppMatches = { imperfective: entries.filter( ({ entry: e }) => diff --git a/src/lib/src/parsing/parse-vp.test.ts b/src/lib/src/parsing/parse-vp.test.ts index d055671..954b6cd 100644 --- a/src/lib/src/parsing/parse-vp.test.ts +++ b/src/lib/src/parsing/parse-vp.test.ts @@ -9,7 +9,7 @@ import { makeNounSelection, makePronounSelection, } from "../phrase-building/make-selections"; -import { lookup, verbLookup, wordQuery } from "./lookup"; +import { lookup, participleLookup, verbLookup, wordQuery } from "./lookup"; import { parseVP } from "./parse-vp"; import { tokenizer } from "./tokenizer"; import { tlul } from "./irreg-verbs"; @@ -1382,7 +1382,7 @@ tests.forEach(({ label, cases }) => { test(label, () => { cases.forEach(({ input, output, error }) => { const tokens = tokenizer(input); - const parsed = parseVP(tokens, lookup, verbLookup); + const parsed = parseVP(tokens, lookup, verbLookup, participleLookup); if (error) { expect(parsed.filter((x) => x.errors.length).length).toBeTruthy(); } else { diff --git a/src/lib/src/parsing/parse-vp.ts b/src/lib/src/parsing/parse-vp.ts index e772e7f..3b4987f 100644 --- a/src/lib/src/parsing/parse-vp.ts +++ b/src/lib/src/parsing/parse-vp.ts @@ -31,12 +31,20 @@ import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp"; export function parseVP( tokens: Readonly, lookup: (s: Partial) => T.DictionaryEntry[], - verbLookup: (s: string) => T.VerbEntry[] + verbLookup: (s: string) => T.VerbEntry[], + participleLookup: (s: string) => T.VerbEntry[] ): T.ParseResult[] { if (tokens.length === 0) { return []; } - const blocks = parseBlocks(tokens, lookup, verbLookup, [], []); + const blocks = parseBlocks( + tokens, + lookup, + verbLookup, + participleLookup, + [], + [] + ); return bindParseResult(blocks, (tokens, { blocks, kids }) => { const phIndex = blocks.findIndex((x) => x.type === "PH"); const vbeIndex = blocks.findIndex((x) => x.type === "VB"); diff --git a/src/lib/src/phrase-building/compile.ts b/src/lib/src/phrase-building/compile.ts index ab5c568..3cd4b4f 100644 --- a/src/lib/src/phrase-building/compile.ts +++ b/src/lib/src/phrase-building/compile.ts @@ -327,7 +327,7 @@ function getPsFromPiece( false ); } - return piece.block.selection.ps; + return flattenLengths(piece.block.selection.ps); } // welded return getPsFromWelded(piece.block); diff --git a/src/lib/src/phrase-building/np-tools.ts b/src/lib/src/phrase-building/np-tools.ts index 4f00039..52af8a5 100644 --- a/src/lib/src/phrase-building/np-tools.ts +++ b/src/lib/src/phrase-building/np-tools.ts @@ -1,6 +1,7 @@ import { isFirstPerson, isSecondPerson } from "../misc-helpers"; import * as T from "../../../types"; import { concatPsString } from "../p-text-helpers"; +import { flattenLengths } from "./compile"; function getBaseAndAdjectives({ selection, @@ -12,9 +13,9 @@ function getBaseAndAdjectives({ } const adjs = "adjectives" in selection && selection.adjectives; if (!adjs) { - return selection.ps; + return flattenLengths(selection.ps); } - return selection.ps.map((p) => + return flattenLengths(selection.ps).map((p) => concatPsString( adjs.reduce( (accum, curr) => @@ -61,9 +62,9 @@ function contractPronoun( n: T.Rendered ): T.PsString | undefined { return isFirstPerson(n.person) - ? concatPsString({ p: "ز", f: "z" }, n.ps[0]) + ? concatPsString({ p: "ز", f: "z" }, flattenLengths(n.ps)[0]) : isSecondPerson(n.person) - ? concatPsString({ p: "س", f: "s" }, n.ps[0]) + ? concatPsString({ p: "س", f: "s" }, flattenLengths(n.ps)[0]) : undefined; } diff --git a/src/lib/src/phrase-building/render-np.ts b/src/lib/src/phrase-building/render-np.ts index bbc03ad..c05e769 100644 --- a/src/lib/src/phrase-building/render-np.ts +++ b/src/lib/src/phrase-building/render-np.ts @@ -1,174 +1,299 @@ import * as T from "../../../types"; import { inflectWord } from "../pashto-inflector"; import * as grammarUnits from "../grammar-units"; -import { - getVerbBlockPosFromPerson, - getPersonNumber, -} from "../misc-helpers"; -import { - concatPsString, - psStringFromEntry, -} from "../p-text-helpers"; -import { - getEnglishParticiple, -} from "../np-tools"; +import { getVerbBlockPosFromPerson, getPersonNumber } from "../misc-helpers"; +import { concatPsString, psStringFromEntry } from "../p-text-helpers"; +import { getEnglishParticiple } from "../np-tools"; import { getEnglishWord } from "../get-english-word"; import { renderAdjectiveSelection } from "./render-adj"; -import { isPattern5Entry, isAnimNounEntry, isPattern1Entry } from "../type-predicates"; +import { + isPattern5Entry, + isAnimNounEntry, + isPattern1Entry, +} from "../type-predicates"; +import { shortVerbEndConsonant } from "../parsing/misc"; +import { removeL } from "../new-verb-engine/rs-helpers"; +import { applySingleOrLengthOpts, fmapSingleOrLengthOpts } from "../fp-ps"; +import { accentOnNFromEnd } from "../accent-helpers"; -export function renderNPSelection(NP: T.NPSelection, inflected: boolean, inflectEnglish: boolean, role: "subject", soRole: "servant" | "king" | "none", isPuSandwich: boolean): T.Rendered; -export function renderNPSelection(NP: T.NPSelection, inflected: boolean, inflectEnglish: boolean, role: "object", soRole: "servant" | "king" | "none", isPuSandwich: boolean): T.Rendered; -export function renderNPSelection(NP: T.NPSelection, inflected: boolean, inflectEnglish: boolean, role: "subject" | "object", soRole: "servant" | "king" | "none", isPuSandwich: boolean): T.Rendered { - if (typeof NP !== "object") { - if (role !== "object") { - throw new Error("ObjectNP only allowed for objects"); - } - return NP; - } - if (NP.selection.type === "noun") { - return { - type: "NP", - selection: renderNounSelection(NP.selection, inflected, soRole, undefined, isPuSandwich), - }; - } - if (NP.selection.type === "pronoun") { - return { - type: "NP", - selection: renderPronounSelection(NP.selection, inflected, inflectEnglish, soRole), - }; - } - if (NP.selection.type === "participle") { - return { - type: "NP", - selection: renderParticipleSelection(NP.selection, inflected, soRole), - }; - } - throw new Error("unknown NP type"); -}; +// TODO: can have subject and objects in possesors!! -export function renderNounSelection(n: T.NounSelection, inflected: boolean, role: "servant" | "king" | "none", noArticles?: true | "noArticles", isPuSandwich?: boolean): T.Rendered { - const english = getEnglishFromNoun(n.entry, n.number, noArticles); - const nounInflects = inflected && !(isPuSandwich && isPattern1Entry(n.entry) && n.number === "singular"); - const pashto = ((): T.PsString[] => { - const infs = inflectWord(n.entry); - const ps = n.number === "singular" - ? getInf(infs, "inflections", n.gender, false, nounInflects) - : (() => { - const plural = getInf(infs, "plural", n.gender, true, inflected); - return [ - ...plural, - ...getInf(infs, "arabicPlural", n.gender, true, inflected), - ...(!plural.length || n.gender === "fem") - // allow for plurals like ډاکټرې as well as ډاکټرانې - ? getInf(infs, "inflections", n.gender, true, inflected) - : [], - ]; - })(); - return ps.length > 0 - ? ps - : [psStringFromEntry(n.entry)]; - })(); - const person = getPersonNumber(n.gender, n.number); +// like زما د ښځو لیدل +// my seeing women... + +export function renderNPSelection( + NP: T.NPSelection, + inflected: boolean, + inflectEnglish: boolean, + role: "subject", + soRole: "servant" | "king" | "none", + isPuSandwich: boolean +): T.Rendered; +export function renderNPSelection( + NP: T.NPSelection, + inflected: boolean, + inflectEnglish: boolean, + role: "object", + soRole: "servant" | "king" | "none", + isPuSandwich: boolean +): T.Rendered; +export function renderNPSelection( + NP: T.NPSelection, + inflected: boolean, + inflectEnglish: boolean, + role: "subject" | "object", + soRole: "servant" | "king" | "none", + isPuSandwich: boolean +): T.Rendered { + if (typeof NP !== "object") { + if (role !== "object") { + throw new Error("ObjectNP only allowed for objects"); + } + return NP; + } + if (NP.selection.type === "noun") { return { - ...n, - adjectives: n.adjectives.map(a => renderAdjectiveSelection(a, person, inflected, isPuSandwich && n.number === "singular")), + type: "NP", + selection: renderNounSelection( + NP.selection, + inflected, + soRole, + undefined, + isPuSandwich + ), + }; + } + if (NP.selection.type === "pronoun") { + return { + type: "NP", + selection: renderPronounSelection( + NP.selection, + inflected, + inflectEnglish, + soRole + ), + }; + } + if (NP.selection.type === "participle") { + return { + type: "NP", + selection: renderParticipleSelection(NP.selection, inflected, soRole), + }; + } + throw new Error("unknown NP type"); +} + +export function renderNounSelection( + n: T.NounSelection, + inflected: boolean, + role: "servant" | "king" | "none", + noArticles?: true | "noArticles", + isPuSandwich?: boolean +): T.Rendered { + const english = getEnglishFromNoun(n.entry, n.number, noArticles); + const nounInflects = + inflected && + !(isPuSandwich && isPattern1Entry(n.entry) && n.number === "singular"); + const pashto = ((): T.PsString[] => { + const infs = inflectWord(n.entry); + const ps = + n.number === "singular" + ? getInf(infs, "inflections", n.gender, false, nounInflects) + : (() => { + const plural = getInf(infs, "plural", n.gender, true, inflected); + return [ + ...plural, + ...getInf(infs, "arabicPlural", n.gender, true, inflected), + ...(!plural.length || n.gender === "fem" + ? // allow for plurals like ډاکټرې as well as ډاکټرانې + getInf(infs, "inflections", n.gender, true, inflected) + : []), + ]; + })(); + return ps.length > 0 ? ps : [psStringFromEntry(n.entry)]; + })(); + const person = getPersonNumber(n.gender, n.number); + return { + ...n, + adjectives: n.adjectives.map((a) => + renderAdjectiveSelection( + a, person, inflected, - role, - ps: pashto, - e: english, - possesor: renderPossesor(n.possesor, role), - demonstrative: renderDemonstrative(n.demonstrative, inflected && n.number === "plural"), - }; + isPuSandwich && n.number === "singular" + ) + ), + person, + inflected, + role, + ps: pashto, + e: english, + possesor: renderPossesor(n.possesor, role), + demonstrative: renderDemonstrative( + n.demonstrative, + inflected && n.number === "plural" + ), + }; } -function renderDemonstrative(demonstrative: T.DemonstrativeSelection | undefined, plurInflected: boolean): T.Rendered | undefined { - if (!demonstrative) { - return undefined; - } - return { - ...demonstrative, - ps: demonstrative.demonstrative === "daa" - ? (plurInflected ? { p: "دې", f: "de" } : { p: "دا", f: "daa" }) - : demonstrative.demonstrative === "dagha" - ? (plurInflected ? { p: "دغه", f: "dágha" } : { p: "دغو", f: "dágho" }) - : (plurInflected ? { p: "هغه", f: "hágha" } : { p: "هغو", f: "hágho" }) - } +function renderDemonstrative( + demonstrative: T.DemonstrativeSelection | undefined, + plurInflected: boolean +): T.Rendered | undefined { + if (!demonstrative) { + return undefined; + } + return { + ...demonstrative, + ps: + demonstrative.demonstrative === "daa" + ? plurInflected + ? { p: "دې", f: "de" } + : { p: "دا", f: "daa" } + : demonstrative.demonstrative === "dagha" + ? plurInflected + ? { p: "دغه", f: "dágha" } + : { p: "دغو", f: "dágho" } + : plurInflected + ? { p: "هغه", f: "hágha" } + : { p: "هغو", f: "hágho" }, + }; } -function renderPronounSelection(p: T.PronounSelection, inflected: boolean, englishInflected: boolean, role: "servant" | "king" | "none"): T.Rendered { - const [row, col] = getVerbBlockPosFromPerson(p.person); - return { - ...p, - inflected, - role, - ps: grammarUnits.pronouns[p.distance][inflected ? "inflected" : "plain"][row][col], - e: grammarUnits.persons[p.person].label[englishInflected ? "object" : "subject"], - }; +function renderPronounSelection( + p: T.PronounSelection, + inflected: boolean, + englishInflected: boolean, + role: "servant" | "king" | "none" +): T.Rendered { + const [row, col] = getVerbBlockPosFromPerson(p.person); + return { + ...p, + inflected, + role, + ps: grammarUnits.pronouns[p.distance][inflected ? "inflected" : "plain"][ + row + ][col], + e: grammarUnits.persons[p.person].label[ + englishInflected ? "object" : "subject" + ], + }; } -function renderParticipleSelection(p: T.ParticipleSelection, inflected: boolean, role: "servant" | "king" | "none"): T.Rendered { - return { - ...p, - inflected, - role, - person: T.Person.ThirdPlurMale, - // TODO: More robust inflection of inflecting pariticiples - get from the conjugation engine - ps: [psStringFromEntry(p.verb.entry)].map(ps => inflected ? concatPsString(ps, { p: "و", f: "o" }) : ps), - e: getEnglishParticiple(p.verb.entry), - possesor: renderPossesor(p.possesor, "subj/obj"), - }; +function renderParticipleSelection( + p: T.ParticipleSelection, + inflected: boolean, + role: "servant" | "king" | "none" +): T.Rendered { + const o = { p: "و", f: "o" }; + const accentedO = { p: "و", f: "ó" }; + const v = accentOnNFromEnd(psStringFromEntry(p.verb.entry), 0); + const hasShortForm = + inflected && shortVerbEndConsonant.includes(v.p[v.p.length - 2]); + const base: T.SingleOrLengthOpts = + inflected && hasShortForm + ? { + long: v, + short: removeL(v), + } + : v; + const ps: T.SingleOrLengthOpts = inflected + ? applySingleOrLengthOpts( + { + long: (x) => [concatPsString(x, o)], + short: (x) => [concatPsString(x, accentedO)], + }, + base + ) + : [v]; + return { + ...p, + inflected, + role, + person: T.Person.ThirdPlurMale, + ps, + e: getEnglishParticiple(p.verb.entry), + possesor: renderPossesor(p.possesor, "subj/obj"), + }; } -function renderPossesor(possesor: T.PossesorSelection | undefined, possesorRole: "servant" | "king" | "none" | "subj/obj"): T.RenderedPossesorSelection | undefined { - if (!possesor) return undefined; - const isSingUnisexAnim5PatternNoun = (possesor.np.selection.type === "noun" - && possesor.np.selection.number === "singular" - && isAnimNounEntry(possesor.np.selection.entry) - && isPattern5Entry(possesor.np.selection.entry) - ); - return { - shrunken: possesor.shrunken, - np: renderNPSelection( - possesor.np, - !isSingUnisexAnim5PatternNoun, - possesorRole === "subj/obj" ? true : false, - "subject", - possesorRole === "subj/obj" ? "none" : possesorRole, - false, - ), - }; +function renderPossesor( + possesor: T.PossesorSelection | undefined, + possesorRole: "servant" | "king" | "none" | "subj/obj" +): T.RenderedPossesorSelection | undefined { + if (!possesor) return undefined; + const isSingUnisexAnim5PatternNoun = + possesor.np.selection.type === "noun" && + possesor.np.selection.number === "singular" && + isAnimNounEntry(possesor.np.selection.entry) && + isPattern5Entry(possesor.np.selection.entry); + return { + shrunken: possesor.shrunken, + np: renderNPSelection( + possesor.np, + !isSingUnisexAnim5PatternNoun, + possesorRole === "subj/obj" ? true : false, + "subject", + possesorRole === "subj/obj" ? "none" : possesorRole, + false + ), + }; } -function getInf(infs: T.InflectorOutput, t: "plural" | "arabicPlural" | "inflections", gender: T.Gender, plural: boolean, inflected: boolean): T.PsString[] { - // TODO: make this safe!! +function getInf( + infs: T.InflectorOutput, + t: "plural" | "arabicPlural" | "inflections", + gender: T.Gender, + plural: boolean, + inflected: boolean +): T.PsString[] { + // TODO: make this safe!! + // @ts-ignore + if ( + infs && + t in infs && // @ts-ignore - if (infs && t in infs && infs[t] !== undefined && gender in infs[t] && infs[t][gender] !== undefined) { - // @ts-ignore - const iset = infs[t][gender] as T.InflectionSet; - const inflectionNumber = (inflected ? 1 : 0) + ((t === "inflections" && plural) ? 1 : 0); - return iset[inflectionNumber]; - } - return []; + infs[t] !== undefined && + // @ts-ignore + gender in infs[t] && + // @ts-ignore + infs[t][gender] !== undefined + ) { + // @ts-ignore + const iset = infs[t][gender] as T.InflectionSet; + const inflectionNumber = + (inflected ? 1 : 0) + (t === "inflections" && plural ? 1 : 0); + return iset[inflectionNumber]; + } + return []; } -function getEnglishFromNoun(entry: T.DictionaryEntry, number: T.NounNumber, noArticles?: true | "noArticles"): string { - const articles = { - singular: "(a/the)", - plural: "(the)", - }; - const article = articles[number]; - function addArticle(s: string) { - if (noArticles) return s; - return `${article} ${s}`; - } - const e = getEnglishWord(entry); - if (!e) throw new Error(`unable to get english from subject ${entry.f} - ${entry.ts}`); +function getEnglishFromNoun( + entry: T.DictionaryEntry, + number: T.NounNumber, + noArticles?: true | "noArticles" +): string { + const articles = { + singular: "(a/the)", + plural: "(the)", + }; + const article = articles[number]; + function addArticle(s: string) { + if (noArticles) return s; + return `${article} ${s}`; + } + const e = getEnglishWord(entry); + if (!e) + throw new Error( + `unable to get english from subject ${entry.f} - ${entry.ts}` + ); - if (typeof e === "string") return ` ${e}`; - if (number === "plural") return addArticle(e.plural); - if (!e.singular || e.singular === undefined) { - throw new Error(`unable to get english from subject ${entry.f} - ${entry.ts}`); - } - return addArticle(e.singular); -} \ No newline at end of file + if (typeof e === "string") return ` ${e}`; + if (number === "plural") return addArticle(e.plural); + if (!e.singular || e.singular === undefined) { + throw new Error( + `unable to get english from subject ${entry.f} - ${entry.ts}` + ); + } + return addArticle(e.singular); +}