parsing participles, and improved participle rendering

This commit is contained in:
adueck 2023-08-28 14:18:14 +04:00
parent 730369a3e6
commit 56890cf4b9
22 changed files with 582 additions and 200 deletions

View File

@ -10,6 +10,7 @@ import { useState } from "react";
import { getLength } from "../../../lib/src/p-text-helpers"; import { getLength } from "../../../lib/src/p-text-helpers";
import { roleIcon } from "../vp-explorer/VPExplorerExplanationModal"; import { roleIcon } from "../vp-explorer/VPExplorerExplanationModal";
import { negativeParticle } from "../../../lib/src/grammar-units"; import { negativeParticle } from "../../../lib/src/grammar-units";
import { flattenLengths } from "../../library";
function Block({ function Block({
opts, opts,
@ -493,7 +494,7 @@ function ComplementBlock({
}) { }) {
return ( return (
<div className="text-center"> <div className="text-center">
<Border>{adv.ps[0][script]}</Border> <Border>{flattenLengths(adv.ps)[0][script]}</Border>
<div>Loc. Adv.</div> <div>Loc. Adv.</div>
<SubText>{adv.e}</SubText> <SubText>{adv.e}</SubText>
</div> </div>
@ -614,7 +615,7 @@ function CompNounBlock({
extraClassName={`!inside && hasPossesor ? "pt-2" : ""`} extraClassName={`!inside && hasPossesor ? "pt-2" : ""`}
padding={"1rem"} padding={"1rem"}
> >
{noun.ps[0][script]} {flattenLengths(noun.ps)[0][script]}
</Border> </Border>
<div>Comp. Noun</div> <div>Comp. Noun</div>
<SubText>{noun.e}</SubText> <SubText>{noun.e}</SubText>
@ -656,7 +657,7 @@ export function NPBlock({
</Adjectives>, </Adjectives>,
<div className={np.selection.adjectives?.length ? "mx-1" : ""}> <div className={np.selection.adjectives?.length ? "mx-1" : ""}>
{" "} {" "}
{np.selection.ps[0][script]} {flattenLengths(np.selection.ps)[0][script]}
</div>, </div>,
]; ];
const el = script === "p" ? elements.reverse() : elements; const el = script === "p" ? elements.reverse() : elements;

View File

@ -1,6 +1,9 @@
import { useEffect, useState } from "react"; import { useEffect, useState } from "react";
import ButtonSelect from "../ButtonSelect"; import ButtonSelect from "../ButtonSelect";
import { combineIntoText } from "../../../lib/src/phrase-building/compile"; import {
combineIntoText,
flattenLengths,
} from "../../../lib/src/phrase-building/compile";
import { insertNegative } from "../../../lib/src/phrase-building/render-vp"; import { insertNegative } from "../../../lib/src/phrase-building/render-vp";
import * as T from "../../../types"; import * as T from "../../../types";
import TableCell from "../TableCell"; import TableCell from "../TableCell";
@ -327,7 +330,9 @@ function AgreementInfo({
</div> </div>
{transitivity === "transitive" && past && objNP && ( {transitivity === "transitive" && past && objNP && (
<div> <div>
<InlinePs opts={opts}>{objNP.selection.ps[0]}</InlinePs> <InlinePs opts={opts}>
{flattenLengths(objNP.selection.ps)[0]}
</InlinePs>
{` `}({printGenNum(personToGenNum(objNP.selection.person))}) {` `}({printGenNum(personToGenNum(objNP.selection.person))})
</div> </div>
)} )}

View File

@ -8,7 +8,10 @@ import shuffleArray from "../../../lib/src/shuffle-array";
import InlinePs from "../InlinePs"; import InlinePs from "../InlinePs";
import { psStringEquals } from "../../../lib/src/p-text-helpers"; import { psStringEquals } from "../../../lib/src/p-text-helpers";
import { renderVP } from "../../../lib/src/phrase-building/render-vp"; import { renderVP } from "../../../lib/src/phrase-building/render-vp";
import { compileVP } from "../../../lib/src/phrase-building/compile"; import {
compileVP,
flattenLengths,
} from "../../../lib/src/phrase-building/compile";
import { getRandomTense } from "./TensePicker"; import { getRandomTense } from "./TensePicker";
import { import {
getTenseFromVerbSelection, getTenseFromVerbSelection,
@ -386,7 +389,9 @@ function QuizNPDisplay({
<div className="text-centered" style={{ fontSize: "large" }}> <div className="text-centered" style={{ fontSize: "large" }}>
{stage === "blanks" && ( {stage === "blanks" && (
<div> <div>
<InlinePs opts={opts}>{children.selection.ps[0]}</InlinePs> <InlinePs opts={opts}>
{flattenLengths(children.selection.ps)[0]}
</InlinePs>
</div> </div>
)} )}
<div>{children.selection.e}</div> <div>{children.selection.e}</div>

View File

@ -421,7 +421,6 @@ function isGenStatCompNoun(
| undefined | undefined
) { ) {
if (!block) return false; if (!block) return false;
console.log({ block });
if ( if (
block.type === "objectSelection" && block.type === "objectSelection" &&
typeof block.selection === "object" && typeof block.selection === "object" &&

View File

@ -1,7 +1,6 @@
import { useState } from "react"; import { useState } from "react";
import * as T from "../types"; import * as T from "../types";
import { parsePhrase } from "../lib/src/parsing/parse-phrase"; import { parsePhrase } from "../lib/src/parsing/parse-phrase";
import { lookup } from "../lib/src/parsing/lookup";
import { tokenizer } from "../lib/src/parsing/tokenizer"; import { tokenizer } from "../lib/src/parsing/tokenizer";
import { import {
CompiledPTextDisplay, CompiledPTextDisplay,
@ -14,15 +13,15 @@ const working = [
"limited demo vocab", "limited demo vocab",
"phrases with simple verbs", "phrases with simple verbs",
"basic verb tenses", "basic verb tenses",
"noun phrases (except participles)", "noun phrases",
"mini-pronouns for shrunken servants", "mini-pronouns for shrunken servants",
"grammar error correction", "grammar error correction",
"negatives", "negatives",
]; ];
const todo = [ const todo = [
"participles",
"compound verbs", "compound verbs",
"adjectival participles",
"adverbial phrases", "adverbial phrases",
"relative clauses", "relative clauses",
"equative verbs", "equative verbs",
@ -60,7 +59,7 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
setErrors([]); setErrors([]);
return; return;
} }
const { success, errors } = parsePhrase(tokenizer(value), lookup); const { success, errors } = parsePhrase(tokenizer(value));
setText(value); setText(value);
setErrors(errors); setErrors(errors);
setResult(success); setResult(success);

View File

@ -5,6 +5,7 @@ import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
import { removeFVarientsFromVerb } from "../accent-and-ps-utils"; import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
import { splitVarients, undoAaXuPattern } from "../p-text-helpers"; import { splitVarients, undoAaXuPattern } from "../p-text-helpers";
import { arraysHaveCommon } from "../misc-helpers"; import { arraysHaveCommon } from "../misc-helpers";
import { shortVerbEndConsonant } from "./misc";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] { export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
const [key, value] = Object.entries(s)[0]; const [key, value] = Object.entries(s)[0];
@ -41,6 +42,23 @@ export function shouldCheckTpp(s: string): boolean {
); );
} }
export function participleLookup(input: string): T.VerbEntry[] {
if (input.endsWith("ل")) {
return verbs.filter((e) => e.entry.p === input);
}
// TODO: short forms
if (input.endsWith("و")) {
const s = input.slice(0, -1);
return [
...verbs.filter((e) => e.entry.p === s),
...(shortVerbEndConsonant.includes(s[s.length - 1])
? verbs.filter((e) => e.entry.p === s + "ل")
: []),
];
}
return [];
}
export function verbLookup(input: string): T.VerbEntry[] { export function verbLookup(input: string): T.VerbEntry[] {
// TODO: // TODO:
// only look up forms if there's an ending // only look up forms if there's an ending

View File

@ -0,0 +1,7 @@
/**
* These are the consonants that a short verb root can end with
* to make it possible to have 3rd person masc sing past
* congugations without an ending, (ie. ولید) or participles without the
* ل (ie. اخیستو, لیدو)
*/
export const shortVerbEndConsonant = ["د", "ت", "ړ"];

View File

@ -10,6 +10,7 @@ export function parseBlocks(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[], lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
verbLookup: (s: string) => T.VerbEntry[], verbLookup: (s: string) => T.VerbEntry[],
participleLookup: (s: string) => T.VerbEntry[],
blocks: T.ParsedBlock[], blocks: T.ParsedBlock[],
kids: T.ParsedKid[] kids: T.ParsedKid[]
): T.ParseResult<{ ): T.ParseResult<{
@ -23,8 +24,7 @@ export function parseBlocks(
(b): b is T.ParsedPH => b.type === "PH" (b): b is T.ParsedPH => b.type === "PH"
); );
const vbExists = blocks.some((b) => "type" in b && b.type === "VB"); const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
const np = prevPh ? [] : parseNP(tokens, lookup); const np = prevPh ? [] : parseNP(tokens, lookup, participleLookup);
// UHOH... This could cause double paths ... maybe don't parse the PH in the parse VB!
const ph = vbExists || prevPh ? [] : parsePH(tokens); const ph = vbExists || prevPh ? [] : parsePH(tokens);
const vb = parseVerb(tokens, verbLookup); const vb = parseVerb(tokens, verbLookup);
const neg = parseNeg(tokens); const neg = parseNeg(tokens);
@ -50,10 +50,14 @@ export function parseBlocks(
const errors: T.ParseError[] = []; const errors: T.ParseError[] = [];
if (r.type === "kids") { if (r.type === "kids") {
return { return {
next: parseBlocks(tokens, lookup, verbLookup, blocks, [ next: parseBlocks(
...kids, tokens,
...r.kids, lookup,
]), verbLookup,
participleLookup,
blocks,
[...kids, ...r.kids]
),
errors: errors:
blocks.length !== 1 blocks.length !== 1
? [{ message: "kids' section out of place" }] ? [{ message: "kids' section out of place" }]
@ -74,7 +78,14 @@ export function parseBlocks(
return []; return [];
} }
return { return {
next: parseBlocks(tokens, lookup, verbLookup, [...blocks, r], kids), next: parseBlocks(
tokens,
lookup,
verbLookup,
participleLookup,
[...blocks, r],
kids
),
errors, errors,
}; };
}); });

View File

@ -3,7 +3,7 @@ import {
makeNounSelection, makeNounSelection,
} from "../phrase-building/make-selections"; } from "../phrase-building/make-selections";
import * as T from "../../../types"; import * as T from "../../../types";
import { lookup, wordQuery } from "./lookup"; import { lookup, participleLookup, wordQuery } from "./lookup";
import { parseNoun } from "./parse-noun"; import { parseNoun } from "./parse-noun";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
import { isCompleteResult } from "./utils"; import { isCompleteResult } from "./utils";
@ -41,7 +41,7 @@ const nabee = wordQuery("نبي", "noun");
const lafz = wordQuery("لفظ", "noun"); const lafz = wordQuery("لفظ", "noun");
// TODO: test for adjective errors etc // TODO: test for adjective errors etc
// TODO: زړو should not be hearts
// bundled plural // bundled plural
const tests: { const tests: {
@ -1371,7 +1371,9 @@ describe("parsing nouns", () => {
test(category, () => { test(category, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup).map(({ body }) => body); const res = parseNoun(tokens, lookup, participleLookup).map(
({ body }) => body
);
expect(res).toEqual(output); expect(res).toEqual(output);
}); });
}); });
@ -1503,7 +1505,7 @@ describe("parsing nouns with adjectives", () => {
test(category, () => { test(category, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup) const res = parseNoun(tokens, lookup, participleLookup)
.filter(isCompleteResult) .filter(isCompleteResult)
.map(({ body }) => body); .map(({ body }) => body);
expect(res).toEqual(output); expect(res).toEqual(output);

View File

@ -16,12 +16,13 @@ type NounResult = { inflected: boolean; selection: T.NounSelection };
export function parseNoun( export function parseNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[] lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
pariticipleLookup: (s: string) => T.VerbEntry[]
): T.ParseResult<NounResult>[] { ): T.ParseResult<NounResult>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
const possesor = parsePossesor(tokens, lookup, undefined); const possesor = parsePossesor(tokens, lookup, pariticipleLookup, undefined);
if (possesor.length) { if (possesor.length) {
return bindParseResult(possesor, (tokens, p) => { return bindParseResult(possesor, (tokens, p) => {
return parseNounAfterPossesor(tokens, lookup, p, []); return parseNounAfterPossesor(tokens, lookup, p, []);

View File

@ -2,10 +2,12 @@ import * as T from "../../../types";
import { parsePronoun } from "./parse-pronoun"; import { parsePronoun } from "./parse-pronoun";
import { parseNoun } from "./parse-noun"; import { parseNoun } from "./parse-noun";
import { fmapParseResult } from "../fp-ps"; import { fmapParseResult } from "../fp-ps";
import { parseParticiple } from "./parse-participle";
export function parseNP( export function parseNP(
s: Readonly<T.Token[]>, s: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[] lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
participleLookup: (input: string) => T.VerbEntry[]
): T.ParseResult<T.ParsedNP>[] { ): T.ParseResult<T.ParsedNP>[] {
if (s.length === 0) { if (s.length === 0) {
return []; return [];
@ -21,6 +23,10 @@ export function parseNP(
inflected: boolean; inflected: boolean;
selection: T.NounSelection; selection: T.NounSelection;
} }
| {
inflected: boolean;
selection: T.ParticipleSelection;
}
): T.ParsedNP { ): T.ParsedNP {
return { return {
type: "NP", type: "NP",
@ -34,6 +40,7 @@ export function parseNP(
return fmapParseResult(makeNPSl, [ return fmapParseResult(makeNPSl, [
...parsePronoun(s), ...parsePronoun(s),
...parseNoun(s, lookup), ...parseNoun(s, lookup, participleLookup),
...parseParticiple(s, lookup, participleLookup),
]); ]);
} }

View File

@ -0,0 +1,133 @@
import {
makeNounSelection,
makeParticipleSelection,
makePossesorSelection,
} from "../phrase-building/make-selections";
import * as T from "../../../types";
import { lookup, participleLookup, wordQuery } from "./lookup";
import { tokenizer } from "./tokenizer";
import { parseParticiple } from "./parse-participle";
const leedul = wordQuery("لیدل", "verb");
const akheestul = wordQuery("اخیستل", "verb");
const wahul = wordQuery("وهل", "verb");
const saray = wordQuery("سړی", "noun");
const tests: {
label: string;
cases: {
input: string;
output: {
inflected: boolean;
selection: T.ParticipleSelection;
}[];
}[];
}[] = [
{
label: "uninflected participles",
cases: [
{
input: "وهل",
output: [
{
inflected: false,
selection: makeParticipleSelection(wahul),
},
],
},
{
input: "لیدل",
output: [
{
inflected: false,
selection: makeParticipleSelection(leedul),
},
],
},
],
},
{
label: "inflected participles",
cases: [
{
input: "وهلو",
output: [
{
inflected: true,
selection: makeParticipleSelection(wahul),
},
],
},
{
input: "اخیستلو",
output: [
{
inflected: true,
selection: makeParticipleSelection(akheestul),
},
],
},
],
},
{
label: "short forms of inflected participles",
cases: [
{
input: "لیدو",
output: [
{
inflected: true,
selection: makeParticipleSelection(leedul),
},
],
},
{
input: "اخیستو",
output: [
{
inflected: true,
selection: makeParticipleSelection(akheestul),
},
],
},
{
input: "وهو",
output: [],
},
],
},
{
label: "with subj/obj",
cases: [
{
input: "د سړي لیدل",
output: [
{
inflected: false,
selection: {
...makeParticipleSelection(leedul),
possesor: makePossesorSelection(
makeNounSelection(saray, undefined)
),
},
},
],
},
],
},
];
describe("parsing participles", () => {
tests.forEach(({ label, cases }) => {
// eslint-disable-next-line jest/valid-title
test(label, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const res = parseParticiple(tokens, lookup, participleLookup).map(
({ body }) => body
);
expect(res).toEqual(output);
});
});
});
});

View File

@ -0,0 +1,54 @@
import * as T from "../../../types";
import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils";
type ParticipleResult = {
inflected: boolean;
selection: T.ParticipleSelection;
};
export function parseParticiple(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
participleLookup: (s: string) => T.VerbEntry[]
): T.ParseResult<ParticipleResult>[] {
if (tokens.length === 0) {
return [];
}
const possesor = parsePossesor(tokens, lookup, participleLookup, undefined);
if (possesor.length) {
return bindParseResult(possesor, (tokens, p) => {
return parseParticipleAfterPossesor(tokens, participleLookup, p);
});
}
return parseParticipleAfterPossesor(tokens, participleLookup, undefined);
}
// TODO: should have adverbs with participle
function parseParticipleAfterPossesor(
tokens: Readonly<T.Token[]>,
participleLookup: (s: string) => T.VerbEntry[],
possesor: T.PossesorSelection | undefined
): T.ParseResult<ParticipleResult>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (!["ل", "و"].includes(first.s.at(-1) || "")) {
return [];
}
const inflected = first.s.endsWith("و");
const matches = participleLookup(first.s);
return matches.map<T.ParseResult<ParticipleResult>>((verb) => ({
tokens: rest,
body: {
inflected,
selection: {
type: "participle",
verb,
possesor,
},
},
errors: [],
}));
}

View File

@ -1,14 +1,11 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { verbLookup } from "./lookup"; import { verbLookup, lookup, participleLookup } from "./lookup";
import { parseNP } from "./parse-np"; import { parseNP } from "./parse-np";
import { parseVP } from "./parse-vp"; import { parseVP } from "./parse-vp";
// شو should not be sheyaano !! // شو should not be sheyaano !!
export function parsePhrase( export function parsePhrase(s: T.Token[]): {
s: T.Token[],
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): {
success: ( success: (
| { | {
inflected: boolean; inflected: boolean;
@ -20,9 +17,11 @@ export function parsePhrase(
errors: string[]; errors: string[];
} { } {
const res = [ const res = [
...parseNP(s, lookup).filter(({ tokens }) => !tokens.length), ...parseNP(s, lookup, participleLookup).filter(
({ tokens }) => !tokens.length
),
// ...parseVerb(s, verbLookup), // ...parseVerb(s, verbLookup),
...parseVP(s, lookup, verbLookup), ...parseVP(s, lookup, verbLookup, participleLookup),
]; ];
const success = res.map((x) => x.body); const success = res.map((x) => x.body);

View File

@ -5,7 +5,7 @@ import {
makeNounSelection, makeNounSelection,
makePronounSelection, makePronounSelection,
} from "../phrase-building/make-selections"; } from "../phrase-building/make-selections";
import { lookup, wordQuery } from "./lookup"; import { lookup, participleLookup, wordQuery } from "./lookup";
import { parsePossesor } from "./parse-possesor"; import { parsePossesor } from "./parse-possesor";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
import { isCompleteResult } from "./utils"; import { isCompleteResult } from "./utils";
@ -110,12 +110,12 @@ const tests: {
test("parse possesor", () => { test("parse possesor", () => {
tests.forEach(({ input, output }) => { tests.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const parsed = parsePossesor(tokens, lookup, undefined); const parsed = parsePossesor(tokens, lookup, participleLookup, undefined);
if (output === "error") { if (output === "error") {
expect(parsed.some((x) => x.errors.length)).toBe(true); expect(parsed.some((x) => x.errors.length)).toBe(true);
} else { } else {
expect( expect(
parsePossesor(tokens, lookup, undefined) parsePossesor(tokens, lookup, participleLookup, undefined)
.filter(isCompleteResult) .filter(isCompleteResult)
.map((x) => x.body.np.selection) .map((x) => x.body.np.selection)
).toEqual(output); ).toEqual(output);

View File

@ -19,6 +19,7 @@ const contractions: [string[], T.Person[]][] = [
export function parsePossesor( export function parsePossesor(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[], lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
participleLookup: (s: string) => T.VerbEntry[],
prevPossesor: T.PossesorSelection | undefined prevPossesor: T.PossesorSelection | undefined
): T.ParseResult<T.PossesorSelection>[] { ): T.ParseResult<T.PossesorSelection>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
@ -42,14 +43,14 @@ export function parsePossesor(
? [{ message: "a pronoun cannot have a possesor" }] ? [{ message: "a pronoun cannot have a possesor" }]
: []; : [];
return contractions return contractions
.flatMap((p) => parsePossesor(rest, lookup, p)) .flatMap((p) => parsePossesor(rest, lookup, participleLookup, p))
.map((x) => ({ .map((x) => ({
...x, ...x,
errors: [...errors, ...x.errors], errors: [...errors, ...x.errors],
})); }));
} }
if (first.s === "د") { if (first.s === "د") {
const np = parseNP(rest, lookup); const np = parseNP(rest, lookup, participleLookup);
return bindParseResult(np, (tokens, body) => { return bindParseResult(np, (tokens, body) => {
const possesor: T.PossesorSelection = { const possesor: T.PossesorSelection = {
shrunken: false, shrunken: false,
@ -62,7 +63,12 @@ export function parsePossesor(
[{ message: `possesor should be inflected` }] [{ message: `possesor should be inflected` }]
: [], : [],
// add and check error - can't add possesor to pronoun // add and check error - can't add possesor to pronoun
next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)), next: parsePossesor(
tokens,
lookup,
participleLookup,
addPoss(prevPossesor, possesor)
),
}; };
}); });
} }

View File

@ -9,6 +9,7 @@ import {
tlul, tlul,
wartlul, wartlul,
} from "./irreg-verbs"; } from "./irreg-verbs";
import { shortVerbEndConsonant } from "./misc";
// big problem ما سړی یوړ crashes it !! // big problem ما سړی یوړ crashes it !!
// BIG problem - issue with و being considered a VB for a lot of little verbs like بلل // BIG problem - issue with و being considered a VB for a lot of little verbs like بلل
@ -194,7 +195,7 @@ function matchVerbs(
} }
const hamzaEnd = s.at(-1) === "ه"; const hamzaEnd = s.at(-1) === "ه";
const oEnd = s.at(-1) === "و"; const oEnd = s.at(-1) === "و";
const abruptEnd = ["د", "ت", "ړ"].includes(s.slice(-1)); const abruptEnd = shortVerbEndConsonant.includes(s.slice(-1));
const tppMatches = { const tppMatches = {
imperfective: entries.filter( imperfective: entries.filter(
({ entry: e }) => ({ entry: e }) =>

View File

@ -9,7 +9,7 @@ import {
makeNounSelection, makeNounSelection,
makePronounSelection, makePronounSelection,
} from "../phrase-building/make-selections"; } from "../phrase-building/make-selections";
import { lookup, verbLookup, wordQuery } from "./lookup"; import { lookup, participleLookup, verbLookup, wordQuery } from "./lookup";
import { parseVP } from "./parse-vp"; import { parseVP } from "./parse-vp";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
import { tlul } from "./irreg-verbs"; import { tlul } from "./irreg-verbs";
@ -1382,7 +1382,7 @@ tests.forEach(({ label, cases }) => {
test(label, () => { test(label, () => {
cases.forEach(({ input, output, error }) => { cases.forEach(({ input, output, error }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const parsed = parseVP(tokens, lookup, verbLookup); const parsed = parseVP(tokens, lookup, verbLookup, participleLookup);
if (error) { if (error) {
expect(parsed.filter((x) => x.errors.length).length).toBeTruthy(); expect(parsed.filter((x) => x.errors.length).length).toBeTruthy();
} else { } else {

View File

@ -31,12 +31,20 @@ import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
export function parseVP( export function parseVP(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[], lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
verbLookup: (s: string) => T.VerbEntry[] verbLookup: (s: string) => T.VerbEntry[],
participleLookup: (s: string) => T.VerbEntry[]
): T.ParseResult<T.VPSelectionComplete>[] { ): T.ParseResult<T.VPSelectionComplete>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
const blocks = parseBlocks(tokens, lookup, verbLookup, [], []); const blocks = parseBlocks(
tokens,
lookup,
verbLookup,
participleLookup,
[],
[]
);
return bindParseResult(blocks, (tokens, { blocks, kids }) => { return bindParseResult(blocks, (tokens, { blocks, kids }) => {
const phIndex = blocks.findIndex((x) => x.type === "PH"); const phIndex = blocks.findIndex((x) => x.type === "PH");
const vbeIndex = blocks.findIndex((x) => x.type === "VB"); const vbeIndex = blocks.findIndex((x) => x.type === "VB");

View File

@ -327,7 +327,7 @@ function getPsFromPiece(
false false
); );
} }
return piece.block.selection.ps; return flattenLengths(piece.block.selection.ps);
} }
// welded // welded
return getPsFromWelded(piece.block); return getPsFromWelded(piece.block);

View File

@ -1,6 +1,7 @@
import { isFirstPerson, isSecondPerson } from "../misc-helpers"; import { isFirstPerson, isSecondPerson } from "../misc-helpers";
import * as T from "../../../types"; import * as T from "../../../types";
import { concatPsString } from "../p-text-helpers"; import { concatPsString } from "../p-text-helpers";
import { flattenLengths } from "./compile";
function getBaseAndAdjectives({ function getBaseAndAdjectives({
selection, selection,
@ -12,9 +13,9 @@ function getBaseAndAdjectives({
} }
const adjs = "adjectives" in selection && selection.adjectives; const adjs = "adjectives" in selection && selection.adjectives;
if (!adjs) { if (!adjs) {
return selection.ps; return flattenLengths(selection.ps);
} }
return selection.ps.map((p) => return flattenLengths(selection.ps).map((p) =>
concatPsString( concatPsString(
adjs.reduce( adjs.reduce(
(accum, curr) => (accum, curr) =>
@ -61,9 +62,9 @@ function contractPronoun(
n: T.Rendered<T.PronounSelection> n: T.Rendered<T.PronounSelection>
): T.PsString | undefined { ): T.PsString | undefined {
return isFirstPerson(n.person) return isFirstPerson(n.person)
? concatPsString({ p: "ز", f: "z" }, n.ps[0]) ? concatPsString({ p: "ز", f: "z" }, flattenLengths(n.ps)[0])
: isSecondPerson(n.person) : isSecondPerson(n.person)
? concatPsString({ p: "س", f: "s" }, n.ps[0]) ? concatPsString({ p: "س", f: "s" }, flattenLengths(n.ps)[0])
: undefined; : undefined;
} }

View File

@ -1,174 +1,299 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { inflectWord } from "../pashto-inflector"; import { inflectWord } from "../pashto-inflector";
import * as grammarUnits from "../grammar-units"; import * as grammarUnits from "../grammar-units";
import { import { getVerbBlockPosFromPerson, getPersonNumber } from "../misc-helpers";
getVerbBlockPosFromPerson, import { concatPsString, psStringFromEntry } from "../p-text-helpers";
getPersonNumber, import { getEnglishParticiple } from "../np-tools";
} from "../misc-helpers";
import {
concatPsString,
psStringFromEntry,
} from "../p-text-helpers";
import {
getEnglishParticiple,
} from "../np-tools";
import { getEnglishWord } from "../get-english-word"; import { getEnglishWord } from "../get-english-word";
import { renderAdjectiveSelection } from "./render-adj"; import { renderAdjectiveSelection } from "./render-adj";
import { isPattern5Entry, isAnimNounEntry, isPattern1Entry } from "../type-predicates"; import {
isPattern5Entry,
isAnimNounEntry,
isPattern1Entry,
} from "../type-predicates";
import { shortVerbEndConsonant } from "../parsing/misc";
import { removeL } from "../new-verb-engine/rs-helpers";
import { applySingleOrLengthOpts, fmapSingleOrLengthOpts } from "../fp-ps";
import { accentOnNFromEnd } from "../accent-helpers";
export function renderNPSelection(NP: T.NPSelection, inflected: boolean, inflectEnglish: boolean, role: "subject", soRole: "servant" | "king" | "none", isPuSandwich: boolean): T.Rendered<T.NPSelection>; // TODO: can have subject and objects in possesors!!
export function renderNPSelection(NP: T.NPSelection, inflected: boolean, inflectEnglish: boolean, role: "object", soRole: "servant" | "king" | "none", isPuSandwich: boolean): T.Rendered<T.NPSelection>;
export function renderNPSelection(NP: T.NPSelection, inflected: boolean, inflectEnglish: boolean, role: "subject" | "object", soRole: "servant" | "king" | "none", isPuSandwich: boolean): T.Rendered<T.NPSelection> {
if (typeof NP !== "object") {
if (role !== "object") {
throw new Error("ObjectNP only allowed for objects");
}
return NP;
}
if (NP.selection.type === "noun") {
return {
type: "NP",
selection: renderNounSelection(NP.selection, inflected, soRole, undefined, isPuSandwich),
};
}
if (NP.selection.type === "pronoun") {
return {
type: "NP",
selection: renderPronounSelection(NP.selection, inflected, inflectEnglish, soRole),
};
}
if (NP.selection.type === "participle") {
return {
type: "NP",
selection: renderParticipleSelection(NP.selection, inflected, soRole),
};
}
throw new Error("unknown NP type");
};
export function renderNounSelection(n: T.NounSelection, inflected: boolean, role: "servant" | "king" | "none", noArticles?: true | "noArticles", isPuSandwich?: boolean): T.Rendered<T.NounSelection> { // like زما د ښځو لیدل
const english = getEnglishFromNoun(n.entry, n.number, noArticles); // my seeing women...
const nounInflects = inflected && !(isPuSandwich && isPattern1Entry(n.entry) && n.number === "singular");
const pashto = ((): T.PsString[] => { export function renderNPSelection(
const infs = inflectWord(n.entry); NP: T.NPSelection,
const ps = n.number === "singular" inflected: boolean,
? getInf(infs, "inflections", n.gender, false, nounInflects) inflectEnglish: boolean,
: (() => { role: "subject",
const plural = getInf(infs, "plural", n.gender, true, inflected); soRole: "servant" | "king" | "none",
return [ isPuSandwich: boolean
...plural, ): T.Rendered<T.NPSelection>;
...getInf(infs, "arabicPlural", n.gender, true, inflected), export function renderNPSelection(
...(!plural.length || n.gender === "fem") NP: T.NPSelection,
// allow for plurals like ډاکټرې as well as ډاکټرانې inflected: boolean,
? getInf(infs, "inflections", n.gender, true, inflected) inflectEnglish: boolean,
: [], role: "object",
]; soRole: "servant" | "king" | "none",
})(); isPuSandwich: boolean
return ps.length > 0 ): T.Rendered<T.NPSelection>;
? ps export function renderNPSelection(
: [psStringFromEntry(n.entry)]; NP: T.NPSelection,
})(); inflected: boolean,
const person = getPersonNumber(n.gender, n.number); inflectEnglish: boolean,
role: "subject" | "object",
soRole: "servant" | "king" | "none",
isPuSandwich: boolean
): T.Rendered<T.NPSelection> {
if (typeof NP !== "object") {
if (role !== "object") {
throw new Error("ObjectNP only allowed for objects");
}
return NP;
}
if (NP.selection.type === "noun") {
return { return {
...n, type: "NP",
adjectives: n.adjectives.map(a => renderAdjectiveSelection(a, person, inflected, isPuSandwich && n.number === "singular")), selection: renderNounSelection(
NP.selection,
inflected,
soRole,
undefined,
isPuSandwich
),
};
}
if (NP.selection.type === "pronoun") {
return {
type: "NP",
selection: renderPronounSelection(
NP.selection,
inflected,
inflectEnglish,
soRole
),
};
}
if (NP.selection.type === "participle") {
return {
type: "NP",
selection: renderParticipleSelection(NP.selection, inflected, soRole),
};
}
throw new Error("unknown NP type");
}
export function renderNounSelection(
n: T.NounSelection,
inflected: boolean,
role: "servant" | "king" | "none",
noArticles?: true | "noArticles",
isPuSandwich?: boolean
): T.Rendered<T.NounSelection> {
const english = getEnglishFromNoun(n.entry, n.number, noArticles);
const nounInflects =
inflected &&
!(isPuSandwich && isPattern1Entry(n.entry) && n.number === "singular");
const pashto = ((): T.PsString[] => {
const infs = inflectWord(n.entry);
const ps =
n.number === "singular"
? getInf(infs, "inflections", n.gender, false, nounInflects)
: (() => {
const plural = getInf(infs, "plural", n.gender, true, inflected);
return [
...plural,
...getInf(infs, "arabicPlural", n.gender, true, inflected),
...(!plural.length || n.gender === "fem"
? // allow for plurals like ډاکټرې as well as ډاکټرانې
getInf(infs, "inflections", n.gender, true, inflected)
: []),
];
})();
return ps.length > 0 ? ps : [psStringFromEntry(n.entry)];
})();
const person = getPersonNumber(n.gender, n.number);
return {
...n,
adjectives: n.adjectives.map((a) =>
renderAdjectiveSelection(
a,
person, person,
inflected, inflected,
role, isPuSandwich && n.number === "singular"
ps: pashto, )
e: english, ),
possesor: renderPossesor(n.possesor, role), person,
demonstrative: renderDemonstrative(n.demonstrative, inflected && n.number === "plural"), inflected,
}; role,
ps: pashto,
e: english,
possesor: renderPossesor(n.possesor, role),
demonstrative: renderDemonstrative(
n.demonstrative,
inflected && n.number === "plural"
),
};
} }
function renderDemonstrative(demonstrative: T.DemonstrativeSelection | undefined, plurInflected: boolean): T.Rendered<T.DemonstrativeSelection> | undefined { function renderDemonstrative(
if (!demonstrative) { demonstrative: T.DemonstrativeSelection | undefined,
return undefined; plurInflected: boolean
} ): T.Rendered<T.DemonstrativeSelection> | undefined {
return { if (!demonstrative) {
...demonstrative, return undefined;
ps: demonstrative.demonstrative === "daa" }
? (plurInflected ? { p: "دې", f: "de" } : { p: "دا", f: "daa" }) return {
: demonstrative.demonstrative === "dagha" ...demonstrative,
? (plurInflected ? { p: "دغه", f: "dágha" } : { p: "دغو", f: "dágho" }) ps:
: (plurInflected ? { p: "هغه", f: "hágha" } : { p: "هغو", f: "hágho" }) demonstrative.demonstrative === "daa"
} ? plurInflected
? { p: "دې", f: "de" }
: { p: "دا", f: "daa" }
: demonstrative.demonstrative === "dagha"
? plurInflected
? { p: "دغه", f: "dágha" }
: { p: "دغو", f: "dágho" }
: plurInflected
? { p: "هغه", f: "hágha" }
: { p: "هغو", f: "hágho" },
};
} }
function renderPronounSelection(p: T.PronounSelection, inflected: boolean, englishInflected: boolean, role: "servant" | "king" | "none"): T.Rendered<T.PronounSelection> { function renderPronounSelection(
const [row, col] = getVerbBlockPosFromPerson(p.person); p: T.PronounSelection,
return { inflected: boolean,
...p, englishInflected: boolean,
inflected, role: "servant" | "king" | "none"
role, ): T.Rendered<T.PronounSelection> {
ps: grammarUnits.pronouns[p.distance][inflected ? "inflected" : "plain"][row][col], const [row, col] = getVerbBlockPosFromPerson(p.person);
e: grammarUnits.persons[p.person].label[englishInflected ? "object" : "subject"], return {
}; ...p,
inflected,
role,
ps: grammarUnits.pronouns[p.distance][inflected ? "inflected" : "plain"][
row
][col],
e: grammarUnits.persons[p.person].label[
englishInflected ? "object" : "subject"
],
};
} }
function renderParticipleSelection(p: T.ParticipleSelection, inflected: boolean, role: "servant" | "king" | "none"): T.Rendered<T.ParticipleSelection> { function renderParticipleSelection(
return { p: T.ParticipleSelection,
...p, inflected: boolean,
inflected, role: "servant" | "king" | "none"
role, ): T.Rendered<T.ParticipleSelection> {
person: T.Person.ThirdPlurMale, const o = { p: "و", f: "o" };
// TODO: More robust inflection of inflecting pariticiples - get from the conjugation engine const accentedO = { p: "و", f: "ó" };
ps: [psStringFromEntry(p.verb.entry)].map(ps => inflected ? concatPsString(ps, { p: "و", f: "o" }) : ps), const v = accentOnNFromEnd(psStringFromEntry(p.verb.entry), 0);
e: getEnglishParticiple(p.verb.entry), const hasShortForm =
possesor: renderPossesor(p.possesor, "subj/obj"), inflected && shortVerbEndConsonant.includes(v.p[v.p.length - 2]);
}; const base: T.SingleOrLengthOpts<T.PsString> =
inflected && hasShortForm
? {
long: v,
short: removeL(v),
}
: v;
const ps: T.SingleOrLengthOpts<T.PsString[]> = inflected
? applySingleOrLengthOpts(
{
long: (x) => [concatPsString(x, o)],
short: (x) => [concatPsString(x, accentedO)],
},
base
)
: [v];
return {
...p,
inflected,
role,
person: T.Person.ThirdPlurMale,
ps,
e: getEnglishParticiple(p.verb.entry),
possesor: renderPossesor(p.possesor, "subj/obj"),
};
} }
function renderPossesor(possesor: T.PossesorSelection | undefined, possesorRole: "servant" | "king" | "none" | "subj/obj"): T.RenderedPossesorSelection | undefined { function renderPossesor(
if (!possesor) return undefined; possesor: T.PossesorSelection | undefined,
const isSingUnisexAnim5PatternNoun = (possesor.np.selection.type === "noun" possesorRole: "servant" | "king" | "none" | "subj/obj"
&& possesor.np.selection.number === "singular" ): T.RenderedPossesorSelection | undefined {
&& isAnimNounEntry(possesor.np.selection.entry) if (!possesor) return undefined;
&& isPattern5Entry(possesor.np.selection.entry) const isSingUnisexAnim5PatternNoun =
); possesor.np.selection.type === "noun" &&
return { possesor.np.selection.number === "singular" &&
shrunken: possesor.shrunken, isAnimNounEntry(possesor.np.selection.entry) &&
np: renderNPSelection( isPattern5Entry(possesor.np.selection.entry);
possesor.np, return {
!isSingUnisexAnim5PatternNoun, shrunken: possesor.shrunken,
possesorRole === "subj/obj" ? true : false, np: renderNPSelection(
"subject", possesor.np,
possesorRole === "subj/obj" ? "none" : possesorRole, !isSingUnisexAnim5PatternNoun,
false, possesorRole === "subj/obj" ? true : false,
), "subject",
}; possesorRole === "subj/obj" ? "none" : possesorRole,
false
),
};
} }
function getInf(infs: T.InflectorOutput, t: "plural" | "arabicPlural" | "inflections", gender: T.Gender, plural: boolean, inflected: boolean): T.PsString[] { function getInf(
// TODO: make this safe!! infs: T.InflectorOutput,
t: "plural" | "arabicPlural" | "inflections",
gender: T.Gender,
plural: boolean,
inflected: boolean
): T.PsString[] {
// TODO: make this safe!!
// @ts-ignore
if (
infs &&
t in infs &&
// @ts-ignore // @ts-ignore
if (infs && t in infs && infs[t] !== undefined && gender in infs[t] && infs[t][gender] !== undefined) { infs[t] !== undefined &&
// @ts-ignore // @ts-ignore
const iset = infs[t][gender] as T.InflectionSet; gender in infs[t] &&
const inflectionNumber = (inflected ? 1 : 0) + ((t === "inflections" && plural) ? 1 : 0); // @ts-ignore
return iset[inflectionNumber]; infs[t][gender] !== undefined
} ) {
return []; // @ts-ignore
const iset = infs[t][gender] as T.InflectionSet;
const inflectionNumber =
(inflected ? 1 : 0) + (t === "inflections" && plural ? 1 : 0);
return iset[inflectionNumber];
}
return [];
} }
function getEnglishFromNoun(entry: T.DictionaryEntry, number: T.NounNumber, noArticles?: true | "noArticles"): string { function getEnglishFromNoun(
const articles = { entry: T.DictionaryEntry,
singular: "(a/the)", number: T.NounNumber,
plural: "(the)", noArticles?: true | "noArticles"
}; ): string {
const article = articles[number]; const articles = {
function addArticle(s: string) { singular: "(a/the)",
if (noArticles) return s; plural: "(the)",
return `${article} ${s}`; };
} const article = articles[number];
const e = getEnglishWord(entry); function addArticle(s: string) {
if (!e) throw new Error(`unable to get english from subject ${entry.f} - ${entry.ts}`); if (noArticles) return s;
return `${article} ${s}`;
}
const e = getEnglishWord(entry);
if (!e)
throw new Error(
`unable to get english from subject ${entry.f} - ${entry.ts}`
);
if (typeof e === "string") return ` ${e}`; if (typeof e === "string") return ` ${e}`;
if (number === "plural") return addArticle(e.plural); if (number === "plural") return addArticle(e.plural);
if (!e.singular || e.singular === undefined) { if (!e.singular || e.singular === undefined) {
throw new Error(`unable to get english from subject ${entry.f} - ${entry.ts}`); throw new Error(
} `unable to get english from subject ${entry.f} - ${entry.ts}`
return addArticle(e.singular); );
} }
return addArticle(e.singular);
}