diff --git a/src/components/src/vp-explorer/AllTensesDisplay.tsx b/src/components/src/vp-explorer/AllTensesDisplay.tsx index 2ded1a0..08e725d 100644 --- a/src/components/src/vp-explorer/AllTensesDisplay.tsx +++ b/src/components/src/vp-explorer/AllTensesDisplay.tsx @@ -151,6 +151,7 @@ function AllTensesDisplay({ x.trim()); + const tppf = verb.entry.tppf.split(",").map((x) => x.trim()); + const tpps = zipWith((p, f) => ({ p, f }), tppp, tppf); + return tpps.map(({ p, f }) => { + const tip = removeAccents( + verb.entry.separationAtP !== undefined + ? makePsString( + p.slice(verb.entry.separationAtP), + f.slice(verb.entry.separationAtF) + ) + : makePsString(p, f) + ); + return aspect === "imperfective" ? accentOnNFromEnd(tip, 0) : tip; + }); // if it ends in a consonant, the special form will also have another // variation ending with a ه - u // const endsInAConsonant = (pashtoConsonants.includes(tip.p.slice(-1)) || tip.f.slice(-1) === "w"); diff --git a/src/lib/src/parsing/lookup.tsx b/src/lib/src/parsing/lookup.tsx index 61866f8..4edfab2 100644 --- a/src/lib/src/parsing/lookup.tsx +++ b/src/lib/src/parsing/lookup.tsx @@ -31,9 +31,23 @@ export function lookup(s: Partial): T.DictionaryEntry[] { return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[]; } +export function shouldCheckTpp(s: string): boolean { + return ( + ["د", "ړ", "ت", "ځ", "و", "ډ", "ڼ", "ن", "ه"].includes(s.slice(-1)) || + ["ست", "ښت"].includes(s.slice(-2)) || + ["ښود"].includes(s.slice(-3)) + ); +} + export function verbLookup(input: string): T.VerbEntry[] { const s = input.slice(0, -1); + // check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING + // if theres no legit verb ending and no tpp possibilities, just return an empty array const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined; + const checkTpp = shouldCheckTpp(input); + const inputWoutOo = + checkTpp && input.startsWith("و") ? input.slice(1) : undefined; + // TODO: don't do the slice of and checking for useless things when you have a NON verb ending (like with the tpp) if (s.endsWith("ېږ")) { return verbs.filter( sWoutOo @@ -43,12 +57,14 @@ export function verbLookup(input: string): T.VerbEntry[] { entry.p ) || [s, sWoutOo].includes(entry.p) || + (checkTpp && [input, inputWoutOo].includes(entry.tppp)) || (entry.psp && [s, sWoutOo].includes(entry.psp)) || entry.prp === s || entry.ssp === s : ({ entry }) => entry.p.slice(0, -1) === s || entry.p === s.slice(0, -1) + "دل" || + (checkTpp && [input, inputWoutOo].includes(entry.tppp)) || entry.p === s || entry.psp === s || entry.prp === s || @@ -63,6 +79,7 @@ export function verbLookup(input: string): T.VerbEntry[] { [s, sWoutOo].includes(entry.p.slice(0, -3)) || [s, sWoutOo].includes(entry.p) || (entry.psp && [s, sWoutOo].includes(entry.psp)) || + (checkTpp && [input, inputWoutOo].includes(entry.tppp)) || entry.prp === s || entry.ssp === s || (entry.separationAtP && @@ -73,6 +90,7 @@ export function verbLookup(input: string): T.VerbEntry[] { // for short intransitive forms entry.p.slice(0, -3) === s || entry.p === s || + (checkTpp && [input, inputWoutOo].includes(entry.tppp)) || entry.psp === s || entry.prp === s || entry.ssp === s || diff --git a/src/lib/src/parsing/parse-ba.ts b/src/lib/src/parsing/parse-ba.ts new file mode 100644 index 0000000..538f519 --- /dev/null +++ b/src/lib/src/parsing/parse-ba.ts @@ -0,0 +1,21 @@ +import * as T from "../../../types"; + +export function parseBa( + tokens: Readonly +): T.ParseResult<{ type: "ba" }>[] { + if (!tokens.length) { + return []; + } + const [first, ...rest] = tokens; + if (first.s === "به") { + return [ + { + body: { + type: "ba", + }, + errors: [], + tokens: rest, + }, + ]; + } else return []; +} diff --git a/src/lib/src/parsing/parse-verb.test.ts b/src/lib/src/parsing/parse-verb.test.ts index 2d11d15..3ffaf79 100644 --- a/src/lib/src/parsing/parse-verb.test.ts +++ b/src/lib/src/parsing/parse-verb.test.ts @@ -15,6 +15,7 @@ const kenaastul = wordQuery("کېناستل", "verb"); const prexodul = wordQuery("پرېښودل", "verb"); const xodul = wordQuery("ښودل", "verb"); const kexodul = wordQuery("کېښودل", "verb"); +const katul = wordQuery("کتل", "verb"); const tests: { label: string; @@ -376,6 +377,7 @@ const tests: { }, ], }, + // TODO!! THESE COULD ALSO BE MALE { input: "لیده", output: [ @@ -402,6 +404,7 @@ const tests: { }, ], }, + // BUT NOT THIS ONE { input: "ولیدله", output: [ @@ -471,6 +474,33 @@ const tests: { }, ], }, + { + input: "خوړ", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective", "perfective"], + }, + verb: khorul, + }, + ], + }, + // TODO: should do کاته as well... what to do, have multiple tpp forms ? YES! + { + input: "وکوت", + output: [ + { + ph: "و", + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: katul, + }, + ], + }, ], }, { @@ -633,194 +663,3 @@ tests.forEach(({ label, cases }) => { }); }); }); - -const b = [ - [ - undefined, - { - info: { - aspect: "imperfective", - base: "stem", - type: "verb", - verb: { - entry: { - c: "v. trans.", - e: "to put, to put down, to set in place", - ec: "put,puts,putting,put,put", - f: "kexodul", - g: "kexodul", - i: 11193, - noOo: true, - p: "کېښودل", - psf: "Gd", - psp: "ږد", - r: 4, - separationAtF: 2, - separationAtP: 2, - ssf: "kéGd", - ssp: "کېږد", - ts: 1527812284, - }, - }, - }, - person: 6, - type: "VB", - }, - ], - [ - undefined, - { - info: { - aspect: "imperfective", - base: "stem", - type: "verb", - verb: { - entry: { - c: "v. trans.", - e: "to put, to put down, to set in place", - ec: "put,puts,putting,put,put", - f: "kexodul", - g: "kexodul", - i: 11193, - noOo: true, - p: "کېښودل", - psf: "Gd", - psp: "ږد", - r: 4, - separationAtF: 2, - separationAtP: 2, - ssf: "kéGd", - ssp: "کېږد", - ts: 1527812284, - }, - }, - }, - person: 7, - type: "VB", - }, - ], - [ - undefined, - { - info: { - aspect: "perfective", - base: "stem", - type: "verb", - verb: { - entry: { - c: "v. trans.", - e: "to leave, abandon, forsake, let go, allow", - ec: "abandon", - f: "prexodúl", - g: "prexodul", - i: 2516, - noOo: true, - p: "پرېښودل", - psf: "preGd", - psp: "پرېږد", - r: 4, - separationAtF: 3, - separationAtP: 3, - ts: 1527815190, - }, - }, - }, - person: 6, - type: "VB", - }, - ], - [ - undefined, - { - info: { - aspect: "perfective", - base: "stem", - type: "verb", - verb: { - entry: { - c: "v. trans.", - e: "to leave, abandon, forsake, let go, allow", - ec: "abandon", - f: "prexodúl", - g: "prexodul", - i: 2516, - noOo: true, - p: "پرېښودل", - psf: "preGd", - psp: "پرېږد", - r: 4, - separationAtF: 3, - separationAtP: 3, - ts: 1527815190, - }, - }, - }, - person: 7, - type: "VB", - }, - ], - [ - { s: "کې", type: "PH" }, - { - info: { - aspect: "perfective", - base: "stem", - type: "verb", - verb: { - entry: { - c: "v. trans.", - e: "to put, to put down, to set in place", - ec: "put,puts,putting,put,put", - f: "kexodul", - g: "kexodul", - i: 11193, - noOo: true, - p: "کېښودل", - psf: "Gd", - psp: "ږد", - r: 4, - separationAtF: 2, - separationAtP: 2, - ssf: "kéGd", - ssp: "کېږد", - ts: 1527812284, - }, - }, - }, - person: 6, - type: "VB", - }, - ], - [ - { s: "کې", type: "PH" }, - { - info: { - aspect: "perfective", - base: "stem", - type: "verb", - verb: { - entry: { - c: "v. trans.", - e: "to put, to put down, to set in place", - ec: "put,puts,putting,put,put", - f: "kexodul", - g: "kexodul", - i: 11193, - noOo: true, - p: "کېښودل", - psf: "Gd", - psp: "ږد", - r: 4, - separationAtF: 2, - separationAtP: 2, - ssf: "kéGd", - ssp: "کېږد", - ts: 1527812284, - }, - }, - }, - person: 7, - type: "VB", - }, - ], -]; diff --git a/src/lib/src/parsing/parse-verb.ts b/src/lib/src/parsing/parse-verb.ts index 95fdf45..0ee3151 100644 --- a/src/lib/src/parsing/parse-verb.ts +++ b/src/lib/src/parsing/parse-verb.ts @@ -1,5 +1,27 @@ import * as T from "../../../types"; +// third persion idosyncratic +// if it ends in a dental or ه - look for tttp +// +// if not having tttp +// automatic things: (with blank or u) +// ېد ست ښت +// ښود +// +// ول - اوه + +// وېشه ? + +// test ګالو ❌ vs ګاللو ✅ + +// واخیست / واخیسته / واخیستلو +// ولید // ولیده // ولیدو +// +// ووت / واته +// +// also write the rules for the third pers sing endings in the grammar +// multiple third pers sing options + export function parseVerb( tokens: Readonly, verbLookup: (s: string) => T.VerbEntry[] @@ -258,6 +280,52 @@ function matchVerbs( }); }); } + const tppMatches = { + imperfective: entries.filter( + ({ entry: e }) => !e.c.includes("comp") && s === e.tppp + ), + perfective: entries.reduce< + { ph: string | undefined; entry: T.VerbEntry }[] + >((acc, entry) => { + const e = entry.entry; + const sNoOo = s.startsWith("و") && s.slice(1); + if (sNoOo && sNoOo === e.tppp) { + return [ + ...acc, + { + ph: "و", + entry, + }, + ]; + } else if (s === e.tppp) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; + } + return acc; + }, []), + }; + Object.entries(tppMatches).forEach(([aspect, entries]) => { + entries.forEach((verb) => { + w.push([ + "ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined, + { + type: "VB", + person: T.Person.ThirdSingMale, + info: { + type: "verb", + aspect: aspect as T.Aspect, + base: "root", + verb: "ph" in verb ? verb.entry : verb, + }, + }, + ]); + }); + }); return w; } diff --git a/src/lib/src/parsing/parse-vp.ts b/src/lib/src/parsing/parse-vp.ts index ed04e03..43c6d91 100644 --- a/src/lib/src/parsing/parse-vp.ts +++ b/src/lib/src/parsing/parse-vp.ts @@ -8,6 +8,7 @@ import { } from "../phrase-building/blocks-utils"; import { vEntry } from "../new-verb-engine/rs-helpers"; import { getPersonFromNP, isThirdPerson } from "../phrase-building/vp-tools"; +import { parseBa } from "./parse-ba"; // to hide equatives type-doubling issue const kedulStat = vEntry({ ts: 1581086654898, @@ -40,16 +41,44 @@ export function parseVP( } // how to make this into a nice pipeline... 🤔 const NP1 = parseNP(tokens, lookup).filter(({ errors }) => !errors.length); + const ba = bindParseResult(NP1, (tokens, np1) => { + const b = parseBa(tokens); + if (!b.length) { + return [ + { + tokens, + body: { + np1, + ba: false, + }, + errors: [], + }, + ]; + } else { + return b.map(({ tokens, errors }) => ({ + body: { + np1, + ba: true, + }, + errors, + tokens, + })); + } + }); const NP2 = bindParseResult< { - inflected: boolean; - selection: T.NPSelection; + np1: { + inflected: boolean; + selection: T.NPSelection; + }; + ba: boolean; }, { np1: { inflected: boolean; selection: T.NPSelection; }; + ba: boolean; np2: | { inflected: boolean; @@ -57,7 +86,7 @@ export function parseVP( } | undefined; } - >(NP1, (tokens, np1) => { + >(ba, (tokens, { np1, ba }) => { const np2s = parseNP(tokens, lookup); if (!np2s.length) { const r: T.ParseResult<{ @@ -65,6 +94,7 @@ export function parseVP( inflected: boolean; selection: T.NPSelection; }; + ba: boolean; np2: undefined; }>[] = [ { @@ -72,6 +102,7 @@ export function parseVP( body: { np1, np2: undefined, + ba, }, errors: [], }, @@ -83,6 +114,7 @@ export function parseVP( body: { np1, np2: p.body, + ba, }, errors: p.errors, })); @@ -96,12 +128,13 @@ export function parseVP( np2: nps.np2, v: p.body, np1: nps.np1, + ba: nps.ba, }, errors: p.errors, })); }).filter(({ errors }) => !errors.length); // TODO: be able to bind mulitple vals - return bindParseResult(vb, (tokens, { np1, np2, v: [ph, v] }) => { + return bindParseResult(vb, (tokens, { np1, np2, v: [ph, v], ba }) => { const w: T.ParseResult[] = []; if (v.info.type === "equative") { throw new Error("not yet implemented"); @@ -143,7 +176,7 @@ export function parseVP( canChangeTransitivity: false, canChangeStatDyn: false, negative: false, - tense: getTenseFromRootsStems(false, v.info.base, v.info.aspect), + tense: getTenseFromRootsStems(ba, v.info.base, v.info.aspect), canChangeVoice: true, isCompound: false, voice: "active", @@ -240,7 +273,7 @@ export function parseVP( canChangeTransitivity: false, canChangeStatDyn: false, negative: false, - tense: getTenseFromRootsStems(false, v.info.base, v.info.aspect), + tense: getTenseFromRootsStems(ba, v.info.base, v.info.aspect), canChangeVoice: true, isCompound: false, voice: "active",