From 56b92a912ca86d41472e4f73338d7f792a87cb3a Mon Sep 17 00:00:00 2001 From: adueck Date: Sat, 19 Aug 2023 20:14:30 +0400 Subject: [PATCH] more with cleaner demo --- src/demo-components/ParserDemo.tsx | 66 +++++++++- src/lib/src/p-text-helpers.test.ts | 1 - src/lib/src/parsing/lookup.tsx | 6 + src/lib/src/parsing/parse-blocks.ts | 14 +- src/lib/src/parsing/parse-verb.test.ts | 33 ++++- src/lib/src/parsing/parse-verb.ts | 4 +- src/lib/src/parsing/parse-vp.test.ts | 176 ++++++++++++++++++++++++- src/lib/src/parsing/parse-vp.ts | 6 +- 8 files changed, 282 insertions(+), 24 deletions(-) diff --git a/src/demo-components/ParserDemo.tsx b/src/demo-components/ParserDemo.tsx index ea36e83..81a342f 100644 --- a/src/demo-components/ParserDemo.tsx +++ b/src/demo-components/ParserDemo.tsx @@ -10,14 +10,47 @@ import { renderVP, } from "../components/library"; +const working = [ + "limited demo vocab", + "phrases with simple verbs", + "basic verb tenses", + "noun phrases (except participles)", + "mini-pronouns for shrunken servants", + "grammar error correction", +]; + +const todo = [ + "participles", + "compound verbs", + "adverbial phrases", + "relative clauses", + "equative verbs", + "perfect tenses", + "ability verbs", + "imperative verbs", + "passive verbs", + "quantifiers", + "demonstrative pronouns", + "mini-pronouns for possesives", + "approximate spelling", +]; + +const examples = [ + "سړي زه ولیدم", + "تلم به", + "یو به مې ړلې", + "د غټې ماشومې زاړه پلار ولیدم", + "ستا پخواني ملګري مې ولیدل", + "ما ډوډۍ خوړله", +]; + function ParserDemo({ opts }: { opts: T.TextOptions }) { const [text, setText] = useState(""); const [result, setResult] = useState< ReturnType["success"] >([]); const [errors, setErrors] = useState([]); - function handleChange(e: React.ChangeEvent) { - const value = e.target.value; + function handleInput(value: string) { if (!value) { setText(""); setResult([]); @@ -31,11 +64,30 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) { } return (
-
Type a sentence to parse
-
- (NOT DONE!! limited vocab, and not working for APs, compound verbs, or - grammatically transitive verbs... yet 👷) +
Type a sentence to parse
+
+
+ NOT DONE: sort of works with:{` `} + {working.map((x) => ( + + ✅ {x} + + ))} + {todo.map((x) => ( + + ❌ {x} + + ))} +
+
working examples
+
    + {examples.map((ex) => ( +
  • handleInput(ex)}> + {ex} +
  • + ))} +
handleInput(e.target.value)} />
{errors.length > 0 && ( diff --git a/src/lib/src/p-text-helpers.test.ts b/src/lib/src/p-text-helpers.test.ts index 79367ef..e6c2ab3 100644 --- a/src/lib/src/p-text-helpers.test.ts +++ b/src/lib/src/p-text-helpers.test.ts @@ -26,7 +26,6 @@ import { endsWith, trimOffPs, undoAaXuPattern, - prevValNotA, lastVowelNotA, } from "./p-text-helpers"; import * as T from "../../types"; diff --git a/src/lib/src/parsing/lookup.tsx b/src/lib/src/parsing/lookup.tsx index 2967c59..6e5fd7e 100644 --- a/src/lib/src/parsing/lookup.tsx +++ b/src/lib/src/parsing/lookup.tsx @@ -42,6 +42,9 @@ export function shouldCheckTpp(s: string): boolean { } export function verbLookup(input: string): T.VerbEntry[] { + // TODO: + // only look up forms if there's an ending + // or is third person thing const s = input.slice(0, -1); // IMPORTANT TODO FOR EFFECIANCY! // check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING @@ -127,7 +130,10 @@ export function verbLookup(input: string): T.VerbEntry[] { ) || [s, sAddedAa, "و" + s].includes(entry.ssp || "") || (entry.separationAtP && + // TODO this is super ugly, do check of short and long function (entry.p.slice(entry.separationAtP) === s || + entry.p.slice(entry.separationAtP, -1) === s || + (checkTpp && entry.p.slice(entry.separationAtP, -1) === input) || entry.psp?.slice(entry.separationAtP) === s || (entry.prp && [ diff --git a/src/lib/src/parsing/parse-blocks.ts b/src/lib/src/parsing/parse-blocks.ts index 212e00a..fd1515d 100644 --- a/src/lib/src/parsing/parse-blocks.ts +++ b/src/lib/src/parsing/parse-blocks.ts @@ -73,18 +73,22 @@ function phMatches(ph: T.ParsedPH | undefined, vb: T.ParsedVBE | undefined) { if (!ph) { return true; } - const v = vb && vb.type === "VB" && vb.info.type === "verb" && vb.info.verb; - if (!v) { + if (!vb) { return true; } - const verbPh = getPhFromVerb(v); + if (vb.info.type !== "verb") { + return false; + } + const verbPh = getPhFromVerb(vb.info.verb, vb.info.base); return verbPh === ph.s; } -function getPhFromVerb(v: T.VerbEntry): string { +function getPhFromVerb(v: T.VerbEntry, base: "root" | "stem"): string { // TODO!! what to do about yo / bo ??? if (v.entry.separationAtP) { - return v.entry.p.slice(0, v.entry.separationAtP); + const p = + base === "root" ? v.entry.prp || v.entry.p : v.entry.ssp || v.entry.p; + return p.slice(0, v.entry.separationAtP); } // TODO or آ if (v.entry.p.startsWith("ا")) { diff --git a/src/lib/src/parsing/parse-verb.test.ts b/src/lib/src/parsing/parse-verb.test.ts index 0ec0312..ecd2e72 100644 --- a/src/lib/src/parsing/parse-verb.test.ts +++ b/src/lib/src/parsing/parse-verb.test.ts @@ -11,7 +11,7 @@ import { import { verbLookup, wordQuery } from "./lookup"; import { parseVerb } from "./parse-verb"; import { tokenizer } from "./tokenizer"; -import { removeKeys } from "./utils"; +import { getPeople, removeKeys } from "./utils"; const wahul = wordQuery("وهل", "verb"); const leekul = wordQuery("لیکل", "verb"); @@ -383,6 +383,11 @@ const tests: { }, ], }, + // but not for kedul + { + input: "کې", + output: [], + }, ], }, { @@ -729,6 +734,32 @@ const tests: { }, ], }, + { + input: "کېناست", + output: [ + { + ph: "کې", + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective", "perfective"], + }, + verb: kenaastul, + }, + ], + }, + { + input: "ناست", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: kenaastul, + }, + ], + }, { input: "پرېږدو", output: [ diff --git a/src/lib/src/parsing/parse-verb.ts b/src/lib/src/parsing/parse-verb.ts index 543a5e7..633be70 100644 --- a/src/lib/src/parsing/parse-verb.ts +++ b/src/lib/src/parsing/parse-verb.ts @@ -73,7 +73,7 @@ function matchVerbs( return e.psp === base; } if (e.c.includes("intrans.")) { - const miniRoot = e.p.slice(0, -3); + const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3); return miniRoot + "ېږ" === base || miniRoot === base; } else { return e.p.slice(0, -1) === base; @@ -168,7 +168,7 @@ function matchVerbs( ]; } } else if (e.c.includes("intrans.")) { - const miniRoot = e.p.slice(0, -3); + const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3); const miniRootEg = miniRoot + "ېږ"; if ([miniRoot, miniRootEg].includes(base)) { return [ diff --git a/src/lib/src/parsing/parse-vp.test.ts b/src/lib/src/parsing/parse-vp.test.ts index 374fc1e..f8a30c8 100644 --- a/src/lib/src/parsing/parse-vp.test.ts +++ b/src/lib/src/parsing/parse-vp.test.ts @@ -3,7 +3,6 @@ import * as T from "../../../types"; import { makeObjectSelectionComplete, - makeSubjectSelection, makeSubjectSelectionComplete, } from "../phrase-building/blocks-utils"; import { @@ -20,6 +19,8 @@ const sarey = wordQuery("سړی", "noun"); const rasedul = wordQuery("رسېدل", "verb"); const maashoom = wordQuery("ماشوم", "noun"); const leedul = wordQuery("لیدل", "verb"); +const kenaastul = wordQuery("کېناستل", "verb"); +const wurul = wordQuery("وړل", "verb"); const tests: { label: string; @@ -91,7 +92,7 @@ const tests: { verb: { type: "verb", verb: tlul, - transitivity: "transitive", + transitivity: "intransitive", canChangeTransitivity: false, canChangeStatDyn: false, negative: false, @@ -129,7 +130,7 @@ const tests: { verb: { type: "verb", verb: tlul, - transitivity: "transitive", + transitivity: "intransitive", canChangeTransitivity: false, canChangeStatDyn: false, negative: false, @@ -168,7 +169,7 @@ const tests: { verb: { type: "verb", verb: rasedul, - transitivity: "transitive", + transitivity: "intransitive", canChangeTransitivity: false, canChangeStatDyn: false, negative: false, @@ -208,7 +209,7 @@ const tests: { verb: { type: "verb", verb: rasedul, - transitivity: "transitive", + transitivity: "intransitive", canChangeTransitivity: false, canChangeStatDyn: false, negative: false, @@ -252,7 +253,7 @@ const tests: { verb: { type: "verb", verb: tlul, - transitivity: "transitive", + transitivity: "intransitive", canChangeTransitivity: false, canChangeStatDyn: false, negative: false, @@ -268,6 +269,46 @@ const tests: { }, })), }, + { + input: "کې به ناست", + output: [ + { + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(T.Person.ThirdSingMale), + }), + }, + { + key: 2, + block: { + type: "objectSelection", + selection: "none", + }, + }, + ], + verb: { + type: "verb", + verb: kenaastul, + transitivity: "intransitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "habitualPerfectivePast", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: true, + shrinkServant: false, + }, + }, + ], + }, ], }, { @@ -938,6 +979,129 @@ const tests: { })) ), }, + { + input: "ودې وینم", + output: getPeople(2, "sing").flatMap((objectPerson) => + getPeople(1, "sing").map((subjectPerson) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(subjectPerson), + }), + }, + { + key: 2, + block: makeObjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(objectPerson), + }), + }, + ], + verb: { + type: "verb", + verb: leedul, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "subjunctiveVerb", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: true, + shrinkServant: true, + }, + })) + ), + }, + { + input: "وینم به دې", + output: getPeople(2, "sing").flatMap((objectPerson) => + getPeople(1, "sing").map((subjectPerson) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(subjectPerson), + }), + }, + { + key: 2, + block: makeObjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(objectPerson), + }), + }, + ], + verb: { + type: "verb", + verb: leedul, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "imperfectiveFuture", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: true, + shrinkServant: true, + }, + })) + ), + }, + { + input: "یو به مې ړلې", + output: [...getPeople(2, "sing"), T.Person.ThirdPlurFemale].flatMap( + (objectPerson) => + getPeople(1, "sing").map( + (subjectPerson) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(subjectPerson), + }), + }, + { + key: 2, + block: makeObjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(objectPerson), + }), + }, + ], + verb: { + type: "verb", + verb: wurul, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "habitualPerfectivePast", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: true, + shrinkServant: true, + }, + }) + ) + ), + }, ], }, ]; diff --git a/src/lib/src/parsing/parse-vp.ts b/src/lib/src/parsing/parse-vp.ts index ad72382..222fee0 100644 --- a/src/lib/src/parsing/parse-vp.ts +++ b/src/lib/src/parsing/parse-vp.ts @@ -32,7 +32,7 @@ import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp"; // make impossible subjects like I saw me, error -// کې به ناست not working! +// TODO: learn how to yank / use plugin for JSON neovim // TODO: transitivity options @@ -72,7 +72,9 @@ export function parseVP( const v: T.VerbSelectionComplete = { type: "verb", verb: verb.info.verb, - transitivity: "transitive", + transitivity: verb.info.verb.entry.c.includes("intrans") + ? "intransitive" + : "transitive", canChangeTransitivity: false, canChangeStatDyn: false, negative: false,