From a9f93dc717f8f4df03af936b58590273af44fb8e Mon Sep 17 00:00:00 2001 From: adueck Date: Mon, 4 Sep 2023 14:26:24 +0400 Subject: [PATCH] some verb bug fixes and starting with AP parsing --- src/lib/src/parsing/{lookup.tsx => lookup.ts} | 46 +++- src/lib/src/parsing/parse-ap.ts | 24 +- src/lib/src/parsing/parse-blocks.ts | 24 +- src/lib/src/parsing/parse-past-part.test.ts | 186 +++++++++++++ src/lib/src/parsing/parse-past-part.ts | 2 +- src/lib/src/parsing/parse-verb.test.ts | 7 +- src/lib/src/parsing/parse-verb.ts | 21 +- src/lib/src/parsing/parse-vp.test.ts | 250 +++++++++--------- src/lib/src/parsing/parse-vp.ts | 10 +- src/types.ts | 1 + 10 files changed, 416 insertions(+), 155 deletions(-) rename src/lib/src/parsing/{lookup.tsx => lookup.ts} (83%) create mode 100644 src/lib/src/parsing/parse-past-part.test.ts diff --git a/src/lib/src/parsing/lookup.tsx b/src/lib/src/parsing/lookup.ts similarity index 83% rename from src/lib/src/parsing/lookup.tsx rename to src/lib/src/parsing/lookup.ts index a47aa82..d4a50f3 100644 --- a/src/lib/src/parsing/lookup.tsx +++ b/src/lib/src/parsing/lookup.ts @@ -1,11 +1,16 @@ import nounsAdjs from "../../../nouns-adjs"; import verbs from "../../../verbs"; import * as T from "../../../types"; -import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; +import { + isAdjectiveEntry, + isAdverbEntry, + isNounEntry, +} from "../type-predicates"; import { removeFVarientsFromVerb } from "../accent-and-ps-utils"; import { splitVarients, undoAaXuPattern } from "../p-text-helpers"; import { arraysHaveCommon } from "../misc-helpers"; import { shortVerbEndConsonant } from "./misc"; +import { kawulDyn, kawulStat, kedulDyn, kedulStat, tlul } from "./irreg-verbs"; export type LookupFunction = typeof lookup; @@ -13,11 +18,13 @@ export function lookup( s: Partial, type: "nounAdj" ): T.DictionaryEntry[]; +export function lookup(s: string, type: "adverb"): T.AdverbEntry[]; +export function lookup(s: string, type: "pPart"): T.VerbEntry[]; export function lookup(s: string, type: "verb" | "participle"): T.VerbEntry[]; export function lookup( s: string | Partial, - type: "nounAdj" | "verb" | "participle" -): T.DictionaryEntry[] | T.VerbEntry[] { + type: "nounAdj" | "verb" | "participle" | "pPart" | "adverb" +): T.DictionaryEntry[] | T.VerbEntry[] | T.AdverbEntry[] { if (type === "nounAdj") { if (typeof s !== "object") { throw new Error("invalid query for noun / adj lookup"); @@ -30,6 +37,12 @@ export function lookup( if (type === "verb") { return verbLookup(s); } + if (type === "pPart") { + return pPartLookup(s); + } + if (type === "adverb") { + return adverbLookup(s); + } return participleLookup(s); } @@ -60,6 +73,12 @@ function nounAdjLookup(s: Partial): T.DictionaryEntry[] { return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[]; } +function adverbLookup(s: string): T.AdverbEntry[] { + return nounsAdjs.filter( + (a) => isAdverbEntry(a) && a.p === s + ) as T.AdverbEntry[]; +} + export function shouldCheckTpp(s: string): boolean { return ( ["د", "ړ", "ت", "ځ", "و", "ډ", "ڼ", "ن", "ه"].includes(s.slice(-1)) || @@ -85,6 +104,27 @@ function participleLookup(input: string): T.VerbEntry[] { return []; } +function pPartLookup(input: string): T.VerbEntry[] { + if (input === "کړ") { + return [kawulStat, kawulDyn]; + } + if (input === "شو") { + return [kedulStat, kedulDyn]; + } + if (input === "تل") { + // TODO: is also ورتلل، راتلل، درتلل like this? + return [tlul]; + } + if (["ست", "ښت"].includes(input.slice(-2))) { + const p = input + "ل"; + return verbs.filter((e) => e.entry.p === p); + } + if (input.at(-1) === "ل") { + return verbs.filter((e) => e.entry.p === input); + } + return []; +} + function verbLookup(input: string): T.VerbEntry[] { // TODO: // only look up forms if there's an ending diff --git a/src/lib/src/parsing/parse-ap.ts b/src/lib/src/parsing/parse-ap.ts index f2a29df..9ed2dae 100644 --- a/src/lib/src/parsing/parse-ap.ts +++ b/src/lib/src/parsing/parse-ap.ts @@ -1 +1,23 @@ -// TODO: ability to treat a doubled noun as an adverb +import * as T from "../../../types"; +import { LookupFunction } from "./lookup"; +import { returnParseResultS } from "./utils"; + +export function parseAP( + tokens: Readonly, + lookup: LookupFunction +): T.ParseResult[] { + if (tokens.length === 0) { + return []; + } + const [first, ...rest] = tokens; + const adverbs = lookup(first.s, "adverb"); + return adverbs.map((entry) => + returnParseResultS(rest, { + type: "AP", + selection: { + type: "adverb", + entry, + }, + }) + ); +} diff --git a/src/lib/src/parsing/parse-blocks.ts b/src/lib/src/parsing/parse-blocks.ts index f0007a6..a721103 100644 --- a/src/lib/src/parsing/parse-blocks.ts +++ b/src/lib/src/parsing/parse-blocks.ts @@ -1,5 +1,6 @@ import * as T from "../../../types"; import { LookupFunction } from "./lookup"; +import { parseAP } from "./parse-ap"; import { parseEquative } from "./parse-equative"; import { parseKidsSection } from "./parse-kids-section"; import { parseNeg } from "./parse-negative"; @@ -25,21 +26,16 @@ export function parseBlocks( (b): b is T.ParsedPH => b.type === "PH" ); const vbExists = blocks.some((b) => "type" in b && b.type === "VB"); - const np = prevPh ? [] : parseNP(tokens, lookup); - const ph = vbExists || prevPh ? [] : parsePH(tokens); - const vb = parseVerb(tokens, lookup); - const vbp = parsePastPart(tokens, lookup); - const eq = parseEquative(tokens); - const neg = parseNeg(tokens); - const kidsR = parseKidsSection(tokens, []); + const allResults: T.ParseResult[] = [ - ...np, - ...ph, - ...neg, - ...vb, - ...vbp, - ...eq, - ...kidsR, + ...(prevPh ? [] : parseAP(tokens, lookup)), + ...(prevPh ? [] : parseNP(tokens, lookup)), + ...(vbExists || prevPh ? [] : parsePH(tokens)), + ...parseVerb(tokens, lookup), + ...parsePastPart(tokens, lookup), + ...parseEquative(tokens), + ...parseNeg(tokens), + ...parseKidsSection(tokens, []), ]; // TODO: is this necessary? // if (!allResults.length) { diff --git a/src/lib/src/parsing/parse-past-part.test.ts b/src/lib/src/parsing/parse-past-part.test.ts new file mode 100644 index 0000000..8ba67b9 --- /dev/null +++ b/src/lib/src/parsing/parse-past-part.test.ts @@ -0,0 +1,186 @@ +import * as T from "../../../types"; +import { lookup, wordQuery } from "./lookup"; +import { tokenizer } from "./tokenizer"; +import { parsePastPart } from "./parse-past-part"; +import { kawulDyn, kawulStat, kedulDyn, kedulStat } from "./irreg-verbs"; + +const leedul = wordQuery("لیدل", "verb"); +const akheestul = wordQuery("اخیستل", "verb"); +const wahul = wordQuery("وهل", "verb"); +const awuxtul = wordQuery("اوښتل", "verb"); +const tlul = wordQuery("tlul", "verb"); + +const tests: { + label: string; + cases: { + input: string; + output: T.ParsedVBP[]; + }[]; +}[] = [ + { + label: "regular past participles", + cases: [ + { + input: "لیدلی", + output: [ + { + type: "VB", + info: { + type: "ppart", + genNum: { + gender: "masc", + number: "singular", + }, + verb: leedul, + }, + }, + ], + }, + { + input: "وهلي", + output: [ + { + type: "VB", + info: { + type: "ppart", + genNum: { + gender: "masc", + number: "plural", + }, + verb: wahul, + }, + }, + ], + }, + { + input: "وهلې", + output: (["singular", "plural"] as const).map((number) => ({ + type: "VB", + info: { + type: "ppart", + genNum: { + gender: "fem", + number, + }, + verb: wahul, + }, + })), + }, + ], + }, + { + label: "past participles with short forms", + cases: [ + { + input: "اخیستی", + output: [ + { + type: "VB", + info: { + type: "ppart", + genNum: { + gender: "masc", + number: "singular", + }, + verb: akheestul, + }, + }, + ], + }, + { + input: "اخیستلی", + output: [ + { + type: "VB", + info: { + type: "ppart", + genNum: { + gender: "masc", + number: "singular", + }, + verb: akheestul, + }, + }, + ], + }, + { + input: "اوښتی", + output: [ + { + type: "VB", + info: { + type: "ppart", + genNum: { + gender: "masc", + number: "singular", + }, + verb: awuxtul, + }, + }, + ], + }, + ], + }, + { + label: "irregular past participles", + cases: [ + { + input: "تلی", + output: [ + { + type: "VB", + info: { + type: "ppart", + genNum: { + gender: "masc", + number: "singular", + }, + verb: tlul, + }, + }, + ], + }, + { + input: "کړي", + output: [kawulStat, kawulDyn].map((verb) => ({ + type: "VB", + info: { + type: "ppart", + genNum: { + gender: "masc", + number: "plural", + }, + verb, + }, + })), + }, + { + input: "شوي", + output: [kedulStat, kedulDyn].map((verb) => ({ + type: "VB", + info: { + type: "ppart", + genNum: { + gender: "masc", + number: "plural", + }, + verb, + }, + })), + }, + ], + }, +]; + +describe("parsing past participles", () => { + tests.forEach(({ label, cases }) => { + // eslint-disable-next-line jest/valid-title + test(label, () => { + cases.forEach(({ input, output }) => { + const tokens = tokenizer(input); + const res = parsePastPart(tokens, lookup).map(({ body }) => body); + expect(res).toEqual(output); + }); + }); + }); +}); diff --git a/src/lib/src/parsing/parse-past-part.ts b/src/lib/src/parsing/parse-past-part.ts index b7bfb11..c3857c0 100644 --- a/src/lib/src/parsing/parse-past-part.ts +++ b/src/lib/src/parsing/parse-past-part.ts @@ -16,7 +16,7 @@ export function parsePastPart( } // TODO: ALSO HANDLE SHORT FORMS const wOutEnd = s.slice(0, -1); - const matches = lookup(wOutEnd, "participle"); + const matches = lookup(wOutEnd, "pPart"); const genNums = endingGenderNum(ending); return matches .flatMap((verb) => diff --git a/src/lib/src/parsing/parse-verb.test.ts b/src/lib/src/parsing/parse-verb.test.ts index c3a2dea..f3d6014 100644 --- a/src/lib/src/parsing/parse-verb.test.ts +++ b/src/lib/src/parsing/parse-verb.test.ts @@ -31,7 +31,8 @@ const akheestul = wordQuery("اخیستل", "verb"); const alwatul = wordQuery("الوتل", "verb"); // const dartlul = wordQuery("درتلل", "verb") -// todo alwatul waalwatul akhistul azmoyul etc +// TODO: azmoyul etc +// TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc const tests: { label: string; @@ -394,7 +395,7 @@ const tests: { { root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["imperfective", "perfective"], + aspects: ["imperfective"], }, verb: akheestul, }, @@ -419,7 +420,7 @@ const tests: { { stem: { persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], - aspects: ["imperfective", "perfective"], + aspects: ["imperfective"], }, verb: alwatul, }, diff --git a/src/lib/src/parsing/parse-verb.ts b/src/lib/src/parsing/parse-verb.ts index 7417ab0..a00c5a3 100644 --- a/src/lib/src/parsing/parse-verb.ts +++ b/src/lib/src/parsing/parse-verb.ts @@ -12,9 +12,6 @@ import { import { LookupFunction } from "./lookup"; import { shortVerbEndConsonant } from "./misc"; -// big problem ما سړی یوړ crashes it !! -// BIG problem - issue with و being considered a VB for a lot of little verbs like بلل - // TODO: کول verbs! // check that aawu stuff is working // check oo`azmooy - @@ -102,6 +99,9 @@ function matchVerbs( } } } else if (e.psp) { + if (hasBreakawayAlef(e) && startsWithAleph(base)) { + return acc; + } if (e.separationAtP) { const bRest = e.psp.slice(e.separationAtP); if (bRest === base) { @@ -117,6 +117,8 @@ function matchVerbs( return [...acc, entry]; } } + } else if (hasBreakawayAlef(e) && startsWithAleph(base)) { + return acc; } else if (e.c.includes("intrans.")) { const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3); const miniRootEg = miniRoot + "ېږ"; @@ -169,6 +171,8 @@ function matchVerbs( if (matchShortOrLong(base, bRest)) { return [...acc, entry]; } + } else if (hasBreakawayAlef(e) && startsWithAleph(base) && !e.prp) { + return acc; } else { const p = e.prp || e.p; if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) { @@ -245,6 +249,9 @@ function matchVerbs( } } } else if (!e.prp) { + if (hasBreakawayAlef(e) && startsWithAleph(base)) { + return acc; + } if (oEnd) { if ([e.p, e.p.slice(0, -1)].includes(base)) { return [...acc, entry]; @@ -395,3 +402,11 @@ function parseIrregularVerb(s: string): T.ParsedVBE[] { } return []; } + +function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean { + return !e.sepOo && ["ا", "آ"].includes(e.p[0]); +} + +function startsWithAleph(base: string): boolean { + return ["ا", "آ"].includes(base[0]); +} diff --git a/src/lib/src/parsing/parse-vp.test.ts b/src/lib/src/parsing/parse-vp.test.ts index 78a7cda..db2c10d 100644 --- a/src/lib/src/parsing/parse-vp.test.ts +++ b/src/lib/src/parsing/parse-vp.test.ts @@ -86,6 +86,10 @@ const tests: { output: [], error: true, }, + { + input: "ما وانه اخیست", + output: [], + }, ], }, { @@ -1005,129 +1009,129 @@ const tests: { })) ), }, - // { - // input: "ودې وینم", - // output: getPeople(2, "sing").flatMap((objectPerson) => - // getPeople(1, "sing").map((subjectPerson) => ({ - // blocks: [ - // { - // key: 1, - // block: makeSubjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(subjectPerson), - // }), - // }, - // { - // key: 2, - // block: makeObjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(objectPerson), - // }), - // }, - // ], - // verb: { - // type: "verb", - // verb: leedul, - // transitivity: "transitive", - // canChangeTransitivity: false, - // canChangeStatDyn: false, - // negative: false, - // tense: "subjunctiveVerb", - // canChangeVoice: true, - // isCompound: false, - // voice: "active", - // }, - // externalComplement: undefined, - // form: { - // removeKing: true, - // shrinkServant: true, - // }, - // })) - // ), - // }, - // { - // input: "وینم به دې", - // output: getPeople(2, "sing").flatMap((objectPerson) => - // getPeople(1, "sing").map((subjectPerson) => ({ - // blocks: [ - // { - // key: 1, - // block: makeSubjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(subjectPerson), - // }), - // }, - // { - // key: 2, - // block: makeObjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(objectPerson), - // }), - // }, - // ], - // verb: { - // type: "verb", - // verb: leedul, - // transitivity: "transitive", - // canChangeTransitivity: false, - // canChangeStatDyn: false, - // negative: false, - // tense: "imperfectiveFuture", - // canChangeVoice: true, - // isCompound: false, - // voice: "active", - // }, - // externalComplement: undefined, - // form: { - // removeKing: true, - // shrinkServant: true, - // }, - // })) - // ), - // }, - // { - // input: "یو به مې ړلې", - // output: [...getPeople(2, "sing"), T.Person.ThirdPlurFemale].flatMap( - // (objectPerson) => - // getPeople(1, "sing").map( - // (subjectPerson) => ({ - // blocks: [ - // { - // key: 1, - // block: makeSubjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(subjectPerson), - // }), - // }, - // { - // key: 2, - // block: makeObjectSelectionComplete({ - // type: "NP", - // selection: makePronounSelection(objectPerson), - // }), - // }, - // ], - // verb: { - // type: "verb", - // verb: wurul, - // transitivity: "transitive", - // canChangeTransitivity: false, - // canChangeStatDyn: false, - // negative: false, - // tense: "habitualPerfectivePast", - // canChangeVoice: true, - // isCompound: false, - // voice: "active", - // }, - // externalComplement: undefined, - // form: { - // removeKing: true, - // shrinkServant: true, - // }, - // }) - // ) - // ), - // }, + { + input: "ودې وینم", + output: getPeople(2, "sing").flatMap((objectPerson) => + getPeople(1, "sing").map((subjectPerson) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(subjectPerson), + }), + }, + { + key: 2, + block: makeObjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(objectPerson), + }), + }, + ], + verb: { + type: "verb", + verb: leedul, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "subjunctiveVerb", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: true, + shrinkServant: true, + }, + })) + ), + }, + { + input: "وینم به دې", + output: getPeople(2, "sing").flatMap((objectPerson) => + getPeople(1, "sing").map((subjectPerson) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(subjectPerson), + }), + }, + { + key: 2, + block: makeObjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(objectPerson), + }), + }, + ], + verb: { + type: "verb", + verb: leedul, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "imperfectiveFuture", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: true, + shrinkServant: true, + }, + })) + ), + }, + { + input: "یو به مې ړلې", + output: [...getPeople(2, "sing"), T.Person.ThirdPlurFemale].flatMap( + (objectPerson) => + getPeople(1, "sing").map( + (subjectPerson) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(subjectPerson), + }), + }, + { + key: 2, + block: makeObjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(objectPerson), + }), + }, + ], + verb: { + type: "verb", + verb: wurul, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: false, + tense: "habitualPerfectivePast", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: true, + shrinkServant: true, + }, + }) + ) + ), + }, ], }, { diff --git a/src/lib/src/parsing/parse-vp.ts b/src/lib/src/parsing/parse-vp.ts index 2634ffe..ef2d966 100644 --- a/src/lib/src/parsing/parse-vp.ts +++ b/src/lib/src/parsing/parse-vp.ts @@ -17,13 +17,6 @@ import { personToGenNum } from "../misc-helpers"; import { equals } from "rambda"; // to hide equatives type-doubling issue -// this should also conjugate to -// وامې نه خیسته -// وامې نه خیستلو -// waa-me nú kheestulo -// وامې نه اخیست -// waa-me nú akheest - // TODO: word query for kawul/kedul/stat/dyn // TODO: learn how to yank / use plugin for JSON neovim @@ -37,6 +30,9 @@ import { equals } from "rambda"; // so we don't get something like ښځو زه خوړلې یم with a hanging // یم not used +// TODO: way to get an error message for past participle and equative +// not matching up + export function parseVP( tokens: Readonly, lookup: LookupFunction diff --git a/src/types.ts b/src/types.ts index bd48f37..ee80f26 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1199,6 +1199,7 @@ export type ParsedBlock = | ParsedPH | ParsedVBE | ParsedVBP + | APSelection | NegativeBlock; export type ParsedKidsSection = {