diff --git a/diagrams/diagram-light.png b/diagrams/diagram-light.png index ef2b5dd..b5379ed 100644 Binary files a/diagrams/diagram-light.png and b/diagrams/diagram-light.png differ diff --git a/src/lib/library.ts b/src/lib/library.ts index 9421c9b..0e59d74 100644 --- a/src/lib/library.ts +++ b/src/lib/library.ts @@ -15,7 +15,10 @@ import { } from "./src/verb-info"; import { makeVPSelectionState } from "./src/phrase-building/verb-selection"; import { vpsReducer } from "./src/phrase-building/vps-reducer"; -import { isPastTense } from "./src/phrase-building/vp-tools"; +import { + isPastTense, + isInvalidSubjObjCombo, +} from "./src/phrase-building/vp-tools"; import { getInflectionPattern } from "./src/inflection-pattern"; import { makePsString, removeFVarients } from "./src/accent-and-ps-utils"; @@ -45,12 +48,7 @@ import { standardizePhonetics, } from "./src/standardize-pashto"; import { phoneticsToDiacritics } from "./src/phonetics-to-diacritics"; -import { - randomPerson, - isInvalidSubjObjCombo, - randomSubjObj, - getEnglishVerb, -} from "./src/np-tools"; +import { randomPerson, randomSubjObj, getEnglishVerb } from "./src/np-tools"; import { getEnglishFromRendered, getPashtoFromRendered, diff --git a/src/lib/src/np-tools.ts b/src/lib/src/np-tools.ts index 73583bb..15cb3d7 100644 --- a/src/lib/src/np-tools.ts +++ b/src/lib/src/np-tools.ts @@ -1,80 +1,79 @@ import * as T from "../../types"; -import { isFirstPerson, parseEc, isSecondPerson } from "./misc-helpers"; +import { parseEc } from "./misc-helpers"; +import { isInvalidSubjObjCombo } from "./phrase-building/vp-tools"; function getRandPers(): T.Person { - return Math.floor(Math.random() * 12); + return Math.floor(Math.random() * 12); } -export function randomPerson(a?: { prev?: T.Person, counterPart?: T.VerbObject | T.NPSelection }) { - // no restrictions, just get any person - if (!a) { - return getRandPers(); - } - if (a.counterPart !== undefined && typeof a.counterPart === "object" && a.counterPart.selection.type === "pronoun") { - // with counterpart pronoun - let newP = 0; - do { - newP = getRandPers(); - } while ( - isInvalidSubjObjCombo(a.counterPart.selection.person, newP) - || - (newP === a.prev) - ); - return newP; - } - // without counterpart pronoun, just previous +export function randomPerson(a?: { + prev?: T.Person; + counterPart?: T.VerbObject | T.NPSelection; +}) { + // no restrictions, just get any person + if (!a) { + return getRandPers(); + } + if ( + a.counterPart !== undefined && + typeof a.counterPart === "object" && + a.counterPart.selection.type === "pronoun" + ) { + // with counterpart pronoun let newP = 0; do { - newP = getRandPers(); - } while (newP === a.prev); - return newP; -} - -export function isInvalidSubjObjCombo(subj: T.Person, obj: T.Person): boolean { - return ( - (isFirstPerson(subj) && isFirstPerson(obj)) - || - (isSecondPerson(subj) && isSecondPerson(obj)) - ); -} - -export function randomSubjObj(old?: { subj: T.Person, obj?: T.Person }): { subj: T.Person, obj: T.Person } { - let subj = 0; - let obj = 0; - do { - subj = getRandPers(); - obj = getRandPers(); + newP = getRandPers(); } while ( - (old && ((old.subj === subj) || (old.obj === obj))) - || - isInvalidSubjObjCombo(subj, obj) + isInvalidSubjObjCombo(a.counterPart.selection.person, newP) || + newP === a.prev ); - return { subj, obj }; + return newP; + } + // without counterpart pronoun, just previous + let newP = 0; + do { + newP = getRandPers(); + } while (newP === a.prev); + return newP; +} + +export function randomSubjObj(old?: { subj: T.Person; obj?: T.Person }): { + subj: T.Person; + obj: T.Person; +} { + let subj = 0; + let obj = 0; + do { + subj = getRandPers(); + obj = getRandPers(); + } while ( + (old && (old.subj === subj || old.obj === obj)) || + isInvalidSubjObjCombo(subj, obj) + ); + return { subj, obj }; } export function getEnglishVerb(entry: T.DictionaryEntry): string { - if (!entry.ec) { - console.error("errored verb"); - console.error(entry); - throw new Error("no english information for verb"); - } - if (entry.ep) { - const ec = entry.ec.includes(",") ? parseEc(entry.ec)[0] : entry.ec; - return `to ${ec} ${entry.ep}`; - } - const ec = parseEc(entry.ec); - return `to ${ec[0]}`; + if (!entry.ec) { + console.error("errored verb"); + console.error(entry); + throw new Error("no english information for verb"); + } + if (entry.ep) { + const ec = entry.ec.includes(",") ? parseEc(entry.ec)[0] : entry.ec; + return `to ${ec} ${entry.ep}`; + } + const ec = parseEc(entry.ec); + return `to ${ec[0]}`; } export function getEnglishParticiple(entry: T.DictionaryEntry): string { - if (!entry.ec) { - throw new Error("no english information for participle"); - } - const ec = parseEc(entry.ec); - if (entry.ep && ec[0] === "am") { - return `to be/being ${entry.ep}`; - } - const participle = `${ec[2]} / to ${ec[0]}`; - return (entry.ep) - ? `${participle} ${entry.ep}` - : participle; -} \ No newline at end of file + if (!entry.ec) { + throw new Error("no english information for participle"); + } + const ec = parseEc(entry.ec); + if (entry.ep && ec[0] === "am") { + return `to be/being ${entry.ep}`; + } + const participle = `${ec[2]} / to ${ec[0]}`; + return entry.ep ? `${participle} ${entry.ep}` : participle; +} diff --git a/src/lib/src/parsing/lookup.tsx b/src/lib/src/parsing/lookup.tsx index 6e5fd7e..e145ea9 100644 --- a/src/lib/src/parsing/lookup.tsx +++ b/src/lib/src/parsing/lookup.tsx @@ -49,7 +49,7 @@ export function verbLookup(input: string): T.VerbEntry[] { // IMPORTANT TODO FOR EFFECIANCY! // check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING // if theres no legit verb ending and no tpp possibilities, just return an empty array - const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined; + // const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined; const checkTpp = shouldCheckTpp(input); const fromAawu = checkTpp && undoAaXuPattern(input); const inputWoutOo = @@ -61,86 +61,45 @@ export function verbLookup(input: string): T.VerbEntry[] { // TODO: don't do the slice of and checking for useless things when you have a NON verb ending (like with the tpp) if (s.endsWith("ېږ")) { return verbs.filter( - sWoutOo - ? ({ entry }) => - [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) || - [ - s.slice(0, -1) + "دل", - sWoutOo.slice(0, -1) + "دل", - sAddedAa.slice(0, -1) + "دل", - ].includes(entry.p) || - [s, sWoutOo, sAddedAa].includes(entry.p) || - (entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) || - entry.prp === s || - entry.ssp === s - : ({ entry }) => - [s, sAddedAa].includes(entry.p.slice(0, -1)) || - [s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes( - entry.p - ) || - [s, sAddedAa].includes(entry.p) || - [s, sAddedAa].includes(entry.psp || "") || - [s, sAddedAa].includes(entry.prp || "") || - [s, sAddedAa].includes(entry.ssp || "") + ({ entry }) => + [s, sAddedAa].includes(entry.p.slice(0, -1)) || + [s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes( + entry.p + ) || + [s, sAddedAa].includes(entry.p) || + [s, sAddedAa].includes(entry.psp || "") || + [s, sAddedAa].includes(entry.prp || "") || + [s, sAddedAa].includes(entry.ssp || "") ); } return verbs.filter( - sWoutOo - ? ({ entry }) => - [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) || - // for short intransitive forms - [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -3)) || - [s, sWoutOo, sAddedAa].includes(entry.p) || - (entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) || - (checkTpp && + ({ entry }) => + [s, sAddedAa].includes(entry.p.slice(0, -1)) || + // for short intransitive forms + [s, sAddedAa].includes(entry.p.slice(0, -3)) || + [s, sAddedAa].includes(entry.p) || + (checkTpp && + [input, fromAawu, sAddedAa].includes(entry.p.slice(0, -1))) || + (entry.tppp && + arraysHaveCommon( + [input, inputWoutOo, sAddedAa, inputAddedAa], + splitVarients(entry.tppp) + )) || + [s, sAddedAa].includes(entry.psp || "") || + arraysHaveCommon([entry.prp, entry.prp?.slice(0, -1)], [s, sAddedAa]) || + [s, sAddedAa].includes(entry.ssp || "") || + (entry.separationAtP && + // TODO this is super ugly, do check of short and long function + (entry.p.slice(entry.separationAtP) === s || + entry.p.slice(entry.separationAtP, -1) === s || + (checkTpp && entry.p.slice(entry.separationAtP, -1) === input) || + entry.psp?.slice(entry.separationAtP) === s || + (entry.prp && [ - input.slice(1), - fromAawu && fromAawu.slice(-1), - inputAddedAa, - ].includes(entry.p.slice(0, -1))) || - (entry.tppp && - arraysHaveCommon( - [input, inputWoutOo, sAddedAa], - splitVarients(entry.tppp) - )) || - arraysHaveCommon( - [s, sAddedAa, "و" + s], - [entry.prp, entry.prp?.slice(0, -1)] - ) || - [s, sAddedAa].includes(entry.ssp || "") || - (entry.separationAtP && - (entry.p.slice(entry.separationAtP) === s || - entry.psp?.slice(entry.separationAtP) === s)) - : ({ entry }) => - [s, sAddedAa].includes(entry.p.slice(0, -1)) || - // for short intransitive forms - [s, sAddedAa].includes(entry.p.slice(0, -3)) || - [s, sAddedAa].includes(entry.p) || - (checkTpp && - [input, fromAawu, sAddedAa].includes(entry.p.slice(0, -1))) || - (entry.tppp && - arraysHaveCommon( - [input, inputWoutOo, sAddedAa, inputAddedAa], - splitVarients(entry.tppp) - )) || - [s, sAddedAa].includes(entry.psp || "") || - arraysHaveCommon( - [entry.prp, entry.prp?.slice(0, -1)], - [s, sAddedAa, "و" + s] - ) || - [s, sAddedAa, "و" + s].includes(entry.ssp || "") || - (entry.separationAtP && - // TODO this is super ugly, do check of short and long function - (entry.p.slice(entry.separationAtP) === s || - entry.p.slice(entry.separationAtP, -1) === s || - (checkTpp && entry.p.slice(entry.separationAtP, -1) === input) || - entry.psp?.slice(entry.separationAtP) === s || - (entry.prp && - [ - entry.prp.slice(entry.separationAtP), - entry.prp.slice(entry.separationAtP).slice(0, -1), - ].includes(s)) || - (entry.ssp && entry.ssp.slice(entry.separationAtP) === s))) + entry.prp.slice(entry.separationAtP), + entry.prp.slice(entry.separationAtP).slice(0, -1), + ].includes(s)) || + (entry.ssp && entry.ssp.slice(entry.separationAtP) === s))) ); } diff --git a/src/lib/src/parsing/parse-blocks.ts b/src/lib/src/parsing/parse-blocks.ts index 0ef4b47..bb559fb 100644 --- a/src/lib/src/parsing/parse-blocks.ts +++ b/src/lib/src/parsing/parse-blocks.ts @@ -1,5 +1,4 @@ import * as T from "../../../types"; -import { fmapParseResult } from "../fp-ps"; import { parseKidsSection } from "./parse-kids-section"; import { parseNeg } from "./parse-negative"; import { parseNP } from "./parse-np"; @@ -21,22 +20,22 @@ export function parseBlocks( return returnParseResult(tokens, { blocks, kids }); } const prevPh: T.ParsedPH | undefined = blocks.find( - (b): b is T.ParsedPH => "type" in b && b.type === "PH" + (b): b is T.ParsedPH => b.type === "PH" ); const vbExists = blocks.some((b) => "type" in b && b.type === "VB"); - const np = prevPh ? [] : fmapParseResult((x) => [x], parseNP(tokens, lookup)); + const np = prevPh ? [] : parseNP(tokens, lookup); // UHOH... This could cause double paths ... maybe don't parse the PH in the parse VB! - const ph = - vbExists || prevPh ? [] : fmapParseResult((x) => [x], parsePH(tokens)); - const vb = fmapParseResult( - ([ph, v]) => (ph ? [ph, v] : [v]), - parseVerb(tokens, verbLookup) - ); - const neg = fmapParseResult((x) => [x], parseNeg(tokens)); + const ph = vbExists || prevPh ? [] : parsePH(tokens); + const vb = parseVerb(tokens, verbLookup); + const neg = parseNeg(tokens); const kidsR = parseKidsSection(tokens, []); - const allResults = [...np, ...ph, ...neg, ...vb, ...kidsR] as T.ParseResult< - T.ParsedBlock[] | { kids: T.ParsedKid[] } - >[]; + const allResults: T.ParseResult[] = [ + ...np, + ...ph, + ...neg, + ...vb, + ...kidsR, + ]; // TODO: is this necessary? // if (!allResults.length) { // return [ @@ -47,10 +46,9 @@ export function parseBlocks( // }, // ]; // } - console.log({ allResults }); return bindParseResult(allResults, (tokens, r) => { const errors: T.ParseError[] = []; - if ("kids" in r) { + if (r.type === "kids") { return { next: parseBlocks(tokens, lookup, verbLookup, blocks, [ ...kids, @@ -62,23 +60,21 @@ export function parseBlocks( : [], }; } - if (prevPh && r.some((x) => "type" in x && x.type === "PH")) { + if (prevPh && r.type === "PH") { return []; } - const vb = r.find((x): x is T.ParsedVBE => "type" in x && x.type === "VB"); - if (!phMatches(prevPh, vb)) { - return []; + // TODO: will have to handle welded + if (r.type === "VB") { + if (!phMatches(prevPh, r)) { + return []; + } } // don't allow two negatives - if ( - "type" in r[0] && - r[0].type === "negative" && - blocks.some((b) => "type" in b && b.type === "negative") - ) { + if (r.type === "negative" && blocks.some((b) => b.type === "negative")) { return []; } return { - next: parseBlocks(tokens, lookup, verbLookup, [...blocks, ...r], kids), + next: parseBlocks(tokens, lookup, verbLookup, [...blocks, r], kids), errors, }; }); diff --git a/src/lib/src/parsing/parse-kids-section.ts b/src/lib/src/parsing/parse-kids-section.ts index 286845a..69140b0 100644 --- a/src/lib/src/parsing/parse-kids-section.ts +++ b/src/lib/src/parsing/parse-kids-section.ts @@ -5,14 +5,18 @@ import { bindParseResult, returnParseResult } from "./utils"; export function parseKidsSection( tokens: Readonly, prevKids: T.ParsedKid[] -): T.ParseResult<{ kids: T.ParsedKid[] }>[] { +): T.ParseResult[] { if (tokens.length === 0) { - return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : []; + return prevKids.length + ? returnParseResult(tokens, { type: "kids", kids: prevKids }) + : []; } const parsedKid = parseKid(tokens); // TODO: is this even necessary ?? if (!parsedKid.length) { - return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : []; + return prevKids.length + ? returnParseResult(tokens, { type: "kids", kids: prevKids }) + : []; } return bindParseResult(parsedKid, (tokens, r) => { // return parseKidsSection(tokens, [...prevKids, r]); diff --git a/src/lib/src/parsing/parse-np.ts b/src/lib/src/parsing/parse-np.ts index 67978be..fb4a483 100644 --- a/src/lib/src/parsing/parse-np.ts +++ b/src/lib/src/parsing/parse-np.ts @@ -21,11 +21,9 @@ export function parseNP( inflected: boolean; selection: T.NounSelection; } - ): { - inflected: boolean; - selection: T.NPSelection; - } { + ): T.ParsedNP { return { + type: "NP", inflected: a.inflected, selection: { type: "NP", diff --git a/src/lib/src/parsing/parse-ph.ts b/src/lib/src/parsing/parse-ph.ts index e39eaa8..2981b4f 100644 --- a/src/lib/src/parsing/parse-ph.ts +++ b/src/lib/src/parsing/parse-ph.ts @@ -18,7 +18,7 @@ const phs = [ export function parsePH( tokens: Readonly -): T.ParseResult<{ type: "PH"; s: string }>[] { +): T.ParseResult[] { if (tokens.length === 0) { return []; } diff --git a/src/lib/src/parsing/parse-verb.test.ts b/src/lib/src/parsing/parse-verb.test.ts index ecd2e72..2281631 100644 --- a/src/lib/src/parsing/parse-verb.test.ts +++ b/src/lib/src/parsing/parse-verb.test.ts @@ -11,7 +11,7 @@ import { import { verbLookup, wordQuery } from "./lookup"; import { parseVerb } from "./parse-verb"; import { tokenizer } from "./tokenizer"; -import { getPeople, removeKeys } from "./utils"; +import { removeKeys } from "./utils"; const wahul = wordQuery("وهل", "verb"); const leekul = wordQuery("لیکل", "verb"); @@ -29,7 +29,7 @@ const watul = wordQuery("وتل", "verb"); const wurul = wordQuery("وړل", "verb"); const akheestul = wordQuery("اخیستل", "verb"); const alwatul = wordQuery("الوتل", "verb"); -// const dartlul = wordQuery("درتلل", "verb"); +// const dartlul = wordQuery("درتلل", "verb") // todo alwatul waalwatul akhistul azmoyul etc @@ -38,7 +38,6 @@ const tests: { cases: { input: string; output: { - ph: string | undefined; root?: { persons: T.Person[]; aspects: T.Aspect[]; @@ -58,10 +57,9 @@ const tests: { input: "وهلم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["imperfective", "perfective"], + aspects: ["perfective", "imperfective"], }, verb: wahul, }, @@ -71,14 +69,13 @@ const tests: { input: "وهم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["imperfective", "perfective"], + aspects: ["perfective", "imperfective"], }, stem: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["imperfective", "perfective"], + aspects: ["perfective", "imperfective"], }, verb: wahul, }, @@ -88,18 +85,17 @@ const tests: { input: "وهې", output: [ { - ph: undefined, root: { persons: [ T.Person.SecondSingMale, T.Person.SecondSingFemale, T.Person.ThirdPlurFemale, ], - aspects: ["imperfective", "perfective"], + aspects: ["perfective", "imperfective"], }, stem: { persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], - aspects: ["imperfective", "perfective"], + aspects: ["perfective", "imperfective"], }, verb: wahul, }, @@ -109,14 +105,13 @@ const tests: { input: "لیکم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["imperfective", "perfective"], + aspects: ["perfective", "imperfective"], }, stem: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["imperfective", "perfective"], + aspects: ["perfective", "imperfective"], }, verb: leekul, }, @@ -126,31 +121,13 @@ const tests: { input: "لیکلو", output: [ { - ph: undefined, root: { persons: [ T.Person.FirstPlurMale, T.Person.FirstPlurFemale, T.Person.ThirdSingMale, ], - aspects: ["imperfective", "perfective"], - }, - verb: leekul, - }, - ], - }, - { - input: "ولیکلو", - output: [ - { - ph: "و", - root: { - persons: [ - T.Person.FirstPlurMale, - T.Person.FirstPlurFemale, - T.Person.ThirdSingMale, - ], - aspects: ["perfective"], + aspects: ["perfective", "imperfective"], }, verb: leekul, }, @@ -160,7 +137,6 @@ const tests: { input: "لیکل", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdPlurMale], aspects: ["imperfective", "perfective"], @@ -177,7 +153,6 @@ const tests: { input: "منله", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale], aspects: ["imperfective", "perfective"], @@ -190,7 +165,6 @@ const tests: { input: "مني", output: [ { - ph: undefined, stem: { persons: [ T.Person.ThirdSingMale, @@ -208,7 +182,6 @@ const tests: { input: "منئ", output: [ { - ph: undefined, stem: { persons: [T.Person.SecondPlurMale, T.Person.SecondPlurFemale], aspects: ["imperfective", "perfective"], @@ -221,25 +194,11 @@ const tests: { }, ], }, - // with perfective head - { - input: "ومنلم", - output: [ - { - ph: "و", - root: { - persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["perfective"], - }, - verb: manul, - }, - ], - }, + { input: "منلم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["perfective", "imperfective"], @@ -248,44 +207,6 @@ const tests: { }, ], }, - { - input: "وګاللې", - output: [ - { - ph: "و", - root: { - persons: [ - T.Person.SecondSingFemale, - T.Person.SecondSingMale, - T.Person.ThirdPlurFemale, - ], - aspects: ["perfective"], - }, - verb: gaalul, - }, - ], - }, - { - input: "وګالې", - output: [ - { - ph: "و", - root: { - persons: [ - T.Person.SecondSingFemale, - T.Person.SecondSingMale, - T.Person.ThirdPlurFemale, - ], - aspects: ["perfective"], - }, - stem: { - persons: [T.Person.SecondSingFemale, T.Person.SecondSingMale], - aspects: ["perfective"], - }, - verb: gaalul, - }, - ], - }, ], }, { @@ -295,7 +216,6 @@ const tests: { input: "رسېدلم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["imperfective", "perfective"], @@ -308,7 +228,6 @@ const tests: { input: "رسېدم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["imperfective", "perfective"], @@ -317,24 +236,10 @@ const tests: { }, ], }, - { - input: "ورسېدم", - output: [ - { - ph: "و", - root: { - persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["perfective"], - }, - verb: rasedul, - }, - ], - }, { input: "رسېږې", output: [ { - ph: undefined, stem: { persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], aspects: ["imperfective", "perfective"], @@ -343,25 +248,11 @@ const tests: { }, ], }, - { - input: "ورسېږې", - output: [ - { - ph: "و", - stem: { - persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], - aspects: ["perfective"], - }, - verb: rasedul, - }, - ], - }, // short version of intransitive as well { input: "رسئ", output: [ { - ph: undefined, stem: { persons: [T.Person.SecondPlurMale, T.Person.SecondPlurFemale], aspects: ["imperfective", "perfective"], @@ -370,19 +261,6 @@ const tests: { }, ], }, - { - input: "ورسئ", - output: [ - { - ph: "و", - stem: { - persons: [T.Person.SecondPlurMale, T.Person.SecondPlurFemale], - aspects: ["perfective"], - }, - verb: rasedul, - }, - ], - }, // but not for kedul { input: "کې", @@ -397,7 +275,6 @@ const tests: { input: "وینم", output: [ { - ph: undefined, stem: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["imperfective", "perfective"], @@ -410,7 +287,6 @@ const tests: { input: "وینم", output: [ { - ph: undefined, stem: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["imperfective", "perfective"], @@ -424,7 +300,6 @@ const tests: { input: "لیده", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale], aspects: ["imperfective", "perfective"], @@ -433,38 +308,11 @@ const tests: { }, ], }, - { - input: "ولیده", - output: [ - { - ph: "و", - root: { - persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale], - aspects: ["perfective"], - }, - verb: leedul, - }, - ], - }, - // BUT NOT THIS ONE - { - input: "ولیدله", - output: [ - { - ph: "و", - root: { - persons: [T.Person.ThirdSingFemale], - aspects: ["perfective"], - }, - verb: leedul, - }, - ], - }, + { input: "خورې", output: [ { - ph: undefined, stem: { persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], aspects: ["imperfective", "perfective"], @@ -473,19 +321,7 @@ const tests: { }, ], }, - { - input: "وخورې", - output: [ - { - ph: "و", - stem: { - persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], - aspects: ["perfective"], - }, - verb: khorul, - }, - ], - }, + { input: "خوړي", output: [], @@ -494,7 +330,6 @@ const tests: { input: "خوړم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["imperfective", "perfective"], @@ -503,24 +338,10 @@ const tests: { }, ], }, - { - input: "وخوړم", - output: [ - { - ph: "و", - root: { - persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["perfective"], - }, - verb: khorul, - }, - ], - }, { input: "خوړ", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["imperfective", "perfective"], @@ -529,24 +350,11 @@ const tests: { }, ], }, - { - input: "وخوړ", - output: [ - { - ph: "و", - root: { - persons: [T.Person.ThirdSingMale], - aspects: ["perfective"], - }, - verb: khorul, - }, - ], - }, + { input: "کوت", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["imperfective", "perfective"], @@ -559,7 +367,6 @@ const tests: { input: "کاته", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["imperfective", "perfective"], @@ -568,37 +375,11 @@ const tests: { }, ], }, - { - input: "وکاته", - output: [ - { - ph: "و", - root: { - persons: [T.Person.ThirdSingMale], - aspects: ["perfective"], - }, - verb: katul, - }, - ], - }, - { - input: "واخلم", - output: [ - { - ph: "وا", - stem: { - persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["perfective"], - }, - verb: akheestul, - }, - ], - }, + { input: "خلم", output: [ { - ph: undefined, stem: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["perfective"], @@ -611,7 +392,6 @@ const tests: { input: "اخیستم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["imperfective", "perfective"], @@ -620,37 +400,11 @@ const tests: { }, ], }, - { - input: "واخیستم", - output: [ - { - ph: "وا", - root: { - persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["perfective"], - }, - verb: akheestul, - }, - ], - }, - { - input: "واخیستلم", - output: [ - { - ph: "وا", - root: { - persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["perfective"], - }, - verb: akheestul, - }, - ], - }, + { input: "خیستلم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["perfective"], @@ -663,7 +417,6 @@ const tests: { input: "الوځې", output: [ { - ph: undefined, stem: { persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], aspects: ["imperfective", "perfective"], @@ -672,24 +425,11 @@ const tests: { }, ], }, - { - input: "والوځې", - output: [ - { - ph: "وا", - stem: { - persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], - aspects: ["perfective"], - }, - verb: alwatul, - }, - ], - }, + { input: "لوځې", output: [ { - ph: undefined, stem: { persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], aspects: ["perfective"], @@ -707,7 +447,6 @@ const tests: { input: "کېني", output: [ { - ph: "کې", stem: { persons: [ T.Person.ThirdSingMale, @@ -715,7 +454,7 @@ const tests: { T.Person.ThirdPlurMale, T.Person.ThirdPlurFemale, ], - aspects: ["imperfective", "perfective"], + aspects: ["imperfective"], }, verb: kenaastul, }, @@ -725,7 +464,6 @@ const tests: { input: "نم", output: [ { - ph: undefined, stem: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["perfective"], @@ -738,10 +476,9 @@ const tests: { input: "کېناست", output: [ { - ph: "کې", root: { persons: [T.Person.ThirdSingMale], - aspects: ["imperfective", "perfective"], + aspects: ["imperfective"], }, verb: kenaastul, }, @@ -751,7 +488,6 @@ const tests: { input: "ناست", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["perfective"], @@ -764,10 +500,9 @@ const tests: { input: "پرېږدو", output: [ { - ph: "پرې", stem: { persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], - aspects: ["imperfective", "perfective"], + aspects: ["imperfective"], }, verb: prexodul, }, @@ -777,7 +512,6 @@ const tests: { input: "ږدو", output: [ { - ph: undefined, stem: { persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], aspects: ["perfective"], @@ -785,7 +519,6 @@ const tests: { verb: prexodul, }, { - ph: undefined, stem: { persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], aspects: ["imperfective", "perfective"], @@ -798,10 +531,9 @@ const tests: { input: "پرېښوده", output: [ { - ph: "پرې", root: { persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale], - aspects: ["imperfective", "perfective"], + aspects: ["imperfective"], }, verb: prexodul, }, @@ -811,7 +543,6 @@ const tests: { input: "ښودله", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale], aspects: ["imperfective", "perfective"], @@ -819,7 +550,6 @@ const tests: { verb: xodul, }, { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale], aspects: ["perfective"], @@ -827,7 +557,6 @@ const tests: { verb: prexodul, }, { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale], aspects: ["perfective"], @@ -836,67 +565,10 @@ const tests: { }, ], }, - { - input: "لاړلم", - output: [ - { - ph: "لا", - root: { - persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["perfective"], - }, - verb: tlul, - }, - ], - }, - { - input: "لاړم", - output: [ - { - ph: "لا", - root: { - persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["perfective"], - }, - verb: tlul, - }, - ], - }, - { - input: "لاړو", - output: [ - { - ph: "لا", - root: { - persons: [ - T.Person.FirstPlurMale, - T.Person.FirstPlurFemale, - T.Person.ThirdSingMale, - ], - aspects: ["perfective"], - }, - verb: tlul, - }, - ], - }, - { - input: "لاړه", - output: [ - { - ph: "لا", - root: { - persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale], - aspects: ["perfective"], - }, - verb: tlul, - }, - ], - }, { input: "ړلم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["perfective"], @@ -904,7 +576,6 @@ const tests: { verb: tlul, }, { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["perfective"], @@ -917,7 +588,6 @@ const tests: { input: "ړم", output: [ { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["perfective"], @@ -925,7 +595,6 @@ const tests: { verb: tlul, }, { - ph: undefined, root: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["perfective"], @@ -934,37 +603,10 @@ const tests: { }, ], }, - { - input: "والووت", - output: [ - { - ph: "وا", - root: { - persons: [T.Person.ThirdSingMale], - aspects: ["perfective"], - }, - verb: alwatul, - }, - ], - }, - { - input: "والواته", - output: [ - { - ph: "وا", - root: { - persons: [T.Person.ThirdSingMale], - aspects: ["perfective"], - }, - verb: alwatul, - }, - ], - }, { input: "لواته", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["perfective"], @@ -973,50 +615,10 @@ const tests: { }, ], }, - { - input: "راشې", - output: [ - { - ph: "را", - stem: { - persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], - aspects: ["perfective"], - }, - verb: raatlul, - }, - ], - }, - { - input: "ورشې", - output: [ - { - ph: "ور", - stem: { - persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], - aspects: ["perfective"], - }, - verb: wartlul, - }, - ], - }, - { - input: "یوسم", - output: [ - { - ph: "یو", - stem: { - persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], - aspects: ["perfective"], - }, - verb: wurul, - }, - ], - }, { input: "سم", output: [ { - ph: undefined, stem: { persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], aspects: ["perfective"], @@ -1025,21 +627,6 @@ const tests: { }, ], }, - - // TOOD: would be more robust if it looked for this without excetions // same as واخیست etc - // { - // input: "لاړ", - // output: [ - // { - // ph: "لا", - // root: { - // persons: [T.Person.ThirdSingMale], - // aspects: ["perfective"], - // }, - // verb: tlul, - // }, - // ], - // }, ], }, { @@ -1049,7 +636,6 @@ const tests: { input: "رسېد", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["imperfective", "perfective"], @@ -1058,27 +644,13 @@ const tests: { }, ], }, - { - input: "ورسېد", - output: [ - { - ph: "و", - root: { - persons: [T.Person.ThirdSingMale], - aspects: ["perfective"], - }, - verb: rasedul, - }, - ], - }, { input: "کېناسته", output: [ { - ph: "کې", root: { persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale], - aspects: ["imperfective", "perfective"], + aspects: ["imperfective"], }, verb: kenaastul, }, @@ -1088,10 +660,9 @@ const tests: { input: "کېناست", output: [ { - ph: "کې", root: { persons: [T.Person.ThirdSingMale], - aspects: ["imperfective", "perfective"], + aspects: ["imperfective"], }, verb: kenaastul, }, @@ -1101,71 +672,22 @@ const tests: { input: "کېناستو", output: [ { - ph: "کې", root: { persons: [ T.Person.ThirdSingMale, T.Person.FirstPlurMale, T.Person.FirstPlurFemale, ], - aspects: ["imperfective", "perfective"], + aspects: ["imperfective"], }, verb: kenaastul, }, ], }, - { - input: "ووتلو", - output: [ - { - ph: "و", - root: { - persons: [ - T.Person.ThirdSingMale, - T.Person.FirstPlurMale, - T.Person.FirstPlurFemale, - ], - aspects: ["perfective"], - }, - verb: watul, - }, - ], - }, - { - input: "ووتو", - output: [ - { - ph: "و", - root: { - persons: [ - T.Person.ThirdSingMale, - T.Person.FirstPlurMale, - T.Person.FirstPlurFemale, - ], - aspects: ["perfective"], - }, - verb: watul, - }, - ], - }, - { - input: "ووته", - output: [ - { - ph: "و", - root: { - persons: [T.Person.ThirdSingFemale], - aspects: ["perfective"], - }, - verb: watul, - }, - ], - }, { input: "واته", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["imperfective", "perfective"], @@ -1178,7 +700,6 @@ const tests: { input: "ووت", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["imperfective", "perfective"], @@ -1187,32 +708,6 @@ const tests: { }, ], }, - { - input: "وووت", - output: [ - { - ph: "و", - root: { - persons: [T.Person.ThirdSingMale], - aspects: ["perfective"], - }, - verb: watul, - }, - ], - }, - { - input: "ورسېد", - output: [ - { - ph: "و", - root: { - persons: [T.Person.ThirdSingMale], - aspects: ["perfective"], - }, - verb: rasedul, - }, - ], - }, ], }, { @@ -1222,7 +717,6 @@ const tests: { input: "ته", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["imperfective"], @@ -1235,7 +729,6 @@ const tests: { input: "راته", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["imperfective"], @@ -1248,7 +741,6 @@ const tests: { input: "ورته", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["imperfective"], @@ -1261,7 +753,6 @@ const tests: { input: "درته", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingMale], aspects: ["imperfective"], @@ -1274,7 +765,6 @@ const tests: { input: "شو", output: [ { - ph: undefined, root: { persons: [ T.Person.ThirdSingMale, @@ -1290,7 +780,6 @@ const tests: { verb: kedulStat, }, { - ph: undefined, root: { persons: [ T.Person.ThirdSingMale, @@ -1311,7 +800,6 @@ const tests: { input: "شوله", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale], aspects: ["perfective"], @@ -1319,7 +807,6 @@ const tests: { verb: kedulStat, }, { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale], aspects: ["perfective"], @@ -1332,7 +819,6 @@ const tests: { input: "شوه", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale], aspects: ["perfective"], @@ -1340,7 +826,6 @@ const tests: { verb: kedulStat, }, { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale], aspects: ["perfective"], @@ -1353,7 +838,6 @@ const tests: { input: "شوله", output: [ { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale], aspects: ["perfective"], @@ -1361,7 +845,6 @@ const tests: { verb: kedulStat, }, { - ph: undefined, root: { persons: [T.Person.ThirdSingFemale], aspects: ["perfective"], @@ -1370,50 +853,7 @@ const tests: { }, ], }, - { - input: "وشوله", - output: [ - { - ph: "و", - root: { - persons: [T.Person.ThirdSingFemale], - aspects: ["perfective"], - }, - verb: kedulDyn, - }, - ], - }, - { - input: "وشوه", - output: [ - { - ph: "و", - root: { - persons: [T.Person.ThirdSingFemale], - aspects: ["perfective"], - }, - verb: kedulDyn, - }, - ], - }, - { - input: "وشي", - output: [ - { - ph: "و", - stem: { - persons: [ - T.Person.ThirdSingMale, - T.Person.ThirdSingFemale, - T.Person.ThirdPlurMale, - T.Person.ThirdPlurFemale, - ], - aspects: ["perfective"], - }, - verb: kedulDyn, - }, - ], - }, + // TODO: It would probably be more effecient just to return the kedul verb options // and then when we put things together with the perfective head parsed they could // become raatlul etc... @@ -1421,7 +861,6 @@ const tests: { input: "شي", output: [ { - ph: undefined, stem: { persons: [ T.Person.ThirdSingMale, @@ -1434,7 +873,6 @@ const tests: { verb: kedulDyn, }, { - ph: undefined, stem: { persons: [ T.Person.ThirdSingMale, @@ -1447,7 +885,6 @@ const tests: { verb: kedulStat, }, { - ph: undefined, stem: { persons: [ T.Person.ThirdSingMale, @@ -1460,7 +897,6 @@ const tests: { verb: raatlul, }, { - ph: undefined, stem: { persons: [ T.Person.ThirdSingMale, @@ -1473,7 +909,6 @@ const tests: { verb: dartlul, }, { - ph: undefined, stem: { persons: [ T.Person.ThirdSingMale, @@ -1491,45 +926,28 @@ const tests: { }, ]; -// Also do سي yo see tests.forEach(({ label, cases }) => { test(label, () => { cases.forEach(({ input, output }) => { const tokens = tokenizer(input); const vbs = parseVerb(tokens, verbLookup).map((r) => r.body); - const madeVbsS = output.reduce< - [{ type: "PH"; s: string } | undefined, Omit][] - >((acc, o) => { + const madeVbsS = output.reduce((acc, o) => { return [ ...acc, ...(["root", "stem"] as const).flatMap((base) => (o[base]?.aspects || []).flatMap((aspect) => - (o[base]?.persons || []).flatMap< - [{ type: "PH"; s: string } | undefined, Omit] - >((person) => { - const r: [ - { type: "PH"; s: string } | undefined, - Omit - ] = [ - aspect === "perfective" && o.ph - ? { - type: "PH", - s: o.ph, - } - : undefined, - { - type: "VB" as const, - person, - info: { - type: "verb" as const, - aspect, - base, - verb: o.verb, - }, + (o[base]?.persons || []).flatMap((person) => [ + { + type: "VB" as const, + person, + info: { + type: "verb" as const, + aspect, + base, + verb: o.verb, }, - ]; - return [r]; - }) + }, + ]) ) ), ]; diff --git a/src/lib/src/parsing/parse-verb.ts b/src/lib/src/parsing/parse-verb.ts index 23aa145..1a58570 100644 --- a/src/lib/src/parsing/parse-verb.ts +++ b/src/lib/src/parsing/parse-verb.ts @@ -22,7 +22,7 @@ import { export function parseVerb( tokens: Readonly, verbLookup: (s: string) => T.VerbEntry[] -): T.ParseResult<[{ type: "PH"; s: string } | undefined, Omit]>[] { +): T.ParseResult[] { if (tokens.length === 0) { return []; } @@ -57,8 +57,8 @@ function matchVerbs( root: T.Person[]; stem: T.Person[]; } -): [{ type: "PH"; s: string } | undefined, Omit][] { - const w: ReturnType = []; +): T.ParsedVBE[] { + const w: T.ParsedVBE[] = []; const lEnding = s.endsWith("ل"); const base = s.endsWith("ل") ? s : s.slice(0, -1); const matchShortOrLong = (b: string, x: string) => { @@ -80,167 +80,52 @@ function matchVerbs( return e.p.slice(0, -1) === base; } }), - perfective: entries.reduce< - { ph: string | undefined; entry: T.VerbEntry }[] - >((acc, entry) => { + perfective: entries.reduce((acc, entry) => { const e = entry.entry; const baseWAa = "ا" + base; if (e.c.includes("comp")) { return acc; } if (e.ssp) { - const bRest = e.separationAtP ? e.ssp.slice(e.separationAtP) : ""; - if (bRest === base) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; - } - if (e.ssp === base) { - return [ - ...acc, - { - ph: e.separationAtF - ? e.ssp.slice(0, e.separationAtP) - : undefined, - entry, - }, - ]; + if (e.separationAtP) { + const bRest = e.ssp.slice(e.separationAtP); + if (bRest === base) { + return [...acc, entry]; + } + } else { + if (e.ssp === base) { + return [...acc, entry]; + } } } else if (e.psp) { - const bRest = e.separationAtP ? e.psp.slice(e.separationAtP) : ""; - if (bRest === base) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; - } - if (e.psp === base && e.separationAtP) { - return [ - ...acc, - { - ph: e.psp.slice(0, e.separationAtP), - entry, - }, - ]; - } - if (!e.sepOo) { - if (base.startsWith("وا") && base.slice(1) === e.psp) { - return [ - ...acc, - { - ph: "وا", - entry, - }, - ]; + if (e.separationAtP) { + const bRest = e.psp.slice(e.separationAtP); + if (bRest === base) { + return [...acc, entry]; } - if ((base.startsWith("و") && base.slice(1)) === e.psp) { - return [ - ...acc, - { - ph: "و", - entry, - }, - ]; + } else { + if (!e.sepOo) { + if (baseWAa === e.psp) { + return [...acc, entry]; + } } - if (baseWAa === e.psp) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + if (base === e.psp) { + return [...acc, entry]; } } - if (base === e.psp) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; - } } else if (e.c.includes("intrans.")) { const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3); const miniRootEg = miniRoot + "ېږ"; if ([miniRoot, miniRootEg].includes(base)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; - } else if (!e.sepOo) { - if ( - base.startsWith("وا") && - [miniRoot, miniRootEg].includes(base.slice(1)) - ) { - return [ - ...acc, - { - ph: "وا", - entry, - }, - ]; - } else if ( - base.startsWith("و") && - [miniRoot, miniRootEg].includes(base.slice(1)) - ) { - return [ - ...acc, - { - ph: "و", - entry, - }, - ]; - } + return [...acc, entry]; } } else { const eb = e.p.slice(0, -1); if (eb === base) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + return [...acc, entry]; } else if (!e.sepOo) { - if (base.startsWith("وا") && eb === base.slice(1)) { - return [ - ...acc, - { - ph: "وا", - entry, - }, - ]; - } - if (base.startsWith("و") && eb === base.slice(1)) { - return [ - ...acc, - { - ph: "و", - entry, - }, - ]; - } if (baseWAa === base.slice(1)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + return [...acc, entry]; } } } @@ -250,19 +135,16 @@ function matchVerbs( Object.entries(stemMatches).forEach(([aspect, entries]) => { entries.forEach((verb) => { people.stem.forEach((person) => { - w.push([ - "ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined, - { - type: "VB", - person, - info: { - type: "verb", - aspect: aspect as T.Aspect, - base: "stem", - verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb, - }, + w.push({ + type: "VB", + person, + info: { + type: "verb", + aspect: aspect as T.Aspect, + base: "stem", + verb: removeFVarientsFromVerb(verb), }, - ]); + }); }); }); }); @@ -272,56 +154,21 @@ function matchVerbs( imperfective: entries.filter( ({ entry: e }) => !e.c.includes("comp") && matchShortOrLong(base, e.p) ), - perfective: entries.reduce< - { ph: string | undefined; entry: T.VerbEntry }[] - >((acc, entry) => { + perfective: entries.reduce((acc, entry) => { const e = entry.entry; if (e.c.includes("comp")) { return acc; } if (e.separationAtP) { const b = e.prp || e.p; - const bHead = b.slice(0, e.separationAtP); const bRest = b.slice(e.separationAtP); - if (matchShortOrLong(base, b)) { - return [ - ...acc, - { - ph: bHead, - entry, - }, - ]; - } else if (matchShortOrLong(base, bRest)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + if (matchShortOrLong(base, bRest)) { + return [...acc, entry]; } } else { - const baseNoOo = base.startsWith("و") && base.slice(1); const p = e.prp || e.p; - if (baseNoOo && matchShortOrLong(baseNoOo, p)) { - return [ - ...acc, - { - ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و", - entry, - }, - ]; - } else if ( - matchShortOrLong(base, p) || - matchShortOrLong("ا" + base, p) - ) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) { + return [...acc, entry]; } } return acc; @@ -331,19 +178,16 @@ function matchVerbs( Object.entries(rootMatches).forEach(([aspect, entries]) => { entries.forEach((verb) => { people.root.forEach((person) => { - w.push([ - "ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined, - { - type: "VB", - person, - info: { - type: "verb", - aspect: aspect as T.Aspect, - base: "root", - verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb, - }, + w.push({ + type: "VB", + person, + info: { + type: "verb", + aspect: aspect as T.Aspect, + base: "root", + verb: removeFVarientsFromVerb(verb), }, - ]); + }); }); }); }); @@ -351,8 +195,6 @@ function matchVerbs( const hamzaEnd = s.at(-1) === "ه"; const oEnd = s.at(-1) === "و"; const abruptEnd = ["د", "ت", "ړ"].includes(s.slice(-1)); - const b = hamzaEnd || oEnd ? base : s; - const bNoOo = b.startsWith("و") && b.slice(1); const tppMatches = { imperfective: entries.filter( ({ entry: e }) => @@ -363,163 +205,63 @@ function matchVerbs( (hamzaEnd ? base : abruptEnd ? s : "") === e.p.slice(0, -1))) // TODO: if check for modified aaXu thing! ), - perfective: entries.reduce< - { ph: string | undefined; entry: T.VerbEntry }[] - >((acc, entry) => { + perfective: entries.reduce((acc, entry) => { const e = entry.entry; if (e.c.includes("comp")) { return acc; } if (e.separationAtP) { const b = e.prp || e.p; - const bHead = b.slice(0, e.separationAtP); const bRest = b.slice(e.separationAtP); if (bRest === "شول") { return acc; } if (abruptEnd) { - if (s === b.slice(0, -1)) { - return [ - ...acc, - { - ph: bHead, - entry, - }, - ]; - } if (s === bRest.slice(0, -1)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + return [...acc, entry]; } } else if (hamzaEnd) { - if (base === b.slice(0, -1)) { - return [ - ...acc, - { - ph: bHead, - entry, - }, - ]; - } if (base === bRest.slice(0, -1)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + return [...acc, entry]; } } else if (oEnd) { - if ([b, b.slice(0, -1)].includes(base)) { - return [ - ...acc, - { - ph: bHead, - entry, - }, - ]; - } if ([bRest, bRest.slice(0, -1)].includes(base)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + return [...acc, entry]; } } } else if (!e.prp) { if (oEnd) { - if (bNoOo && [e.p, e.p.slice(0, -1).includes(bNoOo)]) { - return [ - ...acc, - { - ph: "و", - entry, - }, - ]; - } else if ([e.p, e.p.slice(0, -1)].includes(base)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + if ([e.p, e.p.slice(0, -1)].includes(base)) { + return [...acc, entry]; } } else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) { const b = hamzaEnd ? base : s; const p = e.p.slice(0, -1); - if (bNoOo && bNoOo === p) { - return [ - ...acc, - { - ph: "و", - entry, - }, - ]; - } else if (b === p) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + if (b === p) { + return [...acc, entry]; } } } - const sNoOo = s.startsWith("و") && s.slice(1); - if (isInVarients(e.tppp, sNoOo)) { - return [ - ...acc, - { - ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و", - entry, - }, - ]; - } else if (isInVarients(e.tppp, s)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + if (isInVarients(e.tppp, s)) { + return [...acc, entry]; } else if (isInVarients(e.tppp, "ا" + s)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + return [...acc, entry]; } return acc; }, []), }; Object.entries(tppMatches).forEach(([aspect, entries]) => { entries.forEach((verb) => { - w.push([ - "ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined, - { - type: "VB", - person: T.Person.ThirdSingMale, - info: { - type: "verb", - aspect: aspect as T.Aspect, - base: "root", - verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb, - }, + w.push({ + type: "VB", + person: T.Person.ThirdSingMale, + info: { + type: "verb", + aspect: aspect as T.Aspect, + base: "root", + verb: removeFVarientsFromVerb(verb), }, - ]); + }); }); }); return w; @@ -580,66 +322,26 @@ function getVerbEnding(p: string): { }; } -// const [ph, rest]: [T.PH | undefined, T.PsString] = v.entry.noOo -// ? [undefined, base] -// : v.entry.sepOo -// ? [{ type: "PH", ps: { p: "و ", f: "óo`" } }, base] -// : ["آ", "ا"].includes(base.p.charAt(0)) && base.f.charAt(0) === "a" -// ? [{ type: "PH", ps: { p: "وا", f: "wáa" } }, removeAStart(base)] -// : ["óo", "oo"].includes(base.f.slice(0, 2)) -// ? [{ type: "PH", ps: { p: "و", f: "wÚ" } }, base] -// : ["ée", "ee"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "ای" -// ? [ -// { type: "PH", ps: { p: "وي", f: "wée" } }, -// { -// p: base.p.slice(2), -// f: base.f.slice(2), -// }, -// ] -// : ["é", "e"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "اې" -// ? [ -// { type: "PH", ps: { p: "وي", f: "wé" } }, -// { -// p: base.p.slice(2), -// f: base.f.slice(1), -// }, -// ] -// : ["ó", "o"].includes(base.f[0]) && base.p.slice(0, 2) === "او" -// ? [{ type: "PH", ps: { p: "و", f: "óo`" } }, base] -// : [{ type: "PH", ps: { p: "و", f: "óo" } }, base]; -// return [ph, removeAccents(rest)]; -// function removeAStart(ps: T.PsString) { -// return { -// p: ps.p.slice(1), -// f: ps.f.slice(ps.f[1] === "a" ? 2 : 1), -// }; -// } - // TODO: could handle all sh- verbs for efficiencies sake -function parseIrregularVerb( - s: string -): [{ type: "PH"; s: string } | undefined, Omit][] { +function parseIrregularVerb(s: string): T.ParsedVBE[] { if (["ته", "راته", "ورته", "درته"].includes(s)) { return [ - [ - undefined, - { - type: "VB", - info: { - aspect: "imperfective", - base: "root", - type: "verb", - verb: s.startsWith("را") - ? raatlul - : s.startsWith("ور") - ? wartlul - : s.startsWith("در") - ? dartlul - : tlul, - }, - person: T.Person.ThirdSingMale, + { + type: "VB", + info: { + aspect: "imperfective", + base: "root", + type: "verb", + verb: s.startsWith("را") + ? raatlul + : s.startsWith("ور") + ? wartlul + : s.startsWith("در") + ? dartlul + : tlul, }, - ], + person: T.Person.ThirdSingMale, + }, ]; } if (s === "شو") { @@ -649,38 +351,28 @@ function parseIrregularVerb( T.Person.FirstPlurMale, T.Person.FirstPlurFemale, ].flatMap((person) => - [kedulStat, kedulDyn].map< - [{ type: "PH"; s: string } | undefined, Omit] - >((verb) => [ - undefined, - { - type: "VB", - info: { - aspect: "perfective", - base: "root", - type: "verb", - verb, - }, - person, + [kedulStat, kedulDyn].map((verb) => ({ + type: "VB", + info: { + aspect: "perfective", + base: "root", + type: "verb", + verb, }, - ]) + person, + })) ), ...[T.Person.FirstPlurMale, T.Person.FirstPlurFemale].flatMap((person) => - [kedulStat, kedulDyn].map< - [{ type: "PH"; s: string } | undefined, Omit] - >((verb) => [ - undefined, - { - type: "VB", - info: { - aspect: "perfective", - base: "stem", - type: "verb", - verb, - }, - person, + [kedulStat, kedulDyn].map((verb) => ({ + type: "VB", + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb, }, - ]) + person, + })) ), ]; } diff --git a/src/lib/src/parsing/parse-vp.test.ts b/src/lib/src/parsing/parse-vp.test.ts index f8a30c8..d055671 100644 --- a/src/lib/src/parsing/parse-vp.test.ts +++ b/src/lib/src/parsing/parse-vp.test.ts @@ -21,6 +21,7 @@ const maashoom = wordQuery("ماشوم", "noun"); const leedul = wordQuery("لیدل", "verb"); const kenaastul = wordQuery("کېناستل", "verb"); const wurul = wordQuery("وړل", "verb"); +const akheestul = wordQuery("اخیستل", "verb"); const tests: { label: string; @@ -65,6 +66,20 @@ const tests: { output: [], error: true, }, + { + input: "زه سړی کور", + output: [], + }, + { + input: "زه دې مې وینم", + output: [], + error: true, + }, + { + input: "وامې دې خیست", + output: [], + error: true, + }, ], }, { @@ -1104,6 +1119,263 @@ const tests: { }, ], }, + { + label: "negatives and ordering", + cases: [ + { + input: "سړی تا نه ویني", + output: [...getPeople(2, "sing")].flatMap((objectPerson) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makeNounSelection(sarey, undefined), + }), + }, + { + key: 2, + block: makeObjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(objectPerson), + }), + }, + ], + verb: { + type: "verb", + verb: leedul, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: true, + tense: "presentVerb", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: false, + shrinkServant: false, + }, + })), + }, + { + input: "سړی نه تا ویني", + output: [], + }, + { + input: "سړی تا ونه ویني", + output: [...getPeople(2, "sing")].flatMap((objectPerson) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makeNounSelection(sarey, undefined), + }), + }, + { + key: 2, + block: makeObjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(objectPerson), + }), + }, + ], + verb: { + type: "verb", + verb: leedul, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: true, + tense: "subjunctiveVerb", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: false, + shrinkServant: false, + }, + })), + }, + // with regular و or وا perfective heads, the negative needs to be behind the perfective head + { + input: "سړی تا نه وویني", + output: [], + }, + { + input: "سړي وانه خیستله", + output: [ + { + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makeNounSelection(sarey, undefined), + }), + }, + { + key: 2, + block: makeObjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(T.Person.ThirdSingFemale), + }), + }, + ], + verb: { + type: "verb", + verb: akheestul, + transitivity: "transitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: true, + tense: "perfectivePast", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: true, + shrinkServant: false, + }, + }, + ], + }, + { + input: "سړي نه واخیستله", + output: [], + }, + // but for other perfective heads, the negative can go before or after + { + input: "زه نه کېنم", + output: getPeople(1, "sing").flatMap((subjectPerson) => + ( + ["presentVerb", "subjunctiveVerb"] as const + ).map((tense) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(subjectPerson), + }), + }, + { + key: 2, + block: { + type: "objectSelection", + selection: "none", + }, + }, + ], + verb: { + type: "verb", + verb: kenaastul, + transitivity: "intransitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: true, + tense, + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: false, + shrinkServant: false, + }, + })) + ), + }, + { + input: "زه کېنه نم", + output: getPeople(1, "sing").map( + (subjectPerson) => ({ + blocks: [ + { + key: 1, + block: makeSubjectSelectionComplete({ + type: "NP", + selection: makePronounSelection(subjectPerson), + }), + }, + { + key: 2, + block: { + type: "objectSelection", + selection: "none", + }, + }, + ], + verb: { + type: "verb", + verb: kenaastul, + transitivity: "intransitive", + canChangeTransitivity: false, + canChangeStatDyn: false, + negative: true, + tense: "subjunctiveVerb", + canChangeVoice: true, + isCompound: false, + voice: "active", + }, + externalComplement: undefined, + form: { + removeKing: false, + shrinkServant: false, + }, + }) + ), + }, + ], + }, + { + label: "should check for subject / object conflicts", + cases: [ + { + input: "زه ما وینم", + output: [], + error: true, + }, + { + input: "ما زه ولیدلم", + output: [], + error: true, + }, + { + input: "تاسو تا ولیدئ", + output: [], + error: true, + }, + { + input: "زه مې وینم", + output: [], + error: true, + }, + { + input: "زه مې ولیدم", + output: [], + error: true, + }, + { + input: "ومې لیدم", + output: [], + error: true, + }, + { + input: "وینم مې", + output: [], + error: true, + }, + ], + }, ]; tests.forEach(({ label, cases }) => { diff --git a/src/lib/src/parsing/parse-vp.ts b/src/lib/src/parsing/parse-vp.ts index 8cb3db6..abb90b4 100644 --- a/src/lib/src/parsing/parse-vp.ts +++ b/src/lib/src/parsing/parse-vp.ts @@ -4,30 +4,16 @@ import { makeObjectSelectionComplete, makeSubjectSelectionComplete, } from "../phrase-building/blocks-utils"; -import { getPersonFromNP, isPastTense } from "../phrase-building/vp-tools"; +import { + getPersonFromNP, + isInvalidSubjObjCombo, + isPastTense, +} from "../phrase-building/vp-tools"; import { parseBlocks } from "./parse-blocks"; import { makePronounSelection } from "../phrase-building/make-selections"; import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp"; // to hide equatives type-doubling issue -// demo - -// ماشوم -// ماشومان -// خوږ -// masc plur - -// past tense -// ماشومانو ښځه ولیدله -// ماشومانو ښځه ولیدله - -// cool examples: -// زه ماشوم وهم -// وهلم // خواږه - -// ومې لیدې -// ویې وهم - // this should also conjugate to // وامې نه خیسته // وامې نه خیستلو @@ -35,19 +21,11 @@ import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp"; // وامې نه اخیست // waa-me nú akheest -// TODO: add tests for negatives and negative order -// TODO: imperfective past should also be "was going to / would have" // map over transitivities, to give transitive / gramm. transitive optionns -// make impossible subjects like I saw me, error - // TODO: learn how to yank / use plugin for JSON neovim // learn to use jq to edit selected json in vim ?? COOOL -// TODO: transitivity options - -// TODO: the و is really making it slow down... why? - export function parseVP( tokens: Readonly, lookup: (s: Partial) => T.DictionaryEntry[], @@ -58,11 +36,11 @@ export function parseVP( } const blocks = parseBlocks(tokens, lookup, verbLookup, [], []); return bindParseResult(blocks, (tokens, { blocks, kids }) => { - const phIndex = blocks.findIndex((x) => "type" in x && x.type === "PH"); - const vbeIndex = blocks.findIndex((x) => "type" in x && x.type === "VB"); + const phIndex = blocks.findIndex((x) => x.type === "PH"); + const vbeIndex = blocks.findIndex((x) => x.type === "VB"); const ba = !!kids.find((k) => k === "ba"); const negIndex = blocks.findIndex( - (x) => "type" in x && x.type === "negative" && !x.imperative + (x) => x.type === "negative" && !x.imperative ); const ph = phIndex !== -1 ? (blocks[phIndex] as T.ParsedPH) : undefined; const verb = @@ -110,10 +88,7 @@ export function parseVP( voice: "active", }; - const nps = blocks.filter( - (x): x is { inflected: boolean; selection: T.NPSelection } => - "inflected" in x - ); + const nps = blocks.filter((x): x is T.ParsedNP => x.type === "NP"); // TODO: check that verb and PH match if (verb.info.verb.entry.c.includes("intrans")) { const errors: T.ParseError[] = []; @@ -258,7 +233,9 @@ export function parseVP( shrinkServant: true, }, } as T.VPSelectionComplete, - errors + pronounConflictInBlocks(blocks) + ? [...errors, { message: "invalid subject/object combo" }] + : errors ) ); } @@ -357,7 +334,9 @@ export function parseVP( externalComplement: undefined, form, } as T.VPSelectionComplete, - errors, + errors: pronounConflictInBlocks(blocks) + ? [...errors, { message: "invalid subject/object combo" }] + : errors, })); }); } else { @@ -369,6 +348,16 @@ export function parseVP( ] as const ).flatMap(([s, o, flip]) => { const errors: T.ParseError[] = []; + if ( + isInvalidSubjObjCombo( + getPersonFromNP(s.selection), + getPersonFromNP(o.selection) + ) + ) { + errors.push({ + message: "invalid subject/object combo", + }); + } if (!s.inflected) { errors.push({ message: @@ -422,6 +411,16 @@ export function parseVP( ] as const ).flatMap(([s, o, flip]) => { const errors: T.ParseError[] = []; + if ( + isInvalidSubjObjCombo( + getPersonFromNP(s.selection), + getPersonFromNP(o.selection) + ) + ) { + errors.push({ + message: "invalid subject/object combo", + }); + } if (isFirstOrSecondPersPronoun(o.selection)) { if (!o.inflected) { errors.push({ @@ -563,3 +562,16 @@ function negativeInPlace({ } return true; } + +function pronounConflictInBlocks(blocks: T.VPSBlockComplete[]): boolean { + const subj = blocks.find((b) => b.block.type === "subjectSelection") + ?.block as T.SubjectSelectionComplete; + const obj = blocks.find((b) => b.block.type === "objectSelection") + ?.block as T.ObjectSelectionComplete; + const subjPerson = getPersonFromNP(subj.selection); + const objPerson = getPersonFromNP(obj.selection); + if (objPerson === undefined) { + return false; + } + return isInvalidSubjObjCombo(subjPerson, objPerson); +} diff --git a/src/types.ts b/src/types.ts index d21f194..2ed64f1 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1196,7 +1196,13 @@ export type Block = { export type ParsedBlock = ParsedNP | ParsedPH | ParsedVBE | NegativeBlock; +export type ParsedKidsSection = { + type: "kids"; + kids: ParsedKid[]; +}; + export type ParsedNP = { + type: "NP"; inflected: boolean; selection: NPSelection; };