From f53c81e14dc8920b4f34ba4e7581829cf31b045a Mon Sep 17 00:00:00 2001 From: adueck Date: Mon, 14 Aug 2023 16:52:41 +0400 Subject: [PATCH] =?UTF-8?q?fixed=20problem=20with=20=D9=84=D8=A7=DA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package-lock.json | 4 +- package.json | 2 +- src/components/package-lock.json | 4 +- src/components/package.json | 2 +- src/lib/package.json | 2 +- src/lib/src/misc-helpers.test.ts | 153 ++++++++++++++++----- src/lib/src/misc-helpers.ts | 7 + src/lib/src/new-verb-engine/render-verb.ts | 16 +++ src/lib/src/p-text-helpers.ts | 20 ++- src/lib/src/parsing/lookup.tsx | 17 ++- src/lib/src/parsing/parse-verb.test.ts | 93 ++++++++++--- src/lib/src/parsing/parse-verb.ts | 57 +++++++- src/lib/src/phrase-building/render-vp.ts | 23 +++- 13 files changed, 327 insertions(+), 73 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0fd77fb..7459f74 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "pashto-inflector", - "version": "6.0.5", + "version": "6.0.6", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "pashto-inflector", - "version": "6.0.5", + "version": "6.0.6", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index 84a8673..3896904 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pashto-inflector", - "version": "6.0.5", + "version": "6.0.6", "author": "lingdocs.com", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "homepage": "https://verbs.lingdocs.com", diff --git a/src/components/package-lock.json b/src/components/package-lock.json index 6498281..ec926ff 100644 --- a/src/components/package-lock.json +++ b/src/components/package-lock.json @@ -1,12 +1,12 @@ { "name": "@lingdocs/ps-react", - "version": "6.0.5", + "version": "6.0.6", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@lingdocs/ps-react", - "version": "6.0.5", + "version": "6.0.6", "license": "MIT", "dependencies": { "@formkit/auto-animate": "^1.0.0-beta.3", diff --git a/src/components/package.json b/src/components/package.json index 379b2c1..cfcab22 100644 --- a/src/components/package.json +++ b/src/components/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/ps-react", - "version": "6.0.5", + "version": "6.0.6", "description": "Pashto inflector library module with React components", "main": "dist/components/library.js", "module": "dist/components/library.js", diff --git a/src/lib/package.json b/src/lib/package.json index a3c1e95..fba22a0 100644 --- a/src/lib/package.json +++ b/src/lib/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/inflect", - "version": "6.0.5", + "version": "6.0.6", "description": "Pashto inflector library", "main": "dist/index.js", "types": "dist/lib/library.d.ts", diff --git a/src/lib/src/misc-helpers.test.ts b/src/lib/src/misc-helpers.test.ts index 78c5854..9250296 100644 --- a/src/lib/src/misc-helpers.test.ts +++ b/src/lib/src/misc-helpers.test.ts @@ -6,45 +6,126 @@ * */ - import { - parseEc, - personFromVerbBlockPos, +import { + arraysHaveCommon, + parseEc, + personFromVerbBlockPos, } from "./misc-helpers"; import * as T from "../../types"; test("personFromVerbBlockPos should work", () => { - expect(personFromVerbBlockPos([0, 0])).toEqual(T.Person.FirstSingMale); - expect(personFromVerbBlockPos([1, 0])).toEqual(T.Person.FirstSingFemale); - expect(personFromVerbBlockPos([2, 0])).toEqual(T.Person.SecondSingMale); - expect(personFromVerbBlockPos([3, 0])).toEqual(T.Person.SecondSingFemale); - expect(personFromVerbBlockPos([4, 0])).toEqual(T.Person.ThirdSingMale); - expect(personFromVerbBlockPos([5, 0])).toEqual(T.Person.ThirdSingFemale); - expect(personFromVerbBlockPos([0, 1])).toEqual(T.Person.FirstPlurMale); - expect(personFromVerbBlockPos([1, 1])).toEqual(T.Person.FirstPlurFemale); - expect(personFromVerbBlockPos([2, 1])).toEqual(T.Person.SecondPlurMale); - expect(personFromVerbBlockPos([3, 1])).toEqual(T.Person.SecondPlurFemale); - expect(personFromVerbBlockPos([4, 1])).toEqual(T.Person.ThirdPlurMale); - expect(personFromVerbBlockPos([5, 1])).toEqual(T.Person.ThirdPlurFemale); -}) + expect(personFromVerbBlockPos([0, 0])).toEqual(T.Person.FirstSingMale); + expect(personFromVerbBlockPos([1, 0])).toEqual(T.Person.FirstSingFemale); + expect(personFromVerbBlockPos([2, 0])).toEqual(T.Person.SecondSingMale); + expect(personFromVerbBlockPos([3, 0])).toEqual(T.Person.SecondSingFemale); + expect(personFromVerbBlockPos([4, 0])).toEqual(T.Person.ThirdSingMale); + expect(personFromVerbBlockPos([5, 0])).toEqual(T.Person.ThirdSingFemale); + expect(personFromVerbBlockPos([0, 1])).toEqual(T.Person.FirstPlurMale); + expect(personFromVerbBlockPos([1, 1])).toEqual(T.Person.FirstPlurFemale); + expect(personFromVerbBlockPos([2, 1])).toEqual(T.Person.SecondPlurMale); + expect(personFromVerbBlockPos([3, 1])).toEqual(T.Person.SecondPlurFemale); + expect(personFromVerbBlockPos([4, 1])).toEqual(T.Person.ThirdPlurMale); + expect(personFromVerbBlockPos([5, 1])).toEqual(T.Person.ThirdPlurFemale); +}); test("parseEc should work", () => { - expect(parseEc("walk")).toEqual(["walk", "walks", "walking", "walked", "walked"]); - expect(parseEc("scare")).toEqual(["scare", "scares", "scaring", "scared", "scared"]); - expect(parseEc("study")).toEqual(["study","studies","studying","studied","studied"]); - expect(parseEc("cry")).toEqual(["cry", "cries", "crying", "cried", "cried"]); - expect(parseEc("marry")).toEqual(["marry","marries","marrying","married","married"]); - expect(parseEc("get")).toEqual(["get","gets","getting","got","gotten"]); - expect(parseEc("become")).toEqual(["become","becomes","becoming","became","become"]); - expect(parseEc("be")).toEqual(["am","is","being","was","been"]); - expect(parseEc("make")).toEqual(["make","makes","making","made","made"]); - expect(parseEc("have")).toEqual(["have","has","having","had","had"]); - expect(parseEc("die")).toEqual(["die", "dies", "dying", "died", "died"]); - expect(parseEc("stray")).toEqual(["stray","strays","straying","strayed","strayed"]); - expect(parseEc("cross")).toEqual(["cross","crosses","crossing","crossed","crossed"]); - expect(parseEc("raise")).toEqual(["raise","raises","raising","raised","raised"]); - expect(parseEc("play")).toEqual(["play","plays","playing","played","played"]); - // if there are only four items the perfect will be the same as the simple past - expect(parseEc("think,thinks,thinking,thought")).toEqual(["think","thinks","thinking","thought","thought"]); - expect(parseEc("sew,sews,sewing,sewed,sown")).toEqual(["sew", "sews", "sewing", "sewed", "sown"]); - expect(parseEc(" sew, sews,sewing ,sewed, sown")).toEqual(["sew", "sews", "sewing", "sewed", "sown"]); -}); \ No newline at end of file + expect(parseEc("walk")).toEqual([ + "walk", + "walks", + "walking", + "walked", + "walked", + ]); + expect(parseEc("scare")).toEqual([ + "scare", + "scares", + "scaring", + "scared", + "scared", + ]); + expect(parseEc("study")).toEqual([ + "study", + "studies", + "studying", + "studied", + "studied", + ]); + expect(parseEc("cry")).toEqual(["cry", "cries", "crying", "cried", "cried"]); + expect(parseEc("marry")).toEqual([ + "marry", + "marries", + "marrying", + "married", + "married", + ]); + expect(parseEc("get")).toEqual(["get", "gets", "getting", "got", "gotten"]); + expect(parseEc("become")).toEqual([ + "become", + "becomes", + "becoming", + "became", + "become", + ]); + expect(parseEc("be")).toEqual(["am", "is", "being", "was", "been"]); + expect(parseEc("make")).toEqual(["make", "makes", "making", "made", "made"]); + expect(parseEc("have")).toEqual(["have", "has", "having", "had", "had"]); + expect(parseEc("die")).toEqual(["die", "dies", "dying", "died", "died"]); + expect(parseEc("stray")).toEqual([ + "stray", + "strays", + "straying", + "strayed", + "strayed", + ]); + expect(parseEc("cross")).toEqual([ + "cross", + "crosses", + "crossing", + "crossed", + "crossed", + ]); + expect(parseEc("raise")).toEqual([ + "raise", + "raises", + "raising", + "raised", + "raised", + ]); + expect(parseEc("play")).toEqual([ + "play", + "plays", + "playing", + "played", + "played", + ]); + // if there are only four items the perfect will be the same as the simple past + expect(parseEc("think,thinks,thinking,thought")).toEqual([ + "think", + "thinks", + "thinking", + "thought", + "thought", + ]); + expect(parseEc("sew,sews,sewing,sewed,sown")).toEqual([ + "sew", + "sews", + "sewing", + "sewed", + "sown", + ]); + expect(parseEc(" sew, sews,sewing ,sewed, sown")).toEqual([ + "sew", + "sews", + "sewing", + "sewed", + "sown", + ]); +}); + +test("arraysHaveCommon should work", () => { + expect(arraysHaveCommon(["a", "b", "c"], ["d"])).toBe(false); + expect(arraysHaveCommon(["a", "b", "c"], ["f", "b"])).toBe(true); + expect(arraysHaveCommon([], [23])).toBe(false); + expect(arraysHaveCommon([], [])).toBe(false); + expect(arraysHaveCommon([3, 2, 1, 0, 100], [24, 290, 2, 55, 100])).toBe(true); +}); diff --git a/src/lib/src/misc-helpers.ts b/src/lib/src/misc-helpers.ts index 2bcc1bb..9e1596a 100644 --- a/src/lib/src/misc-helpers.ts +++ b/src/lib/src/misc-helpers.ts @@ -346,3 +346,10 @@ export function chooseLength( export function isGivingVerb(v: T.VerbEntry): boolean { return ["raakawul", "darkawul", "warkawul"].includes(v.entry.g); } + +/** + * checks to see if two arrays have a common element + */ +export function arraysHaveCommon(a: X[], b: X[]): boolean { + return a.some((x) => b.includes(x)); +} diff --git a/src/lib/src/new-verb-engine/render-verb.ts b/src/lib/src/new-verb-engine/render-verb.ts index c2ae0eb..b638348 100644 --- a/src/lib/src/new-verb-engine/render-verb.ts +++ b/src/lib/src/new-verb-engine/render-verb.ts @@ -354,6 +354,22 @@ function ensure3rdPast( if (isTlulVerb(verb)) { // should be imperfective at this point // the perfective غی should already be covered in the function this is coming from + if (verb.entry.p === "تلل" && aspect === "perfective") { + return [ + { + p: "ړ", + f: "R", + }, + { + p: "ړه", + f: "Ru", + }, + { + p: "ړو", + f: "Ro", + }, + ]; + } return [ { p: rs[0].p.slice(0, -1) + "ه", diff --git a/src/lib/src/p-text-helpers.ts b/src/lib/src/p-text-helpers.ts index 83ef070..6915ea7 100644 --- a/src/lib/src/p-text-helpers.ts +++ b/src/lib/src/p-text-helpers.ts @@ -1084,13 +1084,25 @@ export function mapPsString( * @returns */ export function splitPsByVarients(w: T.PsString): T.ArrayOneOrMore { - function cut(s: string) { - return s.split(/[,|،]/).map((s) => s.trim()); - } - const { p, f } = mapPsString(w, cut); + const { p, f } = mapPsString(w, splitVarients); return zipWith(makePsString, p, f) as T.ArrayOneOrMore; } +export function splitVarients(s: string) { + return s.split(/[,|،]/).map((s) => s.trim()); +} + +/** + * checks to see if a search string exists in a list of comma-seperated varents + */ +export function isInVarients( + vars: string | false | undefined, + search: string | false | undefined +): boolean { + if (!vars || !search) return false; + return splitVarients(vars).includes(search); +} + export function removeEndTick(w: T.PsString): T.PsString; export function removeEndTick(w: string): string; export function removeEndTick(w: T.PsString | string): T.PsString | string { diff --git a/src/lib/src/parsing/lookup.tsx b/src/lib/src/parsing/lookup.tsx index 4edfab2..55fb8bb 100644 --- a/src/lib/src/parsing/lookup.tsx +++ b/src/lib/src/parsing/lookup.tsx @@ -3,6 +3,8 @@ import verbs from "../../../verbs"; import * as T from "../../../types"; import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; import { removeFVarientsFromVerb } from "../accent-and-ps-utils"; +import { splitVarients } from "../p-text-helpers"; +import { arraysHaveCommon } from "../misc-helpers"; export function lookup(s: Partial): T.DictionaryEntry[] { const [key, value] = Object.entries(s)[0]; @@ -41,6 +43,7 @@ export function shouldCheckTpp(s: string): boolean { export function verbLookup(input: string): T.VerbEntry[] { const s = input.slice(0, -1); + // IMPORTANT TODO FOR EFFECIANCY! // check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING // if theres no legit verb ending and no tpp possibilities, just return an empty array const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined; @@ -57,14 +60,12 @@ export function verbLookup(input: string): T.VerbEntry[] { entry.p ) || [s, sWoutOo].includes(entry.p) || - (checkTpp && [input, inputWoutOo].includes(entry.tppp)) || (entry.psp && [s, sWoutOo].includes(entry.psp)) || entry.prp === s || entry.ssp === s : ({ entry }) => entry.p.slice(0, -1) === s || entry.p === s.slice(0, -1) + "دل" || - (checkTpp && [input, inputWoutOo].includes(entry.tppp)) || entry.p === s || entry.psp === s || entry.prp === s || @@ -79,7 +80,11 @@ export function verbLookup(input: string): T.VerbEntry[] { [s, sWoutOo].includes(entry.p.slice(0, -3)) || [s, sWoutOo].includes(entry.p) || (entry.psp && [s, sWoutOo].includes(entry.psp)) || - (checkTpp && [input, inputWoutOo].includes(entry.tppp)) || + (entry.tppp && + arraysHaveCommon( + [input, inputWoutOo], + splitVarients(entry.tppp) + )) || entry.prp === s || entry.ssp === s || (entry.separationAtP && @@ -90,7 +95,11 @@ export function verbLookup(input: string): T.VerbEntry[] { // for short intransitive forms entry.p.slice(0, -3) === s || entry.p === s || - (checkTpp && [input, inputWoutOo].includes(entry.tppp)) || + (entry.tppp && + arraysHaveCommon( + [input, inputWoutOo], + splitVarients(entry.tppp) + )) || entry.psp === s || entry.prp === s || entry.ssp === s || diff --git a/src/lib/src/parsing/parse-verb.test.ts b/src/lib/src/parsing/parse-verb.test.ts index 1f90b6c..555e787 100644 --- a/src/lib/src/parsing/parse-verb.test.ts +++ b/src/lib/src/parsing/parse-verb.test.ts @@ -16,6 +16,9 @@ const prexodul = wordQuery("پرېښودل", "verb"); const xodul = wordQuery("ښودل", "verb"); const kexodul = wordQuery("کېښودل", "verb"); const katul = wordQuery("کتل", "verb"); +const tlul = wordQuery("تلل", "verb"); + +// todo alwatul waalwatul akhistul azmoyul etc const tests: { label: string; @@ -384,7 +387,7 @@ const tests: { { ph: undefined, root: { - persons: [T.Person.ThirdSingFemale], + persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale], aspects: ["imperfective", "perfective"], }, verb: leedul, @@ -397,7 +400,7 @@ const tests: { { ph: "و", root: { - persons: [T.Person.ThirdSingFemale], + persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale], aspects: ["perfective"], }, verb: leedul, @@ -487,20 +490,58 @@ const tests: { }, ], }, - // TODO: should do کاته as well... what to do, have multiple tpp forms ? YES! - // { - // input: "وکوت", - // output: [ - // { - // ph: "و", - // root: { - // persons: [T.Person.ThirdSingMale], - // aspects: ["perfective"], - // }, - // verb: katul, - // }, - // ], - // }, + { + input: "وخوړ", + output: [ + { + ph: "و", + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: khorul, + }, + ], + }, + { + input: "کوت", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective", "perfective"], + }, + verb: katul, + }, + ], + }, + { + input: "کاته", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective", "perfective"], + }, + verb: katul, + }, + ], + }, + { + input: "وکاته", + output: [ + { + ph: "و", + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: katul, + }, + ], + }, ], }, { @@ -577,7 +618,7 @@ const tests: { { ph: "پرې", root: { - persons: [T.Person.ThirdSingFemale], + persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale], aspects: ["imperfective", "perfective"], }, verb: prexodul, @@ -615,6 +656,24 @@ const tests: { }, ], }, + { + label: "verbs with abrupt 3rd pers sing past endings", + cases: [ + // { + // input: "لاړ", + // output: [ + // { + // ph: undefined, + // root: { + // persons: [T.Person.ThirdSingMale], + // aspects: ["perfective"], + // }, + // verb: tlul, + // }, + // ], + // }, + ], + }, ]; tests.forEach(({ label, cases }) => { diff --git a/src/lib/src/parsing/parse-verb.ts b/src/lib/src/parsing/parse-verb.ts index 0ee3151..50d90b5 100644 --- a/src/lib/src/parsing/parse-verb.ts +++ b/src/lib/src/parsing/parse-verb.ts @@ -1,4 +1,5 @@ import * as T from "../../../types"; +import { isInVarients } from "../p-text-helpers"; // third persion idosyncratic // if it ends in a dental or ه - look for tttp @@ -280,16 +281,66 @@ function matchVerbs( }); }); } + const hamzaEnd = s.endsWith("ه"); const tppMatches = { imperfective: entries.filter( - ({ entry: e }) => !e.c.includes("comp") && s === e.tppp + ({ entry: e }) => + !e.c.includes("comp") && + (isInVarients(e.tppp, s) || (hamzaEnd && base === e.p.slice(0, -1))) ), perfective: entries.reduce< { ph: string | undefined; entry: T.VerbEntry }[] >((acc, entry) => { const e = entry.entry; + if (e.c.includes("comp")) { + return acc; + } + if (e.separationAtP && hamzaEnd) { + const b = e.prp || e.p; + const bHead = b.slice(0, e.separationAtP); + const bRest = b.slice(e.separationAtP); + // this is REPETITIVE from above ... but doing it again here because the ه will only match on the SHORT versions for 3rd pers masc sing + // could modify and reuse the code above for this + if (base === b.slice(0, -1)) { + return [ + ...acc, + { + ph: bHead, + entry, + }, + ]; + } + if (base === bRest.slice(0, -1)) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; + } + } else if (!e.prp && hamzaEnd) { + const baseNoOo = base.startsWith("و") && base.slice(1); + if (baseNoOo && baseNoOo === e.p.slice(0, -1)) { + return [ + ...acc, + { + ph: "و", + entry, + }, + ]; + } else if (base === e.p.slice(0, -1)) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; + } + } const sNoOo = s.startsWith("و") && s.slice(1); - if (sNoOo && sNoOo === e.tppp) { + if (isInVarients(e.tppp, sNoOo)) { return [ ...acc, { @@ -297,7 +348,7 @@ function matchVerbs( entry, }, ]; - } else if (s === e.tppp) { + } else if (isInVarients(e.tppp, s)) { return [ ...acc, { diff --git a/src/lib/src/phrase-building/render-vp.ts b/src/lib/src/phrase-building/render-vp.ts index 75cbf7f..095cdb6 100644 --- a/src/lib/src/phrase-building/render-vp.ts +++ b/src/lib/src/phrase-building/render-vp.ts @@ -46,7 +46,6 @@ export function renderVP(VP: T.VPSelectionComplete): T.VPRendered { king, complementPerson, }); - // TODO: for dynamic - const { vbs, hasBa } = renderVerb({ verb: VP.verb.isCompound === "generative stative" @@ -164,7 +163,8 @@ export function insertNegative( } if (nonStandPerfectiveSplit) { return [ - insertFromEnd(blocksNoAccentA, neg, 1), + // special case to handle نه لاړ (can't say لا نه ړ) + insertFromEnd(ensureNoHangingR(blocksNoAccentA), neg, 1), insertFromEnd(blocksNoAccentA, neg, 2), ]; } else { @@ -172,6 +172,25 @@ export function insertNegative( } } +function ensureNoHangingR(b: T.Block[]): T.Block[] { + return b.map((x) => + x.block.type === "VB" && + "short" in x.block.ps && + x.block.ps.short.find((x) => x.p === "ړ") + ? { + ...x, + block: { + ...x.block, + ps: { + ...x.block.ps, + short: x.block.ps.short.filter((ps) => ps.p !== "ړ"), + }, + }, + } + : x + ); +} + function swapEndingBlocks(arr: X[], n: number = 1): X[] { return [ ...arr.slice(0, arr.length - (n + 1)),