fixed problem with لاړ

This commit is contained in:
adueck 2023-08-14 16:52:41 +04:00
parent 6b04f67720
commit f53c81e14d
13 changed files with 327 additions and 73 deletions

4
package-lock.json generated
View File

@ -1,12 +1,12 @@
{
"name": "pashto-inflector",
"version": "6.0.5",
"version": "6.0.6",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "pashto-inflector",
"version": "6.0.5",
"version": "6.0.6",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {

View File

@ -1,6 +1,6 @@
{
"name": "pashto-inflector",
"version": "6.0.5",
"version": "6.0.6",
"author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com",

View File

@ -1,12 +1,12 @@
{
"name": "@lingdocs/ps-react",
"version": "6.0.5",
"version": "6.0.6",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@lingdocs/ps-react",
"version": "6.0.5",
"version": "6.0.6",
"license": "MIT",
"dependencies": {
"@formkit/auto-animate": "^1.0.0-beta.3",

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/ps-react",
"version": "6.0.5",
"version": "6.0.6",
"description": "Pashto inflector library module with React components",
"main": "dist/components/library.js",
"module": "dist/components/library.js",

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/inflect",
"version": "6.0.5",
"version": "6.0.6",
"description": "Pashto inflector library",
"main": "dist/index.js",
"types": "dist/lib/library.d.ts",

View File

@ -6,45 +6,126 @@
*
*/
import {
parseEc,
personFromVerbBlockPos,
import {
arraysHaveCommon,
parseEc,
personFromVerbBlockPos,
} from "./misc-helpers";
import * as T from "../../types";
test("personFromVerbBlockPos should work", () => {
expect(personFromVerbBlockPos([0, 0])).toEqual(T.Person.FirstSingMale);
expect(personFromVerbBlockPos([1, 0])).toEqual(T.Person.FirstSingFemale);
expect(personFromVerbBlockPos([2, 0])).toEqual(T.Person.SecondSingMale);
expect(personFromVerbBlockPos([3, 0])).toEqual(T.Person.SecondSingFemale);
expect(personFromVerbBlockPos([4, 0])).toEqual(T.Person.ThirdSingMale);
expect(personFromVerbBlockPos([5, 0])).toEqual(T.Person.ThirdSingFemale);
expect(personFromVerbBlockPos([0, 1])).toEqual(T.Person.FirstPlurMale);
expect(personFromVerbBlockPos([1, 1])).toEqual(T.Person.FirstPlurFemale);
expect(personFromVerbBlockPos([2, 1])).toEqual(T.Person.SecondPlurMale);
expect(personFromVerbBlockPos([3, 1])).toEqual(T.Person.SecondPlurFemale);
expect(personFromVerbBlockPos([4, 1])).toEqual(T.Person.ThirdPlurMale);
expect(personFromVerbBlockPos([5, 1])).toEqual(T.Person.ThirdPlurFemale);
})
expect(personFromVerbBlockPos([0, 0])).toEqual(T.Person.FirstSingMale);
expect(personFromVerbBlockPos([1, 0])).toEqual(T.Person.FirstSingFemale);
expect(personFromVerbBlockPos([2, 0])).toEqual(T.Person.SecondSingMale);
expect(personFromVerbBlockPos([3, 0])).toEqual(T.Person.SecondSingFemale);
expect(personFromVerbBlockPos([4, 0])).toEqual(T.Person.ThirdSingMale);
expect(personFromVerbBlockPos([5, 0])).toEqual(T.Person.ThirdSingFemale);
expect(personFromVerbBlockPos([0, 1])).toEqual(T.Person.FirstPlurMale);
expect(personFromVerbBlockPos([1, 1])).toEqual(T.Person.FirstPlurFemale);
expect(personFromVerbBlockPos([2, 1])).toEqual(T.Person.SecondPlurMale);
expect(personFromVerbBlockPos([3, 1])).toEqual(T.Person.SecondPlurFemale);
expect(personFromVerbBlockPos([4, 1])).toEqual(T.Person.ThirdPlurMale);
expect(personFromVerbBlockPos([5, 1])).toEqual(T.Person.ThirdPlurFemale);
});
test("parseEc should work", () => {
expect(parseEc("walk")).toEqual(["walk", "walks", "walking", "walked", "walked"]);
expect(parseEc("scare")).toEqual(["scare", "scares", "scaring", "scared", "scared"]);
expect(parseEc("study")).toEqual(["study","studies","studying","studied","studied"]);
expect(parseEc("cry")).toEqual(["cry", "cries", "crying", "cried", "cried"]);
expect(parseEc("marry")).toEqual(["marry","marries","marrying","married","married"]);
expect(parseEc("get")).toEqual(["get","gets","getting","got","gotten"]);
expect(parseEc("become")).toEqual(["become","becomes","becoming","became","become"]);
expect(parseEc("be")).toEqual(["am","is","being","was","been"]);
expect(parseEc("make")).toEqual(["make","makes","making","made","made"]);
expect(parseEc("have")).toEqual(["have","has","having","had","had"]);
expect(parseEc("die")).toEqual(["die", "dies", "dying", "died", "died"]);
expect(parseEc("stray")).toEqual(["stray","strays","straying","strayed","strayed"]);
expect(parseEc("cross")).toEqual(["cross","crosses","crossing","crossed","crossed"]);
expect(parseEc("raise")).toEqual(["raise","raises","raising","raised","raised"]);
expect(parseEc("play")).toEqual(["play","plays","playing","played","played"]);
// if there are only four items the perfect will be the same as the simple past
expect(parseEc("think,thinks,thinking,thought")).toEqual(["think","thinks","thinking","thought","thought"]);
expect(parseEc("sew,sews,sewing,sewed,sown")).toEqual(["sew", "sews", "sewing", "sewed", "sown"]);
expect(parseEc(" sew, sews,sewing ,sewed, sown")).toEqual(["sew", "sews", "sewing", "sewed", "sown"]);
});
expect(parseEc("walk")).toEqual([
"walk",
"walks",
"walking",
"walked",
"walked",
]);
expect(parseEc("scare")).toEqual([
"scare",
"scares",
"scaring",
"scared",
"scared",
]);
expect(parseEc("study")).toEqual([
"study",
"studies",
"studying",
"studied",
"studied",
]);
expect(parseEc("cry")).toEqual(["cry", "cries", "crying", "cried", "cried"]);
expect(parseEc("marry")).toEqual([
"marry",
"marries",
"marrying",
"married",
"married",
]);
expect(parseEc("get")).toEqual(["get", "gets", "getting", "got", "gotten"]);
expect(parseEc("become")).toEqual([
"become",
"becomes",
"becoming",
"became",
"become",
]);
expect(parseEc("be")).toEqual(["am", "is", "being", "was", "been"]);
expect(parseEc("make")).toEqual(["make", "makes", "making", "made", "made"]);
expect(parseEc("have")).toEqual(["have", "has", "having", "had", "had"]);
expect(parseEc("die")).toEqual(["die", "dies", "dying", "died", "died"]);
expect(parseEc("stray")).toEqual([
"stray",
"strays",
"straying",
"strayed",
"strayed",
]);
expect(parseEc("cross")).toEqual([
"cross",
"crosses",
"crossing",
"crossed",
"crossed",
]);
expect(parseEc("raise")).toEqual([
"raise",
"raises",
"raising",
"raised",
"raised",
]);
expect(parseEc("play")).toEqual([
"play",
"plays",
"playing",
"played",
"played",
]);
// if there are only four items the perfect will be the same as the simple past
expect(parseEc("think,thinks,thinking,thought")).toEqual([
"think",
"thinks",
"thinking",
"thought",
"thought",
]);
expect(parseEc("sew,sews,sewing,sewed,sown")).toEqual([
"sew",
"sews",
"sewing",
"sewed",
"sown",
]);
expect(parseEc(" sew, sews,sewing ,sewed, sown")).toEqual([
"sew",
"sews",
"sewing",
"sewed",
"sown",
]);
});
test("arraysHaveCommon should work", () => {
expect(arraysHaveCommon(["a", "b", "c"], ["d"])).toBe(false);
expect(arraysHaveCommon(["a", "b", "c"], ["f", "b"])).toBe(true);
expect(arraysHaveCommon([], [23])).toBe(false);
expect(arraysHaveCommon([], [])).toBe(false);
expect(arraysHaveCommon([3, 2, 1, 0, 100], [24, 290, 2, 55, 100])).toBe(true);
});

View File

@ -346,3 +346,10 @@ export function chooseLength<N>(
export function isGivingVerb(v: T.VerbEntry): boolean {
return ["raakawul", "darkawul", "warkawul"].includes(v.entry.g);
}
/**
* checks to see if two arrays have a common element
*/
export function arraysHaveCommon<X>(a: X[], b: X[]): boolean {
return a.some((x) => b.includes(x));
}

View File

@ -354,6 +354,22 @@ function ensure3rdPast(
if (isTlulVerb(verb)) {
// should be imperfective at this point
// the perfective غی should already be covered in the function this is coming from
if (verb.entry.p === "تلل" && aspect === "perfective") {
return [
{
p: "ړ",
f: "R",
},
{
p: "ړه",
f: "Ru",
},
{
p: "ړو",
f: "Ro",
},
];
}
return [
{
p: rs[0].p.slice(0, -1) + "ه",

View File

@ -1084,13 +1084,25 @@ export function mapPsString<T>(
* @returns
*/
export function splitPsByVarients(w: T.PsString): T.ArrayOneOrMore<T.PsString> {
function cut(s: string) {
return s.split(/[,|،]/).map((s) => s.trim());
}
const { p, f } = mapPsString(w, cut);
const { p, f } = mapPsString(w, splitVarients);
return zipWith(makePsString, p, f) as T.ArrayOneOrMore<T.PsString>;
}
export function splitVarients(s: string) {
return s.split(/[,|،]/).map((s) => s.trim());
}
/**
* checks to see if a search string exists in a list of comma-seperated varents
*/
export function isInVarients(
vars: string | false | undefined,
search: string | false | undefined
): boolean {
if (!vars || !search) return false;
return splitVarients(vars).includes(search);
}
export function removeEndTick(w: T.PsString): T.PsString;
export function removeEndTick(w: string): string;
export function removeEndTick(w: T.PsString | string): T.PsString | string {

View File

@ -3,6 +3,8 @@ import verbs from "../../../verbs";
import * as T from "../../../types";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
import { splitVarients } from "../p-text-helpers";
import { arraysHaveCommon } from "../misc-helpers";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
const [key, value] = Object.entries(s)[0];
@ -41,6 +43,7 @@ export function shouldCheckTpp(s: string): boolean {
export function verbLookup(input: string): T.VerbEntry[] {
const s = input.slice(0, -1);
// IMPORTANT TODO FOR EFFECIANCY!
// check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING
// if theres no legit verb ending and no tpp possibilities, just return an empty array
const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
@ -57,14 +60,12 @@ export function verbLookup(input: string): T.VerbEntry[] {
entry.p
) ||
[s, sWoutOo].includes(entry.p) ||
(checkTpp && [input, inputWoutOo].includes(entry.tppp)) ||
(entry.psp && [s, sWoutOo].includes(entry.psp)) ||
entry.prp === s ||
entry.ssp === s
: ({ entry }) =>
entry.p.slice(0, -1) === s ||
entry.p === s.slice(0, -1) + "دل" ||
(checkTpp && [input, inputWoutOo].includes(entry.tppp)) ||
entry.p === s ||
entry.psp === s ||
entry.prp === s ||
@ -79,7 +80,11 @@ export function verbLookup(input: string): T.VerbEntry[] {
[s, sWoutOo].includes(entry.p.slice(0, -3)) ||
[s, sWoutOo].includes(entry.p) ||
(entry.psp && [s, sWoutOo].includes(entry.psp)) ||
(checkTpp && [input, inputWoutOo].includes(entry.tppp)) ||
(entry.tppp &&
arraysHaveCommon(
[input, inputWoutOo],
splitVarients(entry.tppp)
)) ||
entry.prp === s ||
entry.ssp === s ||
(entry.separationAtP &&
@ -90,7 +95,11 @@ export function verbLookup(input: string): T.VerbEntry[] {
// for short intransitive forms
entry.p.slice(0, -3) === s ||
entry.p === s ||
(checkTpp && [input, inputWoutOo].includes(entry.tppp)) ||
(entry.tppp &&
arraysHaveCommon(
[input, inputWoutOo],
splitVarients(entry.tppp)
)) ||
entry.psp === s ||
entry.prp === s ||
entry.ssp === s ||

View File

@ -16,6 +16,9 @@ const prexodul = wordQuery("پرېښودل", "verb");
const xodul = wordQuery("ښودل", "verb");
const kexodul = wordQuery("کېښودل", "verb");
const katul = wordQuery("کتل", "verb");
const tlul = wordQuery("تلل", "verb");
// todo alwatul waalwatul akhistul azmoyul etc
const tests: {
label: string;
@ -384,7 +387,7 @@ const tests: {
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingFemale],
persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: leedul,
@ -397,7 +400,7 @@ const tests: {
{
ph: "و",
root: {
persons: [T.Person.ThirdSingFemale],
persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: leedul,
@ -487,20 +490,58 @@ const tests: {
},
],
},
// TODO: should do کاته as well... what to do, have multiple tpp forms ? YES!
// {
// input: "وکوت",
// output: [
// {
// ph: "و",
// root: {
// persons: [T.Person.ThirdSingMale],
// aspects: ["perfective"],
// },
// verb: katul,
// },
// ],
// },
{
input: "وخوړ",
output: [
{
ph: "و",
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: khorul,
},
],
},
{
input: "کوت",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: katul,
},
],
},
{
input: "کاته",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: katul,
},
],
},
{
input: "وکاته",
output: [
{
ph: "و",
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: katul,
},
],
},
],
},
{
@ -577,7 +618,7 @@ const tests: {
{
ph: "پرې",
root: {
persons: [T.Person.ThirdSingFemale],
persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: prexodul,
@ -615,6 +656,24 @@ const tests: {
},
],
},
{
label: "verbs with abrupt 3rd pers sing past endings",
cases: [
// {
// input: "لاړ",
// output: [
// {
// ph: undefined,
// root: {
// persons: [T.Person.ThirdSingMale],
// aspects: ["perfective"],
// },
// verb: tlul,
// },
// ],
// },
],
},
];
tests.forEach(({ label, cases }) => {

View File

@ -1,4 +1,5 @@
import * as T from "../../../types";
import { isInVarients } from "../p-text-helpers";
// third persion idosyncratic
// if it ends in a dental or ه - look for tttp
@ -280,16 +281,66 @@ function matchVerbs(
});
});
}
const hamzaEnd = s.endsWith("ه");
const tppMatches = {
imperfective: entries.filter(
({ entry: e }) => !e.c.includes("comp") && s === e.tppp
({ entry: e }) =>
!e.c.includes("comp") &&
(isInVarients(e.tppp, s) || (hamzaEnd && base === e.p.slice(0, -1)))
),
perfective: entries.reduce<
{ ph: string | undefined; entry: T.VerbEntry }[]
>((acc, entry) => {
const e = entry.entry;
if (e.c.includes("comp")) {
return acc;
}
if (e.separationAtP && hamzaEnd) {
const b = e.prp || e.p;
const bHead = b.slice(0, e.separationAtP);
const bRest = b.slice(e.separationAtP);
// this is REPETITIVE from above ... but doing it again here because the ه will only match on the SHORT versions for 3rd pers masc sing
// could modify and reuse the code above for this
if (base === b.slice(0, -1)) {
return [
...acc,
{
ph: bHead,
entry,
},
];
}
if (base === bRest.slice(0, -1)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
} else if (!e.prp && hamzaEnd) {
const baseNoOo = base.startsWith("و") && base.slice(1);
if (baseNoOo && baseNoOo === e.p.slice(0, -1)) {
return [
...acc,
{
ph: "و",
entry,
},
];
} else if (base === e.p.slice(0, -1)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
}
const sNoOo = s.startsWith("و") && s.slice(1);
if (sNoOo && sNoOo === e.tppp) {
if (isInVarients(e.tppp, sNoOo)) {
return [
...acc,
{
@ -297,7 +348,7 @@ function matchVerbs(
entry,
},
];
} else if (s === e.tppp) {
} else if (isInVarients(e.tppp, s)) {
return [
...acc,
{

View File

@ -46,7 +46,6 @@ export function renderVP(VP: T.VPSelectionComplete): T.VPRendered {
king,
complementPerson,
});
// TODO: for dynamic -
const { vbs, hasBa } = renderVerb({
verb:
VP.verb.isCompound === "generative stative"
@ -164,7 +163,8 @@ export function insertNegative(
}
if (nonStandPerfectiveSplit) {
return [
insertFromEnd(blocksNoAccentA, neg, 1),
// special case to handle نه لاړ (can't say لا نه ړ)
insertFromEnd(ensureNoHangingR(blocksNoAccentA), neg, 1),
insertFromEnd(blocksNoAccentA, neg, 2),
];
} else {
@ -172,6 +172,25 @@ export function insertNegative(
}
}
function ensureNoHangingR(b: T.Block[]): T.Block[] {
return b.map((x) =>
x.block.type === "VB" &&
"short" in x.block.ps &&
x.block.ps.short.find((x) => x.p === "ړ")
? {
...x,
block: {
...x.block,
ps: {
...x.block.ps,
short: x.block.ps.short.filter((ps) => ps.p !== "ړ"),
},
},
}
: x
);
}
function swapEndingBlocks<X>(arr: X[], n: number = 1): X[] {
return [
...arr.slice(0, arr.length - (n + 1)),