fixed problem with لاړ

This commit is contained in:
adueck 2023-08-14 16:52:41 +04:00
parent 6b04f67720
commit f53c81e14d
13 changed files with 327 additions and 73 deletions

4
package-lock.json generated
View File

@ -1,12 +1,12 @@
{ {
"name": "pashto-inflector", "name": "pashto-inflector",
"version": "6.0.5", "version": "6.0.6",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "pashto-inflector", "name": "pashto-inflector",
"version": "6.0.5", "version": "6.0.6",
"hasInstallScript": true, "hasInstallScript": true,
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {

View File

@ -1,6 +1,6 @@
{ {
"name": "pashto-inflector", "name": "pashto-inflector",
"version": "6.0.5", "version": "6.0.6",
"author": "lingdocs.com", "author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com", "homepage": "https://verbs.lingdocs.com",

View File

@ -1,12 +1,12 @@
{ {
"name": "@lingdocs/ps-react", "name": "@lingdocs/ps-react",
"version": "6.0.5", "version": "6.0.6",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lingdocs/ps-react", "name": "@lingdocs/ps-react",
"version": "6.0.5", "version": "6.0.6",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@formkit/auto-animate": "^1.0.0-beta.3", "@formkit/auto-animate": "^1.0.0-beta.3",

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/ps-react", "name": "@lingdocs/ps-react",
"version": "6.0.5", "version": "6.0.6",
"description": "Pashto inflector library module with React components", "description": "Pashto inflector library module with React components",
"main": "dist/components/library.js", "main": "dist/components/library.js",
"module": "dist/components/library.js", "module": "dist/components/library.js",

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/inflect", "name": "@lingdocs/inflect",
"version": "6.0.5", "version": "6.0.6",
"description": "Pashto inflector library", "description": "Pashto inflector library",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/lib/library.d.ts", "types": "dist/lib/library.d.ts",

View File

@ -7,6 +7,7 @@
*/ */
import { import {
arraysHaveCommon,
parseEc, parseEc,
personFromVerbBlockPos, personFromVerbBlockPos,
} from "./misc-helpers"; } from "./misc-helpers";
@ -25,26 +26,106 @@ test("personFromVerbBlockPos should work", () => {
expect(personFromVerbBlockPos([3, 1])).toEqual(T.Person.SecondPlurFemale); expect(personFromVerbBlockPos([3, 1])).toEqual(T.Person.SecondPlurFemale);
expect(personFromVerbBlockPos([4, 1])).toEqual(T.Person.ThirdPlurMale); expect(personFromVerbBlockPos([4, 1])).toEqual(T.Person.ThirdPlurMale);
expect(personFromVerbBlockPos([5, 1])).toEqual(T.Person.ThirdPlurFemale); expect(personFromVerbBlockPos([5, 1])).toEqual(T.Person.ThirdPlurFemale);
}) });
test("parseEc should work", () => { test("parseEc should work", () => {
expect(parseEc("walk")).toEqual(["walk", "walks", "walking", "walked", "walked"]); expect(parseEc("walk")).toEqual([
expect(parseEc("scare")).toEqual(["scare", "scares", "scaring", "scared", "scared"]); "walk",
expect(parseEc("study")).toEqual(["study","studies","studying","studied","studied"]); "walks",
"walking",
"walked",
"walked",
]);
expect(parseEc("scare")).toEqual([
"scare",
"scares",
"scaring",
"scared",
"scared",
]);
expect(parseEc("study")).toEqual([
"study",
"studies",
"studying",
"studied",
"studied",
]);
expect(parseEc("cry")).toEqual(["cry", "cries", "crying", "cried", "cried"]); expect(parseEc("cry")).toEqual(["cry", "cries", "crying", "cried", "cried"]);
expect(parseEc("marry")).toEqual(["marry","marries","marrying","married","married"]); expect(parseEc("marry")).toEqual([
"marry",
"marries",
"marrying",
"married",
"married",
]);
expect(parseEc("get")).toEqual(["get", "gets", "getting", "got", "gotten"]); expect(parseEc("get")).toEqual(["get", "gets", "getting", "got", "gotten"]);
expect(parseEc("become")).toEqual(["become","becomes","becoming","became","become"]); expect(parseEc("become")).toEqual([
"become",
"becomes",
"becoming",
"became",
"become",
]);
expect(parseEc("be")).toEqual(["am", "is", "being", "was", "been"]); expect(parseEc("be")).toEqual(["am", "is", "being", "was", "been"]);
expect(parseEc("make")).toEqual(["make", "makes", "making", "made", "made"]); expect(parseEc("make")).toEqual(["make", "makes", "making", "made", "made"]);
expect(parseEc("have")).toEqual(["have", "has", "having", "had", "had"]); expect(parseEc("have")).toEqual(["have", "has", "having", "had", "had"]);
expect(parseEc("die")).toEqual(["die", "dies", "dying", "died", "died"]); expect(parseEc("die")).toEqual(["die", "dies", "dying", "died", "died"]);
expect(parseEc("stray")).toEqual(["stray","strays","straying","strayed","strayed"]); expect(parseEc("stray")).toEqual([
expect(parseEc("cross")).toEqual(["cross","crosses","crossing","crossed","crossed"]); "stray",
expect(parseEc("raise")).toEqual(["raise","raises","raising","raised","raised"]); "strays",
expect(parseEc("play")).toEqual(["play","plays","playing","played","played"]); "straying",
"strayed",
"strayed",
]);
expect(parseEc("cross")).toEqual([
"cross",
"crosses",
"crossing",
"crossed",
"crossed",
]);
expect(parseEc("raise")).toEqual([
"raise",
"raises",
"raising",
"raised",
"raised",
]);
expect(parseEc("play")).toEqual([
"play",
"plays",
"playing",
"played",
"played",
]);
// if there are only four items the perfect will be the same as the simple past // if there are only four items the perfect will be the same as the simple past
expect(parseEc("think,thinks,thinking,thought")).toEqual(["think","thinks","thinking","thought","thought"]); expect(parseEc("think,thinks,thinking,thought")).toEqual([
expect(parseEc("sew,sews,sewing,sewed,sown")).toEqual(["sew", "sews", "sewing", "sewed", "sown"]); "think",
expect(parseEc(" sew, sews,sewing ,sewed, sown")).toEqual(["sew", "sews", "sewing", "sewed", "sown"]); "thinks",
"thinking",
"thought",
"thought",
]);
expect(parseEc("sew,sews,sewing,sewed,sown")).toEqual([
"sew",
"sews",
"sewing",
"sewed",
"sown",
]);
expect(parseEc(" sew, sews,sewing ,sewed, sown")).toEqual([
"sew",
"sews",
"sewing",
"sewed",
"sown",
]);
});
test("arraysHaveCommon should work", () => {
expect(arraysHaveCommon(["a", "b", "c"], ["d"])).toBe(false);
expect(arraysHaveCommon(["a", "b", "c"], ["f", "b"])).toBe(true);
expect(arraysHaveCommon([], [23])).toBe(false);
expect(arraysHaveCommon([], [])).toBe(false);
expect(arraysHaveCommon([3, 2, 1, 0, 100], [24, 290, 2, 55, 100])).toBe(true);
}); });

View File

@ -346,3 +346,10 @@ export function chooseLength<N>(
export function isGivingVerb(v: T.VerbEntry): boolean { export function isGivingVerb(v: T.VerbEntry): boolean {
return ["raakawul", "darkawul", "warkawul"].includes(v.entry.g); return ["raakawul", "darkawul", "warkawul"].includes(v.entry.g);
} }
/**
* checks to see if two arrays have a common element
*/
export function arraysHaveCommon<X>(a: X[], b: X[]): boolean {
return a.some((x) => b.includes(x));
}

View File

@ -354,6 +354,22 @@ function ensure3rdPast(
if (isTlulVerb(verb)) { if (isTlulVerb(verb)) {
// should be imperfective at this point // should be imperfective at this point
// the perfective غی should already be covered in the function this is coming from // the perfective غی should already be covered in the function this is coming from
if (verb.entry.p === "تلل" && aspect === "perfective") {
return [
{
p: "ړ",
f: "R",
},
{
p: "ړه",
f: "Ru",
},
{
p: "ړو",
f: "Ro",
},
];
}
return [ return [
{ {
p: rs[0].p.slice(0, -1) + "ه", p: rs[0].p.slice(0, -1) + "ه",

View File

@ -1084,11 +1084,23 @@ export function mapPsString<T>(
* @returns * @returns
*/ */
export function splitPsByVarients(w: T.PsString): T.ArrayOneOrMore<T.PsString> { export function splitPsByVarients(w: T.PsString): T.ArrayOneOrMore<T.PsString> {
function cut(s: string) { const { p, f } = mapPsString(w, splitVarients);
return zipWith(makePsString, p, f) as T.ArrayOneOrMore<T.PsString>;
}
export function splitVarients(s: string) {
return s.split(/[,|،]/).map((s) => s.trim()); return s.split(/[,|،]/).map((s) => s.trim());
} }
const { p, f } = mapPsString(w, cut);
return zipWith(makePsString, p, f) as T.ArrayOneOrMore<T.PsString>; /**
* checks to see if a search string exists in a list of comma-seperated varents
*/
export function isInVarients(
vars: string | false | undefined,
search: string | false | undefined
): boolean {
if (!vars || !search) return false;
return splitVarients(vars).includes(search);
} }
export function removeEndTick(w: T.PsString): T.PsString; export function removeEndTick(w: T.PsString): T.PsString;

View File

@ -3,6 +3,8 @@ import verbs from "../../../verbs";
import * as T from "../../../types"; import * as T from "../../../types";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
import { removeFVarientsFromVerb } from "../accent-and-ps-utils"; import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
import { splitVarients } from "../p-text-helpers";
import { arraysHaveCommon } from "../misc-helpers";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] { export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
const [key, value] = Object.entries(s)[0]; const [key, value] = Object.entries(s)[0];
@ -41,6 +43,7 @@ export function shouldCheckTpp(s: string): boolean {
export function verbLookup(input: string): T.VerbEntry[] { export function verbLookup(input: string): T.VerbEntry[] {
const s = input.slice(0, -1); const s = input.slice(0, -1);
// IMPORTANT TODO FOR EFFECIANCY!
// check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING // check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING
// if theres no legit verb ending and no tpp possibilities, just return an empty array // if theres no legit verb ending and no tpp possibilities, just return an empty array
const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined; const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
@ -57,14 +60,12 @@ export function verbLookup(input: string): T.VerbEntry[] {
entry.p entry.p
) || ) ||
[s, sWoutOo].includes(entry.p) || [s, sWoutOo].includes(entry.p) ||
(checkTpp && [input, inputWoutOo].includes(entry.tppp)) ||
(entry.psp && [s, sWoutOo].includes(entry.psp)) || (entry.psp && [s, sWoutOo].includes(entry.psp)) ||
entry.prp === s || entry.prp === s ||
entry.ssp === s entry.ssp === s
: ({ entry }) => : ({ entry }) =>
entry.p.slice(0, -1) === s || entry.p.slice(0, -1) === s ||
entry.p === s.slice(0, -1) + "دل" || entry.p === s.slice(0, -1) + "دل" ||
(checkTpp && [input, inputWoutOo].includes(entry.tppp)) ||
entry.p === s || entry.p === s ||
entry.psp === s || entry.psp === s ||
entry.prp === s || entry.prp === s ||
@ -79,7 +80,11 @@ export function verbLookup(input: string): T.VerbEntry[] {
[s, sWoutOo].includes(entry.p.slice(0, -3)) || [s, sWoutOo].includes(entry.p.slice(0, -3)) ||
[s, sWoutOo].includes(entry.p) || [s, sWoutOo].includes(entry.p) ||
(entry.psp && [s, sWoutOo].includes(entry.psp)) || (entry.psp && [s, sWoutOo].includes(entry.psp)) ||
(checkTpp && [input, inputWoutOo].includes(entry.tppp)) || (entry.tppp &&
arraysHaveCommon(
[input, inputWoutOo],
splitVarients(entry.tppp)
)) ||
entry.prp === s || entry.prp === s ||
entry.ssp === s || entry.ssp === s ||
(entry.separationAtP && (entry.separationAtP &&
@ -90,7 +95,11 @@ export function verbLookup(input: string): T.VerbEntry[] {
// for short intransitive forms // for short intransitive forms
entry.p.slice(0, -3) === s || entry.p.slice(0, -3) === s ||
entry.p === s || entry.p === s ||
(checkTpp && [input, inputWoutOo].includes(entry.tppp)) || (entry.tppp &&
arraysHaveCommon(
[input, inputWoutOo],
splitVarients(entry.tppp)
)) ||
entry.psp === s || entry.psp === s ||
entry.prp === s || entry.prp === s ||
entry.ssp === s || entry.ssp === s ||

View File

@ -16,6 +16,9 @@ const prexodul = wordQuery("پرېښودل", "verb");
const xodul = wordQuery("ښودل", "verb"); const xodul = wordQuery("ښودل", "verb");
const kexodul = wordQuery("کېښودل", "verb"); const kexodul = wordQuery("کېښودل", "verb");
const katul = wordQuery("کتل", "verb"); const katul = wordQuery("کتل", "verb");
const tlul = wordQuery("تلل", "verb");
// todo alwatul waalwatul akhistul azmoyul etc
const tests: { const tests: {
label: string; label: string;
@ -384,7 +387,7 @@ const tests: {
{ {
ph: undefined, ph: undefined,
root: { root: {
persons: [T.Person.ThirdSingFemale], persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"], aspects: ["imperfective", "perfective"],
}, },
verb: leedul, verb: leedul,
@ -397,7 +400,7 @@ const tests: {
{ {
ph: "و", ph: "و",
root: { root: {
persons: [T.Person.ThirdSingFemale], persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale],
aspects: ["perfective"], aspects: ["perfective"],
}, },
verb: leedul, verb: leedul,
@ -487,20 +490,58 @@ const tests: {
}, },
], ],
}, },
// TODO: should do کاته as well... what to do, have multiple tpp forms ? YES! {
// { input: "وخوړ",
// input: "وکوت", output: [
// output: [ {
// { ph: "و",
// ph: "و", root: {
// root: { persons: [T.Person.ThirdSingMale],
// persons: [T.Person.ThirdSingMale], aspects: ["perfective"],
// aspects: ["perfective"], },
// }, verb: khorul,
// verb: katul, },
// }, ],
// ], },
// }, {
input: "کوت",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: katul,
},
],
},
{
input: "کاته",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: katul,
},
],
},
{
input: "وکاته",
output: [
{
ph: "و",
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: katul,
},
],
},
], ],
}, },
{ {
@ -577,7 +618,7 @@ const tests: {
{ {
ph: "پرې", ph: "پرې",
root: { root: {
persons: [T.Person.ThirdSingFemale], persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"], aspects: ["imperfective", "perfective"],
}, },
verb: prexodul, verb: prexodul,
@ -615,6 +656,24 @@ const tests: {
}, },
], ],
}, },
{
label: "verbs with abrupt 3rd pers sing past endings",
cases: [
// {
// input: "لاړ",
// output: [
// {
// ph: undefined,
// root: {
// persons: [T.Person.ThirdSingMale],
// aspects: ["perfective"],
// },
// verb: tlul,
// },
// ],
// },
],
},
]; ];
tests.forEach(({ label, cases }) => { tests.forEach(({ label, cases }) => {

View File

@ -1,4 +1,5 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { isInVarients } from "../p-text-helpers";
// third persion idosyncratic // third persion idosyncratic
// if it ends in a dental or ه - look for tttp // if it ends in a dental or ه - look for tttp
@ -280,16 +281,47 @@ function matchVerbs(
}); });
}); });
} }
const hamzaEnd = s.endsWith("ه");
const tppMatches = { const tppMatches = {
imperfective: entries.filter( imperfective: entries.filter(
({ entry: e }) => !e.c.includes("comp") && s === e.tppp ({ entry: e }) =>
!e.c.includes("comp") &&
(isInVarients(e.tppp, s) || (hamzaEnd && base === e.p.slice(0, -1)))
), ),
perfective: entries.reduce< perfective: entries.reduce<
{ ph: string | undefined; entry: T.VerbEntry }[] { ph: string | undefined; entry: T.VerbEntry }[]
>((acc, entry) => { >((acc, entry) => {
const e = entry.entry; const e = entry.entry;
const sNoOo = s.startsWith("و") && s.slice(1); if (e.c.includes("comp")) {
if (sNoOo && sNoOo === e.tppp) { return acc;
}
if (e.separationAtP && hamzaEnd) {
const b = e.prp || e.p;
const bHead = b.slice(0, e.separationAtP);
const bRest = b.slice(e.separationAtP);
// this is REPETITIVE from above ... but doing it again here because the ه will only match on the SHORT versions for 3rd pers masc sing
// could modify and reuse the code above for this
if (base === b.slice(0, -1)) {
return [
...acc,
{
ph: bHead,
entry,
},
];
}
if (base === bRest.slice(0, -1)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
} else if (!e.prp && hamzaEnd) {
const baseNoOo = base.startsWith("و") && base.slice(1);
if (baseNoOo && baseNoOo === e.p.slice(0, -1)) {
return [ return [
...acc, ...acc,
{ {
@ -297,7 +329,26 @@ function matchVerbs(
entry, entry,
}, },
]; ];
} else if (s === e.tppp) { } else if (base === e.p.slice(0, -1)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
}
const sNoOo = s.startsWith("و") && s.slice(1);
if (isInVarients(e.tppp, sNoOo)) {
return [
...acc,
{
ph: "و",
entry,
},
];
} else if (isInVarients(e.tppp, s)) {
return [ return [
...acc, ...acc,
{ {

View File

@ -46,7 +46,6 @@ export function renderVP(VP: T.VPSelectionComplete): T.VPRendered {
king, king,
complementPerson, complementPerson,
}); });
// TODO: for dynamic -
const { vbs, hasBa } = renderVerb({ const { vbs, hasBa } = renderVerb({
verb: verb:
VP.verb.isCompound === "generative stative" VP.verb.isCompound === "generative stative"
@ -164,7 +163,8 @@ export function insertNegative(
} }
if (nonStandPerfectiveSplit) { if (nonStandPerfectiveSplit) {
return [ return [
insertFromEnd(blocksNoAccentA, neg, 1), // special case to handle نه لاړ (can't say لا نه ړ)
insertFromEnd(ensureNoHangingR(blocksNoAccentA), neg, 1),
insertFromEnd(blocksNoAccentA, neg, 2), insertFromEnd(blocksNoAccentA, neg, 2),
]; ];
} else { } else {
@ -172,6 +172,25 @@ export function insertNegative(
} }
} }
function ensureNoHangingR(b: T.Block[]): T.Block[] {
return b.map((x) =>
x.block.type === "VB" &&
"short" in x.block.ps &&
x.block.ps.short.find((x) => x.p === "ړ")
? {
...x,
block: {
...x.block,
ps: {
...x.block.ps,
short: x.block.ps.short.filter((ps) => ps.p !== "ړ"),
},
},
}
: x
);
}
function swapEndingBlocks<X>(arr: X[], n: number = 1): X[] { function swapEndingBlocks<X>(arr: X[], n: number = 1): X[] {
return [ return [
...arr.slice(0, arr.length - (n + 1)), ...arr.slice(0, arr.length - (n + 1)),