more work on basic verb parsing

This commit is contained in:
adueck 2023-08-16 11:48:48 +04:00
parent f53c81e14d
commit b384771db5
12 changed files with 1995 additions and 140 deletions

View File

@ -28,7 +28,7 @@
"react-bootstrap": "^1.5.1", "react-bootstrap": "^1.5.1",
"react-dom": "^17.0.1", "react-dom": "^17.0.1",
"react-scripts": "4.0.3", "react-scripts": "4.0.3",
"typescript": "^4.2.3", "typescript": "^5.1.6",
"web-vitals": "^1.0.1" "web-vitals": "^1.0.1"
}, },
"scripts": { "scripts": {

View File

@ -471,7 +471,7 @@ function getPassiveRs(
} }
// TODO: This is a nasty and messy way to do it with the length options included // TODO: This is a nasty and messy way to do it with the length options included
function getPerfectiveHead( export function getPerfectiveHead(
base: T.PsString, base: T.PsString,
v: T.VerbEntryNoFVars v: T.VerbEntryNoFVars
): [T.PH, T.PsString] | [undefined, T.PsString] { ): [T.PH, T.PsString] | [undefined, T.PsString] {

View File

@ -25,6 +25,9 @@ import {
splitPsByVarients, splitPsByVarients,
endsWith, endsWith,
trimOffPs, trimOffPs,
undoAaXuPattern,
prevValNotA,
lastVowelNotA,
} from "./p-text-helpers"; } from "./p-text-helpers";
import * as T from "../../types"; import * as T from "../../types";
import { pastEndings } from "./grammar-units"; import { pastEndings } from "./grammar-units";
@ -1665,3 +1668,17 @@ test("endsWith", () => {
); );
expect(endsWith({ f: ["d", "D"] })({ p: "چت", f: "chat" })).toBe(false); expect(endsWith({ f: ["d", "D"] })({ p: "چت", f: "chat" })).toBe(false);
}); });
test("undoAaXuPattern", () => {
expect(undoAaXuPattern("تور")).toBe(false);
expect(undoAaXuPattern("پښتان")).toBe(false);
expect(undoAaXuPattern("کاوه")).toBe("کو");
expect(undoAaXuPattern("وواته")).toBe("ووت");
expect(undoAaXuPattern("واسته")).toBe("وست");
expect(undoAaXuPattern("لیده")).toBe(false);
});
test("lastVowelNotA", () => {
expect(lastVowelNotA("raat")).toBe(true);
expect(lastVowelNotA("oowat")).toBe(false);
});

View File

@ -1224,3 +1224,29 @@ export function getShort<U extends object>(a: T.SingleOrLengthOpts<U>): U {
export function capitalizeFirstLetter(string: string) { export function capitalizeFirstLetter(string: string) {
return string.charAt(0).toUpperCase() + string.slice(1); return string.charAt(0).toUpperCase() + string.slice(1);
} }
/**
* For use with the
*/
export function undoAaXuPattern(p: string): false | string {
if (p.at(-1) !== "ه") {
return false;
}
const chars = p.split("");
const prevVowel = chars.findIndex((c) => ["ی", "ې", "ا"].includes(c));
if (prevVowel === -1) {
return false;
}
if (p[prevVowel] !== "ا") {
return false;
}
return p.slice(0, prevVowel) + p.slice(prevVowel + 1, -1);
}
export function lastVowelNotA(g: string): boolean {
const matches = g.match(/ee|aa|i|u|o|oo|U|e|a/g);
if (!matches) {
return true;
}
return matches[matches.length - 1] !== "a";
}

View File

@ -0,0 +1,554 @@
const b = [
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to come",
ec: "come,comes,coming,came,come",
f: "raatlúl",
g: "raatlul",
noOo: true,
p: "راتلل",
pprtf: "raaghúlay",
pprtp: "راغلی",
prf: "ráaghlul",
prp: "راغلل",
psf: "raadz",
psp: "راځ",
r: 4,
separationAtF: 3,
separationAtP: 2,
ssf: "ráash",
ssp: "راش",
tppf: "ráaghay",
tppp: "راغی",
ts: 1527815216,
},
},
},
person: 4,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to come",
ec: "come,comes,coming,came,come",
f: "raatlúl",
g: "raatlul",
noOo: true,
p: "راتلل",
pprtf: "raaghúlay",
pprtp: "راغلی",
prf: "ráaghlul",
prp: "راغلل",
psf: "raadz",
psp: "راځ",
r: 4,
separationAtF: 3,
separationAtP: 2,
ssf: "ráash",
ssp: "راش",
tppf: "ráaghay",
tppp: "راغی",
ts: 1527815216,
},
},
},
person: 5,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to come",
ec: "come,comes,coming,came,come",
f: "raatlúl",
g: "raatlul",
noOo: true,
p: "راتلل",
pprtf: "raaghúlay",
pprtp: "راغلی",
prf: "ráaghlul",
prp: "راغلل",
psf: "raadz",
psp: "راځ",
r: 4,
separationAtF: 3,
separationAtP: 2,
ssf: "ráash",
ssp: "راش",
tppf: "ráaghay",
tppp: "راغی",
ts: 1527815216,
},
},
},
person: 10,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to come",
ec: "come,comes,coming,came,come",
f: "raatlúl",
g: "raatlul",
noOo: true,
p: "راتلل",
pprtf: "raaghúlay",
pprtp: "راغلی",
prf: "ráaghlul",
prp: "راغلل",
psf: "raadz",
psp: "راځ",
r: 4,
separationAtF: 3,
separationAtP: 2,
ssf: "ráash",
ssp: "راش",
tppf: "ráaghay",
tppp: "راغی",
ts: 1527815216,
},
},
},
person: 11,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to become _____",
ec: "become",
f: "kedul",
g: "kedul",
noOo: true,
p: "کېدل",
pprtf: "shúway",
pprtp: "شوی",
prf: "shwul",
prp: "شول",
r: 2,
ssf: "sh",
ssp: "ش",
ts: 1581086654898,
},
},
},
person: 4,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to become _____",
ec: "become",
f: "kedul",
g: "kedul",
noOo: true,
p: "کېدل",
pprtf: "shúway",
pprtp: "شوی",
prf: "shwul",
prp: "شول",
r: 2,
ssf: "sh",
ssp: "ش",
ts: 1581086654898,
},
},
},
person: 5,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to become _____",
ec: "become",
f: "kedul",
g: "kedul",
noOo: true,
p: "کېدل",
pprtf: "shúway",
pprtp: "شوی",
prf: "shwul",
prp: "شول",
r: 2,
ssf: "sh",
ssp: "ش",
ts: 1581086654898,
},
},
},
person: 10,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to become _____",
ec: "become",
f: "kedul",
g: "kedul",
noOo: true,
p: "کېدل",
pprtf: "shúway",
pprtp: "شوی",
prf: "shwul",
prp: "شول",
r: 2,
ssf: "sh",
ssp: "ش",
ts: 1581086654898,
},
},
},
person: 11,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
diacExcept: true,
e: "to happen, occur",
ec: "happen",
f: "kedul",
g: "kedul",
p: "کېدل",
pprtf: "shúway",
pprtp: "شوی",
prf: "óoshwul",
prp: "وشول",
r: 2,
separationAtF: 2,
separationAtP: 1,
ssf: "óosh",
ssp: "وش",
ts: 1527812754,
},
},
},
person: 4,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
diacExcept: true,
e: "to happen, occur",
ec: "happen",
f: "kedul",
g: "kedul",
p: "کېدل",
pprtf: "shúway",
pprtp: "شوی",
prf: "óoshwul",
prp: "وشول",
r: 2,
separationAtF: 2,
separationAtP: 1,
ssf: "óosh",
ssp: "وش",
ts: 1527812754,
},
},
},
person: 5,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
diacExcept: true,
e: "to happen, occur",
ec: "happen",
f: "kedul",
g: "kedul",
p: "کېدل",
pprtf: "shúway",
pprtp: "شوی",
prf: "óoshwul",
prp: "وشول",
r: 2,
separationAtF: 2,
separationAtP: 1,
ssf: "óosh",
ssp: "وش",
ts: 1527812754,
},
},
},
person: 10,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
diacExcept: true,
e: "to happen, occur",
ec: "happen",
f: "kedul",
g: "kedul",
p: "کېدل",
pprtf: "shúway",
pprtp: "شوی",
prf: "óoshwul",
prp: "وشول",
r: 2,
separationAtF: 2,
separationAtP: 1,
ssf: "óosh",
ssp: "وش",
ts: 1527812754,
},
},
},
person: 11,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to come / go over to (third person or place)",
ec: "come,comes,coming,came,come",
f: "wărtlul",
g: "wartlul",
noOo: true,
p: "ورتلل",
pprtf: "wărghúlay",
pprtp: "ورغلی",
prf: "wárghlul",
prp: "ورغلل",
psf: "wărdz",
psp: "ورځ",
r: 4,
separationAtF: 3,
separationAtP: 2,
ssf: "wársh",
ssp: "ورش",
tppf: "wărghay",
tppp: "ورغی",
ts: 1585228579997,
},
},
},
person: 4,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to come / go over to (third person or place)",
ec: "come,comes,coming,came,come",
f: "wărtlul",
g: "wartlul",
noOo: true,
p: "ورتلل",
pprtf: "wărghúlay",
pprtp: "ورغلی",
prf: "wárghlul",
prp: "ورغلل",
psf: "wărdz",
psp: "ورځ",
r: 4,
separationAtF: 3,
separationAtP: 2,
ssf: "wársh",
ssp: "ورش",
tppf: "wărghay",
tppp: "ورغی",
ts: 1585228579997,
},
},
},
person: 5,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to come / go over to (third person or place)",
ec: "come,comes,coming,came,come",
f: "wărtlul",
g: "wartlul",
noOo: true,
p: "ورتلل",
pprtf: "wărghúlay",
pprtp: "ورغلی",
prf: "wárghlul",
prp: "ورغلل",
psf: "wărdz",
psp: "ورځ",
r: 4,
separationAtF: 3,
separationAtP: 2,
ssf: "wársh",
ssp: "ورش",
tppf: "wărghay",
tppp: "ورغی",
ts: 1585228579997,
},
},
},
person: 10,
type: "VB",
},
],
[
null,
{
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb: {
entry: {
c: "v. intrans.",
e: "to come / go over to (third person or place)",
ec: "come,comes,coming,came,come",
f: "wărtlul",
g: "wartlul",
noOo: true,
p: "ورتلل",
pprtf: "wărghúlay",
pprtp: "ورغلی",
prf: "wárghlul",
prp: "ورغلل",
psf: "wărdz",
psp: "ورځ",
r: 4,
separationAtF: 3,
separationAtP: 2,
ssf: "wársh",
ssp: "ورش",
tppf: "wărghay",
tppp: "ورغی",
ts: 1585228579997,
},
},
},
person: 11,
type: "VB",
},
],
];

View File

@ -0,0 +1,193 @@
import * as T from "../../../types";
export const raatlul = {
entry: {
ts: 1527815216,
i: 6926,
p: "راتلل",
f: "raatlúl",
g: "raatlul",
e: "to come",
r: 4,
c: "v. intrans.",
psp: "راځ",
psf: "raadz",
ssp: "راش",
ssf: "ráash",
prp: "راغلل",
prf: "ráaghlul",
pprtp: "راغلی",
pprtf: "raaghúlay",
tppp: "راغی",
tppf: "ráaghay",
noOo: true,
separationAtP: 2,
separationAtF: 3,
ec: "come,comes,coming,came,come",
},
} as T.VerbEntry;
export const tlul = {
entry: {
ts: 1527815348,
i: 3820,
p: "تلل",
f: "tlul",
g: "tlul",
e: "to go",
r: 4,
c: "v. intrans.",
psp: "ځ",
psf: "dz",
ssp: "لاړ ش",
ssf: "láaR sh",
prp: "لاړل",
prf: "láaRul",
tppp: "لاړ",
tppf: "laaR",
separationAtP: 2,
separationAtF: 3,
ec: "go,goes,going,went,gone",
},
} as T.VerbEntry;
export const wartlul = {
entry: {
ts: 1585228579997,
i: 14924,
p: "ورتلل",
f: "wărtlul",
g: "wartlul",
e: "to come / go over to (third person or place)",
r: 4,
c: "v. intrans.",
psp: "ورځ",
psf: "wărdz",
ssp: "ورش",
ssf: "wársh",
prp: "ورغلل",
prf: "wárghlul",
pprtp: "ورغلی",
pprtf: "wărghúlay",
tppp: "ورغی",
tppf: "wărghay",
noOo: true,
separationAtP: 2,
separationAtF: 3,
ec: "come,comes,coming,came,come",
},
} as T.VerbEntry;
export const dartlul = {
entry: {
ts: 1585228551150,
i: 6320,
p: "درتلل",
f: "dărtlul",
g: "dartlul",
e: "to come (to you / second person)",
r: 3,
c: "v. intrans.",
psp: "درځ",
psf: "dărdz",
ssp: "درش",
ssf: "dársh",
prp: "درغلل",
prf: "dárghlul",
pprtp: "درغلی",
pprtf: "dărghúlay",
tppp: "درغی",
tppf: "dărghay",
noOo: true,
separationAtP: 2,
separationAtF: 3,
ec: "come,comes,coming,came,come",
},
} as T.VerbEntry;
export const kedulStat = {
entry: {
ts: 1581086654898,
i: 11100,
p: "کېدل",
f: "kedul",
g: "kedul",
e: "to become _____",
r: 2,
c: "v. intrans.",
ssp: "ش",
ssf: "sh",
prp: "شول",
prf: "shwul",
pprtp: "شوی",
pprtf: "shúway",
noOo: true,
ec: "become",
},
} as T.VerbEntry;
export const kedulDyn = {
entry: {
ts: 1527812754,
i: 11101,
p: "کېدل",
f: "kedul",
g: "kedul",
e: "to happen, occur",
r: 2,
c: "v. intrans.",
ssp: "وش",
ssf: "óosh",
prp: "وشول",
prf: "óoshwul",
pprtp: "شوی",
pprtf: "shúway",
diacExcept: true,
ec: "happen",
separationAtP: 1,
separationAtF: 2,
},
} as T.VerbEntry;
export const kawulStat = {
entry: {
ts: 1579015359582,
i: 11112,
p: "کول",
f: "kawul",
g: "kawul",
e: 'to make ____ ____ (as in "He\'s making me angry.")',
r: 4,
c: "v. trans.",
ssp: "کړ",
ssf: "kR",
prp: "کړل",
prf: "kRul",
pprtp: "کړی",
pprtf: "kúRay",
noOo: true,
ec: "make,makes,making,made,made",
},
} as T.VerbEntry;
export const kawulDyn = {
entry: {
ts: 1527812752,
i: 11113,
p: "کول",
f: "kawul",
g: "kawul",
e: "to do (an action or activity)",
r: 4,
c: "v. trans./gramm. trans.",
ssp: "وکړ",
ssf: "óokR",
prp: "وکړل",
prf: "óokRul",
pprtp: "کړی",
pprtf: "kúRay",
separationAtP: 1,
separationAtF: 2,
diacExcept: true,
ec: "do,does,doing,did,done",
},
} as T.VerbEntry;

View File

@ -3,7 +3,7 @@ import verbs from "../../../verbs";
import * as T from "../../../types"; import * as T from "../../../types";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
import { removeFVarientsFromVerb } from "../accent-and-ps-utils"; import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
import { splitVarients } from "../p-text-helpers"; import { splitVarients, undoAaXuPattern } from "../p-text-helpers";
import { arraysHaveCommon } from "../misc-helpers"; import { arraysHaveCommon } from "../misc-helpers";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] { export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
@ -48,64 +48,93 @@ export function verbLookup(input: string): T.VerbEntry[] {
// if theres no legit verb ending and no tpp possibilities, just return an empty array // if theres no legit verb ending and no tpp possibilities, just return an empty array
const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined; const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
const checkTpp = shouldCheckTpp(input); const checkTpp = shouldCheckTpp(input);
const fromAawu = checkTpp && undoAaXuPattern(input);
const inputWoutOo = const inputWoutOo =
checkTpp && input.startsWith("و") ? input.slice(1) : undefined; checkTpp && input.startsWith("و") ? input.slice(1) : undefined;
// TODO: don't do this blindly, but check if it could actually be added
const sAddedAa = "ا" + s;
// for لواته -> to search for tpp الواته
const inputAddedAa = "ا" + input;
// TODO: don't do the slice of and checking for useless things when you have a NON verb ending (like with the tpp) // TODO: don't do the slice of and checking for useless things when you have a NON verb ending (like with the tpp)
if (s.endsWith("ېږ")) { if (s.endsWith("ېږ")) {
return verbs.filter( return verbs.filter(
sWoutOo sWoutOo
? ({ entry }) => ? ({ entry }) =>
[s, sWoutOo].includes(entry.p.slice(0, -1)) || [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) ||
[s.slice(0, -1) + "دل", sWoutOo.slice(0, -1) + "دل"].includes( [
entry.p s.slice(0, -1) + "دل",
) || sWoutOo.slice(0, -1) + "دل",
[s, sWoutOo].includes(entry.p) || sAddedAa.slice(0, -1) + "دل",
(entry.psp && [s, sWoutOo].includes(entry.psp)) || ].includes(entry.p) ||
[s, sWoutOo, sAddedAa].includes(entry.p) ||
(entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) ||
entry.prp === s || entry.prp === s ||
entry.ssp === s entry.ssp === s
: ({ entry }) => : ({ entry }) =>
entry.p.slice(0, -1) === s || [s, sAddedAa].includes(entry.p.slice(0, -1)) ||
entry.p === s.slice(0, -1) + "دل" || [s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes(
entry.p === s || entry.p
entry.psp === s || ) ||
entry.prp === s || [s, sAddedAa].includes(entry.p) ||
entry.ssp === s [s, sAddedAa].includes(entry.psp || "") ||
[s, sAddedAa].includes(entry.prp || "") ||
[s, sAddedAa].includes(entry.ssp || "")
); );
} }
return verbs.filter( return verbs.filter(
sWoutOo sWoutOo
? ({ entry }) => ? ({ entry }) =>
[s, sWoutOo].includes(entry.p.slice(0, -1)) || [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) ||
// for short intransitive forms // for short intransitive forms
[s, sWoutOo].includes(entry.p.slice(0, -3)) || [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -3)) ||
[s, sWoutOo].includes(entry.p) || [s, sWoutOo, sAddedAa].includes(entry.p) ||
(entry.psp && [s, sWoutOo].includes(entry.psp)) || (entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) ||
(checkTpp &&
[
input.slice(1),
fromAawu && fromAawu.slice(-1),
inputAddedAa,
].includes(entry.p.slice(0, -1))) ||
(entry.tppp && (entry.tppp &&
arraysHaveCommon( arraysHaveCommon(
[input, inputWoutOo], [input, inputWoutOo, sAddedAa],
splitVarients(entry.tppp) splitVarients(entry.tppp)
)) || )) ||
entry.prp === s || arraysHaveCommon(
entry.ssp === s || [s, sAddedAa, "و" + s],
[entry.prp, entry.prp?.slice(0, -1)]
) ||
[s, sAddedAa].includes(entry.ssp || "") ||
(entry.separationAtP && (entry.separationAtP &&
(entry.p.slice(entry.separationAtP) === s || (entry.p.slice(entry.separationAtP) === s ||
entry.psp?.slice(entry.separationAtP) === s)) entry.psp?.slice(entry.separationAtP) === s))
: ({ entry }) => : ({ entry }) =>
entry.p.slice(0, -1) === s || [s, sAddedAa].includes(entry.p.slice(0, -1)) ||
// for short intransitive forms // for short intransitive forms
entry.p.slice(0, -3) === s || [s, sAddedAa].includes(entry.p.slice(0, -3)) ||
entry.p === s || [s, sAddedAa].includes(entry.p) ||
(checkTpp &&
[input, fromAawu, sAddedAa].includes(entry.p.slice(0, -1))) ||
(entry.tppp && (entry.tppp &&
arraysHaveCommon( arraysHaveCommon(
[input, inputWoutOo], [input, inputWoutOo, sAddedAa, inputAddedAa],
splitVarients(entry.tppp) splitVarients(entry.tppp)
)) || )) ||
entry.psp === s || [s, sAddedAa].includes(entry.psp || "") ||
entry.prp === s || arraysHaveCommon(
entry.ssp === s || [entry.prp, entry.prp?.slice(0, -1)],
[s, sAddedAa, "و" + s]
) ||
[s, sAddedAa, "و" + s].includes(entry.ssp || "") ||
(entry.separationAtP && (entry.separationAtP &&
(entry.p.slice(entry.separationAtP) === s || (entry.p.slice(entry.separationAtP) === s ||
entry.psp?.slice(entry.separationAtP) === s)) entry.psp?.slice(entry.separationAtP) === s ||
(entry.prp &&
[
entry.prp.slice(entry.separationAtP),
entry.prp.slice(entry.separationAtP).slice(0, -1),
].includes(s)) ||
(entry.ssp && entry.ssp.slice(entry.separationAtP) === s)))
); );
} }

View File

@ -3,6 +3,8 @@ import { verbLookup } from "./lookup";
import { parseNP } from "./parse-np"; import { parseNP } from "./parse-np";
import { parseVP } from "./parse-vp"; import { parseVP } from "./parse-vp";
// شو should not be sheyaano !!
export function parsePhrase( export function parsePhrase(
s: T.Token[], s: T.Token[],
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[] lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]

View File

@ -1,5 +1,13 @@
/* eslint-disable jest/valid-title */ /* eslint-disable jest/valid-title */
import * as T from "../../../types"; import * as T from "../../../types";
import {
dartlul,
kedulDyn,
kedulStat,
tlul,
wartlul,
raatlul,
} from "./irreg-verbs";
import { verbLookup, wordQuery } from "./lookup"; import { verbLookup, wordQuery } from "./lookup";
import { parseVerb } from "./parse-verb"; import { parseVerb } from "./parse-verb";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
@ -16,7 +24,11 @@ const prexodul = wordQuery("پرېښودل", "verb");
const xodul = wordQuery("ښودل", "verb"); const xodul = wordQuery("ښودل", "verb");
const kexodul = wordQuery("کېښودل", "verb"); const kexodul = wordQuery("کېښودل", "verb");
const katul = wordQuery("کتل", "verb"); const katul = wordQuery("کتل", "verb");
const tlul = wordQuery("تلل", "verb"); const watul = wordQuery("وتل", "verb");
const wurul = wordQuery("وړل", "verb");
const akheestul = wordQuery("اخیستل", "verb");
const alwatul = wordQuery("الوتل", "verb");
// const dartlul = wordQuery("درتلل", "verb");
// todo alwatul waalwatul akhistul azmoyul etc // todo alwatul waalwatul akhistul azmoyul etc
@ -115,13 +127,34 @@ const tests: {
{ {
ph: undefined, ph: undefined,
root: { root: {
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], persons: [
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
T.Person.ThirdSingMale,
],
aspects: ["imperfective", "perfective"], aspects: ["imperfective", "perfective"],
}, },
verb: leekul, verb: leekul,
}, },
], ],
}, },
{
input: "ولیکلو",
output: [
{
ph: "و",
root: {
persons: [
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
T.Person.ThirdSingMale,
],
aspects: ["perfective"],
},
verb: leekul,
},
],
},
{ {
input: "لیکل", input: "لیکل",
output: [ output: [
@ -542,6 +575,123 @@ const tests: {
}, },
], ],
}, },
{
input: "واخلم",
output: [
{
ph: "وا",
stem: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: akheestul,
},
],
},
{
input: "خلم",
output: [
{
ph: undefined,
stem: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: akheestul,
},
],
},
{
input: "اخیستم",
output: [
{
ph: undefined,
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["imperfective", "perfective"],
},
verb: akheestul,
},
],
},
{
input: "واخیستم",
output: [
{
ph: "وا",
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: akheestul,
},
],
},
{
input: "واخیستلم",
output: [
{
ph: "وا",
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: akheestul,
},
],
},
{
input: "خیستلم",
output: [
{
ph: undefined,
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: akheestul,
},
],
},
{
input: "الوځې",
output: [
{
ph: undefined,
stem: {
persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale],
aspects: ["imperfective", "perfective"],
},
verb: alwatul,
},
],
},
{
input: "والوځې",
output: [
{
ph: "وا",
stem: {
persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale],
aspects: ["perfective"],
},
verb: alwatul,
},
],
},
{
input: "لوځې",
output: [
{
ph: undefined,
stem: {
persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale],
aspects: ["perfective"],
},
verb: alwatul,
},
],
},
], ],
}, },
{ {
@ -654,16 +804,202 @@ const tests: {
}, },
], ],
}, },
{
input: "لاړلم",
output: [
{
ph: "لا",
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: tlul,
},
], ],
}, },
{ {
label: "verbs with abrupt 3rd pers sing past endings", input: "لاړم",
cases: [ output: [
{
ph: "لا",
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: tlul,
},
],
},
{
input: "لاړو",
output: [
{
ph: "لا",
root: {
persons: [
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
T.Person.ThirdSingMale,
],
aspects: ["perfective"],
},
verb: tlul,
},
],
},
{
input: "لاړه",
output: [
{
ph: "لا",
root: {
persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: tlul,
},
],
},
{
input: "ړلم",
output: [
{
ph: undefined,
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: tlul,
},
{
ph: undefined,
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: wurul,
},
],
},
{
input: "ړم",
output: [
{
ph: undefined,
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: tlul,
},
{
ph: undefined,
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: wurul,
},
],
},
{
input: "والووت",
output: [
{
ph: "وا",
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: alwatul,
},
],
},
{
input: "والواته",
output: [
{
ph: "وا",
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: alwatul,
},
],
},
{
input: "لواته",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: alwatul,
},
],
},
{
input: "راشې",
output: [
{
ph: "را",
stem: {
persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale],
aspects: ["perfective"],
},
verb: raatlul,
},
],
},
{
input: "ورشې",
output: [
{
ph: "ور",
stem: {
persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale],
aspects: ["perfective"],
},
verb: wartlul,
},
],
},
{
input: "یوسم",
output: [
{
ph: "یو",
stem: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: wurul,
},
],
},
{
input: "سم",
output: [
{
ph: undefined,
stem: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: wurul,
},
],
},
// TOOD: would be more robust if it looked for this without excetions // same as واخیست etc
// { // {
// input: "لاړ", // input: "لاړ",
// output: [ // output: [
// { // {
// ph: undefined, // ph: "لا",
// root: { // root: {
// persons: [T.Person.ThirdSingMale], // persons: [T.Person.ThirdSingMale],
// aspects: ["perfective"], // aspects: ["perfective"],
@ -674,8 +1010,456 @@ const tests: {
// }, // },
], ],
}, },
{
label: "verbs with different 3rd pers sing past endings",
cases: [
{
input: "رسېد",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: rasedul,
},
],
},
{
input: "ورسېد",
output: [
{
ph: "و",
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: rasedul,
},
],
},
{
input: "کېناسته",
output: [
{
ph: "کې",
root: {
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
aspects: ["imperfective", "perfective"],
},
verb: kenaastul,
},
],
},
{
input: "کېناست",
output: [
{
ph: "کې",
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: kenaastul,
},
],
},
{
input: "کېناستو",
output: [
{
ph: "کې",
root: {
persons: [
T.Person.ThirdSingMale,
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
],
aspects: ["imperfective", "perfective"],
},
verb: kenaastul,
},
],
},
{
input: "ووتلو",
output: [
{
ph: "و",
root: {
persons: [
T.Person.ThirdSingMale,
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
],
aspects: ["perfective"],
},
verb: watul,
},
],
},
{
input: "ووتو",
output: [
{
ph: "و",
root: {
persons: [
T.Person.ThirdSingMale,
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
],
aspects: ["perfective"],
},
verb: watul,
},
],
},
{
input: "ووته",
output: [
{
ph: "و",
root: {
persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb: watul,
},
],
},
{
input: "واته",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: watul,
},
],
},
{
input: "ووت",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: watul,
},
],
},
{
input: "وووت",
output: [
{
ph: "و",
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: watul,
},
],
},
{
input: "ورسېد",
output: [
{
ph: "و",
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: rasedul,
},
],
},
],
},
{
label: "irregular verbs",
cases: [
{
input: "ته",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective"],
},
verb: tlul,
},
],
},
{
input: "راته",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective"],
},
verb: raatlul,
},
],
},
{
input: "ورته",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective"],
},
verb: wartlul,
},
],
},
{
input: "درته",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective"],
},
verb: dartlul,
},
],
},
{
input: "شو",
output: [
{
ph: undefined,
root: {
persons: [
T.Person.ThirdSingMale,
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
],
aspects: ["perfective"],
},
stem: {
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
aspects: ["perfective"],
},
verb: kedulStat,
},
{
ph: undefined,
root: {
persons: [
T.Person.ThirdSingMale,
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
],
aspects: ["perfective"],
},
stem: {
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
aspects: ["perfective"],
},
verb: kedulDyn,
},
],
},
{
input: "شوله",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb: kedulStat,
},
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb: kedulDyn,
},
],
},
{
input: "شوه",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb: kedulStat,
},
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb: kedulDyn,
},
],
},
{
input: "شوله",
output: [
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb: kedulStat,
},
{
ph: undefined,
root: {
persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb: kedulDyn,
},
],
},
{
input: "وشوله",
output: [
{
ph: "و",
root: {
persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb: kedulDyn,
},
],
},
{
input: "وشوه",
output: [
{
ph: "و",
root: {
persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb: kedulDyn,
},
],
},
{
input: "وشي",
output: [
{
ph: "و",
stem: {
persons: [
T.Person.ThirdSingMale,
T.Person.ThirdSingFemale,
T.Person.ThirdPlurMale,
T.Person.ThirdPlurFemale,
],
aspects: ["perfective"],
},
verb: kedulDyn,
},
],
},
// TODO: It would probably be more effecient just to return the kedul verb options
// and then when we put things together with the perfective head parsed they could
// become raatlul etc...
{
input: "شي",
output: [
{
ph: undefined,
stem: {
persons: [
T.Person.ThirdSingMale,
T.Person.ThirdSingFemale,
T.Person.ThirdPlurMale,
T.Person.ThirdPlurFemale,
],
aspects: ["perfective"],
},
verb: kedulDyn,
},
{
ph: undefined,
stem: {
persons: [
T.Person.ThirdSingMale,
T.Person.ThirdSingFemale,
T.Person.ThirdPlurMale,
T.Person.ThirdPlurFemale,
],
aspects: ["perfective"],
},
verb: kedulStat,
},
{
ph: undefined,
stem: {
persons: [
T.Person.ThirdSingMale,
T.Person.ThirdSingFemale,
T.Person.ThirdPlurMale,
T.Person.ThirdPlurFemale,
],
aspects: ["perfective"],
},
verb: raatlul,
},
{
ph: undefined,
stem: {
persons: [
T.Person.ThirdSingMale,
T.Person.ThirdSingFemale,
T.Person.ThirdPlurMale,
T.Person.ThirdPlurFemale,
],
aspects: ["perfective"],
},
verb: dartlul,
},
{
ph: undefined,
stem: {
persons: [
T.Person.ThirdSingMale,
T.Person.ThirdSingFemale,
T.Person.ThirdPlurMale,
T.Person.ThirdPlurFemale,
],
aspects: ["perfective"],
},
verb: wartlul,
},
],
},
],
},
]; ];
// Also do سي yo see
tests.forEach(({ label, cases }) => { tests.forEach(({ label, cases }) => {
test(label, () => { test(label, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
@ -718,7 +1502,11 @@ tests.forEach(({ label, cases }) => {
), ),
]; ];
}, []); }, []);
expect(vbs).toIncludeSameMembers(madeVbsS); expect(removeIs(vbs)).toIncludeSameMembers(removeIs(madeVbsS));
}); });
}); });
}); });
function removeIs(a: any): any {
return JSON.parse(JSON.stringify(a, (k, v) => (k === "i" ? undefined : v)));
}

View File

@ -1,27 +1,16 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { isInVarients } from "../p-text-helpers"; import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
import { isInVarients, lastVowelNotA } from "../p-text-helpers";
import {
dartlul,
kedulDyn,
kedulStat,
raatlul,
tlul,
wartlul,
} from "./irreg-verbs";
// third persion idosyncratic // big problem ما سړی یوړ crashes it !!
// if it ends in a dental or ه - look for tttp
//
// if not having tttp
// automatic things: (with blank or u)
// ېد ست ښت
// ښود
//
// ول - اوه
// وېشه ?
// test ګالو ❌ vs ګاللو ✅
// واخیست / واخیسته / واخیستلو
// ولید // ولیده // ولیدو
//
// ووت / واته
//
// also write the rules for the third pers sing endings in the grammar
// multiple third pers sing options
export function parseVerb( export function parseVerb(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
@ -31,9 +20,22 @@ export function parseVerb(
return []; return [];
} }
const [first, ...rest] = tokens; const [first, ...rest] = tokens;
const irregResults = parseIrregularVerb(first.s);
if (irregResults.length) {
return irregResults.map((body) => ({
tokens: rest,
body,
errors: [],
}));
}
const people = getVerbEnding(first.s); const people = getVerbEnding(first.s);
// First do rough verb lookup, grab wide pool of possible verbs (low searching complexity for fast lookup)
// TODO: can optimize this to not have to look for possible stems/roots if none // TODO: can optimize this to not have to look for possible stems/roots if none
const verbs = verbLookup(first.s); const verbs = verbLookup(first.s);
// if (first.s === "سم") {
// console.log({ verbs: JSON.stringify(verbs) });
// }
// Then find out which ones match exactly and how
return matchVerbs(first.s, verbs, people).map((body) => ({ return matchVerbs(first.s, verbs, people).map((body) => ({
tokens: rest, tokens: rest,
body, body,
@ -75,6 +77,7 @@ function matchVerbs(
{ ph: string | undefined; entry: T.VerbEntry }[] { ph: string | undefined; entry: T.VerbEntry }[]
>((acc, entry) => { >((acc, entry) => {
const e = entry.entry; const e = entry.entry;
const baseWAa = "ا" + base;
if (e.c.includes("comp")) { if (e.c.includes("comp")) {
return acc; return acc;
} }
@ -120,6 +123,16 @@ function matchVerbs(
}, },
]; ];
} }
if (!e.sepOo) {
if (base.startsWith("وا") && base.slice(1) === e.psp) {
return [
...acc,
{
ph: "وا",
entry,
},
];
}
if ((base.startsWith("و") && base.slice(1)) === e.psp) { if ((base.startsWith("و") && base.slice(1)) === e.psp) {
return [ return [
...acc, ...acc,
@ -129,6 +142,16 @@ function matchVerbs(
}, },
]; ];
} }
if (baseWAa === e.psp) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
}
if (base === e.psp) { if (base === e.psp) {
return [ return [
...acc, ...acc,
@ -149,6 +172,18 @@ function matchVerbs(
entry, entry,
}, },
]; ];
} else if (!e.sepOo) {
if (
base.startsWith("وا") &&
[miniRoot, miniRootEg].includes(base.slice(1))
) {
return [
...acc,
{
ph: "وا",
entry,
},
];
} else if ( } else if (
base.startsWith("و") && base.startsWith("و") &&
[miniRoot, miniRootEg].includes(base.slice(1)) [miniRoot, miniRootEg].includes(base.slice(1))
@ -156,11 +191,12 @@ function matchVerbs(
return [ return [
...acc, ...acc,
{ {
ph: "و", // TODO: check for وا etc ph: "و",
entry, entry,
}, },
]; ];
} }
}
} else { } else {
const eb = e.p.slice(0, -1); const eb = e.p.slice(0, -1);
if (eb === base) { if (eb === base) {
@ -171,7 +207,17 @@ function matchVerbs(
entry, entry,
}, },
]; ];
} else if (base.startsWith("و") && eb === base.slice(1)) { } else if (!e.sepOo) {
if (base.startsWith("وا") && eb === base.slice(1)) {
return [
...acc,
{
ph: "وا",
entry,
},
];
}
if (base.startsWith("و") && eb === base.slice(1)) {
return [ return [
...acc, ...acc,
{ {
@ -180,6 +226,16 @@ function matchVerbs(
}, },
]; ];
} }
if (baseWAa === base.slice(1)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
}
} }
return acc; return acc;
}, []), }, []),
@ -196,7 +252,7 @@ function matchVerbs(
type: "verb", type: "verb",
aspect: aspect as T.Aspect, aspect: aspect as T.Aspect,
base: "stem", base: "stem",
verb: "ph" in verb ? verb.entry : verb, verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
}, },
}, },
]); ]);
@ -237,17 +293,21 @@ function matchVerbs(
}, },
]; ];
} }
} else if (!e.prp) { } else {
const baseNoOo = base.startsWith("و") && base.slice(1); const baseNoOo = base.startsWith("و") && base.slice(1);
if (baseNoOo && matchShortOrLong(baseNoOo, e.p)) { const p = e.prp || e.p;
if (baseNoOo && matchShortOrLong(baseNoOo, p)) {
return [ return [
...acc, ...acc,
{ {
ph: "و", ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و",
entry, entry,
}, },
]; ];
} else if (matchShortOrLong(base, e.p)) { } else if (
matchShortOrLong(base, p) ||
matchShortOrLong("ا" + base, p)
) {
return [ return [
...acc, ...acc,
{ {
@ -273,7 +333,7 @@ function matchVerbs(
type: "verb", type: "verb",
aspect: aspect as T.Aspect, aspect: aspect as T.Aspect,
base: "root", base: "root",
verb: "ph" in verb ? verb.entry : verb, verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
}, },
}, },
]); ]);
@ -281,12 +341,20 @@ function matchVerbs(
}); });
}); });
} }
const hamzaEnd = s.endsWith("ه"); const hamzaEnd = s.at(-1) === "ه";
const oEnd = s.at(-1) === "و";
const abruptEnd = ["د", "ت", "ړ"].includes(s.slice(-1));
const b = hamzaEnd || oEnd ? base : s;
const bNoOo = b.startsWith("و") && b.slice(1);
const tppMatches = { const tppMatches = {
imperfective: entries.filter( imperfective: entries.filter(
({ entry: e }) => ({ entry: e }) =>
!e.c.includes("comp") && !e.c.includes("comp") &&
(isInVarients(e.tppp, s) || (hamzaEnd && base === e.p.slice(0, -1))) (isInVarients(e.tppp, s) ||
(oEnd && [e.p, e.p.slice(0, -1)].includes(base)) ||
(lastVowelNotA(e.g.slice(0, -2)) &&
(hamzaEnd ? base : abruptEnd ? s : "") === e.p.slice(0, -1)))
// TODO: if check for modified aaXu thing!
), ),
perfective: entries.reduce< perfective: entries.reduce<
{ ph: string | undefined; entry: T.VerbEntry }[] { ph: string | undefined; entry: T.VerbEntry }[]
@ -295,12 +363,33 @@ function matchVerbs(
if (e.c.includes("comp")) { if (e.c.includes("comp")) {
return acc; return acc;
} }
if (e.separationAtP && hamzaEnd) { if (e.separationAtP) {
const b = e.prp || e.p; const b = e.prp || e.p;
const bHead = b.slice(0, e.separationAtP); const bHead = b.slice(0, e.separationAtP);
const bRest = b.slice(e.separationAtP); const bRest = b.slice(e.separationAtP);
// this is REPETITIVE from above ... but doing it again here because the ه will only match on the SHORT versions for 3rd pers masc sing if (bRest === "شول") {
// could modify and reuse the code above for this return acc;
}
if (abruptEnd) {
if (s === b.slice(0, -1)) {
return [
...acc,
{
ph: bHead,
entry,
},
];
}
if (s === bRest.slice(0, -1)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
} else if (hamzaEnd) {
if (base === b.slice(0, -1)) { if (base === b.slice(0, -1)) {
return [ return [
...acc, ...acc,
@ -319,17 +408,17 @@ function matchVerbs(
}, },
]; ];
} }
} else if (!e.prp && hamzaEnd) { } else if (oEnd) {
const baseNoOo = base.startsWith("و") && base.slice(1); if ([b, b.slice(0, -1)].includes(base)) {
if (baseNoOo && baseNoOo === e.p.slice(0, -1)) {
return [ return [
...acc, ...acc,
{ {
ph: "و", ph: bHead,
entry, entry,
}, },
]; ];
} else if (base === e.p.slice(0, -1)) { }
if ([bRest, bRest.slice(0, -1)].includes(base)) {
return [ return [
...acc, ...acc,
{ {
@ -339,8 +428,9 @@ function matchVerbs(
]; ];
} }
} }
const sNoOo = s.startsWith("و") && s.slice(1); } else if (!e.prp) {
if (isInVarients(e.tppp, sNoOo)) { if (oEnd) {
if (bNoOo && [e.p, e.p.slice(0, -1).includes(bNoOo)]) {
return [ return [
...acc, ...acc,
{ {
@ -348,6 +438,46 @@ function matchVerbs(
entry, entry,
}, },
]; ];
} else if ([e.p, e.p.slice(0, -1)].includes(base)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
} else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) {
const b = hamzaEnd ? base : s;
const p = e.p.slice(0, -1);
if (bNoOo && bNoOo === p) {
return [
...acc,
{
ph: "و",
entry,
},
];
} else if (b === p) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
}
}
const sNoOo = s.startsWith("و") && s.slice(1);
if (isInVarients(e.tppp, sNoOo)) {
return [
...acc,
{
ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و",
entry,
},
];
} else if (isInVarients(e.tppp, s)) { } else if (isInVarients(e.tppp, s)) {
return [ return [
...acc, ...acc,
@ -356,6 +486,14 @@ function matchVerbs(
entry, entry,
}, },
]; ];
} else if (isInVarients(e.tppp, "ا" + s)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
} }
return acc; return acc;
}, []), }, []),
@ -371,7 +509,7 @@ function matchVerbs(
type: "verb", type: "verb",
aspect: aspect as T.Aspect, aspect: aspect as T.Aspect,
base: "root", base: "root",
verb: "ph" in verb ? verb.entry : verb, verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
}, },
}, },
]); ]);
@ -434,3 +572,110 @@ function getVerbEnding(p: string): {
stem: [], stem: [],
}; };
} }
// const [ph, rest]: [T.PH | undefined, T.PsString] = v.entry.noOo
// ? [undefined, base]
// : v.entry.sepOo
// ? [{ type: "PH", ps: { p: "و ", f: "óo`" } }, base]
// : ["آ", "ا"].includes(base.p.charAt(0)) && base.f.charAt(0) === "a"
// ? [{ type: "PH", ps: { p: "وا", f: "wáa" } }, removeAStart(base)]
// : ["óo", "oo"].includes(base.f.slice(0, 2))
// ? [{ type: "PH", ps: { p: "و", f: "wÚ" } }, base]
// : ["ée", "ee"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "ای"
// ? [
// { type: "PH", ps: { p: "وي", f: "wée" } },
// {
// p: base.p.slice(2),
// f: base.f.slice(2),
// },
// ]
// : ["é", "e"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "اې"
// ? [
// { type: "PH", ps: { p: "وي", f: "wé" } },
// {
// p: base.p.slice(2),
// f: base.f.slice(1),
// },
// ]
// : ["ó", "o"].includes(base.f[0]) && base.p.slice(0, 2) === "او"
// ? [{ type: "PH", ps: { p: "و", f: "óo`" } }, base]
// : [{ type: "PH", ps: { p: "و", f: "óo" } }, base];
// return [ph, removeAccents(rest)];
// function removeAStart(ps: T.PsString) {
// return {
// p: ps.p.slice(1),
// f: ps.f.slice(ps.f[1] === "a" ? 2 : 1),
// };
// }
// TODO: could handle all sh- verbs for efficiencies sake
function parseIrregularVerb(
s: string
): [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">][] {
if (["ته", "راته", "ورته", "درته"].includes(s)) {
return [
[
undefined,
{
type: "VB",
info: {
aspect: "imperfective",
base: "root",
type: "verb",
verb: s.startsWith("را")
? raatlul
: s.startsWith("ور")
? wartlul
: s.startsWith("در")
? dartlul
: tlul,
},
person: T.Person.ThirdSingMale,
},
],
];
}
if (s === "شو") {
return [
...[
T.Person.ThirdSingMale,
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
].flatMap((person) =>
[kedulStat, kedulDyn].map<
[{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]
>((verb) => [
undefined,
{
type: "VB",
info: {
aspect: "perfective",
base: "root",
type: "verb",
verb,
},
person,
},
])
),
...[T.Person.FirstPlurMale, T.Person.FirstPlurFemale].flatMap((person) =>
[kedulStat, kedulDyn].map<
[{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]
>((verb) => [
undefined,
{
type: "VB",
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb,
},
person,
},
])
),
];
}
return [];
}

View File

@ -10,6 +10,7 @@ module.exports = [
1527815139, // osedul 1527815139, // osedul
1585228579997, // ورتلل 1585228579997, // ورتلل
1527815216, // راتلل - to come 1527815216, // راتلل - to come
1585228551150, // درتلل
1527813473, // الوتل - to fly 1527813473, // الوتل - to fly
1527814012, // اوښتل - to pass over, overturn, be flipped over, spill over, shift, change, diverge, pass, cross, abandon 1527814012, // اوښتل - to pass over, overturn, be flipped over, spill over, shift, change, diverge, pass, cross, abandon
1527822843, // برېښېدل - to appear, seem; to shine, sparkle; to smart, have a pricking pain 1527822843, // برېښېدل - to appear, seem; to shine, sparkle; to smart, have a pricking pain

View File

@ -11095,10 +11095,10 @@ typedarray@^0.0.6:
resolved "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz" resolved "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz"
integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c= integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=
typescript@^4.2.3: typescript@^5.1.6:
version "4.4.3" version "5.1.6"
resolved "https://registry.npmjs.org/typescript/-/typescript-4.4.3.tgz" resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.1.6.tgz#02f8ac202b6dad2c0dd5e0913745b47a37998274"
integrity sha512-4xfscpisVgqqDfPaJo5vkd+Qd/ItkoagnHpufr+i2QCHBsNYp+G7UAoyFl8aPtx879u38wPV65rZ8qbGZijalA== integrity sha512-zaWCozRZ6DLEWAWFrVDz1H6FVXzUSfTy5FUMWsQlU8Ym5JP9eO4xkTIROFCQvhQf61z6O/G6ugw3SgAnvvm+HA==
unbox-primitive@^1.0.1: unbox-primitive@^1.0.1:
version "1.0.1" version "1.0.1"