From b384771db5712433c9357365780b2de8e9a973a9 Mon Sep 17 00:00:00 2001 From: adueck Date: Wed, 16 Aug 2023 11:48:48 +0400 Subject: [PATCH] more work on basic verb parsing --- package.json | 4 +- .../src/new-verb-engine/roots-and-stems.ts | 2 +- src/lib/src/p-text-helpers.test.ts | 17 + src/lib/src/p-text-helpers.ts | 26 + src/lib/src/parsing/compare.ts | 554 ++++++++++++ src/lib/src/parsing/irreg-verbs.ts | 193 +++++ src/lib/src/parsing/lookup.tsx | 85 +- src/lib/src/parsing/parse-phrase.ts | 2 + src/lib/src/parsing/parse-verb.test.ts | 806 +++++++++++++++++- src/lib/src/parsing/parse-verb.ts | 437 +++++++--- vocab/verbs/simple-intrans.js | 1 + yarn.lock | 8 +- 12 files changed, 1995 insertions(+), 140 deletions(-) create mode 100644 src/lib/src/parsing/compare.ts create mode 100644 src/lib/src/parsing/irreg-verbs.ts diff --git a/package.json b/package.json index 3896904..889c744 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,7 @@ "react-bootstrap": "^1.5.1", "react-dom": "^17.0.1", "react-scripts": "4.0.3", - "typescript": "^4.2.3", + "typescript": "^5.1.6", "web-vitals": "^1.0.1" }, "scripts": { @@ -77,4 +77,4 @@ ] }, "dependencies": {} -} +} \ No newline at end of file diff --git a/src/lib/src/new-verb-engine/roots-and-stems.ts b/src/lib/src/new-verb-engine/roots-and-stems.ts index b75ab17..72b6715 100644 --- a/src/lib/src/new-verb-engine/roots-and-stems.ts +++ b/src/lib/src/new-verb-engine/roots-and-stems.ts @@ -471,7 +471,7 @@ function getPassiveRs( } // TODO: This is a nasty and messy way to do it with the length options included -function getPerfectiveHead( +export function getPerfectiveHead( base: T.PsString, v: T.VerbEntryNoFVars ): [T.PH, T.PsString] | [undefined, T.PsString] { diff --git a/src/lib/src/p-text-helpers.test.ts b/src/lib/src/p-text-helpers.test.ts index 4af2ece..79367ef 100644 --- a/src/lib/src/p-text-helpers.test.ts +++ b/src/lib/src/p-text-helpers.test.ts @@ -25,6 +25,9 @@ import { splitPsByVarients, endsWith, trimOffPs, + undoAaXuPattern, + prevValNotA, + lastVowelNotA, } from "./p-text-helpers"; import * as T from "../../types"; import { pastEndings } from "./grammar-units"; @@ -1665,3 +1668,17 @@ test("endsWith", () => { ); expect(endsWith({ f: ["d", "D"] })({ p: "چت", f: "chat" })).toBe(false); }); + +test("undoAaXuPattern", () => { + expect(undoAaXuPattern("تور")).toBe(false); + expect(undoAaXuPattern("پښتان")).toBe(false); + expect(undoAaXuPattern("کاوه")).toBe("کو"); + expect(undoAaXuPattern("وواته")).toBe("ووت"); + expect(undoAaXuPattern("واسته")).toBe("وست"); + expect(undoAaXuPattern("لیده")).toBe(false); +}); + +test("lastVowelNotA", () => { + expect(lastVowelNotA("raat")).toBe(true); + expect(lastVowelNotA("oowat")).toBe(false); +}); diff --git a/src/lib/src/p-text-helpers.ts b/src/lib/src/p-text-helpers.ts index 6915ea7..6ced86e 100644 --- a/src/lib/src/p-text-helpers.ts +++ b/src/lib/src/p-text-helpers.ts @@ -1224,3 +1224,29 @@ export function getShort(a: T.SingleOrLengthOpts): U { export function capitalizeFirstLetter(string: string) { return string.charAt(0).toUpperCase() + string.slice(1); } + +/** + * For use with the + */ +export function undoAaXuPattern(p: string): false | string { + if (p.at(-1) !== "ه") { + return false; + } + const chars = p.split(""); + const prevVowel = chars.findIndex((c) => ["ی", "ې", "ا"].includes(c)); + if (prevVowel === -1) { + return false; + } + if (p[prevVowel] !== "ا") { + return false; + } + return p.slice(0, prevVowel) + p.slice(prevVowel + 1, -1); +} + +export function lastVowelNotA(g: string): boolean { + const matches = g.match(/ee|aa|i|u|o|oo|U|e|a/g); + if (!matches) { + return true; + } + return matches[matches.length - 1] !== "a"; +} diff --git a/src/lib/src/parsing/compare.ts b/src/lib/src/parsing/compare.ts new file mode 100644 index 0000000..6db1022 --- /dev/null +++ b/src/lib/src/parsing/compare.ts @@ -0,0 +1,554 @@ +const b = [ + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to come", + ec: "come,comes,coming,came,come", + f: "raatlúl", + g: "raatlul", + noOo: true, + p: "راتلل", + pprtf: "raaghúlay", + pprtp: "راغلی", + prf: "ráaghlul", + prp: "راغلل", + psf: "raadz", + psp: "راځ", + r: 4, + separationAtF: 3, + separationAtP: 2, + ssf: "ráash", + ssp: "راش", + tppf: "ráaghay", + tppp: "راغی", + ts: 1527815216, + }, + }, + }, + person: 4, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to come", + ec: "come,comes,coming,came,come", + f: "raatlúl", + g: "raatlul", + noOo: true, + p: "راتلل", + pprtf: "raaghúlay", + pprtp: "راغلی", + prf: "ráaghlul", + prp: "راغلل", + psf: "raadz", + psp: "راځ", + r: 4, + separationAtF: 3, + separationAtP: 2, + ssf: "ráash", + ssp: "راش", + tppf: "ráaghay", + tppp: "راغی", + ts: 1527815216, + }, + }, + }, + person: 5, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to come", + ec: "come,comes,coming,came,come", + f: "raatlúl", + g: "raatlul", + noOo: true, + p: "راتلل", + pprtf: "raaghúlay", + pprtp: "راغلی", + prf: "ráaghlul", + prp: "راغلل", + psf: "raadz", + psp: "راځ", + r: 4, + separationAtF: 3, + separationAtP: 2, + ssf: "ráash", + ssp: "راش", + tppf: "ráaghay", + tppp: "راغی", + ts: 1527815216, + }, + }, + }, + person: 10, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to come", + ec: "come,comes,coming,came,come", + f: "raatlúl", + g: "raatlul", + noOo: true, + p: "راتلل", + pprtf: "raaghúlay", + pprtp: "راغلی", + prf: "ráaghlul", + prp: "راغلل", + psf: "raadz", + psp: "راځ", + r: 4, + separationAtF: 3, + separationAtP: 2, + ssf: "ráash", + ssp: "راش", + tppf: "ráaghay", + tppp: "راغی", + ts: 1527815216, + }, + }, + }, + person: 11, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to become _____", + ec: "become", + f: "kedul", + g: "kedul", + noOo: true, + p: "کېدل", + pprtf: "shúway", + pprtp: "شوی", + prf: "shwul", + prp: "شول", + r: 2, + ssf: "sh", + ssp: "ش", + ts: 1581086654898, + }, + }, + }, + person: 4, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to become _____", + ec: "become", + f: "kedul", + g: "kedul", + noOo: true, + p: "کېدل", + pprtf: "shúway", + pprtp: "شوی", + prf: "shwul", + prp: "شول", + r: 2, + ssf: "sh", + ssp: "ش", + ts: 1581086654898, + }, + }, + }, + person: 5, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to become _____", + ec: "become", + f: "kedul", + g: "kedul", + noOo: true, + p: "کېدل", + pprtf: "shúway", + pprtp: "شوی", + prf: "shwul", + prp: "شول", + r: 2, + ssf: "sh", + ssp: "ش", + ts: 1581086654898, + }, + }, + }, + person: 10, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to become _____", + ec: "become", + f: "kedul", + g: "kedul", + noOo: true, + p: "کېدل", + pprtf: "shúway", + pprtp: "شوی", + prf: "shwul", + prp: "شول", + r: 2, + ssf: "sh", + ssp: "ش", + ts: 1581086654898, + }, + }, + }, + person: 11, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + diacExcept: true, + e: "to happen, occur", + ec: "happen", + f: "kedul", + g: "kedul", + p: "کېدل", + pprtf: "shúway", + pprtp: "شوی", + prf: "óoshwul", + prp: "وشول", + r: 2, + separationAtF: 2, + separationAtP: 1, + ssf: "óosh", + ssp: "وش", + ts: 1527812754, + }, + }, + }, + person: 4, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + diacExcept: true, + e: "to happen, occur", + ec: "happen", + f: "kedul", + g: "kedul", + p: "کېدل", + pprtf: "shúway", + pprtp: "شوی", + prf: "óoshwul", + prp: "وشول", + r: 2, + separationAtF: 2, + separationAtP: 1, + ssf: "óosh", + ssp: "وش", + ts: 1527812754, + }, + }, + }, + person: 5, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + diacExcept: true, + e: "to happen, occur", + ec: "happen", + f: "kedul", + g: "kedul", + p: "کېدل", + pprtf: "shúway", + pprtp: "شوی", + prf: "óoshwul", + prp: "وشول", + r: 2, + separationAtF: 2, + separationAtP: 1, + ssf: "óosh", + ssp: "وش", + ts: 1527812754, + }, + }, + }, + person: 10, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + diacExcept: true, + e: "to happen, occur", + ec: "happen", + f: "kedul", + g: "kedul", + p: "کېدل", + pprtf: "shúway", + pprtp: "شوی", + prf: "óoshwul", + prp: "وشول", + r: 2, + separationAtF: 2, + separationAtP: 1, + ssf: "óosh", + ssp: "وش", + ts: 1527812754, + }, + }, + }, + person: 11, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to come / go over to (third person or place)", + ec: "come,comes,coming,came,come", + f: "wărtlul", + g: "wartlul", + noOo: true, + p: "ورتلل", + pprtf: "wărghúlay", + pprtp: "ورغلی", + prf: "wárghlul", + prp: "ورغلل", + psf: "wărdz", + psp: "ورځ", + r: 4, + separationAtF: 3, + separationAtP: 2, + ssf: "wársh", + ssp: "ورش", + tppf: "wărghay", + tppp: "ورغی", + ts: 1585228579997, + }, + }, + }, + person: 4, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to come / go over to (third person or place)", + ec: "come,comes,coming,came,come", + f: "wărtlul", + g: "wartlul", + noOo: true, + p: "ورتلل", + pprtf: "wărghúlay", + pprtp: "ورغلی", + prf: "wárghlul", + prp: "ورغلل", + psf: "wărdz", + psp: "ورځ", + r: 4, + separationAtF: 3, + separationAtP: 2, + ssf: "wársh", + ssp: "ورش", + tppf: "wărghay", + tppp: "ورغی", + ts: 1585228579997, + }, + }, + }, + person: 5, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to come / go over to (third person or place)", + ec: "come,comes,coming,came,come", + f: "wărtlul", + g: "wartlul", + noOo: true, + p: "ورتلل", + pprtf: "wărghúlay", + pprtp: "ورغلی", + prf: "wárghlul", + prp: "ورغلل", + psf: "wărdz", + psp: "ورځ", + r: 4, + separationAtF: 3, + separationAtP: 2, + ssf: "wársh", + ssp: "ورش", + tppf: "wărghay", + tppp: "ورغی", + ts: 1585228579997, + }, + }, + }, + person: 10, + type: "VB", + }, + ], + [ + null, + { + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb: { + entry: { + c: "v. intrans.", + e: "to come / go over to (third person or place)", + ec: "come,comes,coming,came,come", + f: "wărtlul", + g: "wartlul", + noOo: true, + p: "ورتلل", + pprtf: "wărghúlay", + pprtp: "ورغلی", + prf: "wárghlul", + prp: "ورغلل", + psf: "wărdz", + psp: "ورځ", + r: 4, + separationAtF: 3, + separationAtP: 2, + ssf: "wársh", + ssp: "ورش", + tppf: "wărghay", + tppp: "ورغی", + ts: 1585228579997, + }, + }, + }, + person: 11, + type: "VB", + }, + ], +]; diff --git a/src/lib/src/parsing/irreg-verbs.ts b/src/lib/src/parsing/irreg-verbs.ts new file mode 100644 index 0000000..3cf10ef --- /dev/null +++ b/src/lib/src/parsing/irreg-verbs.ts @@ -0,0 +1,193 @@ +import * as T from "../../../types"; + +export const raatlul = { + entry: { + ts: 1527815216, + i: 6926, + p: "راتلل", + f: "raatlúl", + g: "raatlul", + e: "to come", + r: 4, + c: "v. intrans.", + psp: "راځ", + psf: "raadz", + ssp: "راش", + ssf: "ráash", + prp: "راغلل", + prf: "ráaghlul", + pprtp: "راغلی", + pprtf: "raaghúlay", + tppp: "راغی", + tppf: "ráaghay", + noOo: true, + separationAtP: 2, + separationAtF: 3, + ec: "come,comes,coming,came,come", + }, +} as T.VerbEntry; + +export const tlul = { + entry: { + ts: 1527815348, + i: 3820, + p: "تلل", + f: "tlul", + g: "tlul", + e: "to go", + r: 4, + c: "v. intrans.", + psp: "ځ", + psf: "dz", + ssp: "لاړ ش", + ssf: "láaR sh", + prp: "لاړل", + prf: "láaRul", + tppp: "لاړ", + tppf: "laaR", + separationAtP: 2, + separationAtF: 3, + ec: "go,goes,going,went,gone", + }, +} as T.VerbEntry; + +export const wartlul = { + entry: { + ts: 1585228579997, + i: 14924, + p: "ورتلل", + f: "wărtlul", + g: "wartlul", + e: "to come / go over to (third person or place)", + r: 4, + c: "v. intrans.", + psp: "ورځ", + psf: "wărdz", + ssp: "ورش", + ssf: "wársh", + prp: "ورغلل", + prf: "wárghlul", + pprtp: "ورغلی", + pprtf: "wărghúlay", + tppp: "ورغی", + tppf: "wărghay", + noOo: true, + separationAtP: 2, + separationAtF: 3, + ec: "come,comes,coming,came,come", + }, +} as T.VerbEntry; + +export const dartlul = { + entry: { + ts: 1585228551150, + i: 6320, + p: "درتلل", + f: "dărtlul", + g: "dartlul", + e: "to come (to you / second person)", + r: 3, + c: "v. intrans.", + psp: "درځ", + psf: "dărdz", + ssp: "درش", + ssf: "dársh", + prp: "درغلل", + prf: "dárghlul", + pprtp: "درغلی", + pprtf: "dărghúlay", + tppp: "درغی", + tppf: "dărghay", + noOo: true, + separationAtP: 2, + separationAtF: 3, + ec: "come,comes,coming,came,come", + }, +} as T.VerbEntry; + +export const kedulStat = { + entry: { + ts: 1581086654898, + i: 11100, + p: "کېدل", + f: "kedul", + g: "kedul", + e: "to become _____", + r: 2, + c: "v. intrans.", + ssp: "ش", + ssf: "sh", + prp: "شول", + prf: "shwul", + pprtp: "شوی", + pprtf: "shúway", + noOo: true, + ec: "become", + }, +} as T.VerbEntry; +export const kedulDyn = { + entry: { + ts: 1527812754, + i: 11101, + p: "کېدل", + f: "kedul", + g: "kedul", + e: "to happen, occur", + r: 2, + c: "v. intrans.", + ssp: "وش", + ssf: "óosh", + prp: "وشول", + prf: "óoshwul", + pprtp: "شوی", + pprtf: "shúway", + diacExcept: true, + ec: "happen", + separationAtP: 1, + separationAtF: 2, + }, +} as T.VerbEntry; + +export const kawulStat = { + entry: { + ts: 1579015359582, + i: 11112, + p: "کول", + f: "kawul", + g: "kawul", + e: 'to make ____ ____ (as in "He\'s making me angry.")', + r: 4, + c: "v. trans.", + ssp: "کړ", + ssf: "kR", + prp: "کړل", + prf: "kRul", + pprtp: "کړی", + pprtf: "kúRay", + noOo: true, + ec: "make,makes,making,made,made", + }, +} as T.VerbEntry; + +export const kawulDyn = { + entry: { + ts: 1527812752, + i: 11113, + p: "کول", + f: "kawul", + g: "kawul", + e: "to do (an action or activity)", + r: 4, + c: "v. trans./gramm. trans.", + ssp: "وکړ", + ssf: "óokR", + prp: "وکړل", + prf: "óokRul", + pprtp: "کړی", + pprtf: "kúRay", + separationAtP: 1, + separationAtF: 2, + diacExcept: true, + ec: "do,does,doing,did,done", + }, +} as T.VerbEntry; diff --git a/src/lib/src/parsing/lookup.tsx b/src/lib/src/parsing/lookup.tsx index 55fb8bb..2967c59 100644 --- a/src/lib/src/parsing/lookup.tsx +++ b/src/lib/src/parsing/lookup.tsx @@ -3,7 +3,7 @@ import verbs from "../../../verbs"; import * as T from "../../../types"; import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; import { removeFVarientsFromVerb } from "../accent-and-ps-utils"; -import { splitVarients } from "../p-text-helpers"; +import { splitVarients, undoAaXuPattern } from "../p-text-helpers"; import { arraysHaveCommon } from "../misc-helpers"; export function lookup(s: Partial): T.DictionaryEntry[] { @@ -48,64 +48,93 @@ export function verbLookup(input: string): T.VerbEntry[] { // if theres no legit verb ending and no tpp possibilities, just return an empty array const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined; const checkTpp = shouldCheckTpp(input); + const fromAawu = checkTpp && undoAaXuPattern(input); const inputWoutOo = checkTpp && input.startsWith("و") ? input.slice(1) : undefined; + // TODO: don't do this blindly, but check if it could actually be added + const sAddedAa = "ا" + s; + // for لواته -> to search for tpp الواته + const inputAddedAa = "ا" + input; // TODO: don't do the slice of and checking for useless things when you have a NON verb ending (like with the tpp) if (s.endsWith("ېږ")) { return verbs.filter( sWoutOo ? ({ entry }) => - [s, sWoutOo].includes(entry.p.slice(0, -1)) || - [s.slice(0, -1) + "دل", sWoutOo.slice(0, -1) + "دل"].includes( - entry.p - ) || - [s, sWoutOo].includes(entry.p) || - (entry.psp && [s, sWoutOo].includes(entry.psp)) || + [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) || + [ + s.slice(0, -1) + "دل", + sWoutOo.slice(0, -1) + "دل", + sAddedAa.slice(0, -1) + "دل", + ].includes(entry.p) || + [s, sWoutOo, sAddedAa].includes(entry.p) || + (entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) || entry.prp === s || entry.ssp === s : ({ entry }) => - entry.p.slice(0, -1) === s || - entry.p === s.slice(0, -1) + "دل" || - entry.p === s || - entry.psp === s || - entry.prp === s || - entry.ssp === s + [s, sAddedAa].includes(entry.p.slice(0, -1)) || + [s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes( + entry.p + ) || + [s, sAddedAa].includes(entry.p) || + [s, sAddedAa].includes(entry.psp || "") || + [s, sAddedAa].includes(entry.prp || "") || + [s, sAddedAa].includes(entry.ssp || "") ); } return verbs.filter( sWoutOo ? ({ entry }) => - [s, sWoutOo].includes(entry.p.slice(0, -1)) || + [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) || // for short intransitive forms - [s, sWoutOo].includes(entry.p.slice(0, -3)) || - [s, sWoutOo].includes(entry.p) || - (entry.psp && [s, sWoutOo].includes(entry.psp)) || + [s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -3)) || + [s, sWoutOo, sAddedAa].includes(entry.p) || + (entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) || + (checkTpp && + [ + input.slice(1), + fromAawu && fromAawu.slice(-1), + inputAddedAa, + ].includes(entry.p.slice(0, -1))) || (entry.tppp && arraysHaveCommon( - [input, inputWoutOo], + [input, inputWoutOo, sAddedAa], splitVarients(entry.tppp) )) || - entry.prp === s || - entry.ssp === s || + arraysHaveCommon( + [s, sAddedAa, "و" + s], + [entry.prp, entry.prp?.slice(0, -1)] + ) || + [s, sAddedAa].includes(entry.ssp || "") || (entry.separationAtP && (entry.p.slice(entry.separationAtP) === s || entry.psp?.slice(entry.separationAtP) === s)) : ({ entry }) => - entry.p.slice(0, -1) === s || + [s, sAddedAa].includes(entry.p.slice(0, -1)) || // for short intransitive forms - entry.p.slice(0, -3) === s || - entry.p === s || + [s, sAddedAa].includes(entry.p.slice(0, -3)) || + [s, sAddedAa].includes(entry.p) || + (checkTpp && + [input, fromAawu, sAddedAa].includes(entry.p.slice(0, -1))) || (entry.tppp && arraysHaveCommon( - [input, inputWoutOo], + [input, inputWoutOo, sAddedAa, inputAddedAa], splitVarients(entry.tppp) )) || - entry.psp === s || - entry.prp === s || - entry.ssp === s || + [s, sAddedAa].includes(entry.psp || "") || + arraysHaveCommon( + [entry.prp, entry.prp?.slice(0, -1)], + [s, sAddedAa, "و" + s] + ) || + [s, sAddedAa, "و" + s].includes(entry.ssp || "") || (entry.separationAtP && (entry.p.slice(entry.separationAtP) === s || - entry.psp?.slice(entry.separationAtP) === s)) + entry.psp?.slice(entry.separationAtP) === s || + (entry.prp && + [ + entry.prp.slice(entry.separationAtP), + entry.prp.slice(entry.separationAtP).slice(0, -1), + ].includes(s)) || + (entry.ssp && entry.ssp.slice(entry.separationAtP) === s))) ); } diff --git a/src/lib/src/parsing/parse-phrase.ts b/src/lib/src/parsing/parse-phrase.ts index 2b7223c..980eee6 100644 --- a/src/lib/src/parsing/parse-phrase.ts +++ b/src/lib/src/parsing/parse-phrase.ts @@ -3,6 +3,8 @@ import { verbLookup } from "./lookup"; import { parseNP } from "./parse-np"; import { parseVP } from "./parse-vp"; +// شو should not be sheyaano !! + export function parsePhrase( s: T.Token[], lookup: (s: Partial) => T.DictionaryEntry[] diff --git a/src/lib/src/parsing/parse-verb.test.ts b/src/lib/src/parsing/parse-verb.test.ts index 555e787..280bf6a 100644 --- a/src/lib/src/parsing/parse-verb.test.ts +++ b/src/lib/src/parsing/parse-verb.test.ts @@ -1,5 +1,13 @@ /* eslint-disable jest/valid-title */ import * as T from "../../../types"; +import { + dartlul, + kedulDyn, + kedulStat, + tlul, + wartlul, + raatlul, +} from "./irreg-verbs"; import { verbLookup, wordQuery } from "./lookup"; import { parseVerb } from "./parse-verb"; import { tokenizer } from "./tokenizer"; @@ -16,7 +24,11 @@ const prexodul = wordQuery("پرېښودل", "verb"); const xodul = wordQuery("ښودل", "verb"); const kexodul = wordQuery("کېښودل", "verb"); const katul = wordQuery("کتل", "verb"); -const tlul = wordQuery("تلل", "verb"); +const watul = wordQuery("وتل", "verb"); +const wurul = wordQuery("وړل", "verb"); +const akheestul = wordQuery("اخیستل", "verb"); +const alwatul = wordQuery("الوتل", "verb"); +// const dartlul = wordQuery("درتلل", "verb"); // todo alwatul waalwatul akhistul azmoyul etc @@ -115,13 +127,34 @@ const tests: { { ph: undefined, root: { - persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], + persons: [ + T.Person.FirstPlurMale, + T.Person.FirstPlurFemale, + T.Person.ThirdSingMale, + ], aspects: ["imperfective", "perfective"], }, verb: leekul, }, ], }, + { + input: "ولیکلو", + output: [ + { + ph: "و", + root: { + persons: [ + T.Person.FirstPlurMale, + T.Person.FirstPlurFemale, + T.Person.ThirdSingMale, + ], + aspects: ["perfective"], + }, + verb: leekul, + }, + ], + }, { input: "لیکل", output: [ @@ -542,6 +575,123 @@ const tests: { }, ], }, + { + input: "واخلم", + output: [ + { + ph: "وا", + stem: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: akheestul, + }, + ], + }, + { + input: "خلم", + output: [ + { + ph: undefined, + stem: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: akheestul, + }, + ], + }, + { + input: "اخیستم", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["imperfective", "perfective"], + }, + verb: akheestul, + }, + ], + }, + { + input: "واخیستم", + output: [ + { + ph: "وا", + root: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: akheestul, + }, + ], + }, + { + input: "واخیستلم", + output: [ + { + ph: "وا", + root: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: akheestul, + }, + ], + }, + { + input: "خیستلم", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: akheestul, + }, + ], + }, + { + input: "الوځې", + output: [ + { + ph: undefined, + stem: { + persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], + aspects: ["imperfective", "perfective"], + }, + verb: alwatul, + }, + ], + }, + { + input: "والوځې", + output: [ + { + ph: "وا", + stem: { + persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], + aspects: ["perfective"], + }, + verb: alwatul, + }, + ], + }, + { + input: "لوځې", + output: [ + { + ph: undefined, + stem: { + persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], + aspects: ["perfective"], + }, + verb: alwatul, + }, + ], + }, ], }, { @@ -654,16 +804,202 @@ const tests: { }, ], }, - ], - }, - { - label: "verbs with abrupt 3rd pers sing past endings", - cases: [ + { + input: "لاړلم", + output: [ + { + ph: "لا", + root: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: tlul, + }, + ], + }, + { + input: "لاړم", + output: [ + { + ph: "لا", + root: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: tlul, + }, + ], + }, + { + input: "لاړو", + output: [ + { + ph: "لا", + root: { + persons: [ + T.Person.FirstPlurMale, + T.Person.FirstPlurFemale, + T.Person.ThirdSingMale, + ], + aspects: ["perfective"], + }, + verb: tlul, + }, + ], + }, + { + input: "لاړه", + output: [ + { + ph: "لا", + root: { + persons: [T.Person.ThirdSingFemale, T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: tlul, + }, + ], + }, + { + input: "ړلم", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: tlul, + }, + { + ph: undefined, + root: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: wurul, + }, + ], + }, + { + input: "ړم", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: tlul, + }, + { + ph: undefined, + root: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: wurul, + }, + ], + }, + { + input: "والووت", + output: [ + { + ph: "وا", + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: alwatul, + }, + ], + }, + { + input: "والواته", + output: [ + { + ph: "وا", + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: alwatul, + }, + ], + }, + { + input: "لواته", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: alwatul, + }, + ], + }, + { + input: "راشې", + output: [ + { + ph: "را", + stem: { + persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], + aspects: ["perfective"], + }, + verb: raatlul, + }, + ], + }, + { + input: "ورشې", + output: [ + { + ph: "ور", + stem: { + persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale], + aspects: ["perfective"], + }, + verb: wartlul, + }, + ], + }, + { + input: "یوسم", + output: [ + { + ph: "یو", + stem: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: wurul, + }, + ], + }, + { + input: "سم", + output: [ + { + ph: undefined, + stem: { + persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale], + aspects: ["perfective"], + }, + verb: wurul, + }, + ], + }, + + // TOOD: would be more robust if it looked for this without excetions // same as واخیست etc // { // input: "لاړ", // output: [ // { - // ph: undefined, + // ph: "لا", // root: { // persons: [T.Person.ThirdSingMale], // aspects: ["perfective"], @@ -674,8 +1010,456 @@ const tests: { // }, ], }, + { + label: "verbs with different 3rd pers sing past endings", + cases: [ + { + input: "رسېد", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective", "perfective"], + }, + verb: rasedul, + }, + ], + }, + { + input: "ورسېد", + output: [ + { + ph: "و", + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: rasedul, + }, + ], + }, + { + input: "کېناسته", + output: [ + { + ph: "کې", + root: { + persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale], + aspects: ["imperfective", "perfective"], + }, + verb: kenaastul, + }, + ], + }, + { + input: "کېناست", + output: [ + { + ph: "کې", + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective", "perfective"], + }, + verb: kenaastul, + }, + ], + }, + { + input: "کېناستو", + output: [ + { + ph: "کې", + root: { + persons: [ + T.Person.ThirdSingMale, + T.Person.FirstPlurMale, + T.Person.FirstPlurFemale, + ], + aspects: ["imperfective", "perfective"], + }, + verb: kenaastul, + }, + ], + }, + { + input: "ووتلو", + output: [ + { + ph: "و", + root: { + persons: [ + T.Person.ThirdSingMale, + T.Person.FirstPlurMale, + T.Person.FirstPlurFemale, + ], + aspects: ["perfective"], + }, + verb: watul, + }, + ], + }, + { + input: "ووتو", + output: [ + { + ph: "و", + root: { + persons: [ + T.Person.ThirdSingMale, + T.Person.FirstPlurMale, + T.Person.FirstPlurFemale, + ], + aspects: ["perfective"], + }, + verb: watul, + }, + ], + }, + { + input: "ووته", + output: [ + { + ph: "و", + root: { + persons: [T.Person.ThirdSingFemale], + aspects: ["perfective"], + }, + verb: watul, + }, + ], + }, + { + input: "واته", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective", "perfective"], + }, + verb: watul, + }, + ], + }, + { + input: "ووت", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective", "perfective"], + }, + verb: watul, + }, + ], + }, + { + input: "وووت", + output: [ + { + ph: "و", + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: watul, + }, + ], + }, + { + input: "ورسېد", + output: [ + { + ph: "و", + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["perfective"], + }, + verb: rasedul, + }, + ], + }, + ], + }, + { + label: "irregular verbs", + cases: [ + { + input: "ته", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective"], + }, + verb: tlul, + }, + ], + }, + { + input: "راته", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective"], + }, + verb: raatlul, + }, + ], + }, + { + input: "ورته", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective"], + }, + verb: wartlul, + }, + ], + }, + { + input: "درته", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingMale], + aspects: ["imperfective"], + }, + verb: dartlul, + }, + ], + }, + { + input: "شو", + output: [ + { + ph: undefined, + root: { + persons: [ + T.Person.ThirdSingMale, + T.Person.FirstPlurMale, + T.Person.FirstPlurFemale, + ], + aspects: ["perfective"], + }, + stem: { + persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], + aspects: ["perfective"], + }, + verb: kedulStat, + }, + { + ph: undefined, + root: { + persons: [ + T.Person.ThirdSingMale, + T.Person.FirstPlurMale, + T.Person.FirstPlurFemale, + ], + aspects: ["perfective"], + }, + stem: { + persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], + aspects: ["perfective"], + }, + verb: kedulDyn, + }, + ], + }, + { + input: "شوله", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingFemale], + aspects: ["perfective"], + }, + verb: kedulStat, + }, + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingFemale], + aspects: ["perfective"], + }, + verb: kedulDyn, + }, + ], + }, + { + input: "شوه", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingFemale], + aspects: ["perfective"], + }, + verb: kedulStat, + }, + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingFemale], + aspects: ["perfective"], + }, + verb: kedulDyn, + }, + ], + }, + { + input: "شوله", + output: [ + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingFemale], + aspects: ["perfective"], + }, + verb: kedulStat, + }, + { + ph: undefined, + root: { + persons: [T.Person.ThirdSingFemale], + aspects: ["perfective"], + }, + verb: kedulDyn, + }, + ], + }, + { + input: "وشوله", + output: [ + { + ph: "و", + root: { + persons: [T.Person.ThirdSingFemale], + aspects: ["perfective"], + }, + verb: kedulDyn, + }, + ], + }, + { + input: "وشوه", + output: [ + { + ph: "و", + root: { + persons: [T.Person.ThirdSingFemale], + aspects: ["perfective"], + }, + verb: kedulDyn, + }, + ], + }, + { + input: "وشي", + output: [ + { + ph: "و", + stem: { + persons: [ + T.Person.ThirdSingMale, + T.Person.ThirdSingFemale, + T.Person.ThirdPlurMale, + T.Person.ThirdPlurFemale, + ], + aspects: ["perfective"], + }, + verb: kedulDyn, + }, + ], + }, + // TODO: It would probably be more effecient just to return the kedul verb options + // and then when we put things together with the perfective head parsed they could + // become raatlul etc... + { + input: "شي", + output: [ + { + ph: undefined, + stem: { + persons: [ + T.Person.ThirdSingMale, + T.Person.ThirdSingFemale, + T.Person.ThirdPlurMale, + T.Person.ThirdPlurFemale, + ], + aspects: ["perfective"], + }, + verb: kedulDyn, + }, + { + ph: undefined, + stem: { + persons: [ + T.Person.ThirdSingMale, + T.Person.ThirdSingFemale, + T.Person.ThirdPlurMale, + T.Person.ThirdPlurFemale, + ], + aspects: ["perfective"], + }, + verb: kedulStat, + }, + { + ph: undefined, + stem: { + persons: [ + T.Person.ThirdSingMale, + T.Person.ThirdSingFemale, + T.Person.ThirdPlurMale, + T.Person.ThirdPlurFemale, + ], + aspects: ["perfective"], + }, + verb: raatlul, + }, + { + ph: undefined, + stem: { + persons: [ + T.Person.ThirdSingMale, + T.Person.ThirdSingFemale, + T.Person.ThirdPlurMale, + T.Person.ThirdPlurFemale, + ], + aspects: ["perfective"], + }, + verb: dartlul, + }, + { + ph: undefined, + stem: { + persons: [ + T.Person.ThirdSingMale, + T.Person.ThirdSingFemale, + T.Person.ThirdPlurMale, + T.Person.ThirdPlurFemale, + ], + aspects: ["perfective"], + }, + verb: wartlul, + }, + ], + }, + ], + }, ]; +// Also do سي yo see tests.forEach(({ label, cases }) => { test(label, () => { cases.forEach(({ input, output }) => { @@ -718,7 +1502,11 @@ tests.forEach(({ label, cases }) => { ), ]; }, []); - expect(vbs).toIncludeSameMembers(madeVbsS); + expect(removeIs(vbs)).toIncludeSameMembers(removeIs(madeVbsS)); }); }); }); + +function removeIs(a: any): any { + return JSON.parse(JSON.stringify(a, (k, v) => (k === "i" ? undefined : v))); +} diff --git a/src/lib/src/parsing/parse-verb.ts b/src/lib/src/parsing/parse-verb.ts index 50d90b5..7b73d8c 100644 --- a/src/lib/src/parsing/parse-verb.ts +++ b/src/lib/src/parsing/parse-verb.ts @@ -1,27 +1,16 @@ import * as T from "../../../types"; -import { isInVarients } from "../p-text-helpers"; +import { removeFVarientsFromVerb } from "../accent-and-ps-utils"; +import { isInVarients, lastVowelNotA } from "../p-text-helpers"; +import { + dartlul, + kedulDyn, + kedulStat, + raatlul, + tlul, + wartlul, +} from "./irreg-verbs"; -// third persion idosyncratic -// if it ends in a dental or ه - look for tttp -// -// if not having tttp -// automatic things: (with blank or u) -// ېد ست ښت -// ښود -// -// ول - اوه - -// وېشه ? - -// test ګالو ❌ vs ګاللو ✅ - -// واخیست / واخیسته / واخیستلو -// ولید // ولیده // ولیدو -// -// ووت / واته -// -// also write the rules for the third pers sing endings in the grammar -// multiple third pers sing options +// big problem ما سړی یوړ crashes it !! export function parseVerb( tokens: Readonly, @@ -31,9 +20,22 @@ export function parseVerb( return []; } const [first, ...rest] = tokens; + const irregResults = parseIrregularVerb(first.s); + if (irregResults.length) { + return irregResults.map((body) => ({ + tokens: rest, + body, + errors: [], + })); + } const people = getVerbEnding(first.s); + // First do rough verb lookup, grab wide pool of possible verbs (low searching complexity for fast lookup) // TODO: can optimize this to not have to look for possible stems/roots if none const verbs = verbLookup(first.s); + // if (first.s === "سم") { + // console.log({ verbs: JSON.stringify(verbs) }); + // } + // Then find out which ones match exactly and how return matchVerbs(first.s, verbs, people).map((body) => ({ tokens: rest, body, @@ -75,6 +77,7 @@ function matchVerbs( { ph: string | undefined; entry: T.VerbEntry }[] >((acc, entry) => { const e = entry.entry; + const baseWAa = "ا" + base; if (e.c.includes("comp")) { return acc; } @@ -120,14 +123,34 @@ function matchVerbs( }, ]; } - if ((base.startsWith("و") && base.slice(1)) === e.psp) { - return [ - ...acc, - { - ph: "و", - entry, - }, - ]; + if (!e.sepOo) { + if (base.startsWith("وا") && base.slice(1) === e.psp) { + return [ + ...acc, + { + ph: "وا", + entry, + }, + ]; + } + if ((base.startsWith("و") && base.slice(1)) === e.psp) { + return [ + ...acc, + { + ph: "و", + entry, + }, + ]; + } + if (baseWAa === e.psp) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; + } } if (base === e.psp) { return [ @@ -149,17 +172,30 @@ function matchVerbs( entry, }, ]; - } else if ( - base.startsWith("و") && - [miniRoot, miniRootEg].includes(base.slice(1)) - ) { - return [ - ...acc, - { - ph: "و", // TODO: check for وا etc - entry, - }, - ]; + } else if (!e.sepOo) { + if ( + base.startsWith("وا") && + [miniRoot, miniRootEg].includes(base.slice(1)) + ) { + return [ + ...acc, + { + ph: "وا", + entry, + }, + ]; + } else if ( + base.startsWith("و") && + [miniRoot, miniRootEg].includes(base.slice(1)) + ) { + return [ + ...acc, + { + ph: "و", + entry, + }, + ]; + } } } else { const eb = e.p.slice(0, -1); @@ -171,14 +207,34 @@ function matchVerbs( entry, }, ]; - } else if (base.startsWith("و") && eb === base.slice(1)) { - return [ - ...acc, - { - ph: "و", - entry, - }, - ]; + } else if (!e.sepOo) { + if (base.startsWith("وا") && eb === base.slice(1)) { + return [ + ...acc, + { + ph: "وا", + entry, + }, + ]; + } + if (base.startsWith("و") && eb === base.slice(1)) { + return [ + ...acc, + { + ph: "و", + entry, + }, + ]; + } + if (baseWAa === base.slice(1)) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; + } } } return acc; @@ -196,7 +252,7 @@ function matchVerbs( type: "verb", aspect: aspect as T.Aspect, base: "stem", - verb: "ph" in verb ? verb.entry : verb, + verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb, }, }, ]); @@ -237,17 +293,21 @@ function matchVerbs( }, ]; } - } else if (!e.prp) { + } else { const baseNoOo = base.startsWith("و") && base.slice(1); - if (baseNoOo && matchShortOrLong(baseNoOo, e.p)) { + const p = e.prp || e.p; + if (baseNoOo && matchShortOrLong(baseNoOo, p)) { return [ ...acc, { - ph: "و", + ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و", entry, }, ]; - } else if (matchShortOrLong(base, e.p)) { + } else if ( + matchShortOrLong(base, p) || + matchShortOrLong("ا" + base, p) + ) { return [ ...acc, { @@ -273,7 +333,7 @@ function matchVerbs( type: "verb", aspect: aspect as T.Aspect, base: "root", - verb: "ph" in verb ? verb.entry : verb, + verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb, }, }, ]); @@ -281,12 +341,20 @@ function matchVerbs( }); }); } - const hamzaEnd = s.endsWith("ه"); + const hamzaEnd = s.at(-1) === "ه"; + const oEnd = s.at(-1) === "و"; + const abruptEnd = ["د", "ت", "ړ"].includes(s.slice(-1)); + const b = hamzaEnd || oEnd ? base : s; + const bNoOo = b.startsWith("و") && b.slice(1); const tppMatches = { imperfective: entries.filter( ({ entry: e }) => !e.c.includes("comp") && - (isInVarients(e.tppp, s) || (hamzaEnd && base === e.p.slice(0, -1))) + (isInVarients(e.tppp, s) || + (oEnd && [e.p, e.p.slice(0, -1)].includes(base)) || + (lastVowelNotA(e.g.slice(0, -2)) && + (hamzaEnd ? base : abruptEnd ? s : "") === e.p.slice(0, -1))) + // TODO: if check for modified aaXu thing! ), perfective: entries.reduce< { ph: string | undefined; entry: T.VerbEntry }[] @@ -295,48 +363,110 @@ function matchVerbs( if (e.c.includes("comp")) { return acc; } - if (e.separationAtP && hamzaEnd) { + if (e.separationAtP) { const b = e.prp || e.p; const bHead = b.slice(0, e.separationAtP); const bRest = b.slice(e.separationAtP); - // this is REPETITIVE from above ... but doing it again here because the ه will only match on the SHORT versions for 3rd pers masc sing - // could modify and reuse the code above for this - if (base === b.slice(0, -1)) { - return [ - ...acc, - { - ph: bHead, - entry, - }, - ]; + if (bRest === "شول") { + return acc; } - if (base === bRest.slice(0, -1)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + if (abruptEnd) { + if (s === b.slice(0, -1)) { + return [ + ...acc, + { + ph: bHead, + entry, + }, + ]; + } + if (s === bRest.slice(0, -1)) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; + } + } else if (hamzaEnd) { + if (base === b.slice(0, -1)) { + return [ + ...acc, + { + ph: bHead, + entry, + }, + ]; + } + if (base === bRest.slice(0, -1)) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; + } + } else if (oEnd) { + if ([b, b.slice(0, -1)].includes(base)) { + return [ + ...acc, + { + ph: bHead, + entry, + }, + ]; + } + if ([bRest, bRest.slice(0, -1)].includes(base)) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; + } } - } else if (!e.prp && hamzaEnd) { - const baseNoOo = base.startsWith("و") && base.slice(1); - if (baseNoOo && baseNoOo === e.p.slice(0, -1)) { - return [ - ...acc, - { - ph: "و", - entry, - }, - ]; - } else if (base === e.p.slice(0, -1)) { - return [ - ...acc, - { - ph: undefined, - entry, - }, - ]; + } else if (!e.prp) { + if (oEnd) { + if (bNoOo && [e.p, e.p.slice(0, -1).includes(bNoOo)]) { + return [ + ...acc, + { + ph: "و", + entry, + }, + ]; + } else if ([e.p, e.p.slice(0, -1)].includes(base)) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; + } + } else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) { + const b = hamzaEnd ? base : s; + const p = e.p.slice(0, -1); + if (bNoOo && bNoOo === p) { + return [ + ...acc, + { + ph: "و", + entry, + }, + ]; + } else if (b === p) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; + } } } const sNoOo = s.startsWith("و") && s.slice(1); @@ -344,7 +474,7 @@ function matchVerbs( return [ ...acc, { - ph: "و", + ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و", entry, }, ]; @@ -356,6 +486,14 @@ function matchVerbs( entry, }, ]; + } else if (isInVarients(e.tppp, "ا" + s)) { + return [ + ...acc, + { + ph: undefined, + entry, + }, + ]; } return acc; }, []), @@ -371,7 +509,7 @@ function matchVerbs( type: "verb", aspect: aspect as T.Aspect, base: "root", - verb: "ph" in verb ? verb.entry : verb, + verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb, }, }, ]); @@ -434,3 +572,110 @@ function getVerbEnding(p: string): { stem: [], }; } + +// const [ph, rest]: [T.PH | undefined, T.PsString] = v.entry.noOo +// ? [undefined, base] +// : v.entry.sepOo +// ? [{ type: "PH", ps: { p: "و ", f: "óo`" } }, base] +// : ["آ", "ا"].includes(base.p.charAt(0)) && base.f.charAt(0) === "a" +// ? [{ type: "PH", ps: { p: "وا", f: "wáa" } }, removeAStart(base)] +// : ["óo", "oo"].includes(base.f.slice(0, 2)) +// ? [{ type: "PH", ps: { p: "و", f: "wÚ" } }, base] +// : ["ée", "ee"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "ای" +// ? [ +// { type: "PH", ps: { p: "وي", f: "wée" } }, +// { +// p: base.p.slice(2), +// f: base.f.slice(2), +// }, +// ] +// : ["é", "e"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "اې" +// ? [ +// { type: "PH", ps: { p: "وي", f: "wé" } }, +// { +// p: base.p.slice(2), +// f: base.f.slice(1), +// }, +// ] +// : ["ó", "o"].includes(base.f[0]) && base.p.slice(0, 2) === "او" +// ? [{ type: "PH", ps: { p: "و", f: "óo`" } }, base] +// : [{ type: "PH", ps: { p: "و", f: "óo" } }, base]; +// return [ph, removeAccents(rest)]; +// function removeAStart(ps: T.PsString) { +// return { +// p: ps.p.slice(1), +// f: ps.f.slice(ps.f[1] === "a" ? 2 : 1), +// }; +// } + +// TODO: could handle all sh- verbs for efficiencies sake +function parseIrregularVerb( + s: string +): [{ type: "PH"; s: string } | undefined, Omit][] { + if (["ته", "راته", "ورته", "درته"].includes(s)) { + return [ + [ + undefined, + { + type: "VB", + info: { + aspect: "imperfective", + base: "root", + type: "verb", + verb: s.startsWith("را") + ? raatlul + : s.startsWith("ور") + ? wartlul + : s.startsWith("در") + ? dartlul + : tlul, + }, + person: T.Person.ThirdSingMale, + }, + ], + ]; + } + if (s === "شو") { + return [ + ...[ + T.Person.ThirdSingMale, + T.Person.FirstPlurMale, + T.Person.FirstPlurFemale, + ].flatMap((person) => + [kedulStat, kedulDyn].map< + [{ type: "PH"; s: string } | undefined, Omit] + >((verb) => [ + undefined, + { + type: "VB", + info: { + aspect: "perfective", + base: "root", + type: "verb", + verb, + }, + person, + }, + ]) + ), + ...[T.Person.FirstPlurMale, T.Person.FirstPlurFemale].flatMap((person) => + [kedulStat, kedulDyn].map< + [{ type: "PH"; s: string } | undefined, Omit] + >((verb) => [ + undefined, + { + type: "VB", + info: { + aspect: "perfective", + base: "stem", + type: "verb", + verb, + }, + person, + }, + ]) + ), + ]; + } + return []; +} diff --git a/vocab/verbs/simple-intrans.js b/vocab/verbs/simple-intrans.js index 6e070e3..54dafd9 100644 --- a/vocab/verbs/simple-intrans.js +++ b/vocab/verbs/simple-intrans.js @@ -10,6 +10,7 @@ module.exports = [ 1527815139, // osedul 1585228579997, // ورتلل 1527815216, // راتلل - to come + 1585228551150, // درتلل 1527813473, // الوتل - to fly 1527814012, // اوښتل - to pass over, overturn, be flipped over, spill over, shift, change, diverge, pass, cross, abandon 1527822843, // برېښېدل - to appear, seem; to shine, sparkle; to smart, have a pricking pain diff --git a/yarn.lock b/yarn.lock index e5cfd5a..344fc8b 100644 --- a/yarn.lock +++ b/yarn.lock @@ -11095,10 +11095,10 @@ typedarray@^0.0.6: resolved "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz" integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c= -typescript@^4.2.3: - version "4.4.3" - resolved "https://registry.npmjs.org/typescript/-/typescript-4.4.3.tgz" - integrity sha512-4xfscpisVgqqDfPaJo5vkd+Qd/ItkoagnHpufr+i2QCHBsNYp+G7UAoyFl8aPtx879u38wPV65rZ8qbGZijalA== +typescript@^5.1.6: + version "5.1.6" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.1.6.tgz#02f8ac202b6dad2c0dd5e0913745b47a37998274" + integrity sha512-zaWCozRZ6DLEWAWFrVDz1H6FVXzUSfTy5FUMWsQlU8Ym5JP9eO4xkTIROFCQvhQf61z6O/G6ugw3SgAnvvm+HA== unbox-primitive@^1.0.1: version "1.0.1"