some verb bug fixes and starting with AP parsing

This commit is contained in:
adueck 2023-09-04 14:26:24 +04:00
parent ded0030395
commit a9f93dc717
10 changed files with 416 additions and 155 deletions

View File

@ -1,11 +1,16 @@
import nounsAdjs from "../../../nouns-adjs";
import verbs from "../../../verbs";
import * as T from "../../../types";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
import {
isAdjectiveEntry,
isAdverbEntry,
isNounEntry,
} from "../type-predicates";
import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
import { splitVarients, undoAaXuPattern } from "../p-text-helpers";
import { arraysHaveCommon } from "../misc-helpers";
import { shortVerbEndConsonant } from "./misc";
import { kawulDyn, kawulStat, kedulDyn, kedulStat, tlul } from "./irreg-verbs";
export type LookupFunction = typeof lookup;
@ -13,11 +18,13 @@ export function lookup(
s: Partial<T.DictionaryEntry>,
type: "nounAdj"
): T.DictionaryEntry[];
export function lookup(s: string, type: "adverb"): T.AdverbEntry[];
export function lookup(s: string, type: "pPart"): T.VerbEntry[];
export function lookup(s: string, type: "verb" | "participle"): T.VerbEntry[];
export function lookup(
s: string | Partial<T.DictionaryEntry>,
type: "nounAdj" | "verb" | "participle"
): T.DictionaryEntry[] | T.VerbEntry[] {
type: "nounAdj" | "verb" | "participle" | "pPart" | "adverb"
): T.DictionaryEntry[] | T.VerbEntry[] | T.AdverbEntry[] {
if (type === "nounAdj") {
if (typeof s !== "object") {
throw new Error("invalid query for noun / adj lookup");
@ -30,6 +37,12 @@ export function lookup(
if (type === "verb") {
return verbLookup(s);
}
if (type === "pPart") {
return pPartLookup(s);
}
if (type === "adverb") {
return adverbLookup(s);
}
return participleLookup(s);
}
@ -60,6 +73,12 @@ function nounAdjLookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
}
function adverbLookup(s: string): T.AdverbEntry[] {
return nounsAdjs.filter(
(a) => isAdverbEntry(a) && a.p === s
) as T.AdverbEntry[];
}
export function shouldCheckTpp(s: string): boolean {
return (
["د", "ړ", "ت", "ځ", "و", "ډ", "ڼ", "ن", "ه"].includes(s.slice(-1)) ||
@ -85,6 +104,27 @@ function participleLookup(input: string): T.VerbEntry[] {
return [];
}
function pPartLookup(input: string): T.VerbEntry[] {
if (input === "کړ") {
return [kawulStat, kawulDyn];
}
if (input === "شو") {
return [kedulStat, kedulDyn];
}
if (input === "تل") {
// TODO: is also ورتلل، راتلل، درتلل like this?
return [tlul];
}
if (["ست", "ښت"].includes(input.slice(-2))) {
const p = input + "ل";
return verbs.filter((e) => e.entry.p === p);
}
if (input.at(-1) === "ل") {
return verbs.filter((e) => e.entry.p === input);
}
return [];
}
function verbLookup(input: string): T.VerbEntry[] {
// TODO:
// only look up forms if there's an ending

View File

@ -1 +1,23 @@
// TODO: ability to treat a doubled noun as an adverb
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { returnParseResultS } from "./utils";
export function parseAP(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction
): T.ParseResult<T.APSelection>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const adverbs = lookup(first.s, "adverb");
return adverbs.map((entry) =>
returnParseResultS(rest, {
type: "AP",
selection: {
type: "adverb",
entry,
},
})
);
}

View File

@ -1,5 +1,6 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parseAP } from "./parse-ap";
import { parseEquative } from "./parse-equative";
import { parseKidsSection } from "./parse-kids-section";
import { parseNeg } from "./parse-negative";
@ -25,21 +26,16 @@ export function parseBlocks(
(b): b is T.ParsedPH => b.type === "PH"
);
const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
const np = prevPh ? [] : parseNP(tokens, lookup);
const ph = vbExists || prevPh ? [] : parsePH(tokens);
const vb = parseVerb(tokens, lookup);
const vbp = parsePastPart(tokens, lookup);
const eq = parseEquative(tokens);
const neg = parseNeg(tokens);
const kidsR = parseKidsSection(tokens, []);
const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
...np,
...ph,
...neg,
...vb,
...vbp,
...eq,
...kidsR,
...(prevPh ? [] : parseAP(tokens, lookup)),
...(prevPh ? [] : parseNP(tokens, lookup)),
...(vbExists || prevPh ? [] : parsePH(tokens)),
...parseVerb(tokens, lookup),
...parsePastPart(tokens, lookup),
...parseEquative(tokens),
...parseNeg(tokens),
...parseKidsSection(tokens, []),
];
// TODO: is this necessary?
// if (!allResults.length) {

View File

@ -0,0 +1,186 @@
import * as T from "../../../types";
import { lookup, wordQuery } from "./lookup";
import { tokenizer } from "./tokenizer";
import { parsePastPart } from "./parse-past-part";
import { kawulDyn, kawulStat, kedulDyn, kedulStat } from "./irreg-verbs";
const leedul = wordQuery("لیدل", "verb");
const akheestul = wordQuery("اخیستل", "verb");
const wahul = wordQuery("وهل", "verb");
const awuxtul = wordQuery("اوښتل", "verb");
const tlul = wordQuery("tlul", "verb");
const tests: {
label: string;
cases: {
input: string;
output: T.ParsedVBP[];
}[];
}[] = [
{
label: "regular past participles",
cases: [
{
input: "لیدلی",
output: [
{
type: "VB",
info: {
type: "ppart",
genNum: {
gender: "masc",
number: "singular",
},
verb: leedul,
},
},
],
},
{
input: "وهلي",
output: [
{
type: "VB",
info: {
type: "ppart",
genNum: {
gender: "masc",
number: "plural",
},
verb: wahul,
},
},
],
},
{
input: "وهلې",
output: (["singular", "plural"] as const).map((number) => ({
type: "VB",
info: {
type: "ppart",
genNum: {
gender: "fem",
number,
},
verb: wahul,
},
})),
},
],
},
{
label: "past participles with short forms",
cases: [
{
input: "اخیستی",
output: [
{
type: "VB",
info: {
type: "ppart",
genNum: {
gender: "masc",
number: "singular",
},
verb: akheestul,
},
},
],
},
{
input: "اخیستلی",
output: [
{
type: "VB",
info: {
type: "ppart",
genNum: {
gender: "masc",
number: "singular",
},
verb: akheestul,
},
},
],
},
{
input: "اوښتی",
output: [
{
type: "VB",
info: {
type: "ppart",
genNum: {
gender: "masc",
number: "singular",
},
verb: awuxtul,
},
},
],
},
],
},
{
label: "irregular past participles",
cases: [
{
input: "تلی",
output: [
{
type: "VB",
info: {
type: "ppart",
genNum: {
gender: "masc",
number: "singular",
},
verb: tlul,
},
},
],
},
{
input: "کړي",
output: [kawulStat, kawulDyn].map((verb) => ({
type: "VB",
info: {
type: "ppart",
genNum: {
gender: "masc",
number: "plural",
},
verb,
},
})),
},
{
input: "شوي",
output: [kedulStat, kedulDyn].map((verb) => ({
type: "VB",
info: {
type: "ppart",
genNum: {
gender: "masc",
number: "plural",
},
verb,
},
})),
},
],
},
];
describe("parsing past participles", () => {
tests.forEach(({ label, cases }) => {
// eslint-disable-next-line jest/valid-title
test(label, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const res = parsePastPart(tokens, lookup).map(({ body }) => body);
expect(res).toEqual(output);
});
});
});
});

View File

@ -16,7 +16,7 @@ export function parsePastPart(
}
// TODO: ALSO HANDLE SHORT FORMS
const wOutEnd = s.slice(0, -1);
const matches = lookup(wOutEnd, "participle");
const matches = lookup(wOutEnd, "pPart");
const genNums = endingGenderNum(ending);
return matches
.flatMap<T.ParsedVBP>((verb) =>

View File

@ -31,7 +31,8 @@ const akheestul = wordQuery("اخیستل", "verb");
const alwatul = wordQuery("الوتل", "verb");
// const dartlul = wordQuery("درتلل", "verb")
// todo alwatul waalwatul akhistul azmoyul etc
// TODO: azmoyul etc
// TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc
const tests: {
label: string;
@ -394,7 +395,7 @@ const tests: {
{
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["imperfective", "perfective"],
aspects: ["imperfective"],
},
verb: akheestul,
},
@ -419,7 +420,7 @@ const tests: {
{
stem: {
persons: [T.Person.SecondSingMale, T.Person.SecondSingFemale],
aspects: ["imperfective", "perfective"],
aspects: ["imperfective"],
},
verb: alwatul,
},

View File

@ -12,9 +12,6 @@ import {
import { LookupFunction } from "./lookup";
import { shortVerbEndConsonant } from "./misc";
// big problem ما سړی یوړ crashes it !!
// BIG problem - issue with و being considered a VB for a lot of little verbs like بلل
// TODO: کول verbs!
// check that aawu stuff is working
// check oo`azmooy -
@ -102,6 +99,9 @@ function matchVerbs(
}
}
} else if (e.psp) {
if (hasBreakawayAlef(e) && startsWithAleph(base)) {
return acc;
}
if (e.separationAtP) {
const bRest = e.psp.slice(e.separationAtP);
if (bRest === base) {
@ -117,6 +117,8 @@ function matchVerbs(
return [...acc, entry];
}
}
} else if (hasBreakawayAlef(e) && startsWithAleph(base)) {
return acc;
} else if (e.c.includes("intrans.")) {
const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
const miniRootEg = miniRoot + "ېږ";
@ -169,6 +171,8 @@ function matchVerbs(
if (matchShortOrLong(base, bRest)) {
return [...acc, entry];
}
} else if (hasBreakawayAlef(e) && startsWithAleph(base) && !e.prp) {
return acc;
} else {
const p = e.prp || e.p;
if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
@ -245,6 +249,9 @@ function matchVerbs(
}
}
} else if (!e.prp) {
if (hasBreakawayAlef(e) && startsWithAleph(base)) {
return acc;
}
if (oEnd) {
if ([e.p, e.p.slice(0, -1)].includes(base)) {
return [...acc, entry];
@ -395,3 +402,11 @@ function parseIrregularVerb(s: string): T.ParsedVBE[] {
}
return [];
}
function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
return !e.sepOo && ["ا", "آ"].includes(e.p[0]);
}
function startsWithAleph(base: string): boolean {
return ["ا", "آ"].includes(base[0]);
}

View File

@ -86,6 +86,10 @@ const tests: {
output: [],
error: true,
},
{
input: "ما وانه اخیست",
output: [],
},
],
},
{
@ -1005,129 +1009,129 @@ const tests: {
}))
),
},
// {
// input: "ودې وینم",
// output: getPeople(2, "sing").flatMap((objectPerson) =>
// getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
// blocks: [
// {
// key: 1,
// block: makeSubjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(subjectPerson),
// }),
// },
// {
// key: 2,
// block: makeObjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(objectPerson),
// }),
// },
// ],
// verb: {
// type: "verb",
// verb: leedul,
// transitivity: "transitive",
// canChangeTransitivity: false,
// canChangeStatDyn: false,
// negative: false,
// tense: "subjunctiveVerb",
// canChangeVoice: true,
// isCompound: false,
// voice: "active",
// },
// externalComplement: undefined,
// form: {
// removeKing: true,
// shrinkServant: true,
// },
// }))
// ),
// },
// {
// input: "وینم به دې",
// output: getPeople(2, "sing").flatMap((objectPerson) =>
// getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
// blocks: [
// {
// key: 1,
// block: makeSubjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(subjectPerson),
// }),
// },
// {
// key: 2,
// block: makeObjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(objectPerson),
// }),
// },
// ],
// verb: {
// type: "verb",
// verb: leedul,
// transitivity: "transitive",
// canChangeTransitivity: false,
// canChangeStatDyn: false,
// negative: false,
// tense: "imperfectiveFuture",
// canChangeVoice: true,
// isCompound: false,
// voice: "active",
// },
// externalComplement: undefined,
// form: {
// removeKing: true,
// shrinkServant: true,
// },
// }))
// ),
// },
// {
// input: "یو به مې ړلې",
// output: [...getPeople(2, "sing"), T.Person.ThirdPlurFemale].flatMap(
// (objectPerson) =>
// getPeople(1, "sing").map<T.VPSelectionComplete>(
// (subjectPerson) => ({
// blocks: [
// {
// key: 1,
// block: makeSubjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(subjectPerson),
// }),
// },
// {
// key: 2,
// block: makeObjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(objectPerson),
// }),
// },
// ],
// verb: {
// type: "verb",
// verb: wurul,
// transitivity: "transitive",
// canChangeTransitivity: false,
// canChangeStatDyn: false,
// negative: false,
// tense: "habitualPerfectivePast",
// canChangeVoice: true,
// isCompound: false,
// voice: "active",
// },
// externalComplement: undefined,
// form: {
// removeKing: true,
// shrinkServant: true,
// },
// })
// )
// ),
// },
{
input: "ودې وینم",
output: getPeople(2, "sing").flatMap((objectPerson) =>
getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(subjectPerson),
}),
},
{
key: 2,
block: makeObjectSelectionComplete({
type: "NP",
selection: makePronounSelection(objectPerson),
}),
},
],
verb: {
type: "verb",
verb: leedul,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "subjunctiveVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: true,
shrinkServant: true,
},
}))
),
},
{
input: "وینم به دې",
output: getPeople(2, "sing").flatMap((objectPerson) =>
getPeople(1, "sing").map<T.VPSelectionComplete>((subjectPerson) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(subjectPerson),
}),
},
{
key: 2,
block: makeObjectSelectionComplete({
type: "NP",
selection: makePronounSelection(objectPerson),
}),
},
],
verb: {
type: "verb",
verb: leedul,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "imperfectiveFuture",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: true,
shrinkServant: true,
},
}))
),
},
{
input: "یو به مې ړلې",
output: [...getPeople(2, "sing"), T.Person.ThirdPlurFemale].flatMap(
(objectPerson) =>
getPeople(1, "sing").map<T.VPSelectionComplete>(
(subjectPerson) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(subjectPerson),
}),
},
{
key: 2,
block: makeObjectSelectionComplete({
type: "NP",
selection: makePronounSelection(objectPerson),
}),
},
],
verb: {
type: "verb",
verb: wurul,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "habitualPerfectivePast",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: true,
shrinkServant: true,
},
})
)
),
},
],
},
{

View File

@ -17,13 +17,6 @@ import { personToGenNum } from "../misc-helpers";
import { equals } from "rambda";
// to hide equatives type-doubling issue
// this should also conjugate to
// وامې نه خیسته
// وامې نه خیستلو
// waa-me nú kheestulo
// وامې نه اخیست
// waa-me nú akheest
// TODO: word query for kawul/kedul/stat/dyn
// TODO: learn how to yank / use plugin for JSON neovim
@ -37,6 +30,9 @@ import { equals } from "rambda";
// so we don't get something like ښځو زه خوړلې یم with a hanging
// یم not used
// TODO: way to get an error message for past participle and equative
// not matching up
export function parseVP(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction

View File

@ -1199,6 +1199,7 @@ export type ParsedBlock =
| ParsedPH
| ParsedVBE
| ParsedVBP
| APSelection
| NegativeBlock;
export type ParsedKidsSection = {