working towards parsing perfect verbs

This commit is contained in:
adueck 2023-09-01 16:35:24 +04:00
parent 28f79729a4
commit 56770b5041
6 changed files with 1111 additions and 491 deletions

View File

@ -1,8 +1,10 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { LookupFunction } from "./lookup"; import { LookupFunction } from "./lookup";
import { parseEquative } from "./parse-equative";
import { parseKidsSection } from "./parse-kids-section"; import { parseKidsSection } from "./parse-kids-section";
import { parseNeg } from "./parse-negative"; import { parseNeg } from "./parse-negative";
import { parseNP } from "./parse-np"; import { parseNP } from "./parse-np";
import { parsePastPart } from "./parse-past-part";
import { parsePH } from "./parse-ph"; import { parsePH } from "./parse-ph";
import { parseVerb } from "./parse-verb"; import { parseVerb } from "./parse-verb";
import { bindParseResult, returnParseResult } from "./utils"; import { bindParseResult, returnParseResult } from "./utils";
@ -26,6 +28,8 @@ export function parseBlocks(
const np = prevPh ? [] : parseNP(tokens, lookup); const np = prevPh ? [] : parseNP(tokens, lookup);
const ph = vbExists || prevPh ? [] : parsePH(tokens); const ph = vbExists || prevPh ? [] : parsePH(tokens);
const vb = parseVerb(tokens, lookup); const vb = parseVerb(tokens, lookup);
const vbp = parsePastPart(tokens, lookup);
const eq = parseEquative(tokens);
const neg = parseNeg(tokens); const neg = parseNeg(tokens);
const kidsR = parseKidsSection(tokens, []); const kidsR = parseKidsSection(tokens, []);
const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [ const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
@ -33,6 +37,8 @@ export function parseBlocks(
...ph, ...ph,
...neg, ...neg,
...vb, ...vb,
...vbp,
...eq,
...kidsR, ...kidsR,
]; ];
// TODO: is this necessary? // TODO: is this necessary?
@ -76,7 +82,10 @@ export function parseBlocks(
}); });
} }
function phMatches(ph: T.ParsedPH | undefined, vb: T.ParsedVBE | undefined) { function phMatches(
ph: T.ParsedPH | undefined,
vb: T.ParsedVBE | T.ParsedVBP | undefined
) {
if (!ph) { if (!ph) {
return true; return true;
} }

View File

@ -0,0 +1,106 @@
import * as T from "../../../types";
export function parseEquative(
tokens: Readonly<T.Token[]>
): T.ParseResult<T.ParsedVBE>[] {
if (tokens.length === 0) {
return [];
}
const [{ s }, ...rest] = tokens;
const match = table.find((x) => x.ps.includes(s));
if (!match) {
return [];
}
return match.people.flatMap((person) =>
match.tenses.map((tense) => ({
tokens: rest,
body: {
type: "VB",
info: {
type: "equative",
tense,
},
person,
},
errors: [],
}))
);
}
// TODO: NOT COMPLETE / CORRECT
const table: {
ps: string[];
tenses: T.EquativeTenseWithoutBa[];
people: T.Person[];
}[] = [
{
ps: ["یم"],
tenses: ["present", "habitual"],
people: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
},
{
ps: ["یې"],
tenses: ["present", "habitual"],
people: [T.Person.SecondSingMale, T.Person.SecondSingFemale],
},
{
ps: ["یو"],
tenses: ["present", "habitual"],
people: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
},
{
ps: ["یئ"],
tenses: ["present", "habitual"],
people: [T.Person.SecondPlurMale, T.Person.SecondPlurFemale],
},
{
ps: ["وم"],
tenses: ["subjunctive", "past"],
people: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
},
{
ps: ["وې"],
tenses: ["subjunctive", "past"],
people: [T.Person.SecondSingMale, T.Person.SecondSingFemale],
},
{
ps: ["وو"],
tenses: ["subjunctive", "past"],
people: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
},
{
ps: ["وئ"],
tenses: ["subjunctive", "past"],
people: [T.Person.SecondPlurMale, T.Person.SecondPlurFemale],
},
{
ps: ["دی"],
tenses: ["present"],
people: [T.Person.ThirdSingMale],
},
{
ps: ["ده"],
tenses: ["present"],
people: [T.Person.ThirdSingFemale],
},
{
ps: ["دي"],
tenses: ["present"],
people: [T.Person.ThirdPlurMale, T.Person.ThirdPlurFemale],
},
{
ps: ["وي"],
tenses: ["habitual"],
people: [
T.Person.ThirdSingMale,
T.Person.ThirdSingFemale,
T.Person.ThirdPlurMale,
T.Person.ThirdPlurFemale,
],
},
{
ps: ["وای", "وی"],
tenses: ["pastSubjunctive"],
people: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
},
];

View File

@ -0,0 +1,64 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { returnParseResult } from "./utils";
export function parsePastPart(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction
): T.ParseResult<T.ParsedVBP>[] {
if (tokens.length === 0) {
return [];
}
const [{ s }, ...rest] = tokens;
const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې";
if (!ending || !["ی", "ي", "ې"].includes(ending)) {
return [];
}
// TODO: ALSO HANDLE SHORT FORMS
const wOutEnd = s.slice(0, -1);
const matches = lookup(wOutEnd, "participle");
const genNums = endingGenderNum(ending);
return matches
.flatMap<T.ParsedVBP>((verb) =>
genNums.map<T.ParsedVBP>((genNum) => ({
type: "VB",
info: {
type: "ppart",
verb,
genNum,
},
}))
)
.flatMap((m) => returnParseResult(rest, m));
}
function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] {
if (ending === "ی") {
return [
{
gender: "masc",
number: "singular",
},
];
}
if (ending === "ي") {
return [
{
gender: "masc",
number: "plural",
},
];
}
// if (ending === "ې") {
return [
{
gender: "fem",
number: "singular",
},
{
gender: "fem",
number: "plural",
},
];
// }
}

View File

@ -22,6 +22,7 @@ const leedul = wordQuery("لیدل", "verb");
const kenaastul = wordQuery("کېناستل", "verb"); const kenaastul = wordQuery("کېناستل", "verb");
const wurul = wordQuery("وړل", "verb"); const wurul = wordQuery("وړل", "verb");
const akheestul = wordQuery("اخیستل", "verb"); const akheestul = wordQuery("اخیستل", "verb");
const khandul = wordQuery("خندل", "verb");
const tests: { const tests: {
label: string; label: string;
@ -51,6 +52,11 @@ const tests: {
output: [], output: [],
error: true, error: true,
}, },
{
input: "تلم مې",
output: [],
error: true,
},
{ {
input: "سړی زه ویني", input: "سړی زه ویني",
output: [], output: [],
@ -1381,6 +1387,258 @@ const tests: {
}, },
], ],
}, },
{
label: "grammatically transitive",
cases: [
{
input: "زه خاندم",
output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
{
key: 2,
block: {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
},
},
],
verb: {
type: "verb",
verb: khandul,
transitivity: "grammatically transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "presentVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
})),
},
{
input: "خاندم",
output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
{
key: 2,
block: {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
},
},
],
verb: {
type: "verb",
verb: khandul,
transitivity: "grammatically transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "presentVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: true,
shrinkServant: false,
},
})),
},
{
input: "ما خندل",
output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
{
key: 2,
block: {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
},
},
],
verb: {
type: "verb",
verb: khandul,
transitivity: "grammatically transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "imperfectivePast",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
})),
},
{
input: "خندل مې",
output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
{
key: 2,
block: {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
},
},
],
verb: {
type: "verb",
verb: khandul,
transitivity: "grammatically transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "imperfectivePast",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: true,
},
})),
},
{
input: "خندل",
output: [],
},
{
input: "خاندم مې",
output: [],
error: true,
},
{
input: "زه وینم",
output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
{
key: 2,
block: {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
},
},
],
verb: {
type: "verb",
verb: leedul,
transitivity: "grammatically transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "presentVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
})),
},
{
input: "ما ولیدل",
output: getPeople(1, "sing").flatMap<T.VPSelectionComplete>((person) =>
(
["transitive", "grammatically transitive"] as const
).map<T.VPSelectionComplete>((transitivity) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
{
key: 2,
block:
transitivity === "grammatically transitive"
? {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
}
: makeObjectSelectionComplete({
type: "NP",
selection: makePronounSelection(T.Person.ThirdPlurMale),
}),
},
],
verb: {
type: "verb",
verb: leedul,
transitivity,
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "perfectivePast",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: transitivity === "transitive",
shrinkServant: false,
},
}))
),
},
],
},
]; ];
tests.forEach(({ label, cases }) => { tests.forEach(({ label, cases }) => {
@ -1394,6 +1652,7 @@ tests.forEach(({ label, cases }) => {
expect(parsed.map((p) => removeKeys(p.body))).toIncludeSameMembers( expect(parsed.map((p) => removeKeys(p.body))).toIncludeSameMembers(
removeKeys(output) removeKeys(output)
); );
expect(parsed.every((p) => p.errors.length === 0)).toBe(true);
} }
}); });
}); });

File diff suppressed because it is too large Load Diff

View File

@ -726,9 +726,7 @@ export type EquativeTenseWithoutBa =
| "subjunctive" | "subjunctive"
| "habitual" | "habitual"
| "past" | "past"
| "wouldBe" | "pastSubjunctive";
| "pastSubjunctive"
| "wouldHaveBeen";
export type PerfectTense = `${EquativeTense}Perfect`; export type PerfectTense = `${EquativeTense}Perfect`;
export type AbilityTense = `${VerbTense}Modal`; export type AbilityTense = `${VerbTense}Modal`;
export type ImperativeTense = `${Aspect}Imperative`; export type ImperativeTense = `${Aspect}Imperative`;
@ -1196,7 +1194,12 @@ export type Block = {
| VHead; | VHead;
}; };
export type ParsedBlock = ParsedNP | ParsedPH | ParsedVBE | NegativeBlock; export type ParsedBlock =
| ParsedNP
| ParsedPH
| ParsedVBE
| ParsedVBP
| NegativeBlock;
export type ParsedKidsSection = { export type ParsedKidsSection = {
type: "kids"; type: "kids";
@ -1213,6 +1216,7 @@ export type ParsedPH = {
s: string; s: string;
}; };
export type ParsedVBE = Omit<VBE, "ps">; export type ParsedVBE = Omit<VBE, "ps">;
export type ParsedVBP = Omit<VBP, "ps">;
export type Kid = { export type Kid = {
key: number; key: number;