big improvement by removing the reduntant PH parsing, added tests for negatives, and also added checking for S/O conflict in VP parsing

This commit is contained in:
adueck 2023-08-22 19:33:53 +04:00
parent a7709c4299
commit a3ac5e2cb3
13 changed files with 604 additions and 1250 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 609 KiB

After

Width:  |  Height:  |  Size: 146 KiB

View File

@ -15,7 +15,10 @@ import {
} from "./src/verb-info";
import { makeVPSelectionState } from "./src/phrase-building/verb-selection";
import { vpsReducer } from "./src/phrase-building/vps-reducer";
import { isPastTense } from "./src/phrase-building/vp-tools";
import {
isPastTense,
isInvalidSubjObjCombo,
} from "./src/phrase-building/vp-tools";
import { getInflectionPattern } from "./src/inflection-pattern";
import { makePsString, removeFVarients } from "./src/accent-and-ps-utils";
@ -45,12 +48,7 @@ import {
standardizePhonetics,
} from "./src/standardize-pashto";
import { phoneticsToDiacritics } from "./src/phonetics-to-diacritics";
import {
randomPerson,
isInvalidSubjObjCombo,
randomSubjObj,
getEnglishVerb,
} from "./src/np-tools";
import { randomPerson, randomSubjObj, getEnglishVerb } from "./src/np-tools";
import {
getEnglishFromRendered,
getPashtoFromRendered,

View File

@ -1,80 +1,79 @@
import * as T from "../../types";
import { isFirstPerson, parseEc, isSecondPerson } from "./misc-helpers";
import { parseEc } from "./misc-helpers";
import { isInvalidSubjObjCombo } from "./phrase-building/vp-tools";
function getRandPers(): T.Person {
return Math.floor(Math.random() * 12);
return Math.floor(Math.random() * 12);
}
export function randomPerson(a?: { prev?: T.Person, counterPart?: T.VerbObject | T.NPSelection }) {
// no restrictions, just get any person
if (!a) {
return getRandPers();
}
if (a.counterPart !== undefined && typeof a.counterPart === "object" && a.counterPart.selection.type === "pronoun") {
// with counterpart pronoun
let newP = 0;
do {
newP = getRandPers();
} while (
isInvalidSubjObjCombo(a.counterPart.selection.person, newP)
||
(newP === a.prev)
);
return newP;
}
// without counterpart pronoun, just previous
export function randomPerson(a?: {
prev?: T.Person;
counterPart?: T.VerbObject | T.NPSelection;
}) {
// no restrictions, just get any person
if (!a) {
return getRandPers();
}
if (
a.counterPart !== undefined &&
typeof a.counterPart === "object" &&
a.counterPart.selection.type === "pronoun"
) {
// with counterpart pronoun
let newP = 0;
do {
newP = getRandPers();
} while (newP === a.prev);
return newP;
}
export function isInvalidSubjObjCombo(subj: T.Person, obj: T.Person): boolean {
return (
(isFirstPerson(subj) && isFirstPerson(obj))
||
(isSecondPerson(subj) && isSecondPerson(obj))
);
}
export function randomSubjObj(old?: { subj: T.Person, obj?: T.Person }): { subj: T.Person, obj: T.Person } {
let subj = 0;
let obj = 0;
do {
subj = getRandPers();
obj = getRandPers();
newP = getRandPers();
} while (
(old && ((old.subj === subj) || (old.obj === obj)))
||
isInvalidSubjObjCombo(subj, obj)
isInvalidSubjObjCombo(a.counterPart.selection.person, newP) ||
newP === a.prev
);
return { subj, obj };
return newP;
}
// without counterpart pronoun, just previous
let newP = 0;
do {
newP = getRandPers();
} while (newP === a.prev);
return newP;
}
export function randomSubjObj(old?: { subj: T.Person; obj?: T.Person }): {
subj: T.Person;
obj: T.Person;
} {
let subj = 0;
let obj = 0;
do {
subj = getRandPers();
obj = getRandPers();
} while (
(old && (old.subj === subj || old.obj === obj)) ||
isInvalidSubjObjCombo(subj, obj)
);
return { subj, obj };
}
export function getEnglishVerb(entry: T.DictionaryEntry): string {
if (!entry.ec) {
console.error("errored verb");
console.error(entry);
throw new Error("no english information for verb");
}
if (entry.ep) {
const ec = entry.ec.includes(",") ? parseEc(entry.ec)[0] : entry.ec;
return `to ${ec} ${entry.ep}`;
}
const ec = parseEc(entry.ec);
return `to ${ec[0]}`;
if (!entry.ec) {
console.error("errored verb");
console.error(entry);
throw new Error("no english information for verb");
}
if (entry.ep) {
const ec = entry.ec.includes(",") ? parseEc(entry.ec)[0] : entry.ec;
return `to ${ec} ${entry.ep}`;
}
const ec = parseEc(entry.ec);
return `to ${ec[0]}`;
}
export function getEnglishParticiple(entry: T.DictionaryEntry): string {
if (!entry.ec) {
throw new Error("no english information for participle");
}
const ec = parseEc(entry.ec);
if (entry.ep && ec[0] === "am") {
return `to be/being ${entry.ep}`;
}
const participle = `${ec[2]} / to ${ec[0]}`;
return (entry.ep)
? `${participle} ${entry.ep}`
: participle;
if (!entry.ec) {
throw new Error("no english information for participle");
}
const ec = parseEc(entry.ec);
if (entry.ep && ec[0] === "am") {
return `to be/being ${entry.ep}`;
}
const participle = `${ec[2]} / to ${ec[0]}`;
return entry.ep ? `${participle} ${entry.ep}` : participle;
}

View File

@ -49,7 +49,7 @@ export function verbLookup(input: string): T.VerbEntry[] {
// IMPORTANT TODO FOR EFFECIANCY!
// check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING
// if theres no legit verb ending and no tpp possibilities, just return an empty array
const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
// const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
const checkTpp = shouldCheckTpp(input);
const fromAawu = checkTpp && undoAaXuPattern(input);
const inputWoutOo =
@ -61,86 +61,45 @@ export function verbLookup(input: string): T.VerbEntry[] {
// TODO: don't do the slice of and checking for useless things when you have a NON verb ending (like with the tpp)
if (s.endsWith("ېږ")) {
return verbs.filter(
sWoutOo
? ({ entry }) =>
[s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) ||
[
s.slice(0, -1) + "دل",
sWoutOo.slice(0, -1) + "دل",
sAddedAa.slice(0, -1) + "دل",
].includes(entry.p) ||
[s, sWoutOo, sAddedAa].includes(entry.p) ||
(entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) ||
entry.prp === s ||
entry.ssp === s
: ({ entry }) =>
[s, sAddedAa].includes(entry.p.slice(0, -1)) ||
[s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes(
entry.p
) ||
[s, sAddedAa].includes(entry.p) ||
[s, sAddedAa].includes(entry.psp || "") ||
[s, sAddedAa].includes(entry.prp || "") ||
[s, sAddedAa].includes(entry.ssp || "")
({ entry }) =>
[s, sAddedAa].includes(entry.p.slice(0, -1)) ||
[s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes(
entry.p
) ||
[s, sAddedAa].includes(entry.p) ||
[s, sAddedAa].includes(entry.psp || "") ||
[s, sAddedAa].includes(entry.prp || "") ||
[s, sAddedAa].includes(entry.ssp || "")
);
}
return verbs.filter(
sWoutOo
? ({ entry }) =>
[s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) ||
// for short intransitive forms
[s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -3)) ||
[s, sWoutOo, sAddedAa].includes(entry.p) ||
(entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) ||
(checkTpp &&
({ entry }) =>
[s, sAddedAa].includes(entry.p.slice(0, -1)) ||
// for short intransitive forms
[s, sAddedAa].includes(entry.p.slice(0, -3)) ||
[s, sAddedAa].includes(entry.p) ||
(checkTpp &&
[input, fromAawu, sAddedAa].includes(entry.p.slice(0, -1))) ||
(entry.tppp &&
arraysHaveCommon(
[input, inputWoutOo, sAddedAa, inputAddedAa],
splitVarients(entry.tppp)
)) ||
[s, sAddedAa].includes(entry.psp || "") ||
arraysHaveCommon([entry.prp, entry.prp?.slice(0, -1)], [s, sAddedAa]) ||
[s, sAddedAa].includes(entry.ssp || "") ||
(entry.separationAtP &&
// TODO this is super ugly, do check of short and long function
(entry.p.slice(entry.separationAtP) === s ||
entry.p.slice(entry.separationAtP, -1) === s ||
(checkTpp && entry.p.slice(entry.separationAtP, -1) === input) ||
entry.psp?.slice(entry.separationAtP) === s ||
(entry.prp &&
[
input.slice(1),
fromAawu && fromAawu.slice(-1),
inputAddedAa,
].includes(entry.p.slice(0, -1))) ||
(entry.tppp &&
arraysHaveCommon(
[input, inputWoutOo, sAddedAa],
splitVarients(entry.tppp)
)) ||
arraysHaveCommon(
[s, sAddedAa, "و" + s],
[entry.prp, entry.prp?.slice(0, -1)]
) ||
[s, sAddedAa].includes(entry.ssp || "") ||
(entry.separationAtP &&
(entry.p.slice(entry.separationAtP) === s ||
entry.psp?.slice(entry.separationAtP) === s))
: ({ entry }) =>
[s, sAddedAa].includes(entry.p.slice(0, -1)) ||
// for short intransitive forms
[s, sAddedAa].includes(entry.p.slice(0, -3)) ||
[s, sAddedAa].includes(entry.p) ||
(checkTpp &&
[input, fromAawu, sAddedAa].includes(entry.p.slice(0, -1))) ||
(entry.tppp &&
arraysHaveCommon(
[input, inputWoutOo, sAddedAa, inputAddedAa],
splitVarients(entry.tppp)
)) ||
[s, sAddedAa].includes(entry.psp || "") ||
arraysHaveCommon(
[entry.prp, entry.prp?.slice(0, -1)],
[s, sAddedAa, "و" + s]
) ||
[s, sAddedAa, "و" + s].includes(entry.ssp || "") ||
(entry.separationAtP &&
// TODO this is super ugly, do check of short and long function
(entry.p.slice(entry.separationAtP) === s ||
entry.p.slice(entry.separationAtP, -1) === s ||
(checkTpp && entry.p.slice(entry.separationAtP, -1) === input) ||
entry.psp?.slice(entry.separationAtP) === s ||
(entry.prp &&
[
entry.prp.slice(entry.separationAtP),
entry.prp.slice(entry.separationAtP).slice(0, -1),
].includes(s)) ||
(entry.ssp && entry.ssp.slice(entry.separationAtP) === s)))
entry.prp.slice(entry.separationAtP),
entry.prp.slice(entry.separationAtP).slice(0, -1),
].includes(s)) ||
(entry.ssp && entry.ssp.slice(entry.separationAtP) === s)))
);
}

View File

@ -1,5 +1,4 @@
import * as T from "../../../types";
import { fmapParseResult } from "../fp-ps";
import { parseKidsSection } from "./parse-kids-section";
import { parseNeg } from "./parse-negative";
import { parseNP } from "./parse-np";
@ -21,22 +20,22 @@ export function parseBlocks(
return returnParseResult(tokens, { blocks, kids });
}
const prevPh: T.ParsedPH | undefined = blocks.find(
(b): b is T.ParsedPH => "type" in b && b.type === "PH"
(b): b is T.ParsedPH => b.type === "PH"
);
const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
const np = prevPh ? [] : fmapParseResult((x) => [x], parseNP(tokens, lookup));
const np = prevPh ? [] : parseNP(tokens, lookup);
// UHOH... This could cause double paths ... maybe don't parse the PH in the parse VB!
const ph =
vbExists || prevPh ? [] : fmapParseResult((x) => [x], parsePH(tokens));
const vb = fmapParseResult(
([ph, v]) => (ph ? [ph, v] : [v]),
parseVerb(tokens, verbLookup)
);
const neg = fmapParseResult((x) => [x], parseNeg(tokens));
const ph = vbExists || prevPh ? [] : parsePH(tokens);
const vb = parseVerb(tokens, verbLookup);
const neg = parseNeg(tokens);
const kidsR = parseKidsSection(tokens, []);
const allResults = [...np, ...ph, ...neg, ...vb, ...kidsR] as T.ParseResult<
T.ParsedBlock[] | { kids: T.ParsedKid[] }
>[];
const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
...np,
...ph,
...neg,
...vb,
...kidsR,
];
// TODO: is this necessary?
// if (!allResults.length) {
// return [
@ -47,10 +46,9 @@ export function parseBlocks(
// },
// ];
// }
console.log({ allResults });
return bindParseResult(allResults, (tokens, r) => {
const errors: T.ParseError[] = [];
if ("kids" in r) {
if (r.type === "kids") {
return {
next: parseBlocks(tokens, lookup, verbLookup, blocks, [
...kids,
@ -62,23 +60,21 @@ export function parseBlocks(
: [],
};
}
if (prevPh && r.some((x) => "type" in x && x.type === "PH")) {
if (prevPh && r.type === "PH") {
return [];
}
const vb = r.find((x): x is T.ParsedVBE => "type" in x && x.type === "VB");
if (!phMatches(prevPh, vb)) {
return [];
// TODO: will have to handle welded
if (r.type === "VB") {
if (!phMatches(prevPh, r)) {
return [];
}
}
// don't allow two negatives
if (
"type" in r[0] &&
r[0].type === "negative" &&
blocks.some((b) => "type" in b && b.type === "negative")
) {
if (r.type === "negative" && blocks.some((b) => b.type === "negative")) {
return [];
}
return {
next: parseBlocks(tokens, lookup, verbLookup, [...blocks, ...r], kids),
next: parseBlocks(tokens, lookup, verbLookup, [...blocks, r], kids),
errors,
};
});

View File

@ -5,14 +5,18 @@ import { bindParseResult, returnParseResult } from "./utils";
export function parseKidsSection(
tokens: Readonly<T.Token[]>,
prevKids: T.ParsedKid[]
): T.ParseResult<{ kids: T.ParsedKid[] }>[] {
): T.ParseResult<T.ParsedKidsSection>[] {
if (tokens.length === 0) {
return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
return prevKids.length
? returnParseResult(tokens, { type: "kids", kids: prevKids })
: [];
}
const parsedKid = parseKid(tokens);
// TODO: is this even necessary ??
if (!parsedKid.length) {
return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
return prevKids.length
? returnParseResult(tokens, { type: "kids", kids: prevKids })
: [];
}
return bindParseResult(parsedKid, (tokens, r) => {
// return parseKidsSection(tokens, [...prevKids, r]);

View File

@ -21,11 +21,9 @@ export function parseNP(
inflected: boolean;
selection: T.NounSelection;
}
): {
inflected: boolean;
selection: T.NPSelection;
} {
): T.ParsedNP {
return {
type: "NP",
inflected: a.inflected,
selection: {
type: "NP",

View File

@ -18,7 +18,7 @@ const phs = [
export function parsePH(
tokens: Readonly<T.Token[]>
): T.ParseResult<{ type: "PH"; s: string }>[] {
): T.ParseResult<T.ParsedPH>[] {
if (tokens.length === 0) {
return [];
}

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,7 @@ import {
export function parseVerb(
tokens: Readonly<T.Token[]>,
verbLookup: (s: string) => T.VerbEntry[]
): T.ParseResult<[{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]>[] {
): T.ParseResult<T.ParsedVBE>[] {
if (tokens.length === 0) {
return [];
}
@ -57,8 +57,8 @@ function matchVerbs(
root: T.Person[];
stem: T.Person[];
}
): [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">][] {
const w: ReturnType<typeof matchVerbs> = [];
): T.ParsedVBE[] {
const w: T.ParsedVBE[] = [];
const lEnding = s.endsWith("ل");
const base = s.endsWith("ل") ? s : s.slice(0, -1);
const matchShortOrLong = (b: string, x: string) => {
@ -80,167 +80,52 @@ function matchVerbs(
return e.p.slice(0, -1) === base;
}
}),
perfective: entries.reduce<
{ ph: string | undefined; entry: T.VerbEntry }[]
>((acc, entry) => {
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
const e = entry.entry;
const baseWAa = "ا" + base;
if (e.c.includes("comp")) {
return acc;
}
if (e.ssp) {
const bRest = e.separationAtP ? e.ssp.slice(e.separationAtP) : "";
if (bRest === base) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
if (e.ssp === base) {
return [
...acc,
{
ph: e.separationAtF
? e.ssp.slice(0, e.separationAtP)
: undefined,
entry,
},
];
if (e.separationAtP) {
const bRest = e.ssp.slice(e.separationAtP);
if (bRest === base) {
return [...acc, entry];
}
} else {
if (e.ssp === base) {
return [...acc, entry];
}
}
} else if (e.psp) {
const bRest = e.separationAtP ? e.psp.slice(e.separationAtP) : "";
if (bRest === base) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
if (e.psp === base && e.separationAtP) {
return [
...acc,
{
ph: e.psp.slice(0, e.separationAtP),
entry,
},
];
}
if (!e.sepOo) {
if (base.startsWith("وا") && base.slice(1) === e.psp) {
return [
...acc,
{
ph: "وا",
entry,
},
];
if (e.separationAtP) {
const bRest = e.psp.slice(e.separationAtP);
if (bRest === base) {
return [...acc, entry];
}
if ((base.startsWith("و") && base.slice(1)) === e.psp) {
return [
...acc,
{
ph: "و",
entry,
},
];
} else {
if (!e.sepOo) {
if (baseWAa === e.psp) {
return [...acc, entry];
}
}
if (baseWAa === e.psp) {
return [
...acc,
{
ph: undefined,
entry,
},
];
if (base === e.psp) {
return [...acc, entry];
}
}
if (base === e.psp) {
return [
...acc,
{
ph: undefined,
entry,
},
];
}
} else if (e.c.includes("intrans.")) {
const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
const miniRootEg = miniRoot + "ېږ";
if ([miniRoot, miniRootEg].includes(base)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
} else if (!e.sepOo) {
if (
base.startsWith("وا") &&
[miniRoot, miniRootEg].includes(base.slice(1))
) {
return [
...acc,
{
ph: "وا",
entry,
},
];
} else if (
base.startsWith("و") &&
[miniRoot, miniRootEg].includes(base.slice(1))
) {
return [
...acc,
{
ph: "و",
entry,
},
];
}
return [...acc, entry];
}
} else {
const eb = e.p.slice(0, -1);
if (eb === base) {
return [
...acc,
{
ph: undefined,
entry,
},
];
return [...acc, entry];
} else if (!e.sepOo) {
if (base.startsWith("وا") && eb === base.slice(1)) {
return [
...acc,
{
ph: "وا",
entry,
},
];
}
if (base.startsWith("و") && eb === base.slice(1)) {
return [
...acc,
{
ph: "و",
entry,
},
];
}
if (baseWAa === base.slice(1)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
return [...acc, entry];
}
}
}
@ -250,19 +135,16 @@ function matchVerbs(
Object.entries(stemMatches).forEach(([aspect, entries]) => {
entries.forEach((verb) => {
people.stem.forEach((person) => {
w.push([
"ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
{
type: "VB",
person,
info: {
type: "verb",
aspect: aspect as T.Aspect,
base: "stem",
verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
},
w.push({
type: "VB",
person,
info: {
type: "verb",
aspect: aspect as T.Aspect,
base: "stem",
verb: removeFVarientsFromVerb(verb),
},
]);
});
});
});
});
@ -272,56 +154,21 @@ function matchVerbs(
imperfective: entries.filter(
({ entry: e }) => !e.c.includes("comp") && matchShortOrLong(base, e.p)
),
perfective: entries.reduce<
{ ph: string | undefined; entry: T.VerbEntry }[]
>((acc, entry) => {
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
const e = entry.entry;
if (e.c.includes("comp")) {
return acc;
}
if (e.separationAtP) {
const b = e.prp || e.p;
const bHead = b.slice(0, e.separationAtP);
const bRest = b.slice(e.separationAtP);
if (matchShortOrLong(base, b)) {
return [
...acc,
{
ph: bHead,
entry,
},
];
} else if (matchShortOrLong(base, bRest)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
if (matchShortOrLong(base, bRest)) {
return [...acc, entry];
}
} else {
const baseNoOo = base.startsWith("و") && base.slice(1);
const p = e.prp || e.p;
if (baseNoOo && matchShortOrLong(baseNoOo, p)) {
return [
...acc,
{
ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و",
entry,
},
];
} else if (
matchShortOrLong(base, p) ||
matchShortOrLong("ا" + base, p)
) {
return [
...acc,
{
ph: undefined,
entry,
},
];
if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
return [...acc, entry];
}
}
return acc;
@ -331,19 +178,16 @@ function matchVerbs(
Object.entries(rootMatches).forEach(([aspect, entries]) => {
entries.forEach((verb) => {
people.root.forEach((person) => {
w.push([
"ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
{
type: "VB",
person,
info: {
type: "verb",
aspect: aspect as T.Aspect,
base: "root",
verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
},
w.push({
type: "VB",
person,
info: {
type: "verb",
aspect: aspect as T.Aspect,
base: "root",
verb: removeFVarientsFromVerb(verb),
},
]);
});
});
});
});
@ -351,8 +195,6 @@ function matchVerbs(
const hamzaEnd = s.at(-1) === "ه";
const oEnd = s.at(-1) === "و";
const abruptEnd = ["د", "ت", "ړ"].includes(s.slice(-1));
const b = hamzaEnd || oEnd ? base : s;
const bNoOo = b.startsWith("و") && b.slice(1);
const tppMatches = {
imperfective: entries.filter(
({ entry: e }) =>
@ -363,163 +205,63 @@ function matchVerbs(
(hamzaEnd ? base : abruptEnd ? s : "") === e.p.slice(0, -1)))
// TODO: if check for modified aaXu thing!
),
perfective: entries.reduce<
{ ph: string | undefined; entry: T.VerbEntry }[]
>((acc, entry) => {
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
const e = entry.entry;
if (e.c.includes("comp")) {
return acc;
}
if (e.separationAtP) {
const b = e.prp || e.p;
const bHead = b.slice(0, e.separationAtP);
const bRest = b.slice(e.separationAtP);
if (bRest === "شول") {
return acc;
}
if (abruptEnd) {
if (s === b.slice(0, -1)) {
return [
...acc,
{
ph: bHead,
entry,
},
];
}
if (s === bRest.slice(0, -1)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
return [...acc, entry];
}
} else if (hamzaEnd) {
if (base === b.slice(0, -1)) {
return [
...acc,
{
ph: bHead,
entry,
},
];
}
if (base === bRest.slice(0, -1)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
return [...acc, entry];
}
} else if (oEnd) {
if ([b, b.slice(0, -1)].includes(base)) {
return [
...acc,
{
ph: bHead,
entry,
},
];
}
if ([bRest, bRest.slice(0, -1)].includes(base)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
return [...acc, entry];
}
}
} else if (!e.prp) {
if (oEnd) {
if (bNoOo && [e.p, e.p.slice(0, -1).includes(bNoOo)]) {
return [
...acc,
{
ph: "و",
entry,
},
];
} else if ([e.p, e.p.slice(0, -1)].includes(base)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
if ([e.p, e.p.slice(0, -1)].includes(base)) {
return [...acc, entry];
}
} else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) {
const b = hamzaEnd ? base : s;
const p = e.p.slice(0, -1);
if (bNoOo && bNoOo === p) {
return [
...acc,
{
ph: "و",
entry,
},
];
} else if (b === p) {
return [
...acc,
{
ph: undefined,
entry,
},
];
if (b === p) {
return [...acc, entry];
}
}
}
const sNoOo = s.startsWith("و") && s.slice(1);
if (isInVarients(e.tppp, sNoOo)) {
return [
...acc,
{
ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و",
entry,
},
];
} else if (isInVarients(e.tppp, s)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
if (isInVarients(e.tppp, s)) {
return [...acc, entry];
} else if (isInVarients(e.tppp, "ا" + s)) {
return [
...acc,
{
ph: undefined,
entry,
},
];
return [...acc, entry];
}
return acc;
}, []),
};
Object.entries(tppMatches).forEach(([aspect, entries]) => {
entries.forEach((verb) => {
w.push([
"ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
{
type: "VB",
person: T.Person.ThirdSingMale,
info: {
type: "verb",
aspect: aspect as T.Aspect,
base: "root",
verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
},
w.push({
type: "VB",
person: T.Person.ThirdSingMale,
info: {
type: "verb",
aspect: aspect as T.Aspect,
base: "root",
verb: removeFVarientsFromVerb(verb),
},
]);
});
});
});
return w;
@ -580,66 +322,26 @@ function getVerbEnding(p: string): {
};
}
// const [ph, rest]: [T.PH | undefined, T.PsString] = v.entry.noOo
// ? [undefined, base]
// : v.entry.sepOo
// ? [{ type: "PH", ps: { p: "و ", f: "óo`" } }, base]
// : ["آ", "ا"].includes(base.p.charAt(0)) && base.f.charAt(0) === "a"
// ? [{ type: "PH", ps: { p: "وا", f: "wáa" } }, removeAStart(base)]
// : ["óo", "oo"].includes(base.f.slice(0, 2))
// ? [{ type: "PH", ps: { p: "و", f: "wÚ" } }, base]
// : ["ée", "ee"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "ای"
// ? [
// { type: "PH", ps: { p: "وي", f: "wée" } },
// {
// p: base.p.slice(2),
// f: base.f.slice(2),
// },
// ]
// : ["é", "e"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "اې"
// ? [
// { type: "PH", ps: { p: "وي", f: "wé" } },
// {
// p: base.p.slice(2),
// f: base.f.slice(1),
// },
// ]
// : ["ó", "o"].includes(base.f[0]) && base.p.slice(0, 2) === "او"
// ? [{ type: "PH", ps: { p: "و", f: "óo`" } }, base]
// : [{ type: "PH", ps: { p: "و", f: "óo" } }, base];
// return [ph, removeAccents(rest)];
// function removeAStart(ps: T.PsString) {
// return {
// p: ps.p.slice(1),
// f: ps.f.slice(ps.f[1] === "a" ? 2 : 1),
// };
// }
// TODO: could handle all sh- verbs for efficiencies sake
function parseIrregularVerb(
s: string
): [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">][] {
function parseIrregularVerb(s: string): T.ParsedVBE[] {
if (["ته", "راته", "ورته", "درته"].includes(s)) {
return [
[
undefined,
{
type: "VB",
info: {
aspect: "imperfective",
base: "root",
type: "verb",
verb: s.startsWith("را")
? raatlul
: s.startsWith("ور")
? wartlul
: s.startsWith("در")
? dartlul
: tlul,
},
person: T.Person.ThirdSingMale,
{
type: "VB",
info: {
aspect: "imperfective",
base: "root",
type: "verb",
verb: s.startsWith("را")
? raatlul
: s.startsWith("ور")
? wartlul
: s.startsWith("در")
? dartlul
: tlul,
},
],
person: T.Person.ThirdSingMale,
},
];
}
if (s === "شو") {
@ -649,38 +351,28 @@ function parseIrregularVerb(
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
].flatMap((person) =>
[kedulStat, kedulDyn].map<
[{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]
>((verb) => [
undefined,
{
type: "VB",
info: {
aspect: "perfective",
base: "root",
type: "verb",
verb,
},
person,
[kedulStat, kedulDyn].map<T.ParsedVBE>((verb) => ({
type: "VB",
info: {
aspect: "perfective",
base: "root",
type: "verb",
verb,
},
])
person,
}))
),
...[T.Person.FirstPlurMale, T.Person.FirstPlurFemale].flatMap((person) =>
[kedulStat, kedulDyn].map<
[{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]
>((verb) => [
undefined,
{
type: "VB",
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb,
},
person,
[kedulStat, kedulDyn].map<T.ParsedVBE>((verb) => ({
type: "VB",
info: {
aspect: "perfective",
base: "stem",
type: "verb",
verb,
},
])
person,
}))
),
];
}

View File

@ -21,6 +21,7 @@ const maashoom = wordQuery("ماشوم", "noun");
const leedul = wordQuery("لیدل", "verb");
const kenaastul = wordQuery("کېناستل", "verb");
const wurul = wordQuery("وړل", "verb");
const akheestul = wordQuery("اخیستل", "verb");
const tests: {
label: string;
@ -65,6 +66,20 @@ const tests: {
output: [],
error: true,
},
{
input: "زه سړی کور",
output: [],
},
{
input: "زه دې مې وینم",
output: [],
error: true,
},
{
input: "وامې دې خیست",
output: [],
error: true,
},
],
},
{
@ -1104,6 +1119,263 @@ const tests: {
},
],
},
{
label: "negatives and ordering",
cases: [
{
input: "سړی تا نه ویني",
output: [...getPeople(2, "sing")].flatMap((objectPerson) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makeNounSelection(sarey, undefined),
}),
},
{
key: 2,
block: makeObjectSelectionComplete({
type: "NP",
selection: makePronounSelection(objectPerson),
}),
},
],
verb: {
type: "verb",
verb: leedul,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: true,
tense: "presentVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
})),
},
{
input: "سړی نه تا ویني",
output: [],
},
{
input: "سړی تا ونه ویني",
output: [...getPeople(2, "sing")].flatMap((objectPerson) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makeNounSelection(sarey, undefined),
}),
},
{
key: 2,
block: makeObjectSelectionComplete({
type: "NP",
selection: makePronounSelection(objectPerson),
}),
},
],
verb: {
type: "verb",
verb: leedul,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: true,
tense: "subjunctiveVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
})),
},
// with regular و or وا perfective heads, the negative needs to be behind the perfective head
{
input: "سړی تا نه وویني",
output: [],
},
{
input: "سړي وانه خیستله",
output: [
{
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makeNounSelection(sarey, undefined),
}),
},
{
key: 2,
block: makeObjectSelectionComplete({
type: "NP",
selection: makePronounSelection(T.Person.ThirdSingFemale),
}),
},
],
verb: {
type: "verb",
verb: akheestul,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: true,
tense: "perfectivePast",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: true,
shrinkServant: false,
},
},
],
},
{
input: "سړي نه واخیستله",
output: [],
},
// but for other perfective heads, the negative can go before or after
{
input: "زه نه کېنم",
output: getPeople(1, "sing").flatMap((subjectPerson) =>
(
["presentVerb", "subjunctiveVerb"] as const
).map<T.VPSelectionComplete>((tense) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(subjectPerson),
}),
},
{
key: 2,
block: {
type: "objectSelection",
selection: "none",
},
},
],
verb: {
type: "verb",
verb: kenaastul,
transitivity: "intransitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: true,
tense,
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
}))
),
},
{
input: "زه کېنه نم",
output: getPeople(1, "sing").map<T.VPSelectionComplete>(
(subjectPerson) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(subjectPerson),
}),
},
{
key: 2,
block: {
type: "objectSelection",
selection: "none",
},
},
],
verb: {
type: "verb",
verb: kenaastul,
transitivity: "intransitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: true,
tense: "subjunctiveVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
})
),
},
],
},
{
label: "should check for subject / object conflicts",
cases: [
{
input: "زه ما وینم",
output: [],
error: true,
},
{
input: "ما زه ولیدلم",
output: [],
error: true,
},
{
input: "تاسو تا ولیدئ",
output: [],
error: true,
},
{
input: "زه مې وینم",
output: [],
error: true,
},
{
input: "زه مې ولیدم",
output: [],
error: true,
},
{
input: "ومې لیدم",
output: [],
error: true,
},
{
input: "وینم مې",
output: [],
error: true,
},
],
},
];
tests.forEach(({ label, cases }) => {

View File

@ -4,30 +4,16 @@ import {
makeObjectSelectionComplete,
makeSubjectSelectionComplete,
} from "../phrase-building/blocks-utils";
import { getPersonFromNP, isPastTense } from "../phrase-building/vp-tools";
import {
getPersonFromNP,
isInvalidSubjObjCombo,
isPastTense,
} from "../phrase-building/vp-tools";
import { parseBlocks } from "./parse-blocks";
import { makePronounSelection } from "../phrase-building/make-selections";
import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
// to hide equatives type-doubling issue
// demo
// ماشوم
// ماشومان
// خوږ
// masc plur
// past tense
// ماشومانو ښځه ولیدله
// ماشومانو ښځه ولیدله
// cool examples:
// زه ماشوم وهم
// وهلم // خواږه
// ومې لیدې
// ویې وهم
// this should also conjugate to
// وامې نه خیسته
// وامې نه خیستلو
@ -35,19 +21,11 @@ import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
// وامې نه اخیست
// waa-me nú akheest
// TODO: add tests for negatives and negative order
// TODO: imperfective past should also be "was going to / would have"
// map over transitivities, to give transitive / gramm. transitive optionns
// make impossible subjects like I saw me, error
// TODO: learn how to yank / use plugin for JSON neovim
// learn to use jq to edit selected json in vim ?? COOOL
// TODO: transitivity options
// TODO: the و is really making it slow down... why?
export function parseVP(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
@ -58,11 +36,11 @@ export function parseVP(
}
const blocks = parseBlocks(tokens, lookup, verbLookup, [], []);
return bindParseResult(blocks, (tokens, { blocks, kids }) => {
const phIndex = blocks.findIndex((x) => "type" in x && x.type === "PH");
const vbeIndex = blocks.findIndex((x) => "type" in x && x.type === "VB");
const phIndex = blocks.findIndex((x) => x.type === "PH");
const vbeIndex = blocks.findIndex((x) => x.type === "VB");
const ba = !!kids.find((k) => k === "ba");
const negIndex = blocks.findIndex(
(x) => "type" in x && x.type === "negative" && !x.imperative
(x) => x.type === "negative" && !x.imperative
);
const ph = phIndex !== -1 ? (blocks[phIndex] as T.ParsedPH) : undefined;
const verb =
@ -110,10 +88,7 @@ export function parseVP(
voice: "active",
};
const nps = blocks.filter(
(x): x is { inflected: boolean; selection: T.NPSelection } =>
"inflected" in x
);
const nps = blocks.filter((x): x is T.ParsedNP => x.type === "NP");
// TODO: check that verb and PH match
if (verb.info.verb.entry.c.includes("intrans")) {
const errors: T.ParseError[] = [];
@ -258,7 +233,9 @@ export function parseVP(
shrinkServant: true,
},
} as T.VPSelectionComplete,
errors
pronounConflictInBlocks(blocks)
? [...errors, { message: "invalid subject/object combo" }]
: errors
)
);
}
@ -357,7 +334,9 @@ export function parseVP(
externalComplement: undefined,
form,
} as T.VPSelectionComplete,
errors,
errors: pronounConflictInBlocks(blocks)
? [...errors, { message: "invalid subject/object combo" }]
: errors,
}));
});
} else {
@ -369,6 +348,16 @@ export function parseVP(
] as const
).flatMap(([s, o, flip]) => {
const errors: T.ParseError[] = [];
if (
isInvalidSubjObjCombo(
getPersonFromNP(s.selection),
getPersonFromNP(o.selection)
)
) {
errors.push({
message: "invalid subject/object combo",
});
}
if (!s.inflected) {
errors.push({
message:
@ -422,6 +411,16 @@ export function parseVP(
] as const
).flatMap(([s, o, flip]) => {
const errors: T.ParseError[] = [];
if (
isInvalidSubjObjCombo(
getPersonFromNP(s.selection),
getPersonFromNP(o.selection)
)
) {
errors.push({
message: "invalid subject/object combo",
});
}
if (isFirstOrSecondPersPronoun(o.selection)) {
if (!o.inflected) {
errors.push({
@ -563,3 +562,16 @@ function negativeInPlace({
}
return true;
}
function pronounConflictInBlocks(blocks: T.VPSBlockComplete[]): boolean {
const subj = blocks.find((b) => b.block.type === "subjectSelection")
?.block as T.SubjectSelectionComplete;
const obj = blocks.find((b) => b.block.type === "objectSelection")
?.block as T.ObjectSelectionComplete;
const subjPerson = getPersonFromNP(subj.selection);
const objPerson = getPersonFromNP(obj.selection);
if (objPerson === undefined) {
return false;
}
return isInvalidSubjObjCombo(subjPerson, objPerson);
}

View File

@ -1196,7 +1196,13 @@ export type Block = {
export type ParsedBlock = ParsedNP | ParsedPH | ParsedVBE | NegativeBlock;
export type ParsedKidsSection = {
type: "kids";
kids: ParsedKid[];
};
export type ParsedNP = {
type: "NP";
inflected: boolean;
selection: NPSelection;
};