big improvement by removing the reduntant PH parsing, added tests for negatives, and also added checking for S/O conflict in VP parsing
This commit is contained in:
parent
a7709c4299
commit
a3ac5e2cb3
Binary file not shown.
Before Width: | Height: | Size: 609 KiB After Width: | Height: | Size: 146 KiB |
|
@ -15,7 +15,10 @@ import {
|
|||
} from "./src/verb-info";
|
||||
import { makeVPSelectionState } from "./src/phrase-building/verb-selection";
|
||||
import { vpsReducer } from "./src/phrase-building/vps-reducer";
|
||||
import { isPastTense } from "./src/phrase-building/vp-tools";
|
||||
import {
|
||||
isPastTense,
|
||||
isInvalidSubjObjCombo,
|
||||
} from "./src/phrase-building/vp-tools";
|
||||
import { getInflectionPattern } from "./src/inflection-pattern";
|
||||
import { makePsString, removeFVarients } from "./src/accent-and-ps-utils";
|
||||
|
||||
|
@ -45,12 +48,7 @@ import {
|
|||
standardizePhonetics,
|
||||
} from "./src/standardize-pashto";
|
||||
import { phoneticsToDiacritics } from "./src/phonetics-to-diacritics";
|
||||
import {
|
||||
randomPerson,
|
||||
isInvalidSubjObjCombo,
|
||||
randomSubjObj,
|
||||
getEnglishVerb,
|
||||
} from "./src/np-tools";
|
||||
import { randomPerson, randomSubjObj, getEnglishVerb } from "./src/np-tools";
|
||||
import {
|
||||
getEnglishFromRendered,
|
||||
getPashtoFromRendered,
|
||||
|
|
|
@ -1,80 +1,79 @@
|
|||
import * as T from "../../types";
|
||||
import { isFirstPerson, parseEc, isSecondPerson } from "./misc-helpers";
|
||||
import { parseEc } from "./misc-helpers";
|
||||
import { isInvalidSubjObjCombo } from "./phrase-building/vp-tools";
|
||||
|
||||
function getRandPers(): T.Person {
|
||||
return Math.floor(Math.random() * 12);
|
||||
return Math.floor(Math.random() * 12);
|
||||
}
|
||||
|
||||
export function randomPerson(a?: { prev?: T.Person, counterPart?: T.VerbObject | T.NPSelection }) {
|
||||
// no restrictions, just get any person
|
||||
if (!a) {
|
||||
return getRandPers();
|
||||
}
|
||||
if (a.counterPart !== undefined && typeof a.counterPart === "object" && a.counterPart.selection.type === "pronoun") {
|
||||
// with counterpart pronoun
|
||||
let newP = 0;
|
||||
do {
|
||||
newP = getRandPers();
|
||||
} while (
|
||||
isInvalidSubjObjCombo(a.counterPart.selection.person, newP)
|
||||
||
|
||||
(newP === a.prev)
|
||||
);
|
||||
return newP;
|
||||
}
|
||||
// without counterpart pronoun, just previous
|
||||
export function randomPerson(a?: {
|
||||
prev?: T.Person;
|
||||
counterPart?: T.VerbObject | T.NPSelection;
|
||||
}) {
|
||||
// no restrictions, just get any person
|
||||
if (!a) {
|
||||
return getRandPers();
|
||||
}
|
||||
if (
|
||||
a.counterPart !== undefined &&
|
||||
typeof a.counterPart === "object" &&
|
||||
a.counterPart.selection.type === "pronoun"
|
||||
) {
|
||||
// with counterpart pronoun
|
||||
let newP = 0;
|
||||
do {
|
||||
newP = getRandPers();
|
||||
} while (newP === a.prev);
|
||||
return newP;
|
||||
}
|
||||
|
||||
export function isInvalidSubjObjCombo(subj: T.Person, obj: T.Person): boolean {
|
||||
return (
|
||||
(isFirstPerson(subj) && isFirstPerson(obj))
|
||||
||
|
||||
(isSecondPerson(subj) && isSecondPerson(obj))
|
||||
);
|
||||
}
|
||||
|
||||
export function randomSubjObj(old?: { subj: T.Person, obj?: T.Person }): { subj: T.Person, obj: T.Person } {
|
||||
let subj = 0;
|
||||
let obj = 0;
|
||||
do {
|
||||
subj = getRandPers();
|
||||
obj = getRandPers();
|
||||
newP = getRandPers();
|
||||
} while (
|
||||
(old && ((old.subj === subj) || (old.obj === obj)))
|
||||
||
|
||||
isInvalidSubjObjCombo(subj, obj)
|
||||
isInvalidSubjObjCombo(a.counterPart.selection.person, newP) ||
|
||||
newP === a.prev
|
||||
);
|
||||
return { subj, obj };
|
||||
return newP;
|
||||
}
|
||||
// without counterpart pronoun, just previous
|
||||
let newP = 0;
|
||||
do {
|
||||
newP = getRandPers();
|
||||
} while (newP === a.prev);
|
||||
return newP;
|
||||
}
|
||||
|
||||
export function randomSubjObj(old?: { subj: T.Person; obj?: T.Person }): {
|
||||
subj: T.Person;
|
||||
obj: T.Person;
|
||||
} {
|
||||
let subj = 0;
|
||||
let obj = 0;
|
||||
do {
|
||||
subj = getRandPers();
|
||||
obj = getRandPers();
|
||||
} while (
|
||||
(old && (old.subj === subj || old.obj === obj)) ||
|
||||
isInvalidSubjObjCombo(subj, obj)
|
||||
);
|
||||
return { subj, obj };
|
||||
}
|
||||
export function getEnglishVerb(entry: T.DictionaryEntry): string {
|
||||
if (!entry.ec) {
|
||||
console.error("errored verb");
|
||||
console.error(entry);
|
||||
throw new Error("no english information for verb");
|
||||
}
|
||||
if (entry.ep) {
|
||||
const ec = entry.ec.includes(",") ? parseEc(entry.ec)[0] : entry.ec;
|
||||
return `to ${ec} ${entry.ep}`;
|
||||
}
|
||||
const ec = parseEc(entry.ec);
|
||||
return `to ${ec[0]}`;
|
||||
if (!entry.ec) {
|
||||
console.error("errored verb");
|
||||
console.error(entry);
|
||||
throw new Error("no english information for verb");
|
||||
}
|
||||
if (entry.ep) {
|
||||
const ec = entry.ec.includes(",") ? parseEc(entry.ec)[0] : entry.ec;
|
||||
return `to ${ec} ${entry.ep}`;
|
||||
}
|
||||
const ec = parseEc(entry.ec);
|
||||
return `to ${ec[0]}`;
|
||||
}
|
||||
|
||||
export function getEnglishParticiple(entry: T.DictionaryEntry): string {
|
||||
if (!entry.ec) {
|
||||
throw new Error("no english information for participle");
|
||||
}
|
||||
const ec = parseEc(entry.ec);
|
||||
if (entry.ep && ec[0] === "am") {
|
||||
return `to be/being ${entry.ep}`;
|
||||
}
|
||||
const participle = `${ec[2]} / to ${ec[0]}`;
|
||||
return (entry.ep)
|
||||
? `${participle} ${entry.ep}`
|
||||
: participle;
|
||||
}
|
||||
if (!entry.ec) {
|
||||
throw new Error("no english information for participle");
|
||||
}
|
||||
const ec = parseEc(entry.ec);
|
||||
if (entry.ep && ec[0] === "am") {
|
||||
return `to be/being ${entry.ep}`;
|
||||
}
|
||||
const participle = `${ec[2]} / to ${ec[0]}`;
|
||||
return entry.ep ? `${participle} ${entry.ep}` : participle;
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ export function verbLookup(input: string): T.VerbEntry[] {
|
|||
// IMPORTANT TODO FOR EFFECIANCY!
|
||||
// check endings TODO: ONLY LOOKUP THE VERB POSSIBILITIES IF IT HAS A LEGITIMATE ENDING
|
||||
// if theres no legit verb ending and no tpp possibilities, just return an empty array
|
||||
const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
|
||||
// const sWoutOo = s.startsWith("و") ? s.slice(1) : undefined;
|
||||
const checkTpp = shouldCheckTpp(input);
|
||||
const fromAawu = checkTpp && undoAaXuPattern(input);
|
||||
const inputWoutOo =
|
||||
|
@ -61,86 +61,45 @@ export function verbLookup(input: string): T.VerbEntry[] {
|
|||
// TODO: don't do the slice of and checking for useless things when you have a NON verb ending (like with the tpp)
|
||||
if (s.endsWith("ېږ")) {
|
||||
return verbs.filter(
|
||||
sWoutOo
|
||||
? ({ entry }) =>
|
||||
[s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) ||
|
||||
[
|
||||
s.slice(0, -1) + "دل",
|
||||
sWoutOo.slice(0, -1) + "دل",
|
||||
sAddedAa.slice(0, -1) + "دل",
|
||||
].includes(entry.p) ||
|
||||
[s, sWoutOo, sAddedAa].includes(entry.p) ||
|
||||
(entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) ||
|
||||
entry.prp === s ||
|
||||
entry.ssp === s
|
||||
: ({ entry }) =>
|
||||
[s, sAddedAa].includes(entry.p.slice(0, -1)) ||
|
||||
[s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes(
|
||||
entry.p
|
||||
) ||
|
||||
[s, sAddedAa].includes(entry.p) ||
|
||||
[s, sAddedAa].includes(entry.psp || "") ||
|
||||
[s, sAddedAa].includes(entry.prp || "") ||
|
||||
[s, sAddedAa].includes(entry.ssp || "")
|
||||
({ entry }) =>
|
||||
[s, sAddedAa].includes(entry.p.slice(0, -1)) ||
|
||||
[s.slice(0, -1) + "دل", sAddedAa.slice(0, -1) + "دل"].includes(
|
||||
entry.p
|
||||
) ||
|
||||
[s, sAddedAa].includes(entry.p) ||
|
||||
[s, sAddedAa].includes(entry.psp || "") ||
|
||||
[s, sAddedAa].includes(entry.prp || "") ||
|
||||
[s, sAddedAa].includes(entry.ssp || "")
|
||||
);
|
||||
}
|
||||
return verbs.filter(
|
||||
sWoutOo
|
||||
? ({ entry }) =>
|
||||
[s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -1)) ||
|
||||
// for short intransitive forms
|
||||
[s, sWoutOo, sAddedAa].includes(entry.p.slice(0, -3)) ||
|
||||
[s, sWoutOo, sAddedAa].includes(entry.p) ||
|
||||
(entry.psp && [s, sWoutOo, sAddedAa].includes(entry.psp)) ||
|
||||
(checkTpp &&
|
||||
({ entry }) =>
|
||||
[s, sAddedAa].includes(entry.p.slice(0, -1)) ||
|
||||
// for short intransitive forms
|
||||
[s, sAddedAa].includes(entry.p.slice(0, -3)) ||
|
||||
[s, sAddedAa].includes(entry.p) ||
|
||||
(checkTpp &&
|
||||
[input, fromAawu, sAddedAa].includes(entry.p.slice(0, -1))) ||
|
||||
(entry.tppp &&
|
||||
arraysHaveCommon(
|
||||
[input, inputWoutOo, sAddedAa, inputAddedAa],
|
||||
splitVarients(entry.tppp)
|
||||
)) ||
|
||||
[s, sAddedAa].includes(entry.psp || "") ||
|
||||
arraysHaveCommon([entry.prp, entry.prp?.slice(0, -1)], [s, sAddedAa]) ||
|
||||
[s, sAddedAa].includes(entry.ssp || "") ||
|
||||
(entry.separationAtP &&
|
||||
// TODO this is super ugly, do check of short and long function
|
||||
(entry.p.slice(entry.separationAtP) === s ||
|
||||
entry.p.slice(entry.separationAtP, -1) === s ||
|
||||
(checkTpp && entry.p.slice(entry.separationAtP, -1) === input) ||
|
||||
entry.psp?.slice(entry.separationAtP) === s ||
|
||||
(entry.prp &&
|
||||
[
|
||||
input.slice(1),
|
||||
fromAawu && fromAawu.slice(-1),
|
||||
inputAddedAa,
|
||||
].includes(entry.p.slice(0, -1))) ||
|
||||
(entry.tppp &&
|
||||
arraysHaveCommon(
|
||||
[input, inputWoutOo, sAddedAa],
|
||||
splitVarients(entry.tppp)
|
||||
)) ||
|
||||
arraysHaveCommon(
|
||||
[s, sAddedAa, "و" + s],
|
||||
[entry.prp, entry.prp?.slice(0, -1)]
|
||||
) ||
|
||||
[s, sAddedAa].includes(entry.ssp || "") ||
|
||||
(entry.separationAtP &&
|
||||
(entry.p.slice(entry.separationAtP) === s ||
|
||||
entry.psp?.slice(entry.separationAtP) === s))
|
||||
: ({ entry }) =>
|
||||
[s, sAddedAa].includes(entry.p.slice(0, -1)) ||
|
||||
// for short intransitive forms
|
||||
[s, sAddedAa].includes(entry.p.slice(0, -3)) ||
|
||||
[s, sAddedAa].includes(entry.p) ||
|
||||
(checkTpp &&
|
||||
[input, fromAawu, sAddedAa].includes(entry.p.slice(0, -1))) ||
|
||||
(entry.tppp &&
|
||||
arraysHaveCommon(
|
||||
[input, inputWoutOo, sAddedAa, inputAddedAa],
|
||||
splitVarients(entry.tppp)
|
||||
)) ||
|
||||
[s, sAddedAa].includes(entry.psp || "") ||
|
||||
arraysHaveCommon(
|
||||
[entry.prp, entry.prp?.slice(0, -1)],
|
||||
[s, sAddedAa, "و" + s]
|
||||
) ||
|
||||
[s, sAddedAa, "و" + s].includes(entry.ssp || "") ||
|
||||
(entry.separationAtP &&
|
||||
// TODO this is super ugly, do check of short and long function
|
||||
(entry.p.slice(entry.separationAtP) === s ||
|
||||
entry.p.slice(entry.separationAtP, -1) === s ||
|
||||
(checkTpp && entry.p.slice(entry.separationAtP, -1) === input) ||
|
||||
entry.psp?.slice(entry.separationAtP) === s ||
|
||||
(entry.prp &&
|
||||
[
|
||||
entry.prp.slice(entry.separationAtP),
|
||||
entry.prp.slice(entry.separationAtP).slice(0, -1),
|
||||
].includes(s)) ||
|
||||
(entry.ssp && entry.ssp.slice(entry.separationAtP) === s)))
|
||||
entry.prp.slice(entry.separationAtP),
|
||||
entry.prp.slice(entry.separationAtP).slice(0, -1),
|
||||
].includes(s)) ||
|
||||
(entry.ssp && entry.ssp.slice(entry.separationAtP) === s)))
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import * as T from "../../../types";
|
||||
import { fmapParseResult } from "../fp-ps";
|
||||
import { parseKidsSection } from "./parse-kids-section";
|
||||
import { parseNeg } from "./parse-negative";
|
||||
import { parseNP } from "./parse-np";
|
||||
|
@ -21,22 +20,22 @@ export function parseBlocks(
|
|||
return returnParseResult(tokens, { blocks, kids });
|
||||
}
|
||||
const prevPh: T.ParsedPH | undefined = blocks.find(
|
||||
(b): b is T.ParsedPH => "type" in b && b.type === "PH"
|
||||
(b): b is T.ParsedPH => b.type === "PH"
|
||||
);
|
||||
const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
|
||||
const np = prevPh ? [] : fmapParseResult((x) => [x], parseNP(tokens, lookup));
|
||||
const np = prevPh ? [] : parseNP(tokens, lookup);
|
||||
// UHOH... This could cause double paths ... maybe don't parse the PH in the parse VB!
|
||||
const ph =
|
||||
vbExists || prevPh ? [] : fmapParseResult((x) => [x], parsePH(tokens));
|
||||
const vb = fmapParseResult(
|
||||
([ph, v]) => (ph ? [ph, v] : [v]),
|
||||
parseVerb(tokens, verbLookup)
|
||||
);
|
||||
const neg = fmapParseResult((x) => [x], parseNeg(tokens));
|
||||
const ph = vbExists || prevPh ? [] : parsePH(tokens);
|
||||
const vb = parseVerb(tokens, verbLookup);
|
||||
const neg = parseNeg(tokens);
|
||||
const kidsR = parseKidsSection(tokens, []);
|
||||
const allResults = [...np, ...ph, ...neg, ...vb, ...kidsR] as T.ParseResult<
|
||||
T.ParsedBlock[] | { kids: T.ParsedKid[] }
|
||||
>[];
|
||||
const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
|
||||
...np,
|
||||
...ph,
|
||||
...neg,
|
||||
...vb,
|
||||
...kidsR,
|
||||
];
|
||||
// TODO: is this necessary?
|
||||
// if (!allResults.length) {
|
||||
// return [
|
||||
|
@ -47,10 +46,9 @@ export function parseBlocks(
|
|||
// },
|
||||
// ];
|
||||
// }
|
||||
console.log({ allResults });
|
||||
return bindParseResult(allResults, (tokens, r) => {
|
||||
const errors: T.ParseError[] = [];
|
||||
if ("kids" in r) {
|
||||
if (r.type === "kids") {
|
||||
return {
|
||||
next: parseBlocks(tokens, lookup, verbLookup, blocks, [
|
||||
...kids,
|
||||
|
@ -62,23 +60,21 @@ export function parseBlocks(
|
|||
: [],
|
||||
};
|
||||
}
|
||||
if (prevPh && r.some((x) => "type" in x && x.type === "PH")) {
|
||||
if (prevPh && r.type === "PH") {
|
||||
return [];
|
||||
}
|
||||
const vb = r.find((x): x is T.ParsedVBE => "type" in x && x.type === "VB");
|
||||
if (!phMatches(prevPh, vb)) {
|
||||
return [];
|
||||
// TODO: will have to handle welded
|
||||
if (r.type === "VB") {
|
||||
if (!phMatches(prevPh, r)) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
// don't allow two negatives
|
||||
if (
|
||||
"type" in r[0] &&
|
||||
r[0].type === "negative" &&
|
||||
blocks.some((b) => "type" in b && b.type === "negative")
|
||||
) {
|
||||
if (r.type === "negative" && blocks.some((b) => b.type === "negative")) {
|
||||
return [];
|
||||
}
|
||||
return {
|
||||
next: parseBlocks(tokens, lookup, verbLookup, [...blocks, ...r], kids),
|
||||
next: parseBlocks(tokens, lookup, verbLookup, [...blocks, r], kids),
|
||||
errors,
|
||||
};
|
||||
});
|
||||
|
|
|
@ -5,14 +5,18 @@ import { bindParseResult, returnParseResult } from "./utils";
|
|||
export function parseKidsSection(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
prevKids: T.ParsedKid[]
|
||||
): T.ParseResult<{ kids: T.ParsedKid[] }>[] {
|
||||
): T.ParseResult<T.ParsedKidsSection>[] {
|
||||
if (tokens.length === 0) {
|
||||
return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
|
||||
return prevKids.length
|
||||
? returnParseResult(tokens, { type: "kids", kids: prevKids })
|
||||
: [];
|
||||
}
|
||||
const parsedKid = parseKid(tokens);
|
||||
// TODO: is this even necessary ??
|
||||
if (!parsedKid.length) {
|
||||
return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
|
||||
return prevKids.length
|
||||
? returnParseResult(tokens, { type: "kids", kids: prevKids })
|
||||
: [];
|
||||
}
|
||||
return bindParseResult(parsedKid, (tokens, r) => {
|
||||
// return parseKidsSection(tokens, [...prevKids, r]);
|
||||
|
|
|
@ -21,11 +21,9 @@ export function parseNP(
|
|||
inflected: boolean;
|
||||
selection: T.NounSelection;
|
||||
}
|
||||
): {
|
||||
inflected: boolean;
|
||||
selection: T.NPSelection;
|
||||
} {
|
||||
): T.ParsedNP {
|
||||
return {
|
||||
type: "NP",
|
||||
inflected: a.inflected,
|
||||
selection: {
|
||||
type: "NP",
|
||||
|
|
|
@ -18,7 +18,7 @@ const phs = [
|
|||
|
||||
export function parsePH(
|
||||
tokens: Readonly<T.Token[]>
|
||||
): T.ParseResult<{ type: "PH"; s: string }>[] {
|
||||
): T.ParseResult<T.ParsedPH>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -22,7 +22,7 @@ import {
|
|||
export function parseVerb(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
verbLookup: (s: string) => T.VerbEntry[]
|
||||
): T.ParseResult<[{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]>[] {
|
||||
): T.ParseResult<T.ParsedVBE>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
@ -57,8 +57,8 @@ function matchVerbs(
|
|||
root: T.Person[];
|
||||
stem: T.Person[];
|
||||
}
|
||||
): [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">][] {
|
||||
const w: ReturnType<typeof matchVerbs> = [];
|
||||
): T.ParsedVBE[] {
|
||||
const w: T.ParsedVBE[] = [];
|
||||
const lEnding = s.endsWith("ل");
|
||||
const base = s.endsWith("ل") ? s : s.slice(0, -1);
|
||||
const matchShortOrLong = (b: string, x: string) => {
|
||||
|
@ -80,167 +80,52 @@ function matchVerbs(
|
|||
return e.p.slice(0, -1) === base;
|
||||
}
|
||||
}),
|
||||
perfective: entries.reduce<
|
||||
{ ph: string | undefined; entry: T.VerbEntry }[]
|
||||
>((acc, entry) => {
|
||||
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
||||
const e = entry.entry;
|
||||
const baseWAa = "ا" + base;
|
||||
if (e.c.includes("comp")) {
|
||||
return acc;
|
||||
}
|
||||
if (e.ssp) {
|
||||
const bRest = e.separationAtP ? e.ssp.slice(e.separationAtP) : "";
|
||||
if (bRest === base) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
}
|
||||
if (e.ssp === base) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: e.separationAtF
|
||||
? e.ssp.slice(0, e.separationAtP)
|
||||
: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
if (e.separationAtP) {
|
||||
const bRest = e.ssp.slice(e.separationAtP);
|
||||
if (bRest === base) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else {
|
||||
if (e.ssp === base) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
} else if (e.psp) {
|
||||
const bRest = e.separationAtP ? e.psp.slice(e.separationAtP) : "";
|
||||
if (bRest === base) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
}
|
||||
if (e.psp === base && e.separationAtP) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: e.psp.slice(0, e.separationAtP),
|
||||
entry,
|
||||
},
|
||||
];
|
||||
}
|
||||
if (!e.sepOo) {
|
||||
if (base.startsWith("وا") && base.slice(1) === e.psp) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: "وا",
|
||||
entry,
|
||||
},
|
||||
];
|
||||
if (e.separationAtP) {
|
||||
const bRest = e.psp.slice(e.separationAtP);
|
||||
if (bRest === base) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
if ((base.startsWith("و") && base.slice(1)) === e.psp) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: "و",
|
||||
entry,
|
||||
},
|
||||
];
|
||||
} else {
|
||||
if (!e.sepOo) {
|
||||
if (baseWAa === e.psp) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
if (baseWAa === e.psp) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
if (base === e.psp) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
if (base === e.psp) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
}
|
||||
} else if (e.c.includes("intrans.")) {
|
||||
const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
|
||||
const miniRootEg = miniRoot + "ېږ";
|
||||
if ([miniRoot, miniRootEg].includes(base)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
} else if (!e.sepOo) {
|
||||
if (
|
||||
base.startsWith("وا") &&
|
||||
[miniRoot, miniRootEg].includes(base.slice(1))
|
||||
) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: "وا",
|
||||
entry,
|
||||
},
|
||||
];
|
||||
} else if (
|
||||
base.startsWith("و") &&
|
||||
[miniRoot, miniRootEg].includes(base.slice(1))
|
||||
) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: "و",
|
||||
entry,
|
||||
},
|
||||
];
|
||||
}
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else {
|
||||
const eb = e.p.slice(0, -1);
|
||||
if (eb === base) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
return [...acc, entry];
|
||||
} else if (!e.sepOo) {
|
||||
if (base.startsWith("وا") && eb === base.slice(1)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: "وا",
|
||||
entry,
|
||||
},
|
||||
];
|
||||
}
|
||||
if (base.startsWith("و") && eb === base.slice(1)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: "و",
|
||||
entry,
|
||||
},
|
||||
];
|
||||
}
|
||||
if (baseWAa === base.slice(1)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -250,19 +135,16 @@ function matchVerbs(
|
|||
Object.entries(stemMatches).forEach(([aspect, entries]) => {
|
||||
entries.forEach((verb) => {
|
||||
people.stem.forEach((person) => {
|
||||
w.push([
|
||||
"ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
|
||||
{
|
||||
type: "VB",
|
||||
person,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: aspect as T.Aspect,
|
||||
base: "stem",
|
||||
verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
|
||||
},
|
||||
w.push({
|
||||
type: "VB",
|
||||
person,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: aspect as T.Aspect,
|
||||
base: "stem",
|
||||
verb: removeFVarientsFromVerb(verb),
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
@ -272,56 +154,21 @@ function matchVerbs(
|
|||
imperfective: entries.filter(
|
||||
({ entry: e }) => !e.c.includes("comp") && matchShortOrLong(base, e.p)
|
||||
),
|
||||
perfective: entries.reduce<
|
||||
{ ph: string | undefined; entry: T.VerbEntry }[]
|
||||
>((acc, entry) => {
|
||||
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
||||
const e = entry.entry;
|
||||
if (e.c.includes("comp")) {
|
||||
return acc;
|
||||
}
|
||||
if (e.separationAtP) {
|
||||
const b = e.prp || e.p;
|
||||
const bHead = b.slice(0, e.separationAtP);
|
||||
const bRest = b.slice(e.separationAtP);
|
||||
if (matchShortOrLong(base, b)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: bHead,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
} else if (matchShortOrLong(base, bRest)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
if (matchShortOrLong(base, bRest)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else {
|
||||
const baseNoOo = base.startsWith("و") && base.slice(1);
|
||||
const p = e.prp || e.p;
|
||||
if (baseNoOo && matchShortOrLong(baseNoOo, p)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و",
|
||||
entry,
|
||||
},
|
||||
];
|
||||
} else if (
|
||||
matchShortOrLong(base, p) ||
|
||||
matchShortOrLong("ا" + base, p)
|
||||
) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
return acc;
|
||||
|
@ -331,19 +178,16 @@ function matchVerbs(
|
|||
Object.entries(rootMatches).forEach(([aspect, entries]) => {
|
||||
entries.forEach((verb) => {
|
||||
people.root.forEach((person) => {
|
||||
w.push([
|
||||
"ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
|
||||
{
|
||||
type: "VB",
|
||||
person,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: aspect as T.Aspect,
|
||||
base: "root",
|
||||
verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
|
||||
},
|
||||
w.push({
|
||||
type: "VB",
|
||||
person,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: aspect as T.Aspect,
|
||||
base: "root",
|
||||
verb: removeFVarientsFromVerb(verb),
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
@ -351,8 +195,6 @@ function matchVerbs(
|
|||
const hamzaEnd = s.at(-1) === "ه";
|
||||
const oEnd = s.at(-1) === "و";
|
||||
const abruptEnd = ["د", "ت", "ړ"].includes(s.slice(-1));
|
||||
const b = hamzaEnd || oEnd ? base : s;
|
||||
const bNoOo = b.startsWith("و") && b.slice(1);
|
||||
const tppMatches = {
|
||||
imperfective: entries.filter(
|
||||
({ entry: e }) =>
|
||||
|
@ -363,163 +205,63 @@ function matchVerbs(
|
|||
(hamzaEnd ? base : abruptEnd ? s : "") === e.p.slice(0, -1)))
|
||||
// TODO: if check for modified aaXu thing!
|
||||
),
|
||||
perfective: entries.reduce<
|
||||
{ ph: string | undefined; entry: T.VerbEntry }[]
|
||||
>((acc, entry) => {
|
||||
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
||||
const e = entry.entry;
|
||||
if (e.c.includes("comp")) {
|
||||
return acc;
|
||||
}
|
||||
if (e.separationAtP) {
|
||||
const b = e.prp || e.p;
|
||||
const bHead = b.slice(0, e.separationAtP);
|
||||
const bRest = b.slice(e.separationAtP);
|
||||
if (bRest === "شول") {
|
||||
return acc;
|
||||
}
|
||||
if (abruptEnd) {
|
||||
if (s === b.slice(0, -1)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: bHead,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
}
|
||||
if (s === bRest.slice(0, -1)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else if (hamzaEnd) {
|
||||
if (base === b.slice(0, -1)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: bHead,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
}
|
||||
if (base === bRest.slice(0, -1)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else if (oEnd) {
|
||||
if ([b, b.slice(0, -1)].includes(base)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: bHead,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
}
|
||||
if ([bRest, bRest.slice(0, -1)].includes(base)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
} else if (!e.prp) {
|
||||
if (oEnd) {
|
||||
if (bNoOo && [e.p, e.p.slice(0, -1).includes(bNoOo)]) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: "و",
|
||||
entry,
|
||||
},
|
||||
];
|
||||
} else if ([e.p, e.p.slice(0, -1)].includes(base)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
if ([e.p, e.p.slice(0, -1)].includes(base)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) {
|
||||
const b = hamzaEnd ? base : s;
|
||||
const p = e.p.slice(0, -1);
|
||||
if (bNoOo && bNoOo === p) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: "و",
|
||||
entry,
|
||||
},
|
||||
];
|
||||
} else if (b === p) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
if (b === p) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
}
|
||||
const sNoOo = s.startsWith("و") && s.slice(1);
|
||||
if (isInVarients(e.tppp, sNoOo)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: !e.sepOo && e.p.at(0) === "ا" ? "وا" : "و",
|
||||
entry,
|
||||
},
|
||||
];
|
||||
} else if (isInVarients(e.tppp, s)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
if (isInVarients(e.tppp, s)) {
|
||||
return [...acc, entry];
|
||||
} else if (isInVarients(e.tppp, "ا" + s)) {
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
ph: undefined,
|
||||
entry,
|
||||
},
|
||||
];
|
||||
return [...acc, entry];
|
||||
}
|
||||
return acc;
|
||||
}, []),
|
||||
};
|
||||
Object.entries(tppMatches).forEach(([aspect, entries]) => {
|
||||
entries.forEach((verb) => {
|
||||
w.push([
|
||||
"ph" in verb && verb.ph ? { type: "PH", s: verb.ph } : undefined,
|
||||
{
|
||||
type: "VB",
|
||||
person: T.Person.ThirdSingMale,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: aspect as T.Aspect,
|
||||
base: "root",
|
||||
verb: "ph" in verb ? removeFVarientsFromVerb(verb.entry) : verb,
|
||||
},
|
||||
w.push({
|
||||
type: "VB",
|
||||
person: T.Person.ThirdSingMale,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: aspect as T.Aspect,
|
||||
base: "root",
|
||||
verb: removeFVarientsFromVerb(verb),
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
return w;
|
||||
|
@ -580,66 +322,26 @@ function getVerbEnding(p: string): {
|
|||
};
|
||||
}
|
||||
|
||||
// const [ph, rest]: [T.PH | undefined, T.PsString] = v.entry.noOo
|
||||
// ? [undefined, base]
|
||||
// : v.entry.sepOo
|
||||
// ? [{ type: "PH", ps: { p: "و ", f: "óo`" } }, base]
|
||||
// : ["آ", "ا"].includes(base.p.charAt(0)) && base.f.charAt(0) === "a"
|
||||
// ? [{ type: "PH", ps: { p: "وا", f: "wáa" } }, removeAStart(base)]
|
||||
// : ["óo", "oo"].includes(base.f.slice(0, 2))
|
||||
// ? [{ type: "PH", ps: { p: "و", f: "wÚ" } }, base]
|
||||
// : ["ée", "ee"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "ای"
|
||||
// ? [
|
||||
// { type: "PH", ps: { p: "وي", f: "wée" } },
|
||||
// {
|
||||
// p: base.p.slice(2),
|
||||
// f: base.f.slice(2),
|
||||
// },
|
||||
// ]
|
||||
// : ["é", "e"].includes(base.f.slice(0, 2)) && base.p.slice(0, 2) === "اې"
|
||||
// ? [
|
||||
// { type: "PH", ps: { p: "وي", f: "wé" } },
|
||||
// {
|
||||
// p: base.p.slice(2),
|
||||
// f: base.f.slice(1),
|
||||
// },
|
||||
// ]
|
||||
// : ["ó", "o"].includes(base.f[0]) && base.p.slice(0, 2) === "او"
|
||||
// ? [{ type: "PH", ps: { p: "و", f: "óo`" } }, base]
|
||||
// : [{ type: "PH", ps: { p: "و", f: "óo" } }, base];
|
||||
// return [ph, removeAccents(rest)];
|
||||
// function removeAStart(ps: T.PsString) {
|
||||
// return {
|
||||
// p: ps.p.slice(1),
|
||||
// f: ps.f.slice(ps.f[1] === "a" ? 2 : 1),
|
||||
// };
|
||||
// }
|
||||
|
||||
// TODO: could handle all sh- verbs for efficiencies sake
|
||||
function parseIrregularVerb(
|
||||
s: string
|
||||
): [{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">][] {
|
||||
function parseIrregularVerb(s: string): T.ParsedVBE[] {
|
||||
if (["ته", "راته", "ورته", "درته"].includes(s)) {
|
||||
return [
|
||||
[
|
||||
undefined,
|
||||
{
|
||||
type: "VB",
|
||||
info: {
|
||||
aspect: "imperfective",
|
||||
base: "root",
|
||||
type: "verb",
|
||||
verb: s.startsWith("را")
|
||||
? raatlul
|
||||
: s.startsWith("ور")
|
||||
? wartlul
|
||||
: s.startsWith("در")
|
||||
? dartlul
|
||||
: tlul,
|
||||
},
|
||||
person: T.Person.ThirdSingMale,
|
||||
{
|
||||
type: "VB",
|
||||
info: {
|
||||
aspect: "imperfective",
|
||||
base: "root",
|
||||
type: "verb",
|
||||
verb: s.startsWith("را")
|
||||
? raatlul
|
||||
: s.startsWith("ور")
|
||||
? wartlul
|
||||
: s.startsWith("در")
|
||||
? dartlul
|
||||
: tlul,
|
||||
},
|
||||
],
|
||||
person: T.Person.ThirdSingMale,
|
||||
},
|
||||
];
|
||||
}
|
||||
if (s === "شو") {
|
||||
|
@ -649,38 +351,28 @@ function parseIrregularVerb(
|
|||
T.Person.FirstPlurMale,
|
||||
T.Person.FirstPlurFemale,
|
||||
].flatMap((person) =>
|
||||
[kedulStat, kedulDyn].map<
|
||||
[{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]
|
||||
>((verb) => [
|
||||
undefined,
|
||||
{
|
||||
type: "VB",
|
||||
info: {
|
||||
aspect: "perfective",
|
||||
base: "root",
|
||||
type: "verb",
|
||||
verb,
|
||||
},
|
||||
person,
|
||||
[kedulStat, kedulDyn].map<T.ParsedVBE>((verb) => ({
|
||||
type: "VB",
|
||||
info: {
|
||||
aspect: "perfective",
|
||||
base: "root",
|
||||
type: "verb",
|
||||
verb,
|
||||
},
|
||||
])
|
||||
person,
|
||||
}))
|
||||
),
|
||||
...[T.Person.FirstPlurMale, T.Person.FirstPlurFemale].flatMap((person) =>
|
||||
[kedulStat, kedulDyn].map<
|
||||
[{ type: "PH"; s: string } | undefined, Omit<T.VBE, "ps">]
|
||||
>((verb) => [
|
||||
undefined,
|
||||
{
|
||||
type: "VB",
|
||||
info: {
|
||||
aspect: "perfective",
|
||||
base: "stem",
|
||||
type: "verb",
|
||||
verb,
|
||||
},
|
||||
person,
|
||||
[kedulStat, kedulDyn].map<T.ParsedVBE>((verb) => ({
|
||||
type: "VB",
|
||||
info: {
|
||||
aspect: "perfective",
|
||||
base: "stem",
|
||||
type: "verb",
|
||||
verb,
|
||||
},
|
||||
])
|
||||
person,
|
||||
}))
|
||||
),
|
||||
];
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ const maashoom = wordQuery("ماشوم", "noun");
|
|||
const leedul = wordQuery("لیدل", "verb");
|
||||
const kenaastul = wordQuery("کېناستل", "verb");
|
||||
const wurul = wordQuery("وړل", "verb");
|
||||
const akheestul = wordQuery("اخیستل", "verb");
|
||||
|
||||
const tests: {
|
||||
label: string;
|
||||
|
@ -65,6 +66,20 @@ const tests: {
|
|||
output: [],
|
||||
error: true,
|
||||
},
|
||||
{
|
||||
input: "زه سړی کور",
|
||||
output: [],
|
||||
},
|
||||
{
|
||||
input: "زه دې مې وینم",
|
||||
output: [],
|
||||
error: true,
|
||||
},
|
||||
{
|
||||
input: "وامې دې خیست",
|
||||
output: [],
|
||||
error: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -1104,6 +1119,263 @@ const tests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
label: "negatives and ordering",
|
||||
cases: [
|
||||
{
|
||||
input: "سړی تا نه ویني",
|
||||
output: [...getPeople(2, "sing")].flatMap((objectPerson) => ({
|
||||
blocks: [
|
||||
{
|
||||
key: 1,
|
||||
block: makeSubjectSelectionComplete({
|
||||
type: "NP",
|
||||
selection: makeNounSelection(sarey, undefined),
|
||||
}),
|
||||
},
|
||||
{
|
||||
key: 2,
|
||||
block: makeObjectSelectionComplete({
|
||||
type: "NP",
|
||||
selection: makePronounSelection(objectPerson),
|
||||
}),
|
||||
},
|
||||
],
|
||||
verb: {
|
||||
type: "verb",
|
||||
verb: leedul,
|
||||
transitivity: "transitive",
|
||||
canChangeTransitivity: false,
|
||||
canChangeStatDyn: false,
|
||||
negative: true,
|
||||
tense: "presentVerb",
|
||||
canChangeVoice: true,
|
||||
isCompound: false,
|
||||
voice: "active",
|
||||
},
|
||||
externalComplement: undefined,
|
||||
form: {
|
||||
removeKing: false,
|
||||
shrinkServant: false,
|
||||
},
|
||||
})),
|
||||
},
|
||||
{
|
||||
input: "سړی نه تا ویني",
|
||||
output: [],
|
||||
},
|
||||
{
|
||||
input: "سړی تا ونه ویني",
|
||||
output: [...getPeople(2, "sing")].flatMap((objectPerson) => ({
|
||||
blocks: [
|
||||
{
|
||||
key: 1,
|
||||
block: makeSubjectSelectionComplete({
|
||||
type: "NP",
|
||||
selection: makeNounSelection(sarey, undefined),
|
||||
}),
|
||||
},
|
||||
{
|
||||
key: 2,
|
||||
block: makeObjectSelectionComplete({
|
||||
type: "NP",
|
||||
selection: makePronounSelection(objectPerson),
|
||||
}),
|
||||
},
|
||||
],
|
||||
verb: {
|
||||
type: "verb",
|
||||
verb: leedul,
|
||||
transitivity: "transitive",
|
||||
canChangeTransitivity: false,
|
||||
canChangeStatDyn: false,
|
||||
negative: true,
|
||||
tense: "subjunctiveVerb",
|
||||
canChangeVoice: true,
|
||||
isCompound: false,
|
||||
voice: "active",
|
||||
},
|
||||
externalComplement: undefined,
|
||||
form: {
|
||||
removeKing: false,
|
||||
shrinkServant: false,
|
||||
},
|
||||
})),
|
||||
},
|
||||
// with regular و or وا perfective heads, the negative needs to be behind the perfective head
|
||||
{
|
||||
input: "سړی تا نه وویني",
|
||||
output: [],
|
||||
},
|
||||
{
|
||||
input: "سړي وانه خیستله",
|
||||
output: [
|
||||
{
|
||||
blocks: [
|
||||
{
|
||||
key: 1,
|
||||
block: makeSubjectSelectionComplete({
|
||||
type: "NP",
|
||||
selection: makeNounSelection(sarey, undefined),
|
||||
}),
|
||||
},
|
||||
{
|
||||
key: 2,
|
||||
block: makeObjectSelectionComplete({
|
||||
type: "NP",
|
||||
selection: makePronounSelection(T.Person.ThirdSingFemale),
|
||||
}),
|
||||
},
|
||||
],
|
||||
verb: {
|
||||
type: "verb",
|
||||
verb: akheestul,
|
||||
transitivity: "transitive",
|
||||
canChangeTransitivity: false,
|
||||
canChangeStatDyn: false,
|
||||
negative: true,
|
||||
tense: "perfectivePast",
|
||||
canChangeVoice: true,
|
||||
isCompound: false,
|
||||
voice: "active",
|
||||
},
|
||||
externalComplement: undefined,
|
||||
form: {
|
||||
removeKing: true,
|
||||
shrinkServant: false,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "سړي نه واخیستله",
|
||||
output: [],
|
||||
},
|
||||
// but for other perfective heads, the negative can go before or after
|
||||
{
|
||||
input: "زه نه کېنم",
|
||||
output: getPeople(1, "sing").flatMap((subjectPerson) =>
|
||||
(
|
||||
["presentVerb", "subjunctiveVerb"] as const
|
||||
).map<T.VPSelectionComplete>((tense) => ({
|
||||
blocks: [
|
||||
{
|
||||
key: 1,
|
||||
block: makeSubjectSelectionComplete({
|
||||
type: "NP",
|
||||
selection: makePronounSelection(subjectPerson),
|
||||
}),
|
||||
},
|
||||
{
|
||||
key: 2,
|
||||
block: {
|
||||
type: "objectSelection",
|
||||
selection: "none",
|
||||
},
|
||||
},
|
||||
],
|
||||
verb: {
|
||||
type: "verb",
|
||||
verb: kenaastul,
|
||||
transitivity: "intransitive",
|
||||
canChangeTransitivity: false,
|
||||
canChangeStatDyn: false,
|
||||
negative: true,
|
||||
tense,
|
||||
canChangeVoice: true,
|
||||
isCompound: false,
|
||||
voice: "active",
|
||||
},
|
||||
externalComplement: undefined,
|
||||
form: {
|
||||
removeKing: false,
|
||||
shrinkServant: false,
|
||||
},
|
||||
}))
|
||||
),
|
||||
},
|
||||
{
|
||||
input: "زه کېنه نم",
|
||||
output: getPeople(1, "sing").map<T.VPSelectionComplete>(
|
||||
(subjectPerson) => ({
|
||||
blocks: [
|
||||
{
|
||||
key: 1,
|
||||
block: makeSubjectSelectionComplete({
|
||||
type: "NP",
|
||||
selection: makePronounSelection(subjectPerson),
|
||||
}),
|
||||
},
|
||||
{
|
||||
key: 2,
|
||||
block: {
|
||||
type: "objectSelection",
|
||||
selection: "none",
|
||||
},
|
||||
},
|
||||
],
|
||||
verb: {
|
||||
type: "verb",
|
||||
verb: kenaastul,
|
||||
transitivity: "intransitive",
|
||||
canChangeTransitivity: false,
|
||||
canChangeStatDyn: false,
|
||||
negative: true,
|
||||
tense: "subjunctiveVerb",
|
||||
canChangeVoice: true,
|
||||
isCompound: false,
|
||||
voice: "active",
|
||||
},
|
||||
externalComplement: undefined,
|
||||
form: {
|
||||
removeKing: false,
|
||||
shrinkServant: false,
|
||||
},
|
||||
})
|
||||
),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
label: "should check for subject / object conflicts",
|
||||
cases: [
|
||||
{
|
||||
input: "زه ما وینم",
|
||||
output: [],
|
||||
error: true,
|
||||
},
|
||||
{
|
||||
input: "ما زه ولیدلم",
|
||||
output: [],
|
||||
error: true,
|
||||
},
|
||||
{
|
||||
input: "تاسو تا ولیدئ",
|
||||
output: [],
|
||||
error: true,
|
||||
},
|
||||
{
|
||||
input: "زه مې وینم",
|
||||
output: [],
|
||||
error: true,
|
||||
},
|
||||
{
|
||||
input: "زه مې ولیدم",
|
||||
output: [],
|
||||
error: true,
|
||||
},
|
||||
{
|
||||
input: "ومې لیدم",
|
||||
output: [],
|
||||
error: true,
|
||||
},
|
||||
{
|
||||
input: "وینم مې",
|
||||
output: [],
|
||||
error: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
tests.forEach(({ label, cases }) => {
|
||||
|
|
|
@ -4,30 +4,16 @@ import {
|
|||
makeObjectSelectionComplete,
|
||||
makeSubjectSelectionComplete,
|
||||
} from "../phrase-building/blocks-utils";
|
||||
import { getPersonFromNP, isPastTense } from "../phrase-building/vp-tools";
|
||||
import {
|
||||
getPersonFromNP,
|
||||
isInvalidSubjObjCombo,
|
||||
isPastTense,
|
||||
} from "../phrase-building/vp-tools";
|
||||
import { parseBlocks } from "./parse-blocks";
|
||||
import { makePronounSelection } from "../phrase-building/make-selections";
|
||||
import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
|
||||
// to hide equatives type-doubling issue
|
||||
|
||||
// demo
|
||||
|
||||
// ماشوم
|
||||
// ماشومان
|
||||
// خوږ
|
||||
// masc plur
|
||||
|
||||
// past tense
|
||||
// ماشومانو ښځه ولیدله
|
||||
// ماشومانو ښځه ولیدله
|
||||
|
||||
// cool examples:
|
||||
// زه ماشوم وهم
|
||||
// وهلم // خواږه
|
||||
|
||||
// ومې لیدې
|
||||
// ویې وهم
|
||||
|
||||
// this should also conjugate to
|
||||
// وامې نه خیسته
|
||||
// وامې نه خیستلو
|
||||
|
@ -35,19 +21,11 @@ import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
|
|||
// وامې نه اخیست
|
||||
// waa-me nú akheest
|
||||
|
||||
// TODO: add tests for negatives and negative order
|
||||
// TODO: imperfective past should also be "was going to / would have"
|
||||
// map over transitivities, to give transitive / gramm. transitive optionns
|
||||
|
||||
// make impossible subjects like I saw me, error
|
||||
|
||||
// TODO: learn how to yank / use plugin for JSON neovim
|
||||
// learn to use jq to edit selected json in vim ?? COOOL
|
||||
|
||||
// TODO: transitivity options
|
||||
|
||||
// TODO: the و is really making it slow down... why?
|
||||
|
||||
export function parseVP(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||
|
@ -58,11 +36,11 @@ export function parseVP(
|
|||
}
|
||||
const blocks = parseBlocks(tokens, lookup, verbLookup, [], []);
|
||||
return bindParseResult(blocks, (tokens, { blocks, kids }) => {
|
||||
const phIndex = blocks.findIndex((x) => "type" in x && x.type === "PH");
|
||||
const vbeIndex = blocks.findIndex((x) => "type" in x && x.type === "VB");
|
||||
const phIndex = blocks.findIndex((x) => x.type === "PH");
|
||||
const vbeIndex = blocks.findIndex((x) => x.type === "VB");
|
||||
const ba = !!kids.find((k) => k === "ba");
|
||||
const negIndex = blocks.findIndex(
|
||||
(x) => "type" in x && x.type === "negative" && !x.imperative
|
||||
(x) => x.type === "negative" && !x.imperative
|
||||
);
|
||||
const ph = phIndex !== -1 ? (blocks[phIndex] as T.ParsedPH) : undefined;
|
||||
const verb =
|
||||
|
@ -110,10 +88,7 @@ export function parseVP(
|
|||
voice: "active",
|
||||
};
|
||||
|
||||
const nps = blocks.filter(
|
||||
(x): x is { inflected: boolean; selection: T.NPSelection } =>
|
||||
"inflected" in x
|
||||
);
|
||||
const nps = blocks.filter((x): x is T.ParsedNP => x.type === "NP");
|
||||
// TODO: check that verb and PH match
|
||||
if (verb.info.verb.entry.c.includes("intrans")) {
|
||||
const errors: T.ParseError[] = [];
|
||||
|
@ -258,7 +233,9 @@ export function parseVP(
|
|||
shrinkServant: true,
|
||||
},
|
||||
} as T.VPSelectionComplete,
|
||||
errors
|
||||
pronounConflictInBlocks(blocks)
|
||||
? [...errors, { message: "invalid subject/object combo" }]
|
||||
: errors
|
||||
)
|
||||
);
|
||||
}
|
||||
|
@ -357,7 +334,9 @@ export function parseVP(
|
|||
externalComplement: undefined,
|
||||
form,
|
||||
} as T.VPSelectionComplete,
|
||||
errors,
|
||||
errors: pronounConflictInBlocks(blocks)
|
||||
? [...errors, { message: "invalid subject/object combo" }]
|
||||
: errors,
|
||||
}));
|
||||
});
|
||||
} else {
|
||||
|
@ -369,6 +348,16 @@ export function parseVP(
|
|||
] as const
|
||||
).flatMap(([s, o, flip]) => {
|
||||
const errors: T.ParseError[] = [];
|
||||
if (
|
||||
isInvalidSubjObjCombo(
|
||||
getPersonFromNP(s.selection),
|
||||
getPersonFromNP(o.selection)
|
||||
)
|
||||
) {
|
||||
errors.push({
|
||||
message: "invalid subject/object combo",
|
||||
});
|
||||
}
|
||||
if (!s.inflected) {
|
||||
errors.push({
|
||||
message:
|
||||
|
@ -422,6 +411,16 @@ export function parseVP(
|
|||
] as const
|
||||
).flatMap(([s, o, flip]) => {
|
||||
const errors: T.ParseError[] = [];
|
||||
if (
|
||||
isInvalidSubjObjCombo(
|
||||
getPersonFromNP(s.selection),
|
||||
getPersonFromNP(o.selection)
|
||||
)
|
||||
) {
|
||||
errors.push({
|
||||
message: "invalid subject/object combo",
|
||||
});
|
||||
}
|
||||
if (isFirstOrSecondPersPronoun(o.selection)) {
|
||||
if (!o.inflected) {
|
||||
errors.push({
|
||||
|
@ -563,3 +562,16 @@ function negativeInPlace({
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function pronounConflictInBlocks(blocks: T.VPSBlockComplete[]): boolean {
|
||||
const subj = blocks.find((b) => b.block.type === "subjectSelection")
|
||||
?.block as T.SubjectSelectionComplete;
|
||||
const obj = blocks.find((b) => b.block.type === "objectSelection")
|
||||
?.block as T.ObjectSelectionComplete;
|
||||
const subjPerson = getPersonFromNP(subj.selection);
|
||||
const objPerson = getPersonFromNP(obj.selection);
|
||||
if (objPerson === undefined) {
|
||||
return false;
|
||||
}
|
||||
return isInvalidSubjObjCombo(subjPerson, objPerson);
|
||||
}
|
||||
|
|
|
@ -1196,7 +1196,13 @@ export type Block = {
|
|||
|
||||
export type ParsedBlock = ParsedNP | ParsedPH | ParsedVBE | NegativeBlock;
|
||||
|
||||
export type ParsedKidsSection = {
|
||||
type: "kids";
|
||||
kids: ParsedKid[];
|
||||
};
|
||||
|
||||
export type ParsedNP = {
|
||||
type: "NP";
|
||||
inflected: boolean;
|
||||
selection: NPSelection;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue