just trying out parsing very, very basic VPs

This commit is contained in:
adueck 2023-08-05 20:35:15 +04:00
parent 4cc81c8b10
commit e910de719f
32 changed files with 3106 additions and 2328 deletions

View File

@ -146,7 +146,7 @@ function VBBlock({
script: "p" | "f"; script: "p" | "f";
block: block:
| T.VBBasic | T.VBBasic
| T.VBGenNum | (T.VBBasic & (T.VBPartInfo | T.VBAbilityInfo))
| (T.VBBasic & { | (T.VBBasic & {
person: T.Person; person: T.Person;
}); });
@ -167,8 +167,8 @@ function VBBlock({
); );
} }
const infInfo = const infInfo =
"gender" in block "info" in block && block.info.type === "ppart"
? getEnglishGenNumInfo(block.gender, block.number) ? getEnglishGenNumInfo(block.info.genNum.gender, block.info.genNum.number)
: "person" in block : "person" in block
? getEnglishPersonInfo(block.person, "short") ? getEnglishPersonInfo(block.person, "short")
: ""; : "";

View File

@ -132,7 +132,7 @@ function grabLength(
if (vb.type === "welded") { if (vb.type === "welded") {
return { return {
...vb, ...vb,
right: grabVBLength(vb.right) as T.VBBasic | T.VBGenNum, right: grabVBLength(vb.right) as T.VBBasic | T.VBP,
}; };
} }
if (!(length in vb.ps)) { if (!(length in vb.ps)) {

View File

@ -3,12 +3,17 @@ import * as T from "../types";
import { parsePhrase } from "../lib/src/parsing/parse-phrase"; import { parsePhrase } from "../lib/src/parsing/parse-phrase";
import { lookup } from "../lib/src/parsing/lookup"; import { lookup } from "../lib/src/parsing/lookup";
import { tokenizer } from "../lib/src/parsing/tokenizer"; import { tokenizer } from "../lib/src/parsing/tokenizer";
import { NPDisplay } from "../components/library"; import {
CompiledPTextDisplay,
NPDisplay,
compileVP,
renderVP,
} from "../components/library";
function ParserDemo({ opts }: { opts: T.TextOptions }) { function ParserDemo({ opts }: { opts: T.TextOptions }) {
const [text, setText] = useState<string>(""); const [text, setText] = useState<string>("");
const [result, setResult] = useState< const [result, setResult] = useState<
{ inflected: boolean; selection: T.NPSelection }[] ReturnType<typeof parsePhrase>["success"]
>([]); >([]);
const [errors, setErrors] = useState<string[]>([]); const [errors, setErrors] = useState<string[]>([]);
function handleChange(e: React.ChangeEvent<HTMLInputElement>) { function handleChange(e: React.ChangeEvent<HTMLInputElement>) {
@ -26,7 +31,7 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
} }
return ( return (
<div className="mt-3" style={{ marginBottom: "1000px" }}> <div className="mt-3" style={{ marginBottom: "1000px" }}>
<p>Type an adjective or noun (w or without adjs) to parse it</p> <p>Type a NP</p>
<div className="form-group mb-2"> <div className="form-group mb-2">
<input <input
dir="rtl" dir="rtl"
@ -45,17 +50,49 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
{errors.length > 0 && ( {errors.length > 0 && (
<> <>
<div className="alert alert-danger" role="alert"> <div className="alert alert-danger" role="alert">
{errors.map((e) => ( {errors.length > 0 ? (
<div>{e}</div> <>
))} <div>possible errors:</div>
<ul>
{errors.map((e) => (
<li>{e}</li>
))}
</ul>
</>
) : (
<div>{errors[0]}</div>
)}
</div> </div>
<div className="text-center">Did you mean:</div> <div className="text-center">Did you mean:</div>
</> </>
)} )}
{result.map((np) => ( {result.map((res) =>
<NPDisplay NP={np.selection} inflected={np.inflected} opts={opts} /> "inflected" in res ? (
))} <NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
) : "verb" in res ? (
(() => {
const rendered = renderVP(res);
const compiled = compileVP(rendered, res.form);
return (
<div>
<CompiledPTextDisplay compiled={compiled} opts={opts} />
{compiled.e && (
<div className={`text-muted mt-2 text-center`}>
{compiled.e.map((e, i) => (
<div key={i}>{e}</div>
))}
</div>
)}
</div>
);
})()
) : (
<samp>
<pre>{JSON.stringify(res, null, " ")}</pre>
</samp>
)
)}
<details> <details>
<summary>AST</summary> <summary>AST</summary>
<samp> <samp>

View File

@ -92,7 +92,7 @@ export function mapVerbRenderedOutput(
f: (a: T.PsString) => T.PsString, f: (a: T.PsString) => T.PsString,
[a, b]: T.VerbRenderedOutput [a, b]: T.VerbRenderedOutput
): T.VerbRenderedOutput { ): T.VerbRenderedOutput {
return [fmapVHead(a), fmapV(b)]; return [fmapVHead(a), fmapVE(b)];
function fmapVHead([v]: [T.VHead] | []): [T.VHead] | [] { function fmapVHead([v]: [T.VHead] | []): [T.VHead] | [] {
if (v === undefined) { if (v === undefined) {
return []; return [];
@ -118,10 +118,10 @@ export function mapVerbRenderedOutput(
ps: f(comp.ps), ps: f(comp.ps),
}; };
} }
function fmapV(v: [T.VB, T.VBE] | [T.VBE]): [T.VB, T.VBE] | [T.VBE] { function fmapVE(v: [T.VBP, T.VBE] | [T.VBE]): [T.VBP, T.VBE] | [T.VBE] {
return v.map(fmapVB) as [T.VB, T.VBE] | [T.VBE]; return v.map(fmapVB) as [T.VBP, T.VBE] | [T.VBE];
} }
function fmapVB<V extends T.VB | T.VBE>(v: V): V { function fmapVB<V extends T.VB | T.VBE | T.VBP>(v: V): V {
if (v.type === "welded") { if (v.type === "welded") {
return { return {
...v, ...v,

View File

@ -1031,11 +1031,11 @@ export const persons = [
person: 9, person: 9,
}, },
{ {
label: { subject: "thay (m. pl.)", object: "them (m. pl.)" }, label: { subject: "they (m. pl.)", object: "them (m. pl.)" },
person: 10, person: 10,
}, },
{ {
label: { subject: "thay (f. pl.)", object: "them (f. pl.)" }, label: { subject: "they (f. pl.)", object: "them (f. pl.)" },
person: 11, person: 11,
}, },
]; ];

File diff suppressed because it is too large Load Diff

View File

@ -26,6 +26,7 @@ import { getPastParticiple, getRootStem } from "./roots-and-stems";
import { import {
isKedul, isKedul,
perfectTenseToEquative, perfectTenseToEquative,
vEntry,
verbEndingConcat, verbEndingConcat,
} from "./rs-helpers"; } from "./rs-helpers";
import { import {
@ -33,7 +34,24 @@ import {
accentPsSyllable, accentPsSyllable,
removeAccents, removeAccents,
} from "../accent-helpers"; } from "../accent-helpers";
const kedulStat = vEntry({
ts: 1581086654898,
i: 11100,
p: "کېدل",
f: "kedul",
g: "kedul",
e: "to become _____",
r: 2,
c: "v. intrans.",
ssp: "ش",
ssf: "sh",
prp: "شول",
prf: "shwul",
pprtp: "شوی",
pprtf: "shúway",
noOo: true,
ec: "become",
});
const formulas: Record< const formulas: Record<
T.VerbTense | T.ImperativeTense, T.VerbTense | T.ImperativeTense,
{ {
@ -123,11 +141,12 @@ export function renderVerb({
const type = isAbilityTense(tense) ? "ability" : "basic"; const type = isAbilityTense(tense) ? "ability" : "basic";
const transitive = object !== undefined; const transitive = object !== undefined;
const king = transitive && isPast ? object : subject; const king = transitive && isPast ? object : subject;
const base = isPast ? "root" : "stem";
// #1 get the appropriate root / stem // #1 get the appropriate root / stem
const [vHead, rest] = getRootStem({ const [vHead, rest] = getRootStem({
verb, verb,
rs: isPast ? "root" : "stem", rs: base,
aspect: negative && isImperativeTense(tense) ? "imperfective" : aspect, aspect: negative && isImperativeTense(tense) ? "imperfective" : aspect,
voice, voice,
type, type,
@ -148,6 +167,8 @@ export function renderVerb({
pastThird: isPast && king === T.Person.ThirdSingMale, pastThird: isPast && king === T.Person.ThirdSingMale,
aspect, aspect,
basicForm: type === "basic" && voice === "active", basicForm: type === "basic" && voice === "active",
base,
ability: type === "ability",
}), }),
], ],
}; };
@ -165,7 +186,7 @@ function renderPerfectVerb({
voice: T.Voice; voice: T.Voice;
}): { }): {
hasBa: boolean; hasBa: boolean;
vbs: [[], [T.VB, T.VBE]]; vbs: [[], [T.VBP, T.VBE]];
objComp: T.Rendered<T.NPSelection> | undefined; objComp: T.Rendered<T.NPSelection> | undefined;
} { } {
const hasBa = perfectTenseHasBa(tense); const hasBa = perfectTenseHasBa(tense);
@ -178,6 +199,10 @@ function renderPerfectVerb({
type: "VB", type: "VB",
person, person,
ps: fmapSingleOrLengthOpts((x) => x[row][col], equative), ps: fmapSingleOrLengthOpts((x) => x[row][col], equative),
info: {
type: "equative",
tense: perfectTenseToEquative(tense),
},
}; };
return { return {
hasBa, hasBa,
@ -194,32 +219,46 @@ function addEnding({
pastThird, pastThird,
aspect, aspect,
basicForm, basicForm,
base,
ability,
}: { }: {
rs: [T.VB, T.VBA] | [T.VBA]; rs: [T.VBP, T.VB] | [T.VB];
ending: T.SingleOrLengthOpts<T.PsString[]>; ending: T.SingleOrLengthOpts<T.PsString[]>;
person: T.Person; person: T.Person;
verb: T.VerbEntry; verb: T.VerbEntry;
pastThird: boolean; pastThird: boolean;
aspect: T.Aspect; aspect: T.Aspect;
basicForm: boolean; basicForm: boolean;
}): [T.VB, T.VBE] | [T.VBE] { base: "stem" | "root";
ability: boolean;
}): [T.VBP, T.VBE] | [T.VBE] {
return rs.length === 2 return rs.length === 2
? [rs[0], addEnd(rs[1], ending)] ? [rs[0], addEnd(rs[1], ending)]
: [addEnd(rs[0], ending)]; : [addEnd(rs[0], ending)];
function addEnd( function addEnd(vb: T.VB, ending: T.SingleOrLengthOpts<T.PsString[]>): T.VBE {
vba: T.VBA, const info = {
ending: T.SingleOrLengthOpts<T.PsString[]> type: "verb" as const,
): T.VBE { aspect: ability ? "perfective" : aspect,
if (vba.type === "welded") { base,
verb: ability ? kedulStat : verb,
...(ability
? {
abilityAux: true,
}
: {}),
};
if (vb.type === "welded") {
return { return {
...vba, ...vb,
right: addToVBBasicEnd(vba.right, ending), right: addToVBBasicEnd(vb.right, ending),
person, person,
info,
}; };
} }
return { return {
...addToVBBasicEnd(vba, ending), ...addToVBBasicEnd(vb, ending),
person, person,
info,
}; };
} }
function addToVBBasicEnd( function addToVBBasicEnd(

File diff suppressed because it is too large Load Diff

View File

@ -15,7 +15,7 @@ import {
countSyllables, countSyllables,
removeAccents, removeAccents,
} from "../accent-helpers"; } from "../accent-helpers";
import { isKawulVerb, isTlulVerb } from "../type-predicates"; import { isKawulVerb } from "../type-predicates";
import { import {
vEntry, vEntry,
addAbilityEnding, addAbilityEnding,
@ -123,42 +123,60 @@ function getAbilityRs(
rs: "root" | "stem", rs: "root" | "stem",
voice: T.Voice, voice: T.Voice,
genderNum: T.GenderNumber genderNum: T.GenderNumber
): [[] | [T.VHead], [T.VB, T.VBA]] { ): [[] | [T.VHead], [T.VBP, T.VB]] {
// https://grammar.lingdocs.com/verbs/ability/#exceptions
const losesAspect = const losesAspect =
isTlulVerb(verb) || (verb.entry.prp && verb.entry.p !== "کول") ||
(isStatComp(verb) && vTransitivity(verb) === "intransitive"); (isStatComp(verb) && vTransitivity(verb) === "intransitive");
const asp = losesAspect ? "imperfective" : aspect;
const [vhead, [basicroot]] = const [vhead, [basicroot]] =
voice === "passive" voice === "passive"
? getPassiveRs(verb, "imperfective", "root", genderNum) ? getPassiveRs(verb, "imperfective", "root", genderNum)
: getRoot(verb, genderNum, losesAspect ? "imperfective" : aspect); : getRoot(verb, genderNum, asp);
return [vhead, [addAbilityEnding(basicroot), rs === "root" ? shwulVB : shVB]]; return [
vhead,
[addAbilityEnding(basicroot, verb, asp), rs === "root" ? shwulVB : shVB],
];
} }
export function getPastParticiple( export function getPastParticiple(
verb: T.VerbEntry, verb: T.VerbEntry,
voice: T.Voice, voice: T.Voice,
{ gender, number }: { gender: T.Gender; number: T.NounNumber } { gender, number }: { gender: T.Gender; number: T.NounNumber }
): T.VBGenNum | T.WeldedGN { ): T.VBP {
const v = removeFVarientsFromVerb(verb); const v = removeFVarientsFromVerb(verb);
if (voice === "passive") { if (voice === "passive") {
return getPassivePp(v, { gender, number }); return getPassivePp(v, { gender, number });
} }
if (isStatComp(v) && v.complement) { if (isStatComp(v) && v.complement) {
return weld( return {
makeComplement(v.complement, { gender, number }), ...weld(
getPastParticiple(statVerb[vTransitivity(verb)], voice, { makeComplement(v.complement, { gender, number }),
gender, getPastParticiple(statVerb[vTransitivity(verb)], voice, {
number, gender,
}) as T.VBGenNum number,
); })
),
info: {
type: "ppart",
genNum: { gender, number },
verb,
},
};
} }
if (verb.entry.pprtp && verb.entry.pprtf) { if (verb.entry.pprtp && verb.entry.pprtf) {
const base = makePsString(verb.entry.pprtp, verb.entry.pprtf); const base = makePsString(verb.entry.pprtp, verb.entry.pprtf);
return { return {
type: "VB", type: "VB",
ps: inflectPattern3(base, { gender, number }), ps: inflectPattern3(base, { gender, number }),
gender, info: {
number, type: "ppart",
verb,
genNum: {
gender,
number,
},
},
}; };
} }
const basicRoot = getRoot( const basicRoot = getRoot(
@ -166,7 +184,7 @@ export function getPastParticiple(
{ gender, number }, { gender, number },
"imperfective" "imperfective"
)[1][0]; )[1][0];
const longRoot = getLongVB(basicRoot); const longRoot = getLongVB(basicRoot) as T.VBNoLenghts<T.VB>;
const rootWLengths = possiblePPartLengths(longRoot); const rootWLengths = possiblePPartLengths(longRoot);
/* istanbul ignore next */ /* istanbul ignore next */
if ("right" in rootWLengths) { if ("right" in rootWLengths) {
@ -175,8 +193,14 @@ export function getPastParticiple(
return { return {
...rootWLengths, ...rootWLengths,
ps: addTail(rootWLengths.ps), ps: addTail(rootWLengths.ps),
gender, info: {
number, type: "ppart",
verb,
genNum: {
gender,
number,
},
},
}; };
function addTail( function addTail(
@ -192,12 +216,19 @@ export function getPastParticiple(
function getPassivePp( function getPassivePp(
verb: T.VerbEntryNoFVars, verb: T.VerbEntryNoFVars,
genderNumber: T.GenderNumber genderNumber: T.GenderNumber
): T.WeldedGN { ): T.VBP {
if (isStatComp(verb) && verb.complement) { if (isStatComp(verb) && verb.complement) {
return weld( return {
makeComplement(verb.complement, genderNumber), ...weld(
getPassivePp(statVerb.transitive, genderNumber) makeComplement(verb.complement, genderNumber),
); getPassivePp(statVerb.transitive, genderNumber)
),
info: {
type: "ppart",
verb,
genNum: genderNumber,
},
};
} }
const basicRoot = getRoot( const basicRoot = getRoot(
verb, verb,
@ -205,38 +236,26 @@ function getPassivePp(
isKawulVerb(verb) ? "perfective" : "imperfective" isKawulVerb(verb) ? "perfective" : "imperfective"
)[1][0]; )[1][0];
const longRoot = getLongVB(basicRoot); const longRoot = getLongVB(basicRoot);
const kedulVb: T.VBGenNum = getPastParticiple( const kedulVb = getPastParticiple(
statVerb.intransitive, statVerb.intransitive,
"active", "active",
genderNumber genderNumber
) as T.VBGenNum; );
return weld(longRoot, kedulVb); return {
} ...weld(longRoot, kedulVb),
info: {
function getPassiveRs( type: "ppart",
verb: T.VerbEntryNoFVars, verb,
aspect: T.Aspect, genNum: genderNumber,
rs: "root" | "stem", },
genderNumber: T.GenderNumber };
): [[] | [T.VHead], [T.VBA]] {
const [vHead, [basicRoot]] = getRoot(verb, genderNumber, aspect);
const longRoot = getLongVB(basicRoot);
const kedulVba = getRootStem({
verb: statVerb.intransitive,
aspect,
rs,
type: "basic",
voice: "active",
genderNumber: { gender: "masc", number: "singular" },
})[1][0] as T.VBBasic;
return [vHead, [weld(longRoot, kedulVba)]];
} }
function getRoot( function getRoot(
verb: T.VerbEntryNoFVars, verb: T.VerbEntryNoFVars,
genderNum: T.GenderNumber, genderNum: T.GenderNumber,
aspect: T.Aspect aspect: T.Aspect
): [[T.VHead] | [], [T.VBA]] { ): [[T.VHead] | [], [T.VB]] {
if ( if (
verb.complement && verb.complement &&
isStatComp(verb) && isStatComp(verb) &&
@ -430,6 +449,25 @@ function getStem(
} }
} }
function getPassiveRs(
verb: T.VerbEntryNoFVars,
aspect: T.Aspect,
rs: "root" | "stem",
genderNumber: T.GenderNumber
): [[] | [T.VHead], [T.VB]] {
const [vHead, [basicRoot]] = getRoot(verb, genderNumber, aspect);
const longRoot = getLongVB(basicRoot);
const kedulVba = getRootStem({
verb: statVerb.intransitive,
aspect,
rs,
type: "basic",
voice: "active",
genderNumber: { gender: "masc", number: "singular" },
})[1][0] as T.VBBasic;
return [vHead, [weld(longRoot, kedulVba)]];
}
// TODO: This is a nasty and messy way to do it with the length options included // TODO: This is a nasty and messy way to do it with the length options included
function getPerfectiveHead( function getPerfectiveHead(
base: T.PsString, base: T.PsString,

View File

@ -123,19 +123,10 @@ export function verbEndingConcat(
); );
} }
// TODO: THIS IS UGGGGLY NEED TO THINK THROUGH THE TYPING ON THE WELDING
export function weld( export function weld(
left: T.Welded["left"], left: T.Welded["left"],
right: T.VBGenNum | T.WeldedGN right: T.VB | T.VBP | T.NComp
): T.WeldedGN; ): T.Welded {
export function weld(
left: T.Welded["left"],
right: T.VBBasic | T.NComp | T.Welded
): T.Welded;
export function weld(
left: T.Welded["left"],
right: T.VBBasic | T.VBGenNum | T.Welded | T.NComp | T.WeldedGN
): T.Welded | T.WeldedGN {
if (right.type === "welded") { if (right.type === "welded") {
return weld(weld(left, right.left), right.right); return weld(weld(left, right.left), right.right);
} }
@ -218,7 +209,11 @@ export function tlulPerfectiveStem(person: {
]; ];
} }
export function addAbilityEnding(vb: T.VBA): T.VBA { export function addAbilityEnding(
vb: T.VB,
verb: T.VerbEntry,
aspect: T.Aspect
): T.VBP {
const abilityEnding: T.PsString[] = [ const abilityEnding: T.PsString[] = [
{ p: "ی", f: "ay" }, { p: "ی", f: "ay" },
{ p: "ای", f: "aay" }, { p: "ای", f: "aay" },
@ -227,9 +222,21 @@ export function addAbilityEnding(vb: T.VBA): T.VBA {
return { return {
...vb, ...vb,
right: addToEnd(vb.right, abilityEnding), right: addToEnd(vb.right, abilityEnding),
info: {
type: "ability",
verb,
aspect,
},
}; };
} }
return addToEnd(vb, abilityEnding); return {
...addToEnd(vb, abilityEnding),
info: {
type: "ability",
verb,
aspect,
},
};
function addToEnd(vb: T.VBBasic, end: T.PsString[]): T.VBBasic { function addToEnd(vb: T.VBBasic, end: T.PsString[]): T.VBBasic {
/* istanbul ignore next */ /* istanbul ignore next */
if (!("long" in vb.ps)) { if (!("long" in vb.ps)) {
@ -248,8 +255,8 @@ export function addAbilityEnding(vb: T.VBA): T.VBA {
} }
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBBasic>): T.VBBasic; export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBBasic>): T.VBBasic;
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBA>): T.VBA; export function possiblePPartLengths(vba: T.VBNoLenghts<T.VB>): T.VB;
export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBA>): T.VBA { export function possiblePPartLengths(vba: T.VBNoLenghts<T.VB>): T.VB {
const shortenableEndings = ["ښتل", "ستل", "وتل"]; const shortenableEndings = ["ښتل", "ستل", "وتل"];
const wrul = ["وړل", "راوړل", "وروړل", "دروړل"]; const wrul = ["وړل", "راوړل", "وروړل", "دروړل"];
// can't find a case where this is used - type safety // can't find a case where this is used - type safety
@ -294,12 +301,11 @@ export function possiblePPartLengths(vba: T.VBNoLenghts<T.VBA>): T.VBA {
return vba; return vba;
} }
export function getLongVB(vb: T.VBBasic): T.VBNoLenghts<T.VBBasic>; export function getLongVB(vb: T.VB): T.VBNoLenghts<T.VB> {
export function getLongVB(vb: T.VBA): T.VBNoLenghts<T.VBA>;
export function getLongVB(vb: T.VBA): T.VBNoLenghts<T.VBA> {
if (vb.type === "welded") { if (vb.type === "welded") {
return { return {
...vb, ...vb,
// @ts-ignore
right: getLongVB(vb.right), right: getLongVB(vb.right),
}; };
} }

View File

@ -1,4 +1,5 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { endsInConsonant } from "../p-text-helpers";
import { import {
isPattern1Entry, isPattern1Entry,
isPattern2Entry, isPattern2Entry,
@ -50,6 +51,7 @@ export function getInflectionQueries(
}, },
}); });
if (noun) { if (noun) {
// TODO: could merge these queries for more efficiency ??
queries.push({ queries.push({
search: { ppp: s }, search: { ppp: s },
details: { details: {
@ -59,7 +61,17 @@ export function getInflectionQueries(
predicate: isNounEntry, predicate: isNounEntry,
}, },
}); });
if (s.endsWith("و")) { queries.push({
search: { app: s },
details: {
inflection: [0],
gender: ["masc", "fem"],
plural: true,
predicate: isNounEntry,
},
});
// TODO: what about short vowel ending nouns with وو etc
if (s.endsWith("و") && !["ا", "و"].includes(s.charAt(s.length - 2))) {
queries.push({ queries.push({
search: { ppp: s.slice(0, -1) }, search: { ppp: s.slice(0, -1) },
details: { details: {
@ -69,6 +81,15 @@ export function getInflectionQueries(
predicate: isMascNounEntry, predicate: isMascNounEntry,
}, },
}); });
queries.push({
search: { app: s.slice(0, -1) },
details: {
inflection: [1],
gender: ["masc"],
plural: true,
predicate: isMascNounEntry,
},
});
queries.push({ queries.push({
search: { ppp: s.slice(0, -1) + "ې" }, search: { ppp: s.slice(0, -1) + "ې" },
details: { details: {
@ -218,6 +239,15 @@ export function getInflectionQueries(
!isPattern4Entry(e), !isPattern4Entry(e),
}, },
}); });
queries.push({
search: { app: s.slice(0, -2) },
details: {
inflection: [1],
gender: ["masc"],
plural: true,
predicate: (e) => isNounEntry(e),
},
});
} }
if ( if (
s.endsWith("ګانو") && s.endsWith("ګانو") &&
@ -364,6 +394,18 @@ export function getInflectionQueries(
predicate: isPattern1Entry, predicate: isPattern1Entry,
}, },
}); });
if (noun) {
// bundled plural
queries.push({
search: { p: s.slice(0, -1) },
details: {
inflection: [0],
plural: true,
gender: ["masc"],
predicate: (e) => !isPattern5Entry(e) && endsInConsonant(e),
},
});
}
queries.push({ queries.push({
search: { infbp: s.slice(0, -1) }, search: { infbp: s.slice(0, -1) },
details: { details: {

View File

@ -1,9 +1,12 @@
import nounsAdjs from "../../../nouns-adjs"; import nounsAdjs from "../../../nouns-adjs";
import verbs from "../../../verbs";
import * as T from "../../../types"; import * as T from "../../../types";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] { export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
const [key, value] = Object.entries(s)[0]; const [key, value] = Object.entries(s)[0];
// TODO: could make this more efficient - merging ppp and app queries?
if (key === "ppp") { if (key === "ppp") {
return nounsAdjs.filter( return nounsAdjs.filter(
(e) => (e) =>
@ -14,16 +17,42 @@ export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
.includes(value as string) .includes(value as string)
); );
} }
if (key === "ppp") {
return nounsAdjs.filter(
(e) =>
e.app &&
e.app
.split(",")
.map((w) => w.trim())
.includes(value as string)
);
}
// @ts-ignore // @ts-ignore
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[]; return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
} }
export function verbLookup(
s: (e: T.VerbDictionaryEntry) => boolean
): T.VerbEntry[] {
return verbs.filter(({ entry }) => s(entry));
}
export function wordQuery(word: string, type: "adj"): T.AdjectiveEntry; export function wordQuery(word: string, type: "adj"): T.AdjectiveEntry;
export function wordQuery(word: string, type: "noun"): T.NounEntry; export function wordQuery(word: string, type: "noun"): T.NounEntry;
export function wordQuery(word: string, type: "verb"): T.VerbEntryNoFVars;
export function wordQuery( export function wordQuery(
word: string, word: string,
type: "noun" | "adj" type: "noun" | "adj" | "verb"
): T.NounEntry | T.AdjectiveEntry { ): T.NounEntry | T.AdjectiveEntry | T.VerbEntryNoFVars {
if (type === "verb") {
const verb = verbs.find(
(x) => x.entry.p === word || x.entry.f === word || x.entry.g === word
);
if (!verb) {
throw new Error(`missing ${word} in word query`);
}
return removeFVarientsFromVerb(verb);
}
const entry = nounsAdjs.find( const entry = nounsAdjs.find(
(x) => x.p === word || x.f === word || x.g === word (x) => x.p === word || x.f === word || x.g === word
); );

View File

@ -1,67 +1,14 @@
import { makeAdjectiveSelection } from "../phrase-building/make-selections"; import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import * as T from "../../../types"; import * as T from "../../../types";
import { lookup } from "./lookup"; import { lookup, wordQuery } from "./lookup";
import { parseAdjective } from "./parse-adjective"; import { parseAdjective } from "./parse-adjective";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
const ghut = { const ghut = wordQuery("غټ", "adj");
ts: 1527812625, const sturey = wordQuery("ستړی", "adj");
i: 9561, const narey = wordQuery("نری", "adj");
p: "غټ", const zor = wordQuery("زوړ", "adj");
f: "ghuT, ghaT", const sheen = wordQuery("شین", "adj");
g: "ghuT,ghaT",
e: "big, fat",
r: 4,
c: "adj.",
} as T.AdjectiveEntry;
const sturey = {
ts: 1527815306,
i: 7933,
p: "ستړی",
f: "stúRay",
g: "stuRay",
e: "tired",
r: 4,
c: "adj. / adv.",
} as T.AdjectiveEntry;
const narey = {
ts: 1527819320,
i: 14027,
p: "نری",
f: "naráy",
g: "naray",
e: "thin; mild; high (pitch)",
r: 4,
c: "adj.",
} as T.AdjectiveEntry;
const zor = {
ts: 1527815451,
i: 7570,
p: "زوړ",
f: "zoR",
g: "zoR",
e: "old",
r: 4,
c: "adj.",
infap: "زاړه",
infaf: "zaaRu",
infbp: "زړ",
infbf: "zaR",
} as T.AdjectiveEntry;
const sheen = {
ts: 1527815265,
i: 8979,
p: "شین",
f: "sheen",
g: "sheen",
e: "green, blue; unripe, immature; bright, sunny",
r: 4,
c: "adj.",
infap: "شنه",
infaf: "shnu",
infbp: "شن",
infbf: "shn",
} as T.AdjectiveEntry;
const tests: { const tests: {
category: string; category: string;
@ -312,7 +259,7 @@ describe("parsing adjectives", () => {
test(category, () => { test(category, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const possibilities = parseAdjective(tokens, lookup).map((x) => x[1]); const possibilities = parseAdjective(tokens, lookup).map((x) => x.body);
expect( expect(
possibilities.map((x) => { possibilities.map((x) => {
const { given, ...rest } = x; const { given, ...rest } = x;

View File

@ -6,6 +6,7 @@ import * as T from "../../../types";
import { lookup, wordQuery } from "./lookup"; import { lookup, wordQuery } from "./lookup";
import { parseNoun } from "./parse-noun"; import { parseNoun } from "./parse-noun";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
import { isCompleteResult } from "./utils";
const sor = wordQuery("سوړ", "adj"); const sor = wordQuery("سوړ", "adj");
const zor = wordQuery("زوړ", "adj"); const zor = wordQuery("زوړ", "adj");
@ -36,6 +37,12 @@ const maamaa = wordQuery("ماما", "noun");
const peesho = wordQuery("پیشو", "noun"); const peesho = wordQuery("پیشو", "noun");
const duaa = wordQuery("دعا", "noun"); const duaa = wordQuery("دعا", "noun");
const zooy = wordQuery("زوی", "noun"); const zooy = wordQuery("زوی", "noun");
const nabee = wordQuery("نبي", "noun");
const lafz = wordQuery("لفظ", "noun");
// TODO: test for adjective errors etc
// bundled plural
const tests: { const tests: {
category: string; category: string;
@ -123,6 +130,13 @@ const tests: {
gender: "fem", gender: "fem",
}, },
}, },
{
inflected: false,
selection: {
...makeNounSelection(daktar, undefined),
number: "plural",
},
},
], ],
}, },
{ {
@ -1290,18 +1304,98 @@ const tests: {
}, },
], ],
}, },
{
category: "arabic plurals",
cases: [
{
input: "الفاظ",
output: [
{
inflected: false,
selection: {
...makeNounSelection(lafz, undefined),
number: "plural",
},
},
],
},
{
input: "الفاظو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(lafz, undefined),
number: "plural",
},
},
],
},
{
input: "نبي",
output: [
{
inflected: false,
selection: makeNounSelection(nabee, undefined),
},
{
inflected: true,
selection: makeNounSelection(nabee, undefined),
},
],
},
{
input: "انبیا",
output: [
{
inflected: false,
selection: {
...makeNounSelection(nabee, undefined),
number: "plural",
},
},
],
},
{
input: "انبیاوو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(nabee, undefined),
number: "plural",
},
},
],
},
],
},
{
category: "bundled plurals",
cases: [
{
input: "کوره",
output: [
{
inflected: false,
selection: {
...makeNounSelection(kor, undefined),
number: "plural",
},
},
],
},
],
},
]; ];
// PROBLEM WITH غټې وریژې
// ];
describe("parsing nouns", () => { describe("parsing nouns", () => {
tests.forEach(({ category, cases }) => { tests.forEach(({ category, cases }) => {
// eslint-disable-next-line jest/valid-title // eslint-disable-next-line jest/valid-title
test(category, () => { test(category, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup, undefined).map(([t, res]) => res); const res = parseNoun(tokens, lookup).map(({ body }) => body);
expect(res).toEqual(output); expect(res).toEqual(output);
}); });
}); });
@ -1407,10 +1501,8 @@ const adjsTests: {
}, },
], ],
}, },
// TODO: testing issue with the parser returning multiple options needs
// to be worked out to test double adjectives
{ {
input: "غټو کورونو", input: "غټو زړو کورونو",
output: [ output: [
{ {
inflected: true, inflected: true,
@ -1419,7 +1511,7 @@ const adjsTests: {
number: "plural", number: "plural",
adjectives: [ adjectives: [
makeAdjectiveSelection(ghut), makeAdjectiveSelection(ghut),
// makeAdjectiveSelection(zor), makeAdjectiveSelection(zor),
], ],
}, },
}, },
@ -1429,15 +1521,17 @@ const adjsTests: {
}, },
]; ];
// describe("parsing nouns with adjectives", () => { describe("parsing nouns with adjectives", () => {
// adjsTests.forEach(({ category, cases }) => { adjsTests.forEach(({ category, cases }) => {
// // eslint-disable-next-line jest/valid-title // eslint-disable-next-line jest/valid-title
// test(category, () => { test(category, () => {
// cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
// const tokens = tokenizer(input); const tokens = tokenizer(input);
// const res = parseNoun(tokens, lookup, undefined).map(([t, res]) => res); const res = parseNoun(tokens, lookup)
// expect(res).toEqual(output); .filter(isCompleteResult)
// }); .map(({ body }) => body);
// }); expect(res).toEqual(output);
// }); });
// }); });
});
});

View File

@ -9,102 +9,31 @@ import {
} from "../type-predicates"; } from "../type-predicates";
import { getInflectionQueries } from "./inflection-query"; import { getInflectionQueries } from "./inflection-query";
import { parseAdjective } from "./parse-adjective"; import { parseAdjective } from "./parse-adjective";
import { groupWith, equals } from "rambda"; import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils";
// TODO:
// - cleanup the workflow and make sure all nouns are covered and test
// - add possesive parsing
type NounResult = { inflected: boolean; selection: T.NounSelection }; type NounResult = { inflected: boolean; selection: T.NounSelection };
export function parseNoun( export function parseNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[], lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
prevPossesor: { inflected: boolean; selection: T.NounSelection } | undefined
): T.ParseResult<NounResult>[] { ): T.ParseResult<NounResult>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
const [first, ...rest] = tokens; const possesor = parsePossesor(tokens, lookup, undefined);
const possesor = if (possesor.length) {
first.s === "د" ? parseNoun(rest, lookup, undefined) : undefined; return bindParseResult(possesor, (tokens, p) => {
if (possesor) { return parseNounAfterPossesor(tokens, lookup, p, []);
const runsAfterPossesor: T.ParseResult<NounResult | undefined>[] = possesor });
? possesor
: [{ tokens, body: undefined, errors: [] }];
// could be a case for a monad ??
return removeUnneccesaryFailing(
runsAfterPossesor.flatMap(
({ tokens, body: possesor, errors }) =>
parseNoun(
tokens,
lookup,
possesor
? {
inflected: possesor.inflected,
selection: {
...possesor.selection,
possesor: prevPossesor
? {
shrunken: false,
np: {
type: "NP",
selection: prevPossesor.selection,
},
}
: undefined,
},
}
: undefined
)
// .map<T.ParseResult<NounResult>>(([t, r, errs]) => [
// t,
// r,
// // TODO: should the errors from the runsAfterPossesor be thrown out?
// // or ...errors should be kept?
// // to show an error like د غتو ماشومان نومونه
// // adj error غټ should be first inflection (seems confusing)
// [...errs, ...errors],
// ])
)
);
} else {
return removeUnneccesaryFailing(
parseNounAfterPossesor(tokens, lookup, prevPossesor, [])
);
} }
return parseNounAfterPossesor(tokens, lookup, undefined, []);
} }
function removeUnneccesaryFailing(
results: T.ParseResult<NounResult>[]
): T.ParseResult<NounResult>[] {
// group by identical results
const groups = groupWith(
(a, b) => equals(a.body.selection, b.body.selection),
results
);
// if there's a group of identical results with some success in it
// remove any erroneous results
const stage1 = groups.flatMap((group) => {
if (group.find((x) => x.errors.length === 0)) {
return group.filter((x) => x.errors.length === 0);
}
return group;
});
// finally, if there's any success anywhere, remove any of the errors
if (stage1.find((x) => x.errors.length === 0)) {
return stage1.filter((x) => x.errors.length === 0);
} else {
return stage1;
}
}
// create NP parsing function for that
// TODO with possesor, parse an NP not a noun
function parseNounAfterPossesor( function parseNounAfterPossesor(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[], lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
possesor: { inflected: boolean; selection: T.NounSelection } | undefined, possesor: T.PossesorSelection | undefined,
adjectives: { adjectives: {
inflection: (0 | 1 | 2)[]; inflection: (0 | 1 | 2)[];
gender: T.Gender[]; gender: T.Gender[];
@ -117,14 +46,13 @@ function parseNounAfterPossesor(
} }
// TODO: add recognition of او between adjectives // TODO: add recognition of او between adjectives
const adjRes = parseAdjective(tokens, lookup); const adjRes = parseAdjective(tokens, lookup);
const withAdj = adjRes.flatMap(({ tokens: tkns, body: adj }) => const withAdj = bindParseResult(adjRes, (tkns, adj) =>
parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj]) parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
); );
const [first, ...rest] = tokens; const [first, ...rest] = tokens;
const w: ReturnType<typeof parseNoun> = [];
const searches = getInflectionQueries(first.s, true); const searches = getInflectionQueries(first.s, true);
const w: ReturnType<typeof parseNoun> = [];
searches.forEach(({ search, details }) => { searches.forEach(({ search, details }) => {
const nounEntries = lookup(search).filter(isNounEntry); const nounEntries = lookup(search).filter(isNounEntry);
details.forEach((deets) => { details.forEach((deets) => {
@ -147,6 +75,11 @@ function parseNounAfterPossesor(
convertInflection(inf, entry, gender, deets.plural).forEach( convertInflection(inf, entry, gender, deets.plural).forEach(
({ inflected, number }) => { ({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined); const selection = makeNounSelection(entry, undefined);
const errors = [
...adjErrors.map((message) => ({
message,
})),
];
w.push({ w.push({
tokens: rest, tokens: rest,
body: { body: {
@ -162,25 +95,10 @@ function parseNounAfterPossesor(
adjectives: adjectives.map((a) => a.selection), adjectives: adjectives.map((a) => a.selection),
// TODO: could be nicer to validate that the possesor is inflected before // TODO: could be nicer to validate that the possesor is inflected before
// and just pass in the selection // and just pass in the selection
possesor: possesor possesor,
? {
shrunken: false,
np: {
type: "NP",
selection: possesor.selection,
},
}
: undefined,
}, },
}, },
errors: [ errors,
...(possesor?.inflected === false
? [{ message: "possesor should be inflected" }]
: []),
...adjErrors.map((message) => ({
message,
})),
],
}); });
} }
); );

View File

@ -4,9 +4,13 @@ import { parseNoun } from "./parse-noun";
import { fmapParseResult } from "../fp-ps"; import { fmapParseResult } from "../fp-ps";
export function parseNP( export function parseNP(
s: T.Token[], s: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[] lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): T.ParseResult<{ inflected: boolean; selection: T.NPSelection }>[] { ): T.ParseResult<{ inflected: boolean; selection: T.NPSelection }>[] {
if (s.length === 0) {
return [];
}
function makeNPSl( function makeNPSl(
a: a:
| { | {
@ -33,6 +37,6 @@ export function parseNP(
// @ts-ignore grrr webpack is having trouble with this // @ts-ignore grrr webpack is having trouble with this
return fmapParseResult(makeNPSl, [ return fmapParseResult(makeNPSl, [
...parsePronoun(s), ...parsePronoun(s),
...parseNoun(s, lookup, undefined), ...parseNoun(s, lookup),
]); ]);
} }

View File

@ -1,20 +1,34 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { verbLookup } from "./lookup";
import { parseNP } from "./parse-np"; import { parseNP } from "./parse-np";
import { parseVerb } from "./parse-verb";
import { parseVP } from "./parse-vp";
export function parsePhrase( export function parsePhrase(
s: T.Token[], s: T.Token[],
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[] lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): { ): {
success: { inflected: boolean; selection: T.NPSelection }[]; success: (
| {
inflected: boolean;
selection: T.NPSelection;
}
| Omit<T.VBE, "ps">
| T.VPSelectionComplete
)[];
errors: string[]; errors: string[];
} { } {
const nps = parseNP(s, lookup).filter(({ tokens }) => !tokens.length); const res = [
...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
...parseVerb(s, verbLookup),
...parseVP(s, lookup, verbLookup),
];
const success = nps.map((x) => x.body); const success = res.map((x) => x.body);
return { return {
success, success,
errors: [ errors: [
...new Set(nps.flatMap(({ errors }) => errors.map((e) => e.message))), ...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
], ],
}; };
} }

View File

@ -0,0 +1,124 @@
/* eslint-disable jest/no-conditional-expect */
import * as T from "../../../types";
import {
makeAdjectiveSelection,
makeNounSelection,
makePronounSelection,
} from "../phrase-building/make-selections";
import { lookup, wordQuery } from "./lookup";
import { parsePossesor } from "./parse-possesor";
import { tokenizer } from "./tokenizer";
import { isCompleteResult } from "./utils";
const sturey = wordQuery("ستړی", "adj");
const sarey = wordQuery("سړی", "noun");
const maashoom = wordQuery("ماشوم", "noun");
const malguray = wordQuery("ملګری", "noun");
const plaar = wordQuery("پلار", "noun");
const tests: {
input: string;
output: T.NPSelection["selection"][] | "error";
}[] = [
{
input: "د سړي",
output: [makeNounSelection(sarey, undefined)],
},
{
input: "د ماشومې",
output: [
{
...makeNounSelection(maashoom, undefined),
gender: "fem",
},
],
},
{
input: "د ستړي پلار د ملګري",
output: [
{
...makeNounSelection(malguray, undefined),
possesor: {
shrunken: false,
np: {
type: "NP",
selection: {
...makeNounSelection(plaar, undefined),
adjectives: [makeAdjectiveSelection(sturey)],
},
},
},
},
],
},
{
input: "د سړی نوم",
output: "error",
},
{
input: "د ښځې د ماشومه",
output: "error",
},
{
input: "زما",
output: [
makePronounSelection(T.Person.FirstSingMale),
makePronounSelection(T.Person.FirstSingFemale),
],
},
{
input: "ستا",
output: [
makePronounSelection(T.Person.SecondSingMale),
makePronounSelection(T.Person.SecondSingFemale),
],
},
{
input: "زمونږ",
output: [
makePronounSelection(T.Person.FirstPlurMale),
makePronounSelection(T.Person.FirstPlurFemale),
],
},
{
input: "زموږ",
output: [
makePronounSelection(T.Person.FirstPlurMale),
makePronounSelection(T.Person.FirstPlurFemale),
],
},
{
input: "ستاسو",
output: [
makePronounSelection(T.Person.SecondPlurMale),
makePronounSelection(T.Person.SecondPlurFemale),
],
},
{
input: "ستاسې",
output: [
makePronounSelection(T.Person.SecondPlurMale),
makePronounSelection(T.Person.SecondPlurFemale),
],
},
{
input: "د پلار ستا",
output: "error",
},
];
test("parse possesor", () => {
tests.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const parsed = parsePossesor(tokens, lookup, undefined);
if (output === "error") {
expect(parsed.some((x) => x.errors.length)).toBe(true);
} else {
expect(
parsePossesor(tokens, lookup, undefined)
.filter(isCompleteResult)
.map((x) => x.body.np.selection)
).toEqual(output);
}
});
});

View File

@ -0,0 +1,136 @@
import * as T from "../../../types";
import { parseNP } from "./parse-np";
import { bindParseResult } from "./utils";
// TODO: maybe contractions should just be male to cut down on the
// alternative sentences
const contractions: [string[], T.Person[]][] = [
[["زما"], [T.Person.FirstSingMale, T.Person.FirstSingFemale]],
[["ستا"], [T.Person.SecondSingMale, T.Person.SecondSingFemale]],
[
["زمونږ", "زموږ"],
[T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
],
[
["ستاسو", "ستاسې"],
[T.Person.SecondPlurMale, T.Person.SecondPlurFemale],
],
];
export function parsePossesor(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
prevPossesor: T.PossesorSelection | undefined
): T.ParseResult<T.PossesorSelection>[] {
if (tokens.length === 0) {
if (prevPossesor) {
return [
{
tokens,
body: prevPossesor,
errors: [],
},
];
}
return [];
}
const [first, ...rest] = tokens;
// parse contraction
// then later (if possessor || contractions)
const contractions = parseContractions(first);
if (contractions.length) {
const errors = prevPossesor
? [{ message: "a pronoun cannot have a possesor" }]
: [];
return contractions
.flatMap((p) => parsePossesor(rest, lookup, p))
.map((x) => ({
...x,
errors: [...errors, ...x.errors],
}));
}
if (first.s === "د") {
const np = parseNP(rest, lookup);
return bindParseResult(np, (tokens, body) => {
const possesor: T.PossesorSelection = {
shrunken: false,
np: body.selection,
};
return {
errors: !body.inflected
? // TODO: get ps to say which possesor
// TODO: track the position coming from the parseNP etc for highlighting
[{ message: `possesor should be inflected` }]
: [],
// add and check error - can't add possesor to pronoun
next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)),
};
});
}
if (first.s === "زما") {
return [
{
tokens: rest,
body: {
shrunken: false,
np: {
type: "NP",
selection: {
type: "pronoun",
distance: "far",
person: T.Person.FirstSingMale,
},
},
},
errors: [],
},
];
}
if (prevPossesor) {
return [
{
tokens,
body: prevPossesor,
errors: [],
},
];
}
return [];
}
function addPoss(
possesor: T.PossesorSelection | undefined,
possesorOf: T.PossesorSelection
): T.PossesorSelection {
return {
...possesorOf,
...(possesorOf.np.selection.type !== "pronoun"
? {
np: {
...possesorOf.np,
selection: {
...possesorOf.np.selection,
possesor,
},
},
}
: {}),
};
}
function parseContractions({ s }: T.Token): T.PossesorSelection[] {
const c = contractions.find(([ps]) => ps.includes(s));
if (!c) {
return [];
}
return c[1].map((person) => ({
shrunken: false,
np: {
type: "NP",
selection: {
type: "pronoun",
distance: "far",
person,
},
},
}));
}

View File

@ -2,7 +2,7 @@ import * as T from "../../../types";
type Result = ReturnType<typeof parsePronoun>[number]; type Result = ReturnType<typeof parsePronoun>[number];
// TODO: map for doubling true, false, and masc fem // TODO: add chaa
export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
inflected: boolean; inflected: boolean;
selection: T.PronounSelection; selection: T.PronounSelection;
@ -21,6 +21,19 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
}, },
errors: [], errors: [],
})); }));
} else if (s === "ما") {
return [0, 1].map((person) => ({
tokens: rest,
body: {
inflected: true,
selection: {
type: "pronoun",
person,
distance: "far",
},
},
errors: [],
}));
} else if (s === "ته") { } else if (s === "ته") {
return [2, 3].map((person) => ({ return [2, 3].map((person) => ({
tokens: rest, tokens: rest,
@ -34,6 +47,19 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
}, },
errors: [], errors: [],
})); }));
} else if (s === "تا") {
return [2, 3].map((person) => ({
tokens: rest,
body: {
inflected: true,
selection: {
type: "pronoun",
person,
distance: "far",
},
},
errors: [],
}));
} else if (s === "هغه") { } else if (s === "هغه") {
return [ return [
...[false, true].map<Result>((inflected) => ({ ...[false, true].map<Result>((inflected) => ({
@ -42,7 +68,7 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
inflected, inflected,
selection: { selection: {
type: "pronoun", type: "pronoun",
person: 5, person: 4,
distance: "far", distance: "far",
}, },
}, },
@ -54,7 +80,7 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
inflected: false, inflected: false,
selection: { selection: {
type: "pronoun", type: "pronoun",
person: 5, person: 4,
distance: "far", distance: "far",
}, },
}, },

View File

@ -0,0 +1,64 @@
import * as T from "../../../types";
export function parseVerb(
tokens: Readonly<T.Token[]>,
verbLookup: (s: (e: T.VerbDictionaryEntry) => boolean) => T.VerbEntry[]
): T.ParseResult<Omit<T.VBE, "ps">>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const people = getVerbEnding(first.s);
if (people.length === 0) {
return [];
}
const verbs = findByStem(first.s.slice(0, -1), verbLookup);
return people.flatMap((person) =>
verbs.map((verb) => ({
tokens: rest,
body: {
type: "VB",
person,
info: {
type: "verb",
aspect: "imperfective",
base: "stem",
verb,
},
},
errors: [],
}))
);
}
function getVerbEnding(p: string): T.Person[] {
if (p.endsWith("م")) {
return [T.Person.FirstSingMale, T.Person.FirstSingFemale];
} else if (p.endsWith("ې")) {
return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
} else if (p.endsWith("ي")) {
return [
T.Person.ThirdSingMale,
T.Person.ThirdSingFemale,
T.Person.ThirdPlurMale,
T.Person.ThirdPlurFemale,
];
} else if (p.endsWith("و")) {
return [T.Person.FirstPlurMale, T.Person.FirstPlurFemale];
} else if (p.endsWith("ئ")) {
return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
}
return [];
}
function findByStem(
stem: string,
verbLookup: (s: (e: T.VerbDictionaryEntry) => boolean) => T.VerbEntry[]
): T.VerbEntry[] {
return verbLookup(
(e) =>
e.psp === stem ||
(!e.psp && !e.c.includes("comp") && e.p.slice(0, -1) === stem)
);
}

View File

@ -0,0 +1,117 @@
import * as T from "../../../types";
import { parseNP } from "./parse-np";
import { bindParseResult } from "./utils";
import { parseVerb } from "./parse-verb";
import {
makeObjectSelectionComplete,
makeSubjectSelectionComplete,
} from "../phrase-building/blocks-utils";
import { vEntry } from "../new-verb-engine/rs-helpers";
import { getPersonFromNP, isThirdPerson } from "../phrase-building/vp-tools";
// to hide equatives type-doubling issue
const kedulStat = vEntry({
ts: 1581086654898,
i: 11100,
p: "کېدل",
f: "kedul",
g: "kedul",
e: "to become _____",
r: 2,
c: "v. intrans.",
ssp: "ش",
ssf: "sh",
prp: "شول",
prf: "shwul",
pprtp: "شوی",
pprtf: "shúway",
noOo: true,
ec: "become",
});
export function parseVP(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
verbLookup: (s: (e: T.VerbDictionaryEntry) => boolean) => T.VerbEntry[]
): T.ParseResult<T.VPSelectionComplete>[] {
if (tokens.length === 0) {
return [];
}
// how to make this into a nice pipeline... 🤔
const NP1 = parseNP(tokens, lookup);
const NP2 = bindParseResult(NP1, (tokens) => parseNP(tokens, lookup), true);
const vb = bindParseResult(
NP2,
(tokens) => parseVerb(tokens, verbLookup),
true
);
// TODO: be able to bind mulitple vals
return bindParseResult<Omit<T.VBE, "ps">, T.VPSelectionComplete>(
vb,
(tokens, v) => {
const w: T.ParseResult<T.VPSelectionComplete>[] = [];
NP1.forEach(({ body: np1 }) => {
NP2.forEach(({ body: np2 }) => {
[
[np1, np2],
[np2, np1],
].forEach(([s, o]) => {
const errors: T.ParseError[] = [];
const subjPerson = getPersonFromNP(s.selection);
if (s.inflected) {
errors.push({ message: "subject should not be inflected" });
}
if (o.selection.selection.type === "pronoun") {
if (!isThirdPerson(subjPerson) && !o.inflected) {
errors.push({
message:
"1st or 2nd person object pronoun should be inflected",
});
}
} else if (o.inflected) {
errors.push({ message: "object should not be inflected" });
}
if (getPersonFromNP(s.selection) !== v.person) {
errors.push({ message: "verb does not match subject" });
}
const blocks: T.VPSBlockComplete[] = [
{
key: 1,
block: makeSubjectSelectionComplete(s.selection),
},
{
key: 2,
block: makeObjectSelectionComplete(o.selection),
},
];
const verb: T.VerbSelectionComplete = {
type: "verb",
verb: v.info.type === "verb" ? v.info.verb : kedulStat,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "presentVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
};
w.push({
tokens,
body: {
blocks,
verb,
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
},
errors,
});
});
});
});
return w;
}
);
}

View File

@ -1,7 +1,8 @@
import { Token } from "../../../types"; import { Token } from "../../../types";
import { standardizePashto } from "../standardize-pashto";
export function tokenizer(s: string): Token[] { export function tokenizer(s: string): Token[] {
const words = s.trim().split(/ +/); const words = standardizePashto(s).trim().split(/ +/);
const indexed: { i: number; s: string }[] = []; const indexed: { i: number; s: string }[] = [];
for (let i = 0; i < words.length; i++) { for (let i = 0; i < words.length; i++) {
indexed.push({ i, s: words[i] }); indexed.push({ i, s: words[i] });

View File

@ -0,0 +1,81 @@
import * as T from "../../../types";
/**
* Monadic binding for ParseResult[]
*
* Takes a given array of parse results
* and a function to take the tokens and body of each parse result
* and do something further with them
*
* all the results are flatMapped into a new ParseResult[] monad
* and the errors are passed on and pruned
*
* @param previous - the set of results (monad) to start with
* @param f - a function that takes a remaining list of tokens and one body of the previous result
* and returns the next set of possible results, optionally with an object containing any errors
* @param getCritical - if needed, a function that returns with *part* of the result body to compare
* for identical results while pruning out the unneccesary errors
* @param ignorePrevious - pass in true if you don't need the previous ParseResult to calculate
* the next one. This will add effeciancy by only caring about how many tokens are available
* from the different previous results
* @returns
*/
export function bindParseResult<C extends object, D extends object>(
previous: T.ParseResult<C>[],
f: (
tokens: Readonly<T.Token[]>,
r: C
) =>
| T.ParseResult<D>[]
| {
errors: T.ParseError[];
next: T.ParseResult<D>[];
},
ignorePrevious?: boolean
): T.ParseResult<D>[] {
// const prev = ignorePrevious
// ? (() => {
// const resArr: T.ParseResult<C>[] = [];
// previous.filter((item) => {
// var i = resArr.findIndex(
// (x) => x.tokens.length === item.tokens.length
// );
// if (i <= -1) {
// resArr.push(item);
// }
// return null;
// });
// return resArr;
// })()
// : previous;
const prev = previous;
const nextPossibilities = prev.flatMap(({ tokens, body, errors }) => {
const res = f(tokens, body);
const { errors: errsPassed, next } = Array.isArray(res)
? { errors: [], next: res }
: res;
return next.map((x) => ({
tokens: x.tokens,
body: x.body,
errors: [...errsPassed, ...x.errors, ...errors],
}));
});
return cleanOutFails(nextPossibilities);
}
export function cleanOutFails<C extends object>(
results: T.ParseResult<C>[]
): T.ParseResult<C>[] {
// if there's any success anywhere, remove any of the errors
const errorsGone = results.find((x) => x.errors.length === 0)
? results.filter((x) => x.errors.length === 0)
: results;
// @ts-ignore
return Array.from(new Set(errorsGone.map(JSON.stringify))).map(JSON.parse);
}
export function isCompleteResult<C extends object>(
r: T.ParseResult<C>
): boolean {
return !r.tokens.length && !r.errors.length;
}

View File

@ -157,6 +157,15 @@ export function makeSubjectSelection(
}; };
} }
export function makeSubjectSelectionComplete(
selection: T.NPSelection
): T.SubjectSelectionComplete {
return {
type: "subjectSelection",
selection,
};
}
export function makeObjectSelection( export function makeObjectSelection(
selection: selection:
| T.ObjectSelection | T.ObjectSelection
@ -195,6 +204,15 @@ export function makeObjectSelection(
}; };
} }
export function makeObjectSelectionComplete(
selection: T.NPSelection
): T.ObjectSelectionComplete {
return {
type: "objectSelection",
selection,
};
}
export function EPSBlocksAreComplete( export function EPSBlocksAreComplete(
blocks: T.EPSBlock[] blocks: T.EPSBlock[]
): blocks is T.EPSBlockComplete[] { ): blocks is T.EPSBlockComplete[] {

View File

@ -344,9 +344,7 @@ function getPsFromPiece(
} }
function getPsFromWelded(v: T.Welded): T.PsString[] { function getPsFromWelded(v: T.Welded): T.PsString[] {
function getPsFromSide( function getPsFromSide(v: T.VB | T.NComp): T.PsString[] {
v: T.VBBasic | T.Welded | T.NComp | T.VBGenNum
): T.PsString[] {
if (v.type === "VB") { if (v.type === "VB") {
return flattenLengths(v.ps); return flattenLengths(v.ps);
} }

View File

@ -31,6 +31,29 @@ export function makeAdjectiveSelection(
}; };
} }
export function makePossesorSelection(
np: T.NPSelection["selection"]
): T.PossesorSelection {
return {
shrunken: false,
np: {
type: "NP",
selection: np,
},
};
}
export function makePronounSelection(
person: T.Person,
distance?: "near" | "far"
): T.PronounSelection {
return {
type: "pronoun",
distance: distance || "far",
person,
};
}
export function makeParticipleSelection( export function makeParticipleSelection(
verb: T.VerbEntry verb: T.VerbEntry
): T.ParticipleSelection { ): T.ParticipleSelection {

View File

@ -1,239 +1,311 @@
import { import { isFirstPerson, isSecondPerson } from "../misc-helpers";
isFirstPerson,
isSecondPerson,
} from "../misc-helpers";
import * as T from "../../../types"; import * as T from "../../../types";
import { concatPsString } from "../p-text-helpers"; import { concatPsString } from "../p-text-helpers";
function getBaseAndAdjectives({ selection }: T.Rendered<T.NPSelection | T.ComplementSelection | T.APSelection>): T.PsString[] { function getBaseAndAdjectives({
if (selection.type === "sandwich") { selection,
return getSandwichPsBaseAndAdjectives(selection); }: T.Rendered<
} T.NPSelection | T.ComplementSelection | T.APSelection
const adjs = "adjectives" in selection && selection.adjectives; >): T.PsString[] {
if (!adjs) { if (selection.type === "sandwich") {
return selection.ps; return getSandwichPsBaseAndAdjectives(selection);
} }
return selection.ps.map(p => ( const adjs = "adjectives" in selection && selection.adjectives;
concatPsString( if (!adjs) {
adjs.reduce((accum, curr) => ( return selection.ps;
// TODO: with variations of adjs? }
concatPsString(accum, (accum.p === "" && accum.f === "") ? "" : " ", curr.ps[0]) return selection.ps.map((p) =>
), { p: "", f: "" }), concatPsString(
" ", adjs.reduce(
p, (accum, curr) =>
) // TODO: with variations of adjs?
)); concatPsString(
accum,
accum.p === "" && accum.f === "" ? "" : " ",
curr.ps[0]
),
{ p: "", f: "" }
),
" ",
p
)
);
} }
function getSandwichPsBaseAndAdjectives(s: T.Rendered<T.SandwichSelection<T.Sandwich>>): T.PsString[] { function getSandwichPsBaseAndAdjectives(
const insideBase = getBaseAndAdjectives(s.inside); s: T.Rendered<T.SandwichSelection<T.Sandwich>>
const willContractWithPronoun = s.before && s.before.p === "د" && s.inside.selection.type === "pronoun" ): T.PsString[] {
&& (isFirstPerson(s.inside.selection.person) || isSecondPerson(s.inside.selection.person)) const insideBase = getBaseAndAdjectives(s.inside);
const contracted = (willContractWithPronoun && s.inside.selection.type === "pronoun") const willContractWithPronoun =
? contractPronoun(s.inside.selection) s.before &&
: undefined s.before.p === "د" &&
return insideBase.map((inside) => ( s.inside.selection.type === "pronoun" &&
concatPsString( (isFirstPerson(s.inside.selection.person) ||
(s.before && !willContractWithPronoun) ? s.before : "", isSecondPerson(s.inside.selection.person));
s.before ? " " : "", const contracted =
contracted ? contracted : inside, willContractWithPronoun && s.inside.selection.type === "pronoun"
s.after ? " " : "", ? contractPronoun(s.inside.selection)
s.after ? s.after : "", : undefined;
) return insideBase.map((inside) =>
)); concatPsString(
s.before && !willContractWithPronoun ? s.before : "",
s.before ? " " : "",
contracted ? contracted : inside,
s.after ? " " : "",
s.after ? s.after : ""
)
);
} }
function contractPronoun(n: T.Rendered<T.PronounSelection>): T.PsString | undefined { function contractPronoun(
return isFirstPerson(n.person) n: T.Rendered<T.PronounSelection>
? concatPsString({ p: "ز", f: "z" }, n.ps[0]) ): T.PsString | undefined {
: isSecondPerson(n.person) return isFirstPerson(n.person)
? concatPsString({ p: "س", f: "s" }, n.ps[0]) ? concatPsString({ p: "ز", f: "z" }, n.ps[0])
: undefined; : isSecondPerson(n.person)
? concatPsString({ p: "س", f: "s" }, n.ps[0])
: undefined;
} }
function trimOffShrunkenPossesive(p: T.Rendered<T.NPSelection>): T.Rendered<T.NPSelection> { function trimOffShrunkenPossesive(
if (!("possesor" in p.selection)) { p: T.Rendered<T.NPSelection>
return p; ): T.Rendered<T.NPSelection> {
} if (!("possesor" in p.selection)) {
if (!p.selection.possesor) { return p;
return p; }
} if (!p.selection.possesor) {
if (p.selection.possesor.shrunken) { return p;
return { }
type: "NP", if (p.selection.possesor.shrunken) {
selection: {
...p.selection,
possesor: undefined,
},
};
}
return { return {
type: "NP", type: "NP",
selection: { selection: {
...p.selection, ...p.selection,
possesor: { possesor: undefined,
...p.selection.possesor, },
np: trimOffShrunkenPossesive(p.selection.possesor.np),
},
},
}; };
}
return {
type: "NP",
selection: {
...p.selection,
possesor: {
...p.selection.possesor,
np: trimOffShrunkenPossesive(p.selection.possesor.np),
},
},
};
} }
export function getPashtoFromRendered(b: T.Rendered<T.NPSelection> | T.Rendered<T.ComplementSelection> | T.Rendered<T.APSelection>, subjectsPerson: false | T.Person): T.PsString[] { export function getPashtoFromRendered(
const base = getBaseAndAdjectives(b); b:
if (b.selection.type === "loc. adv." || b.selection.type === "adverb") { | T.Rendered<T.NPSelection>
return base; | T.Rendered<T.ComplementSelection>
| T.Rendered<T.APSelection>,
subjectsPerson: false | T.Person
): T.PsString[] {
const base = getBaseAndAdjectives(b);
if (b.selection.type === "loc. adv." || b.selection.type === "adverb") {
return base;
}
if (b.selection.type === "adjective") {
if (!b.selection.sandwich) {
return base;
} }
if (b.selection.type === "adjective") { // TODO: Kinda cheating
if (!b.selection.sandwich) { const sandwichPs = getPashtoFromRendered(
return base { type: "AP", selection: b.selection.sandwich },
} false
// TODO: Kinda cheating );
const sandwichPs = getPashtoFromRendered({ type: "AP", selection: b.selection.sandwich }, false); return base.flatMap((p) =>
return base.flatMap(p => ( sandwichPs.flatMap((s) => concatPsString(s, " ", p))
sandwichPs.flatMap(s => ( );
concatPsString(s, " ", p) }
)) const trimmed =
)); b.selection.type === "sandwich"
} ? {
const trimmed = b.selection.type === "sandwich" ? { type: b.type,
type: b.type, selection: {
selection: {
...b.selection, ...b.selection,
inside: trimOffShrunkenPossesive(b.selection.inside), inside: trimOffShrunkenPossesive(b.selection.inside),
}, },
} : trimOffShrunkenPossesive({ type: "NP", selection: b.selection }); }
if (trimmed.selection.type === "sandwich") { : trimOffShrunkenPossesive({ type: "NP", selection: b.selection });
return trimmed.selection.inside.selection.possesor if (trimmed.selection.type === "sandwich") {
? addPossesor(trimmed.selection.inside.selection.possesor.np, base, subjectsPerson) return trimmed.selection.inside.selection.possesor
: base; ? addPossesor(
} trimmed.selection.inside.selection.possesor.np,
if (trimmed.selection.possesor) { base,
return addPossesor(trimmed.selection.possesor.np, base, subjectsPerson); subjectsPerson
} )
return base; : base;
}
if (trimmed.selection.possesor) {
return addPossesor(trimmed.selection.possesor.np, base, subjectsPerson);
}
return base;
} }
function addPossesor(owner: T.Rendered<T.NPSelection>, existing: T.PsString[], subjectsPerson: false | T.Person): T.PsString[] { function addPossesor(
function willBeReflexive(subj: T.Person, obj: T.Person): boolean { owner: T.Rendered<T.NPSelection>,
return ( existing: T.PsString[],
([0, 1].includes(subj) && [0, 1].includes(obj)) subjectsPerson: false | T.Person
|| ): T.PsString[] {
([2, 3].includes(subj) && [8, 9].includes(obj)) function willBeReflexive(subj: T.Person, obj: T.Person): boolean {
); return (
} ([0, 1].includes(subj) && [0, 1].includes(obj)) ||
const wPossesor = existing.flatMap(ps => ( ([2, 3].includes(subj) && [8, 9].includes(obj))
getBaseAndAdjectives(owner).map(v => ( );
(owner.selection.type === "pronoun" && subjectsPerson !== false && willBeReflexive(subjectsPerson, owner.selection.person)) }
? concatPsString({ p: "خپل", f: "khpul" }, " ", ps) const wPossesor = existing.flatMap((ps) =>
: (owner.selection.type === "pronoun" && isFirstPerson(owner.selection.person)) getBaseAndAdjectives(owner).map((v) =>
? concatPsString({ p: "ز", f: "z" }, v, " ", ps) owner.selection.type === "pronoun" &&
: (owner.selection.type === "pronoun" && isSecondPerson(owner.selection.person)) subjectsPerson !== false &&
? concatPsString({ p: "س", f: "s" }, v, " ", ps) willBeReflexive(subjectsPerson, owner.selection.person)
: concatPsString({ p: "د", f: "du" }, " ", v, " ", ps) ? concatPsString({ p: "خپل", f: "khpul" }, " ", ps)
)) : owner.selection.type === "pronoun" &&
)); isFirstPerson(owner.selection.person)
if (!owner.selection.possesor) { ? concatPsString({ p: "ز", f: "z" }, v, " ", ps)
return wPossesor; : owner.selection.type === "pronoun" &&
} isSecondPerson(owner.selection.person)
return addPossesor(owner.selection.possesor.np, wPossesor, subjectsPerson); ? concatPsString({ p: "س", f: "s" }, v, " ", ps)
: concatPsString({ p: "د", f: "du" }, " ", v, " ", ps)
)
);
if (!owner.selection.possesor) {
return wPossesor;
}
return addPossesor(owner.selection.possesor.np, wPossesor, subjectsPerson);
} }
function addArticlesAndAdjs(np: T.Rendered<T.NounSelection>): string | undefined { function addArticlesAndAdjs(
if (!np.e) return undefined; np: T.Rendered<T.NounSelection>
try { ): string | undefined {
// split out the atricles so adjectives can be stuck inbetween them and the word if (!np.e) return undefined;
const chunks = np.e.split("the)"); try {
const [articles, word] = chunks.length === 1 // split out the atricles so adjectives can be stuck inbetween them and the word
? ["", np.e] const chunks = np.e.split("the)");
: [chunks[0] + "the) ", chunks[1]]; const [articles, word] =
const adjs = !np.adjectives chunks.length === 1 ? ["", np.e] : [chunks[0] + "the) ", chunks[1]];
? "" const adjs = !np.adjectives
: np.adjectives.reduce((accum, curr): string => { ? ""
if (!curr.e) throw new Error("no english for adjective"); : np.adjectives.reduce((accum, curr): string => {
return accum + curr.e + " "; if (!curr.e) throw new Error("no english for adjective");
}, ""); return accum + curr.e + " ";
const genderTag = np.genderCanChange ? (np.gender === "fem" ? " (f.)" : " (m.)") : ""; }, "");
return `${articles}${adjs}${word}${genderTag}`; const genderTag = np.genderCanChange
} catch (e) { ? np.gender === "fem"
return undefined; ? " (f.)"
} : " (m.)"
: "";
return `${articles}${adjs}${word}${genderTag}`;
} catch (e) {
return undefined;
}
} }
function addPossesors(possesor: T.Rendered<T.NPSelection> | undefined, base: string | undefined, type: "noun" | "participle"): string | undefined { function addPossesors(
function removeArticles(s: string): string { possesor: T.Rendered<T.NPSelection> | undefined,
return s.replace("(the) ", "").replace("(a/the) ", ""); base: string | undefined,
} type: "noun" | "participle"
if (!base) return undefined; ): string | undefined {
if (!possesor) return base; function removeArticles(s: string): string {
if (possesor.selection.type === "pronoun") { return s.replace("(the) ", "").replace("(a/the) ", "");
return type === "noun" }
? `${pronounPossEng(possesor.selection.person)} ${removeArticles(base)}` if (!base) return undefined;
: `(${pronounPossEng(possesor.selection.person)}) ${removeArticles(base)} (${possesor.selection.e})` if (!possesor) return base;
} if (possesor.selection.type === "pronoun") {
const possesorE = getEnglishFromRendered(possesor);
if (!possesorE) return undefined;
const withApostrophe = `${possesorE}'${possesorE.endsWith("s") ? "" : "s"}`;
return type === "noun" return type === "noun"
? `${withApostrophe} ${removeArticles(base)}` ? `${pronounPossEng(possesor.selection.person)} ${removeArticles(base)}`
: `(${withApostrophe}) ${removeArticles(base)} (${possesorE})`; : `(${pronounPossEng(possesor.selection.person)}) ${removeArticles(
base
)} (${possesor.selection.e})`;
}
const possesorE = getEnglishFromRendered(possesor);
if (!possesorE) return undefined;
const withApostrophe = `${possesorE}'${possesorE.endsWith("s") ? "" : "s"}`;
return type === "noun"
? `${withApostrophe} ${removeArticles(base)}`
: `(${withApostrophe}) ${removeArticles(base)} (${possesorE})`;
} }
function pronounPossEng(p: T.Person): string { function pronounPossEng(p: T.Person): string {
if (p === T.Person.FirstSingMale || p === T.Person.FirstSingFemale) { function gend(x: T.Person): string {
return "my"; return `${x % 2 === 0 ? "m." : "f."}`;
} }
if (p === T.Person.FirstPlurMale || p === T.Person.FirstPlurFemale) { if (p === T.Person.FirstSingMale || p === T.Person.FirstSingFemale) {
return "our"; return `my (${gend(p)})`;
} }
if (p === T.Person.SecondSingMale || p === T.Person.SecondSingFemale) { if (p === T.Person.FirstPlurMale || p === T.Person.FirstPlurFemale) {
return "your"; return `our (${gend(p)})`;
} }
if (p === T.Person.SecondPlurMale || p === T.Person.SecondPlurFemale) { if (p === T.Person.SecondSingMale || p === T.Person.SecondSingFemale) {
return "your (pl.)"; return `your (${gend(p)})`;
} }
if (p === T.Person.ThirdSingMale) { if (p === T.Person.SecondPlurMale || p === T.Person.SecondPlurFemale) {
return "his/its"; return `your (${gend(p)} pl.)`;
} }
if (p === T.Person.ThirdSingFemale) { if (p === T.Person.ThirdSingMale) {
return "her/its"; return "his/its";
} }
return "their"; if (p === T.Person.ThirdSingFemale) {
return "her/its";
}
return `their ${gend(p)}`;
} }
export function getEnglishFromRendered(r: T.Rendered<T.NPSelection | T.ComplementSelection | T.APSelection | T.SandwichSelection<T.Sandwich>>): string | undefined { export function getEnglishFromRendered(
if (r.type === "sandwich") { r: T.Rendered<
return getEnglishFromRenderedSandwich(r); | T.NPSelection
} | T.ComplementSelection
if (r.selection.type === "sandwich") { | T.APSelection
return getEnglishFromRenderedSandwich(r.selection); | T.SandwichSelection<T.Sandwich>
} >
if (!r.selection.e) return undefined; ): string | undefined {
if (r.selection.type === "loc. adv." || r.selection.type === "adverb") { if (r.type === "sandwich") {
return r.selection.e; return getEnglishFromRenderedSandwich(r);
} }
if (r.selection.type === "adjective") { if (r.selection.type === "sandwich") {
return getEnglishFromRenderedAdjective(r.selection); return getEnglishFromRenderedSandwich(r.selection);
} }
if (r.selection.type === "pronoun") { if (!r.selection.e) return undefined;
return r.selection.e; if (r.selection.type === "loc. adv." || r.selection.type === "adverb") {
} return r.selection.e;
if (r.selection.type === "participle") { }
return addPossesors(r.selection.possesor?.np, r.selection.e, r.selection.type); if (r.selection.type === "adjective") {
} return getEnglishFromRenderedAdjective(r.selection);
return addPossesors(r.selection.possesor?.np, addArticlesAndAdjs(r.selection), r.selection.type); }
if (r.selection.type === "pronoun") {
return r.selection.e;
}
if (r.selection.type === "participle") {
return addPossesors(
r.selection.possesor?.np,
r.selection.e,
r.selection.type
);
}
return addPossesors(
r.selection.possesor?.np,
addArticlesAndAdjs(r.selection),
r.selection.type
);
} }
function getEnglishFromRenderedSandwich(r: T.Rendered<T.SandwichSelection<T.Sandwich>>): string | undefined { function getEnglishFromRenderedSandwich(
const insideE = getEnglishFromRendered(r.inside); r: T.Rendered<T.SandwichSelection<T.Sandwich>>
if (!insideE) return undefined; ): string | undefined {
return `${r.e} ${insideE}`; const insideE = getEnglishFromRendered(r.inside);
if (!insideE) return undefined;
return `${r.e} ${insideE}`;
} }
function getEnglishFromRenderedAdjective(a: T.Rendered<T.AdjectiveSelection>): string | undefined { function getEnglishFromRenderedAdjective(
if (!a.sandwich) { a: T.Rendered<T.AdjectiveSelection>
return a.e; ): string | undefined {
} if (!a.sandwich) {
if (!a.e) return undefined; return a.e;
return `${a.e} ${getEnglishFromRenderedSandwich(a.sandwich)}`; }
if (!a.e) return undefined;
return `${a.e} ${getEnglishFromRenderedSandwich(a.sandwich)}`;
} }

View File

@ -721,6 +721,14 @@ export type EquativeTense =
| "wouldBe" | "wouldBe"
| "pastSubjunctive" | "pastSubjunctive"
| "wouldHaveBeen"; | "wouldHaveBeen";
export type EquativeTenseWithoutBa =
| "present"
| "subjunctive"
| "habitual"
| "past"
| "wouldBe"
| "pastSubjunctive"
| "wouldHaveBeen";
export type PerfectTense = `${EquativeTense}Perfect`; export type PerfectTense = `${EquativeTense}Perfect`;
export type AbilityTense = `${VerbTense}Modal`; export type AbilityTense = `${VerbTense}Modal`;
export type ImperativeTense = `${Aspect}Imperative`; export type ImperativeTense = `${Aspect}Imperative`;
@ -1201,16 +1209,50 @@ export type RenderVerbOutput = {
hasBa: boolean; hasBa: boolean;
vbs: VerbRenderedOutput; vbs: VerbRenderedOutput;
}; };
export type VerbRenderedOutput = [[VHead] | [], [VB, VBE] | [VBE]]; export type VerbRenderedOutput = [[VHead] | [], [VBP, VBE] | [VBE]];
export type RootsStemsOutput = [[VHead] | [], [VB, VBA] | [VBA]]; // or perfect / equative export type RootsStemsOutput = [[VHead] | [], [VBP, VB] | [VB]]; // or perfect / equative
export type VB = VBBasic | VBGenNum | Welded | WeldedGN; export type VB = VBBasic | Welded;
/** A VB block that can have endings attached to it */
export type VBA = Exclude<VB, VBGenNum | WeldedGN>;
/** A VB block that has had a person verb ending attached */ /** A VB block that has had a person verb ending attached */
export type VBE = (VBBasic | Welded) & { export type VBE = VB & {
person: Person; person: Person;
}; // or equative info:
| {
type: "equative";
tense: EquativeTenseWithoutBa;
}
| {
type: "verb";
aspect: Aspect;
base: "stem" | "root";
verb: VerbEntry;
abilityAux?: boolean;
};
};
/** A VB block used for ability verbs or perfect (past participle)
* get optionally swapped in order with the VBE when used with negative
*/
export type VBP = VB & (VBPartInfo | VBAbilityInfo);
export type VBPartInfo = {
info: {
type: "ppart";
genNum: GenderNumber;
verb: VerbEntry;
};
};
export type VBAbilityInfo = {
info: {
type: "ability";
verb: VerbEntry;
aspect: Aspect;
};
};
// in VB OR VBE - add root / stem and entry for parsing info
// but how would that work with perfect and ability verbs ...
export type VBNoLenghts<V extends VB> = V extends VBBasic export type VBNoLenghts<V extends VB> = V extends VBBasic
? Omit<VBBasic, "ps"> & { ps: PsString[] } ? Omit<VBBasic, "ps"> & { ps: PsString[] }
@ -1221,10 +1263,6 @@ export type VBBasic = {
ps: SingleOrLengthOpts<PsString[]>; ps: SingleOrLengthOpts<PsString[]>;
}; };
// TODO: might be a better design decision to keep the GenderNuber stuff
// in the RIGHT side of the weld
export type VBGenNum = VBBasic & GenderNumber;
export type GenderNumber = { export type GenderNumber = {
gender: Gender; gender: Gender;
number: NounNumber; number: NounNumber;
@ -1233,11 +1271,9 @@ export type GenderNumber = {
export type Welded = { export type Welded = {
type: "welded"; type: "welded";
left: NComp | VBBasic | Welded; left: NComp | VBBasic | Welded;
right: VBBasic; right: VBBasic | (VBBasic & (VBPartInfo | VBAbilityInfo));
}; };
export type WeldedGN = Omit<Welded, "right"> & { right: VBGenNum };
export type VHead = PH | NComp; export type VHead = PH | NComp;
/** perfective head block */ /** perfective head block */

View File

@ -27,4 +27,12 @@ module.exports = [
ts: 1527815450, ts: 1527815450,
e: "son", // زوی e: "son", // زوی
}, },
{
ts: 1527823093,
e: "prophet", // نبي
},
{
ts: 1527822456,
e: "word", // لفظ
},
]; ];

View File

@ -7,6 +7,7 @@
*/ */
module.exports = [ module.exports = [
1527815139, // osedul
1585228579997, // ورتلل 1585228579997, // ورتلل
1527815216, // راتلل - to come 1527815216, // راتلل - to come
1527813473, // الوتل - to fly 1527813473, // الوتل - to fly

View File

@ -7,6 +7,8 @@
*/ */
module.exports = [ module.exports = [
1527817457, // درکول
1659037345120, // بیانېدل
1608137130992, // چیغه کول 1608137130992, // چیغه کول
1658537998960, // لېونی کول 1658537998960, // لېونی کول
1527812403, // بچ کول - to save, protect, guard, spare, rescue, economize 1527812403, // بچ کول - to save, protect, guard, spare, rescue, economize