Compare commits

...

5 Commits

Author SHA1 Message Date
adueck b5e33b1db9 mini-pronouns parsing 2024-02-10 06:50:43 -05:00
adueck ae4ff6f27b cleanup 2024-02-08 23:07:29 +01:00
adueck 221249271c added test 2024-02-08 22:55:04 +01:00
adueck 411f5b2d04 fixed issue with intransitive verbs not checking for subject agreement 2024-02-08 22:51:27 +01:00
adueck db3bc8303b possesive mini pronouns 2024-02-08 22:33:24 +01:00
5 changed files with 431 additions and 77 deletions

View File

@ -11,7 +11,7 @@ const working = [
"phrases with simple verbs", "phrases with simple verbs",
"basic verb tenses", "basic verb tenses",
"noun phrases", "noun phrases",
"mini-pronouns for shrunken servants", "mini-pronouns",
"grammar error correction", "grammar error correction",
"negatives", "negatives",
"imperative verbs", "imperative verbs",
@ -21,14 +21,12 @@ const working = [
const todo = [ const todo = [
"compound verbs", "compound verbs",
"adjectival participles", "adjectival participles",
"adverbial phrases",
"relative clauses", "relative clauses",
"equative verbs", "equative verbs",
"ability verbs", "ability verbs",
"passive verbs", "passive verbs",
"quantifiers", "quantifiers",
"demonstrative pronouns", "demonstrative pronouns",
"mini-pronouns for possesives",
"approximate spelling", "approximate spelling",
]; ];
@ -38,7 +36,7 @@ const examples = [
"یو به مې ړلې", "یو به مې ړلې",
"د غټې ماشومې زاړه پلار ولیدم", "د غټې ماشومې زاړه پلار ولیدم",
"ستا پخواني ملګري مې ولیدل", "ستا پخواني ملګري مې ولیدل",
"پرون مې ولیدې", "پرون مې دې ملګرې ولیده",
"ما ډوډۍ خوړله", "ما ډوډۍ خوړله",
"وامې نه خیست", "وامې نه خیست",
"وبه مې وینې", "وبه مې وینې",

View File

@ -13,7 +13,7 @@ import { lookup, wordQuery } from "./lookup";
import { parseVP } from "./parse-vp"; import { parseVP } from "./parse-vp";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
import { tlul } from "./irreg-verbs"; import { tlul } from "./irreg-verbs";
import { getPeople, removeKeys } from "./utils"; import { addShrunkenPossesor, getPeople, removeKeys } from "./utils";
const sarey = wordQuery("سړی", "noun"); const sarey = wordQuery("سړی", "noun");
const rasedul = wordQuery("رسېدل", "verb"); const rasedul = wordQuery("رسېدل", "verb");
@ -259,6 +259,11 @@ const tests: {
}, },
], ],
}, },
{
input: "ماشوم ځې",
output: [],
error: true,
},
], ],
}, },
{ {
@ -890,47 +895,95 @@ const tests: {
], ],
}, },
{ {
input: "سړی یې وویني", input: "سړی یې ووهي",
output: getPeople(3, "both").map((person) => ({ output: [
blocks: [ ...getPeople(3, "both").map<T.VPSelectionComplete>((person) => ({
{ blocks: [
key: 1, {
block: makeSubjectSelectionComplete({ key: 1,
type: "NP", block: makeSubjectSelectionComplete({
selection: makeNounSelection(sarey, undefined), type: "NP",
}), selection: makeNounSelection(sarey, undefined),
}),
},
{
key: 1,
block: makeObjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
],
verb: {
type: "verb",
verb: wahul,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "subjunctiveVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
}, },
{ externalComplement: undefined,
key: 1, form: {
block: makeObjectSelectionComplete({ removeKing: false,
type: "NP", shrinkServant: true,
selection: makePronounSelection(person),
}),
}, },
], })),
verb: { ...getPeople(3, "both").flatMap<T.VPSelectionComplete>((person) =>
type: "verb", getPeople(3, "both").map<T.VPSelectionComplete>((person2) => ({
verb: leedul, blocks: [
transitivity: "transitive", {
canChangeTransitivity: false, key: 1,
canChangeStatDyn: false, block: makeSubjectSelectionComplete({
negative: false, type: "NP",
tense: "subjunctiveVerb", selection: makePronounSelection(person),
canChangeVoice: true, }),
isCompound: false, },
voice: "active", {
}, key: 1,
externalComplement: undefined, block: makeObjectSelectionComplete(
form: { addShrunkenPossesor(
removeKing: false, {
shrinkServant: true, type: "NP",
}, selection: makeNounSelection(sarey, undefined),
})), },
person2
)
),
},
],
verb: {
type: "verb",
verb: wahul,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "subjunctiveVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: true,
shrinkServant: false,
},
}))
),
],
}, },
{ {
input: "سړی مو ویني", input: "سړی مو ویني",
output: [...getPeople(1, "pl"), ...getPeople(2, "pl")].map( output: [
(person) => ({ // the man sees you/us
...[
...getPeople(1, "pl"),
...getPeople(2, "pl"),
].map<T.VPSelectionComplete>((person) => ({
blocks: [ blocks: [
{ {
key: 1, key: 1,
@ -964,13 +1017,98 @@ const tests: {
removeKing: false, removeKing: false,
shrinkServant: true, shrinkServant: true,
}, },
}) })),
), // your/our man sees
}, ...[
{ ...getPeople(1, "pl"),
input: "سړي مې واهه", ...getPeople(2, "pl"),
output: [], ].map<T.VPSelectionComplete>((person) => ({
error: true, blocks: [
{
key: 1,
block: makeSubjectSelectionComplete(
addShrunkenPossesor(
{
type: "NP",
selection: makeNounSelection(sarey, undefined),
},
person
)
),
},
{
key: 1,
block: {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
},
},
],
verb: {
type: "verb",
verb: leedul,
transitivity: "grammatically transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "presentVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
})),
// they/he sees your/our man
...[
...getPeople(1, "pl"),
...getPeople(2, "pl"),
].flatMap<T.VPSelectionComplete>((possPers) =>
getPeople(3, "both").map<T.VPSelectionComplete>((subjPers) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(subjPers),
}),
},
{
key: 1,
block: makeObjectSelectionComplete(
addShrunkenPossesor(
{
type: "NP",
selection: makeNounSelection(sarey, undefined),
},
possPers
)
),
},
],
verb: {
type: "verb",
verb: leedul,
transitivity: "transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "presentVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: true,
shrinkServant: false,
},
}))
),
],
}, },
], ],
}, },

View File

@ -1,6 +1,8 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { import {
addShrunkenPossesor,
bindParseResult, bindParseResult,
canTakeShrunkenPossesor,
isNeg, isNeg,
isNonOoPh, isNonOoPh,
isPH, isPH,
@ -28,6 +30,8 @@ import { equals, zip } from "rambda";
import { isImperativeTense } from "../type-predicates"; import { isImperativeTense } from "../type-predicates";
// to hide equatives type-doubling issue // to hide equatives type-doubling issue
// TODO: problem with 3rd pers sing verb endings اواز مې دې واورېده
// TODO: word query for kawul/kedul/stat/dyn // TODO: word query for kawul/kedul/stat/dyn
// TODO: test all types with pronouns // TODO: test all types with pronouns
@ -43,33 +47,38 @@ export function parseVP(
return []; return [];
} }
const blocks = parseBlocks(tokens, lookup, [], []); const blocks = parseBlocks(tokens, lookup, [], []);
return bindParseResult(blocks, (tokens, { blocks, kids }) => { return bindParseResult(
const ba = kids.some((k) => k === "ba"); createPossesivePossibilities(blocks),
const miniPronouns = getMiniPronouns(kids); (tokens, { blocks, kids }) => {
const npsAndAps = blocks.filter( const ba = kids.some((k) => k === "ba");
(x): x is T.ParsedNP | T.APSelection => x.type === "NP" || x.type === "AP" const miniPronouns = getMiniPronouns(kids);
); const npsAndAps = blocks.filter(
const verbSection = blocks.findIndex(startsVerbSection); (x): x is T.ParsedNP | T.APSelection =>
// TODO: would be nice if this could pass error messages about the x.type === "NP" || x.type === "AP"
// negative being out of place etc );
if (!verbSectionOK(blocks.slice(verbSection))) { const verbSection = blocks.findIndex(startsVerbSection);
return []; // TODO: would be nice if this could pass error messages about the
// negative being out of place etc
if (!verbSectionOK(blocks.slice(verbSection))) {
return [];
}
const tenses = getTenses(blocks, ba);
// TODO get errors from the get tenses (perfect verbs not agreeing)
return tenses.flatMap(
({ tense, person, transitivities, negative, verb }) =>
finishPossibleVPSs({
tense,
transitivities,
npsAndAps,
miniPronouns,
tokens,
negative,
verb,
person,
})
);
} }
const tenses = getTenses(blocks, ba); );
// TODO get errors from the get tenses (perfect verbs not agreeing)
return tenses.flatMap(({ tense, person, transitivities, negative, verb }) =>
finishPossibleVPSs({
tense,
transitivities,
npsAndAps,
miniPronouns,
tokens,
negative,
verb,
person,
})
);
});
} }
function getTenses( function getTenses(
@ -311,6 +320,11 @@ function finishIntransitive({
message: "subject of intransitive verb must not be inflected", message: "subject of intransitive verb must not be inflected",
}); });
} }
if (subjectPerson !== person) {
errors.push({
message: "subject and verb must agree for intransitive verb",
});
}
const blocks: T.VPSBlockComplete[] = [ const blocks: T.VPSBlockComplete[] = [
...mapOutnpsAndAps(["S"], npsAndAps), ...mapOutnpsAndAps(["S"], npsAndAps),
{ {
@ -487,6 +501,9 @@ function finishTransitive({
} }
} }
} else { } else {
if (miniPronouns.length > 1) {
errors.push({ message: "unknown mini-pronoun" });
}
if (np.inflected) { if (np.inflected) {
errors.push({ errors.push({
message: !isPast message: !isPast
@ -553,6 +570,9 @@ function finishTransitive({
})); }));
}); });
} else { } else {
const miniPronErrors: T.ParseError[] = miniPronouns.length
? [{ message: "unknown mini-pronoun" }]
: [];
if (isPast) { if (isPast) {
return ( return (
[ [
@ -598,7 +618,7 @@ function finishTransitive({
shrinkServant: false, shrinkServant: false,
}, },
} as T.VPSelectionComplete, } as T.VPSelectionComplete,
errors [...miniPronErrors, ...errors]
); );
}); });
} else { } else {
@ -657,7 +677,7 @@ function finishTransitive({
shrinkServant: false, shrinkServant: false,
}, },
} as T.VPSelectionComplete, } as T.VPSelectionComplete,
errors [...miniPronErrors, ...errors]
); );
}); });
} }
@ -1033,3 +1053,143 @@ function mapOutnpsAndAps(
} }
}); });
} }
/**
* Given a set of blocks and kids, produces all possible arrangements
* with the mini-pronouns being used as possesives, or not
*
* Case 1: no mini pronouns
* 1. return as is
*
* Case 2: one mini pronoun
* 1. don't use any as possesive
* 2. use the mini pronoun as a possesive (in all possible places)
*
* Case 3: two mini pronouns
* 1. don't use any as possesive
* 2. use first as possesive
* 3. use second as possesive
* 4. use both as possesives
*
* @param blocks
* @returns
*/
function createPossesivePossibilities(
blocks: T.ParseResult<{
kids: T.ParsedKid[];
blocks: T.ParsedBlock[];
}>[]
): T.ParseResult<{
kids: T.ParsedKid[];
blocks: T.ParsedBlock[];
}>[] {
function pullOutMiniPronoun(
body: {
kids: T.ParsedKid[];
blocks: T.ParsedBlock[];
},
pos: 0 | 1
): {
adjusted: {
kids: T.ParsedKid[];
blocks: T.ParsedBlock[];
};
miniPronoun: T.ParsedMiniPronoun;
} {
const miniPronoun = getMiniPronouns(body.kids)[pos];
if (!miniPronoun) {
throw new Error("tried to pull out non-existent mini-pronoun");
}
return {
miniPronoun,
adjusted: {
kids: body.kids.filter((x) => x !== miniPronoun),
blocks: body.blocks,
},
};
}
function spreadOutPoss(
body: {
kids: T.ParsedKid[];
blocks: T.ParsedBlock[];
},
pos: 0 | 1
): {
kids: T.ParsedKid[];
blocks: T.ParsedBlock[];
}[] {
const { miniPronoun, adjusted } = pullOutMiniPronoun(body, pos);
const people = getPeopleFromMiniPronouns([miniPronoun]);
// TODO: turn into reduce?
// TODO: allow possesives for sandwiches
return adjusted.blocks
.flatMap((x, i) => {
if (
(x.type === "NP" && canTakeShrunkenPossesor(x.selection)) ||
(x.type === "AP" && canTakeShrunkenPossesor(x))
) {
return addPossesiveAtIndex(people, adjusted.blocks, i);
} else {
return [];
}
})
.map((xb) => ({
kids: adjusted.kids,
blocks: xb,
}));
}
function addPossesiveAtIndex(
people: T.Person[],
blocks: T.ParsedBlock[],
i: number
): T.ParsedBlock[][] {
return people.map((person) => {
return blocks.map((x, j) => {
if (i !== j) return x;
// TODO: this is redundant ?
if (x.type === "NP" && canTakeShrunkenPossesor(x.selection)) {
return {
...x,
selection: addShrunkenPossesor(x.selection, person),
};
} else if (x.type === "AP" && canTakeShrunkenPossesor(x)) {
return addShrunkenPossesor(x, person);
} else {
throw new Error(
"improper index for adding possesor - addPossesiveAtIndex"
);
}
});
});
}
return blocks.flatMap((b) => {
const miniPronouns = getMiniPronouns(b.body.kids);
if (miniPronouns.length === 0) {
return b;
} else if (miniPronouns.length === 1) {
const withFirstMiniAsPossesive = spreadOutPoss(b.body, 0);
return [b.body, ...withFirstMiniAsPossesive].map((x) => ({
tokens: b.tokens,
body: x,
errors: b.errors,
}));
} else {
const withFirstMiniAsPossesive = spreadOutPoss(b.body, 0);
const withSecondMiniAsPossesive = spreadOutPoss(b.body, 1);
return [
// using none of the mini-pronouns as possesives
b.body,
// using the first mini-pronoun as a possesive
...withFirstMiniAsPossesive,
// using the second mini-pronoun as a prossesive
...withSecondMiniAsPossesive,
// using both mini pronouns as possesives
...withFirstMiniAsPossesive.flatMap((x) => spreadOutPoss(x, 0)),
].map((x) => ({
tokens: b.tokens,
body: x,
errors: b.errors,
}));
}
});
}

View File

@ -174,3 +174,61 @@ export function startsVerbSection(b: T.ParsedBlock): boolean {
b.type === "negative" b.type === "negative"
); );
} }
export function canTakeShrunkenPossesor(
block: T.NPSelection | T.APSelection
): boolean {
if (block.type === "NP") {
return block.selection.type !== "pronoun" && !block.selection.possesor;
}
if (block.selection.type === "sandwich") {
return canTakeShrunkenPossesor(block.selection.inside);
}
return false;
}
export function addShrunkenPossesor(
b: T.NPSelection,
person: T.Person
): T.NPSelection;
export function addShrunkenPossesor(
b: T.APSelection,
person: T.Person
): T.APSelection;
export function addShrunkenPossesor(
b: T.NPSelection | T.APSelection,
person: T.Person
): T.NPSelection | T.APSelection {
if (b.selection.type === "adverb" || b.selection.type === "pronoun") {
throw new Error("cannot add shrunken possesor");
}
if (b.type === "AP") {
return {
...b,
selection: {
...b.selection,
inside: addShrunkenPossesor(b.selection.inside, person),
},
};
}
if (b.selection.possesor) {
throw new Error("cannot add another possesor");
}
return {
...b,
selection: {
...b.selection,
possesor: {
shrunken: true,
np: {
type: "NP",
selection: {
type: "pronoun",
distance: "far",
person,
},
},
},
},
};
}

View File

@ -252,7 +252,7 @@ function pronounPossEng(p: T.Person): string {
if (p === T.Person.ThirdSingFemale) { if (p === T.Person.ThirdSingFemale) {
return "her/its"; return "her/its";
} }
return `their ${gend(p)}`; return `their (${gend(p)})`;
} }
export function getEnglishFromRendered( export function getEnglishFromRendered(