really beta sandwich parsing

This commit is contained in:
adueck 2023-10-01 16:18:29 -07:00
parent 3575c1da4f
commit d4db23142e
14 changed files with 318 additions and 271 deletions

View File

@ -1 +0,0 @@

View File

@ -1,13 +1,26 @@
import * as T from "../../../types";
import { fmapParseResult } from "../fp-ps";
import { LookupFunction } from "./lookup";
import { parseAdverb } from "./parse-adverb";
import { parseSandwich } from "./parse-sandwich";
export function parseAP(
s: Readonly<T.Token[]>,
lookup: LookupFunction
lookup: LookupFunction,
possesor: T.PossesorSelection | undefined
): T.ParseResult<T.APSelection>[] {
if (s.length === 0) {
return [];
}
return parseAdverb(s, lookup);
return [
...(!possesor ? parseAdverb(s, lookup) : []),
...fmapParseResult(
(selection) =>
({
type: "AP",
selection,
} as const),
parseSandwich(s, lookup, possesor)
),
];
}

View File

@ -1,10 +1,9 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parseAP } from "./parse-ap";
import { parseEquative } from "./parse-equative";
import { parseKidsSection } from "./parse-kids-section";
import { parseNeg } from "./parse-negative";
import { parseNP } from "./parse-np";
import { parseNPAP } from "./parse-npap";
import { parsePastPart } from "./parse-past-part";
import { parsePH } from "./parse-ph";
import { parseVerb } from "./parse-verb";
@ -34,9 +33,7 @@ export function parseBlocks(
);
const allBlocks: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
...(!inVerbSection
? [...parseAP(tokens, lookup), ...parseNP(tokens, lookup)]
: []),
...(!inVerbSection ? parseNPAP(tokens, lookup) : []),
// ensure at most one of each PH, VBE, VBP
...(prevPh ? [] : parsePH(tokens)),
...(blocks.some(isParsedVBE)

View File

@ -1371,7 +1371,9 @@ describe("parsing nouns", () => {
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup).map(({ body }) => body);
const res = parseNoun(tokens, lookup, undefined, []).map(
({ body }) => body
);
expect(res).toEqual(output);
});
});
@ -1503,7 +1505,7 @@ describe("parsing nouns with adjectives", () => {
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup)
const res = parseNoun(tokens, lookup, undefined, [])
.filter(isCompleteResult)
.map(({ body }) => body);
expect(res).toEqual(output);

View File

@ -10,28 +10,11 @@ import {
import { getInflectionQueries } from "./inflection-query";
import { LookupFunction } from "./lookup";
import { parseAdjective } from "./parse-adjective";
import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils";
type NounResult = { inflected: boolean; selection: T.NounSelection };
export function parseNoun(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction
): T.ParseResult<NounResult>[] {
if (tokens.length === 0) {
return [];
}
const possesor = parsePossesor(tokens, lookup, undefined);
if (possesor.length) {
return bindParseResult(possesor, (tokens, p) => {
return parseNounAfterPossesor(tokens, lookup, p, []);
});
}
return parseNounAfterPossesor(tokens, lookup, undefined, []);
}
function parseNounAfterPossesor(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction,
possesor: T.PossesorSelection | undefined,
@ -48,7 +31,7 @@ function parseNounAfterPossesor(
// TODO: add recognition of او between adjectives
const adjRes = parseAdjective(tokens, lookup);
const withAdj = bindParseResult(adjRes, (tkns, adj) =>
parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
parseNoun(tkns, lookup, possesor, [...adjectives, adj])
);
const [first, ...rest] = tokens;
const searches = getInflectionQueries(first.s, true);
@ -113,7 +96,7 @@ function parseNounAfterPossesor(
}
function adjsMatch(
adjectives: Parameters<typeof parseNounAfterPossesor>[3],
adjectives: Parameters<typeof parseNoun>[3],
gender: T.Gender,
inf: 0 | 1 | 2,
plural: boolean | undefined

View File

@ -7,7 +7,8 @@ import { LookupFunction } from "./lookup";
export function parseNP(
s: Readonly<T.Token[]>,
lookup: LookupFunction
lookup: LookupFunction,
possesor: T.PossesorSelection | undefined
): T.ParseResult<T.ParsedNP>[] {
if (s.length === 0) {
return [];
@ -39,8 +40,8 @@ export function parseNP(
}
return fmapParseResult(makeNPSl, [
...parsePronoun(s),
...parseNoun(s, lookup),
...parseParticiple(s, lookup),
...(!possesor ? parsePronoun(s) : []),
...parseNoun(s, lookup, possesor, []),
...parseParticiple(s, lookup, possesor),
]);
}

View File

@ -0,0 +1,25 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parseAP } from "./parse-ap";
import { parseNP } from "./parse-np";
import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils";
export function parseNPAP(
s: Readonly<T.Token[]>,
lookup: LookupFunction
): T.ParseResult<T.APSelection | T.ParsedNP>[] {
if (s.length === 0) {
return [];
}
const possesor = parsePossesor(s, lookup, undefined);
if (!possesor.length) {
return [...parseNP(s, lookup, undefined), ...parseAP(s, lookup, undefined)];
}
return bindParseResult<T.PossesorSelection, T.APSelection | T.ParsedNP>(
possesor,
(tokens, p) => {
return [...parseNP(tokens, lookup, p), ...parseAP(tokens, lookup, p)];
}
);
}

View File

@ -6,7 +6,7 @@ import {
import * as T from "../../../types";
import { lookup, wordQuery } from "./lookup";
import { tokenizer } from "./tokenizer";
import { parseParticiple } from "./parse-participle";
import { parseNPAP } from "./parse-npap";
const leedul = wordQuery("لیدل", "verb");
const akheestul = wordQuery("اخیستل", "verb");
@ -113,6 +113,20 @@ const tests: {
},
],
},
{
input: "د سړي لیدو",
output: [
{
inflected: true,
selection: {
...makeParticipleSelection(leedul),
possesor: makePossesorSelection(
makeNounSelection(saray, undefined)
),
},
},
],
},
],
},
];
@ -123,8 +137,19 @@ describe("parsing participles", () => {
test(label, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const res = parseParticiple(tokens, lookup).map(({ body }) => body);
expect(res).toEqual(output);
const res = parseNPAP(tokens, lookup).map(({ body }) => body);
expect(res).toEqual(
output.map(
(x): T.ParsedNP => ({
type: "NP",
inflected: x.inflected,
selection: {
type: "NP",
selection: x.selection,
},
})
)
);
});
});
});

View File

@ -1,31 +1,13 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils";
type ParticipleResult = {
inflected: boolean;
selection: T.ParticipleSelection;
};
export function parseParticiple(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction
): T.ParseResult<ParticipleResult>[] {
if (tokens.length === 0) {
return [];
}
const possesor = parsePossesor(tokens, lookup, undefined);
if (possesor.length) {
return bindParseResult(possesor, (tokens, p) => {
return parseParticipleAfterPossesor(tokens, lookup, p);
});
}
return parseParticipleAfterPossesor(tokens, lookup, undefined);
}
// TODO: should have adverbs with participle
function parseParticipleAfterPossesor(
export function parseParticiple(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction,
possesor: T.PossesorSelection | undefined

View File

@ -1,6 +1,5 @@
import * as T from "../../../types";
import { lookup } from "./lookup";
import { parseNP } from "./parse-np";
import { parseVP } from "./parse-vp";
// شو should not be sheyaano !!
@ -17,7 +16,7 @@ export function parsePhrase(s: T.Token[]): {
errors: string[];
} {
const res = [
...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
// ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
// ...parseVerb(s, verbLookup),
...parseVP(s, lookup),
];

View File

@ -50,7 +50,7 @@ export function parsePossesor(
}));
}
if (first.s === "د") {
const np = parseNP(rest, lookup);
const np = parseNP(rest, lookup, undefined);
return bindParseResult(np, (tokens, body) => {
const possesor: T.PossesorSelection = {
shrunken: false,

View File

@ -0,0 +1,39 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { sandwiches } from "../sandwiches";
import { parseNP } from "./parse-np";
import { bindParseResult } from "./utils";
export function parseSandwich(
s: Readonly<T.Token[]>,
lookup: LookupFunction,
possesor: T.PossesorSelection | undefined
): T.ParseResult<T.SandwichSelection<T.Sandwich>>[] {
if (s.length === 0) {
return [];
}
const [first, ...rest] = s;
const startMatches = sandwiches.filter((x) => x.before?.p === first.s);
if (!startMatches) {
return [];
}
// TODO: parse without possesive!
const nps = parseNP(rest, lookup, possesor);
return bindParseResult(nps, (tokens, np) => {
const sandMatches = startMatches.filter((x) => x.after?.p === tokens[0]?.s);
// TODO: allow pattern #1 not inflected
const errors: T.ParseError[] = np.inflected
? []
: [{ message: "NP inside sandwich must be inflected" }];
return sandMatches.map((s) => ({
tokens: tokens.slice(1),
body: {
...s,
inside: np.selection,
},
errors,
}));
});
}

View File

@ -1474,177 +1474,177 @@ const tests: {
},
})),
},
// {
// input: "ما خندل",
// output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
// blocks: [
// {
// key: 1,
// block: makeSubjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(person),
// }),
// },
// {
// key: 2,
// block: {
// type: "objectSelection",
// selection: T.Person.ThirdPlurMale,
// },
// },
// ],
// verb: {
// type: "verb",
// verb: khandul,
// transitivity: "grammatically transitive",
// canChangeTransitivity: false,
// canChangeStatDyn: false,
// negative: false,
// tense: "imperfectivePast",
// canChangeVoice: true,
// isCompound: false,
// voice: "active",
// },
// externalComplement: undefined,
// form: {
// removeKing: false,
// shrinkServant: false,
// },
// })),
// },
// {
// input: "خندل مې",
// output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
// blocks: [
// {
// key: 1,
// block: makeSubjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(person),
// }),
// },
// {
// key: 2,
// block: {
// type: "objectSelection",
// selection: T.Person.ThirdPlurMale,
// },
// },
// ],
// verb: {
// type: "verb",
// verb: khandul,
// transitivity: "grammatically transitive",
// canChangeTransitivity: false,
// canChangeStatDyn: false,
// negative: false,
// tense: "imperfectivePast",
// canChangeVoice: true,
// isCompound: false,
// voice: "active",
// },
// externalComplement: undefined,
// form: {
// removeKing: false,
// shrinkServant: true,
// },
// })),
// },
// {
// input: "خندل",
// output: [],
// },
// {
// input: "خاندم مې",
// output: [],
// error: true,
// },
// {
// input: "زه وینم",
// output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
// blocks: [
// {
// key: 1,
// block: makeSubjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(person),
// }),
// },
// {
// key: 2,
// block: {
// type: "objectSelection",
// selection: T.Person.ThirdPlurMale,
// },
// },
// ],
// verb: {
// type: "verb",
// verb: leedul,
// transitivity: "grammatically transitive",
// canChangeTransitivity: false,
// canChangeStatDyn: false,
// negative: false,
// tense: "presentVerb",
// canChangeVoice: true,
// isCompound: false,
// voice: "active",
// },
// externalComplement: undefined,
// form: {
// removeKing: false,
// shrinkServant: false,
// },
// })),
// },
// {
// input: "ما ولیدل",
// output: getPeople(1, "sing").flatMap<T.VPSelectionComplete>((person) =>
// (
// ["transitive", "grammatically transitive"] as const
// ).map<T.VPSelectionComplete>((transitivity) => ({
// blocks: [
// {
// key: 1,
// block: makeSubjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(person),
// }),
// },
// {
// key: 2,
// block:
// transitivity === "grammatically transitive"
// ? {
// type: "objectSelection",
// selection: T.Person.ThirdPlurMale,
// }
// : makeObjectSelectionComplete({
// type: "NP",
// selection: makePronounSelection(T.Person.ThirdPlurMale),
// }),
// },
// ],
// verb: {
// type: "verb",
// verb: leedul,
// transitivity,
// canChangeTransitivity: false,
// canChangeStatDyn: false,
// negative: false,
// tense: "perfectivePast",
// canChangeVoice: true,
// isCompound: false,
// voice: "active",
// },
// externalComplement: undefined,
// form: {
// removeKing: transitivity === "transitive",
// shrinkServant: false,
// },
// }))
// ),
// },
{
input: "ما خندل",
output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
{
key: 2,
block: {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
},
},
],
verb: {
type: "verb",
verb: khandul,
transitivity: "grammatically transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "imperfectivePast",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
})),
},
{
input: "خندل مې",
output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
{
key: 2,
block: {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
},
},
],
verb: {
type: "verb",
verb: khandul,
transitivity: "grammatically transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "imperfectivePast",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: true,
},
})),
},
{
input: "خندل",
output: [],
},
{
input: "خاندم مې",
output: [],
error: true,
},
{
input: "زه وینم",
output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
{
key: 2,
block: {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
},
},
],
verb: {
type: "verb",
verb: leedul,
transitivity: "grammatically transitive",
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "presentVerb",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: false,
shrinkServant: false,
},
})),
},
{
input: "ما ولیدل",
output: getPeople(1, "sing").flatMap<T.VPSelectionComplete>((person) =>
(
["transitive", "grammatically transitive"] as const
).map<T.VPSelectionComplete>((transitivity) => ({
blocks: [
{
key: 1,
block: makeSubjectSelectionComplete({
type: "NP",
selection: makePronounSelection(person),
}),
},
{
key: 2,
block:
transitivity === "grammatically transitive"
? {
type: "objectSelection",
selection: T.Person.ThirdPlurMale,
}
: makeObjectSelectionComplete({
type: "NP",
selection: makePronounSelection(T.Person.ThirdPlurMale),
}),
},
],
verb: {
type: "verb",
verb: leedul,
transitivity,
canChangeTransitivity: false,
canChangeStatDyn: false,
negative: false,
tense: "perfectivePast",
canChangeVoice: true,
isCompound: false,
voice: "active",
},
externalComplement: undefined,
form: {
removeKing: transitivity === "transitive",
shrinkServant: false,
},
}))
),
},
],
},
];

View File

@ -28,14 +28,11 @@ import { equals, zip } from "rambda";
// TODO: word query for kawul/kedul/stat/dyn
// TODO: test grammatically transitive stuff
// test raaba ye wree
// TODO: test all types with pronouns
// TODO: way to get an error message for past participle and equative
// not matching up
// TODO: negative with perfect forms
export function parseVP(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction
@ -400,7 +397,6 @@ function finishTransitive({
)
);
}
// TODO: allow APs for this
if (nps.length === 1) {
const np = nps[0];
// possibilities
@ -476,27 +472,39 @@ function finishTransitive({
});
}
}
const blocksOps: T.VPSBlockComplete[][] = servants.map((servant) =>
!isPast
const blocksOps: T.VPSBlockComplete[][] = servants.map<
T.VPSBlockComplete[]
>((servant) =>
!isPast && form.removeKing
? [
{
key: 1,
key: 2345,
block: makeSubjectSelectionComplete(king),
},
...mapOutnpsAndAps(["O"], npsAndAps),
]
: !isPast && form.shrinkServant
? [
...mapOutnpsAndAps(["S"], npsAndAps),
{
key: 2,
key: 2345,
block: makeObjectSelectionComplete(servant),
},
]
: isPast && form.removeKing
? [
...mapOutnpsAndAps(["S"], npsAndAps),
{
key: 2345,
block: makeObjectSelectionComplete(king),
},
]
: [
{
key: 1,
key: 2345,
block: makeSubjectSelectionComplete(servant),
},
{
key: 2,
block: makeObjectSelectionComplete(king),
},
...mapOutnpsAndAps(["O"], npsAndAps),
]
);
return blocksOps.map((blocks) => ({
@ -547,23 +555,10 @@ function finishTransitive({
message: "past tense transitive verb must agree with the object",
});
}
let blocks: T.VPSBlockComplete[] = [
{
key: 1,
block: makeSubjectSelectionComplete(s.selection),
},
{
key: 2,
block: makeObjectSelectionComplete(o.selection),
},
];
if (flip) {
blocks = blocks.reverse();
}
return returnParseResult(
tokens,
{
blocks,
blocks: mapOutnpsAndAps(!flip ? ["S", "O"] : ["O", "S"], npsAndAps),
verb: v,
externalComplement: undefined,
form: {
@ -619,23 +614,10 @@ function finishTransitive({
"non-past tense transitive verb must agree with the subject",
});
}
let blocks: T.VPSBlockComplete[] = [
{
key: 1,
block: makeSubjectSelectionComplete(s.selection),
},
{
key: 2,
block: makeObjectSelectionComplete(o.selection),
},
];
if (flip) {
blocks = blocks.reverse();
}
return returnParseResult(
tokens,
{
blocks,
blocks: mapOutnpsAndAps(!flip ? ["S", "O"] : ["O", "S"], npsAndAps),
verb: v,
externalComplement: undefined,
form: {