really beta sandwich parsing

This commit is contained in:
adueck 2023-10-01 16:18:29 -07:00
parent 3575c1da4f
commit d4db23142e
14 changed files with 318 additions and 271 deletions

View File

@ -1 +0,0 @@

View File

@ -1,13 +1,26 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { fmapParseResult } from "../fp-ps";
import { LookupFunction } from "./lookup"; import { LookupFunction } from "./lookup";
import { parseAdverb } from "./parse-adverb"; import { parseAdverb } from "./parse-adverb";
import { parseSandwich } from "./parse-sandwich";
export function parseAP( export function parseAP(
s: Readonly<T.Token[]>, s: Readonly<T.Token[]>,
lookup: LookupFunction lookup: LookupFunction,
possesor: T.PossesorSelection | undefined
): T.ParseResult<T.APSelection>[] { ): T.ParseResult<T.APSelection>[] {
if (s.length === 0) { if (s.length === 0) {
return []; return [];
} }
return parseAdverb(s, lookup); return [
...(!possesor ? parseAdverb(s, lookup) : []),
...fmapParseResult(
(selection) =>
({
type: "AP",
selection,
} as const),
parseSandwich(s, lookup, possesor)
),
];
} }

View File

@ -1,10 +1,9 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { LookupFunction } from "./lookup"; import { LookupFunction } from "./lookup";
import { parseAP } from "./parse-ap";
import { parseEquative } from "./parse-equative"; import { parseEquative } from "./parse-equative";
import { parseKidsSection } from "./parse-kids-section"; import { parseKidsSection } from "./parse-kids-section";
import { parseNeg } from "./parse-negative"; import { parseNeg } from "./parse-negative";
import { parseNP } from "./parse-np"; import { parseNPAP } from "./parse-npap";
import { parsePastPart } from "./parse-past-part"; import { parsePastPart } from "./parse-past-part";
import { parsePH } from "./parse-ph"; import { parsePH } from "./parse-ph";
import { parseVerb } from "./parse-verb"; import { parseVerb } from "./parse-verb";
@ -34,9 +33,7 @@ export function parseBlocks(
); );
const allBlocks: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [ const allBlocks: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
...(!inVerbSection ...(!inVerbSection ? parseNPAP(tokens, lookup) : []),
? [...parseAP(tokens, lookup), ...parseNP(tokens, lookup)]
: []),
// ensure at most one of each PH, VBE, VBP // ensure at most one of each PH, VBE, VBP
...(prevPh ? [] : parsePH(tokens)), ...(prevPh ? [] : parsePH(tokens)),
...(blocks.some(isParsedVBE) ...(blocks.some(isParsedVBE)

View File

@ -1371,7 +1371,9 @@ describe("parsing nouns", () => {
test(category, () => { test(category, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup).map(({ body }) => body); const res = parseNoun(tokens, lookup, undefined, []).map(
({ body }) => body
);
expect(res).toEqual(output); expect(res).toEqual(output);
}); });
}); });
@ -1503,7 +1505,7 @@ describe("parsing nouns with adjectives", () => {
test(category, () => { test(category, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup) const res = parseNoun(tokens, lookup, undefined, [])
.filter(isCompleteResult) .filter(isCompleteResult)
.map(({ body }) => body); .map(({ body }) => body);
expect(res).toEqual(output); expect(res).toEqual(output);

View File

@ -10,28 +10,11 @@ import {
import { getInflectionQueries } from "./inflection-query"; import { getInflectionQueries } from "./inflection-query";
import { LookupFunction } from "./lookup"; import { LookupFunction } from "./lookup";
import { parseAdjective } from "./parse-adjective"; import { parseAdjective } from "./parse-adjective";
import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils"; import { bindParseResult } from "./utils";
type NounResult = { inflected: boolean; selection: T.NounSelection }; type NounResult = { inflected: boolean; selection: T.NounSelection };
export function parseNoun( export function parseNoun(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction
): T.ParseResult<NounResult>[] {
if (tokens.length === 0) {
return [];
}
const possesor = parsePossesor(tokens, lookup, undefined);
if (possesor.length) {
return bindParseResult(possesor, (tokens, p) => {
return parseNounAfterPossesor(tokens, lookup, p, []);
});
}
return parseNounAfterPossesor(tokens, lookup, undefined, []);
}
function parseNounAfterPossesor(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: LookupFunction, lookup: LookupFunction,
possesor: T.PossesorSelection | undefined, possesor: T.PossesorSelection | undefined,
@ -48,7 +31,7 @@ function parseNounAfterPossesor(
// TODO: add recognition of او between adjectives // TODO: add recognition of او between adjectives
const adjRes = parseAdjective(tokens, lookup); const adjRes = parseAdjective(tokens, lookup);
const withAdj = bindParseResult(adjRes, (tkns, adj) => const withAdj = bindParseResult(adjRes, (tkns, adj) =>
parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj]) parseNoun(tkns, lookup, possesor, [...adjectives, adj])
); );
const [first, ...rest] = tokens; const [first, ...rest] = tokens;
const searches = getInflectionQueries(first.s, true); const searches = getInflectionQueries(first.s, true);
@ -113,7 +96,7 @@ function parseNounAfterPossesor(
} }
function adjsMatch( function adjsMatch(
adjectives: Parameters<typeof parseNounAfterPossesor>[3], adjectives: Parameters<typeof parseNoun>[3],
gender: T.Gender, gender: T.Gender,
inf: 0 | 1 | 2, inf: 0 | 1 | 2,
plural: boolean | undefined plural: boolean | undefined

View File

@ -7,7 +7,8 @@ import { LookupFunction } from "./lookup";
export function parseNP( export function parseNP(
s: Readonly<T.Token[]>, s: Readonly<T.Token[]>,
lookup: LookupFunction lookup: LookupFunction,
possesor: T.PossesorSelection | undefined
): T.ParseResult<T.ParsedNP>[] { ): T.ParseResult<T.ParsedNP>[] {
if (s.length === 0) { if (s.length === 0) {
return []; return [];
@ -39,8 +40,8 @@ export function parseNP(
} }
return fmapParseResult(makeNPSl, [ return fmapParseResult(makeNPSl, [
...parsePronoun(s), ...(!possesor ? parsePronoun(s) : []),
...parseNoun(s, lookup), ...parseNoun(s, lookup, possesor, []),
...parseParticiple(s, lookup), ...parseParticiple(s, lookup, possesor),
]); ]);
} }

View File

@ -0,0 +1,25 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parseAP } from "./parse-ap";
import { parseNP } from "./parse-np";
import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils";
export function parseNPAP(
s: Readonly<T.Token[]>,
lookup: LookupFunction
): T.ParseResult<T.APSelection | T.ParsedNP>[] {
if (s.length === 0) {
return [];
}
const possesor = parsePossesor(s, lookup, undefined);
if (!possesor.length) {
return [...parseNP(s, lookup, undefined), ...parseAP(s, lookup, undefined)];
}
return bindParseResult<T.PossesorSelection, T.APSelection | T.ParsedNP>(
possesor,
(tokens, p) => {
return [...parseNP(tokens, lookup, p), ...parseAP(tokens, lookup, p)];
}
);
}

View File

@ -6,7 +6,7 @@ import {
import * as T from "../../../types"; import * as T from "../../../types";
import { lookup, wordQuery } from "./lookup"; import { lookup, wordQuery } from "./lookup";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
import { parseParticiple } from "./parse-participle"; import { parseNPAP } from "./parse-npap";
const leedul = wordQuery("لیدل", "verb"); const leedul = wordQuery("لیدل", "verb");
const akheestul = wordQuery("اخیستل", "verb"); const akheestul = wordQuery("اخیستل", "verb");
@ -113,6 +113,20 @@ const tests: {
}, },
], ],
}, },
{
input: "د سړي لیدو",
output: [
{
inflected: true,
selection: {
...makeParticipleSelection(leedul),
possesor: makePossesorSelection(
makeNounSelection(saray, undefined)
),
},
},
],
},
], ],
}, },
]; ];
@ -123,8 +137,19 @@ describe("parsing participles", () => {
test(label, () => { test(label, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const res = parseParticiple(tokens, lookup).map(({ body }) => body); const res = parseNPAP(tokens, lookup).map(({ body }) => body);
expect(res).toEqual(output); expect(res).toEqual(
output.map(
(x): T.ParsedNP => ({
type: "NP",
inflected: x.inflected,
selection: {
type: "NP",
selection: x.selection,
},
})
)
);
}); });
}); });
}); });

View File

@ -1,31 +1,13 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { LookupFunction } from "./lookup"; import { LookupFunction } from "./lookup";
import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils";
type ParticipleResult = { type ParticipleResult = {
inflected: boolean; inflected: boolean;
selection: T.ParticipleSelection; selection: T.ParticipleSelection;
}; };
export function parseParticiple(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction
): T.ParseResult<ParticipleResult>[] {
if (tokens.length === 0) {
return [];
}
const possesor = parsePossesor(tokens, lookup, undefined);
if (possesor.length) {
return bindParseResult(possesor, (tokens, p) => {
return parseParticipleAfterPossesor(tokens, lookup, p);
});
}
return parseParticipleAfterPossesor(tokens, lookup, undefined);
}
// TODO: should have adverbs with participle // TODO: should have adverbs with participle
function parseParticipleAfterPossesor( export function parseParticiple(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: LookupFunction, lookup: LookupFunction,
possesor: T.PossesorSelection | undefined possesor: T.PossesorSelection | undefined

View File

@ -1,6 +1,5 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { lookup } from "./lookup"; import { lookup } from "./lookup";
import { parseNP } from "./parse-np";
import { parseVP } from "./parse-vp"; import { parseVP } from "./parse-vp";
// شو should not be sheyaano !! // شو should not be sheyaano !!
@ -17,7 +16,7 @@ export function parsePhrase(s: T.Token[]): {
errors: string[]; errors: string[];
} { } {
const res = [ const res = [
...parseNP(s, lookup).filter(({ tokens }) => !tokens.length), // ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
// ...parseVerb(s, verbLookup), // ...parseVerb(s, verbLookup),
...parseVP(s, lookup), ...parseVP(s, lookup),
]; ];

View File

@ -50,7 +50,7 @@ export function parsePossesor(
})); }));
} }
if (first.s === "د") { if (first.s === "د") {
const np = parseNP(rest, lookup); const np = parseNP(rest, lookup, undefined);
return bindParseResult(np, (tokens, body) => { return bindParseResult(np, (tokens, body) => {
const possesor: T.PossesorSelection = { const possesor: T.PossesorSelection = {
shrunken: false, shrunken: false,

View File

@ -0,0 +1,39 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { sandwiches } from "../sandwiches";
import { parseNP } from "./parse-np";
import { bindParseResult } from "./utils";
export function parseSandwich(
s: Readonly<T.Token[]>,
lookup: LookupFunction,
possesor: T.PossesorSelection | undefined
): T.ParseResult<T.SandwichSelection<T.Sandwich>>[] {
if (s.length === 0) {
return [];
}
const [first, ...rest] = s;
const startMatches = sandwiches.filter((x) => x.before?.p === first.s);
if (!startMatches) {
return [];
}
// TODO: parse without possesive!
const nps = parseNP(rest, lookup, possesor);
return bindParseResult(nps, (tokens, np) => {
const sandMatches = startMatches.filter((x) => x.after?.p === tokens[0]?.s);
// TODO: allow pattern #1 not inflected
const errors: T.ParseError[] = np.inflected
? []
: [{ message: "NP inside sandwich must be inflected" }];
return sandMatches.map((s) => ({
tokens: tokens.slice(1),
body: {
...s,
inside: np.selection,
},
errors,
}));
});
}

View File

@ -1474,177 +1474,177 @@ const tests: {
}, },
})), })),
}, },
// { {
// input: "ما خندل", input: "ما خندل",
// output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({ output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
// blocks: [ blocks: [
// { {
// key: 1, key: 1,
// block: makeSubjectSelectionComplete({ block: makeSubjectSelectionComplete({
// type: "NP", type: "NP",
// selection: makePronounSelection(person), selection: makePronounSelection(person),
// }), }),
// }, },
// { {
// key: 2, key: 2,
// block: { block: {
// type: "objectSelection", type: "objectSelection",
// selection: T.Person.ThirdPlurMale, selection: T.Person.ThirdPlurMale,
// }, },
// }, },
// ], ],
// verb: { verb: {
// type: "verb", type: "verb",
// verb: khandul, verb: khandul,
// transitivity: "grammatically transitive", transitivity: "grammatically transitive",
// canChangeTransitivity: false, canChangeTransitivity: false,
// canChangeStatDyn: false, canChangeStatDyn: false,
// negative: false, negative: false,
// tense: "imperfectivePast", tense: "imperfectivePast",
// canChangeVoice: true, canChangeVoice: true,
// isCompound: false, isCompound: false,
// voice: "active", voice: "active",
// }, },
// externalComplement: undefined, externalComplement: undefined,
// form: { form: {
// removeKing: false, removeKing: false,
// shrinkServant: false, shrinkServant: false,
// }, },
// })), })),
// }, },
// { {
// input: "خندل مې", input: "خندل مې",
// output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({ output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
// blocks: [ blocks: [
// { {
// key: 1, key: 1,
// block: makeSubjectSelectionComplete({ block: makeSubjectSelectionComplete({
// type: "NP", type: "NP",
// selection: makePronounSelection(person), selection: makePronounSelection(person),
// }), }),
// }, },
// { {
// key: 2, key: 2,
// block: { block: {
// type: "objectSelection", type: "objectSelection",
// selection: T.Person.ThirdPlurMale, selection: T.Person.ThirdPlurMale,
// }, },
// }, },
// ], ],
// verb: { verb: {
// type: "verb", type: "verb",
// verb: khandul, verb: khandul,
// transitivity: "grammatically transitive", transitivity: "grammatically transitive",
// canChangeTransitivity: false, canChangeTransitivity: false,
// canChangeStatDyn: false, canChangeStatDyn: false,
// negative: false, negative: false,
// tense: "imperfectivePast", tense: "imperfectivePast",
// canChangeVoice: true, canChangeVoice: true,
// isCompound: false, isCompound: false,
// voice: "active", voice: "active",
// }, },
// externalComplement: undefined, externalComplement: undefined,
// form: { form: {
// removeKing: false, removeKing: false,
// shrinkServant: true, shrinkServant: true,
// }, },
// })), })),
// }, },
// { {
// input: "خندل", input: "خندل",
// output: [], output: [],
// }, },
// { {
// input: "خاندم مې", input: "خاندم مې",
// output: [], output: [],
// error: true, error: true,
// }, },
// { {
// input: "زه وینم", input: "زه وینم",
// output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({ output: getPeople(1, "sing").map<T.VPSelectionComplete>((person) => ({
// blocks: [ blocks: [
// { {
// key: 1, key: 1,
// block: makeSubjectSelectionComplete({ block: makeSubjectSelectionComplete({
// type: "NP", type: "NP",
// selection: makePronounSelection(person), selection: makePronounSelection(person),
// }), }),
// }, },
// { {
// key: 2, key: 2,
// block: { block: {
// type: "objectSelection", type: "objectSelection",
// selection: T.Person.ThirdPlurMale, selection: T.Person.ThirdPlurMale,
// }, },
// }, },
// ], ],
// verb: { verb: {
// type: "verb", type: "verb",
// verb: leedul, verb: leedul,
// transitivity: "grammatically transitive", transitivity: "grammatically transitive",
// canChangeTransitivity: false, canChangeTransitivity: false,
// canChangeStatDyn: false, canChangeStatDyn: false,
// negative: false, negative: false,
// tense: "presentVerb", tense: "presentVerb",
// canChangeVoice: true, canChangeVoice: true,
// isCompound: false, isCompound: false,
// voice: "active", voice: "active",
// }, },
// externalComplement: undefined, externalComplement: undefined,
// form: { form: {
// removeKing: false, removeKing: false,
// shrinkServant: false, shrinkServant: false,
// }, },
// })), })),
// }, },
// { {
// input: "ما ولیدل", input: "ما ولیدل",
// output: getPeople(1, "sing").flatMap<T.VPSelectionComplete>((person) => output: getPeople(1, "sing").flatMap<T.VPSelectionComplete>((person) =>
// ( (
// ["transitive", "grammatically transitive"] as const ["transitive", "grammatically transitive"] as const
// ).map<T.VPSelectionComplete>((transitivity) => ({ ).map<T.VPSelectionComplete>((transitivity) => ({
// blocks: [ blocks: [
// { {
// key: 1, key: 1,
// block: makeSubjectSelectionComplete({ block: makeSubjectSelectionComplete({
// type: "NP", type: "NP",
// selection: makePronounSelection(person), selection: makePronounSelection(person),
// }), }),
// }, },
// { {
// key: 2, key: 2,
// block: block:
// transitivity === "grammatically transitive" transitivity === "grammatically transitive"
// ? { ? {
// type: "objectSelection", type: "objectSelection",
// selection: T.Person.ThirdPlurMale, selection: T.Person.ThirdPlurMale,
// } }
// : makeObjectSelectionComplete({ : makeObjectSelectionComplete({
// type: "NP", type: "NP",
// selection: makePronounSelection(T.Person.ThirdPlurMale), selection: makePronounSelection(T.Person.ThirdPlurMale),
// }), }),
// }, },
// ], ],
// verb: { verb: {
// type: "verb", type: "verb",
// verb: leedul, verb: leedul,
// transitivity, transitivity,
// canChangeTransitivity: false, canChangeTransitivity: false,
// canChangeStatDyn: false, canChangeStatDyn: false,
// negative: false, negative: false,
// tense: "perfectivePast", tense: "perfectivePast",
// canChangeVoice: true, canChangeVoice: true,
// isCompound: false, isCompound: false,
// voice: "active", voice: "active",
// }, },
// externalComplement: undefined, externalComplement: undefined,
// form: { form: {
// removeKing: transitivity === "transitive", removeKing: transitivity === "transitive",
// shrinkServant: false, shrinkServant: false,
// }, },
// })) }))
// ), ),
// }, },
], ],
}, },
]; ];

View File

@ -28,14 +28,11 @@ import { equals, zip } from "rambda";
// TODO: word query for kawul/kedul/stat/dyn // TODO: word query for kawul/kedul/stat/dyn
// TODO: test grammatically transitive stuff // TODO: test all types with pronouns
// test raaba ye wree
// TODO: way to get an error message for past participle and equative // TODO: way to get an error message for past participle and equative
// not matching up // not matching up
// TODO: negative with perfect forms
export function parseVP( export function parseVP(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: LookupFunction lookup: LookupFunction
@ -400,7 +397,6 @@ function finishTransitive({
) )
); );
} }
// TODO: allow APs for this
if (nps.length === 1) { if (nps.length === 1) {
const np = nps[0]; const np = nps[0];
// possibilities // possibilities
@ -476,27 +472,39 @@ function finishTransitive({
}); });
} }
} }
const blocksOps: T.VPSBlockComplete[][] = servants.map((servant) => const blocksOps: T.VPSBlockComplete[][] = servants.map<
!isPast T.VPSBlockComplete[]
>((servant) =>
!isPast && form.removeKing
? [ ? [
{ {
key: 1, key: 2345,
block: makeSubjectSelectionComplete(king), block: makeSubjectSelectionComplete(king),
}, },
...mapOutnpsAndAps(["O"], npsAndAps),
]
: !isPast && form.shrinkServant
? [
...mapOutnpsAndAps(["S"], npsAndAps),
{ {
key: 2, key: 2345,
block: makeObjectSelectionComplete(servant), block: makeObjectSelectionComplete(servant),
}, },
] ]
: isPast && form.removeKing
? [
...mapOutnpsAndAps(["S"], npsAndAps),
{
key: 2345,
block: makeObjectSelectionComplete(king),
},
]
: [ : [
{ {
key: 1, key: 2345,
block: makeSubjectSelectionComplete(servant), block: makeSubjectSelectionComplete(servant),
}, },
{ ...mapOutnpsAndAps(["O"], npsAndAps),
key: 2,
block: makeObjectSelectionComplete(king),
},
] ]
); );
return blocksOps.map((blocks) => ({ return blocksOps.map((blocks) => ({
@ -547,23 +555,10 @@ function finishTransitive({
message: "past tense transitive verb must agree with the object", message: "past tense transitive verb must agree with the object",
}); });
} }
let blocks: T.VPSBlockComplete[] = [
{
key: 1,
block: makeSubjectSelectionComplete(s.selection),
},
{
key: 2,
block: makeObjectSelectionComplete(o.selection),
},
];
if (flip) {
blocks = blocks.reverse();
}
return returnParseResult( return returnParseResult(
tokens, tokens,
{ {
blocks, blocks: mapOutnpsAndAps(!flip ? ["S", "O"] : ["O", "S"], npsAndAps),
verb: v, verb: v,
externalComplement: undefined, externalComplement: undefined,
form: { form: {
@ -619,23 +614,10 @@ function finishTransitive({
"non-past tense transitive verb must agree with the subject", "non-past tense transitive verb must agree with the subject",
}); });
} }
let blocks: T.VPSBlockComplete[] = [
{
key: 1,
block: makeSubjectSelectionComplete(s.selection),
},
{
key: 2,
block: makeObjectSelectionComplete(o.selection),
},
];
if (flip) {
blocks = blocks.reverse();
}
return returnParseResult( return returnParseResult(
tokens, tokens,
{ {
blocks, blocks: mapOutnpsAndAps(!flip ? ["S", "O"] : ["O", "S"], npsAndAps),
verb: v, verb: v,
externalComplement: undefined, externalComplement: undefined,
form: { form: {