more on parser

This commit is contained in:
adueck 2023-08-17 18:12:09 +04:00
parent b384771db5
commit 288718f69a
12 changed files with 1064 additions and 283 deletions

View File

@ -1,21 +0,0 @@
import * as T from "../../../types";
export function parseBa(
tokens: Readonly<T.Token[]>
): T.ParseResult<{ type: "ba" }>[] {
if (!tokens.length) {
return [];
}
const [first, ...rest] = tokens;
if (first.s === "به") {
return [
{
body: {
type: "ba",
},
errors: [],
tokens: rest,
},
];
} else return [];
}

View File

@ -0,0 +1,61 @@
import * as T from "../../../types";
import { fmapParseResult } from "../fp-ps";
import { parseNP } from "./parse-np";
import { parseVerb } from "./parse-verb";
export function parseBlock(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
verbLookup: (s: string) => T.VerbEntry[]
): T.ParseResult<
| [
{
inflected: boolean;
selection: T.NPSelection;
}
]
| [
(
| {
type: "PH";
s: string;
}
| undefined
),
Omit<T.VBE, "ps">
]
| []
>[] {
if (tokens.length === 0) {
return [
{
tokens: [],
body: [],
errors: [],
},
];
}
return [
...(fmapParseResult((x) => [x], parseNP(tokens, lookup)) as T.ParseResult<
[
{
inflected: boolean;
selection: T.NPSelection;
}
]
>[]),
...(parseVerb(tokens, verbLookup) as T.ParseResult<
[
(
| {
type: "PH";
s: string;
}
| undefined
),
Omit<T.VBE, "ps">
]
>[]),
];
}

View File

@ -0,0 +1,96 @@
import * as T from "../../../types";
import { parseBlock } from "./parse-block";
import { parseKidsSection } from "./parse-kids-section";
import { bindParseResult, returnParseResult } from "./utils";
export function parseBlocks(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
verbLookup: (s: string) => T.VerbEntry[],
prevBlocks: (
| {
inflected: boolean;
selection: T.NPSelection;
}
| {
type: "PH";
s: string;
}
| Omit<T.VBE, "ps">
)[],
kids: T.ParsedKid[]
): T.ParseResult<{
kids: T.ParsedKid[];
blocks: (
| {
inflected: boolean;
selection: T.NPSelection;
}
| {
type: "PH";
s: string;
}
| Omit<T.VBE, "ps">
)[];
}>[] {
if (tokens.length === 0) {
// console.log("at end", { prevBlocks, kids });
return returnParseResult(tokens, { blocks: prevBlocks, kids });
}
const block = parseBlock(tokens, lookup, verbLookup);
const kidsR = parseKidsSection(tokens, []);
const allResults = [...block, ...kidsR] as T.ParseResult<
| [
{
inflected: boolean;
selection: T.NPSelection;
}
]
| [
(
| {
type: "PH";
s: string;
}
| undefined
),
Omit<T.VBE, "ps">
]
| []
| { kids: T.ParsedKid[] }
>[];
if (!allResults.length) {
return [
{
tokens: [],
body: { blocks: prevBlocks, kids },
errors: [],
},
];
}
return bindParseResult(allResults, (tokens, r) => {
if ("kids" in r) {
return {
next: parseBlocks(tokens, lookup, verbLookup, prevBlocks, [
...kids,
...r.kids,
]),
errors:
prevBlocks.length !== 1
? [{ message: "kids' section out of place" }]
: [],
};
}
// filter out the empty PH pieces
// for some reason ts won't let me do filter here
const newBlocks = r.flatMap((x) => (x ? [x] : []));
return parseBlocks(
tokens,
lookup,
verbLookup,
[...prevBlocks, ...newBlocks],
kids
);
});
}

View File

@ -0,0 +1,27 @@
import * as T from "../../../types";
import { returnParseResult } from "./utils";
export function parseKid(
tokens: Readonly<T.Token[]>
): T.ParseResult<T.ParsedKid>[] {
if (tokens.length === 0) {
return [];
}
const [{ s }, ...rest] = tokens;
if (s === "به") {
return returnParseResult(rest, "ba");
}
if (s === "یې") {
return returnParseResult(rest, "ye");
}
if (s === "مې") {
return returnParseResult(rest, "me");
}
if (s === "دې") {
return returnParseResult(rest, "de");
}
if (s === "مو") {
return returnParseResult(rest, "mU");
}
return [];
}

View File

@ -0,0 +1,83 @@
/* eslint-disable jest/no-conditional-expect */
/* eslint-disable jest/valid-title */
import * as T from "../../../types";
import { parseKidsSection } from "./parse-kids-section";
import { tokenizer } from "./tokenizer";
const tests: {
label: string;
cases: {
input: string;
output: T.ParsedKid[];
error?: boolean;
}[];
}[] = [
{
label: "basic kids section",
cases: [
{
input: "به",
output: ["ba"],
},
{
input: "به دې",
output: ["ba", "de"],
},
{
input: "",
output: [],
},
{
input: "مې دې یې",
output: ["me", "de", "ye"],
},
{
input: "دې به مې",
output: ["de", "ba", "me"],
error: true,
},
{
input: "مې یې",
output: ["me", "ye"],
},
{
input: "دې مې",
output: ["de", "me"],
error: true,
},
],
},
{
label: "can parse kids section when tokens come after",
cases: [
{
input: "به سړی",
output: ["ba"],
},
{
input: "مې دې واخیسته",
output: ["me", "de"],
},
],
},
];
tests.forEach(({ label, cases }) => {
test(label, () => {
cases.forEach(({ input, output, error }) => {
const tokens = tokenizer(input);
const parsed = parseKidsSection(tokens, []);
if (output.length) {
expect(parsed.length).toBe(1);
expect(parsed.map((x) => x.body.kids)).toEqual(
output.length ? [output] : []
);
if (error) {
expect(parsed[0].errors.length).toBeTruthy();
} else {
expect(parsed[0].errors.length).toBe(0);
}
}
});
});
});

View File

@ -0,0 +1,50 @@
import * as T from "../../../types";
import { parseKid } from "./parse-kid";
import { bindParseResult, returnParseResult } from "./utils";
export function parseKidsSection(
tokens: Readonly<T.Token[]>,
prevKids: T.ParsedKid[]
): T.ParseResult<{ kids: T.ParsedKid[] }>[] {
if (tokens.length === 0) {
return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
}
const parsedKid = parseKid(tokens);
// TODO: is this even necessary ??
if (!parsedKid.length) {
return prevKids.length ? returnParseResult(tokens, { kids: prevKids }) : [];
}
return bindParseResult(parsedKid, (tokens, r) => {
// return parseKidsSection(tokens, [...prevKids, r]);
return {
errors: kidDoubled(r, prevKids)
? [{ message: `double '${r}' in kids section` }]
: !kidComesBehind(r, prevKids.at(-1))
? [{ message: "kids section out of order" }]
: [],
next: parseKidsSection(tokens, [...prevKids, r]),
};
});
}
function kidDoubled(k: T.ParsedKid, prev: T.ParsedKid[]): boolean {
return !!prev.find((x) => x === k);
}
const kidsOrder: T.ParsedKid[] = ["ba", "me", "de", "ye"];
function getKidRank(k: T.ParsedKid): number {
if (k === "mU") {
return 1;
}
return kidsOrder.indexOf(k);
}
function kidComesBehind(
k: T.ParsedKid,
prev: T.ParsedKid | undefined
): boolean {
if (!prev) {
return true;
}
return getKidRank(k) >= getKidRank(prev);
}

View File

@ -80,7 +80,7 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
inflected: false,
selection: {
type: "pronoun",
person: 4,
person: 5,
distance: "far",
},
},

View File

@ -12,6 +12,11 @@ import {
// big problem ما سړی یوړ crashes it !!
// TODO: کول verbs!
// check that aawu stuff is working
// check oo`azmooy -
// check څاته
export function parseVerb(
tokens: Readonly<T.Token[]>,
verbLookup: (s: string) => T.VerbEntry[]

File diff suppressed because it is too large Load Diff

View File

@ -20,7 +20,7 @@ import * as T from "../../../types";
* from the different previous results
* @returns
*/
export function bindParseResult<C extends object, D extends object>(
export function bindParseResult<C, D>(
previous: T.ParseResult<C>[],
f: (
tokens: Readonly<T.Token[]>,
@ -59,18 +59,42 @@ export function bindParseResult<C extends object, D extends object>(
errors: [...errsPassed, ...x.errors, ...errors],
}));
});
return cleanOutFails(nextPossibilities);
return cleanOutResults(nextPossibilities);
}
export function cleanOutFails<C extends object>(
export function returnParseResult<D>(
tokens: Readonly<T.Token[]>,
body: D,
errors?: T.ParseError[]
): T.ParseResult<D>[] {
return [
{
tokens,
body,
errors: errors || [],
},
];
}
/**
* finds the most successful path(s) and culls out any other more erroneous
* or redundant paths
*/
export function cleanOutResults<C>(
results: T.ParseResult<C>[]
): T.ParseResult<C>[] {
// if there's any success anywhere, remove any of the errors
const errorsGone = results.find((x) => x.errors.length === 0)
? results.filter((x) => x.errors.length === 0)
: results;
if (results.length === 0) {
return results;
}
let min = Infinity;
for (let a of results) {
if (a.errors.length < min) {
min = a.errors.length;
}
}
const errorsCulled = results.filter((x) => x.errors.length === min);
// @ts-ignore
return Array.from(new Set(errorsGone.map(JSON.stringify))).map(JSON.parse);
return Array.from(new Set(errorsCulled.map(JSON.stringify))).map(JSON.parse);
}
export function isCompleteResult<C extends object>(

View File

@ -352,7 +352,7 @@ export function getKingAndServant(
};
}
function isFirstOrSecondPersPronoun(
export function isFirstOrSecondPersPronoun(
o: "none" | T.NPSelection | T.Person.ThirdPlurMale
): boolean {
if (typeof o !== "object") return false;

View File

@ -1197,6 +1197,8 @@ export type Kid = {
kid: { type: "ba" } | MiniPronoun;
};
export type ParsedKid = "ba" | "me" | "de" | "ye" | "mU";
export type MiniPronoun = {
type: "mini-pronoun";
person: Person;