This commit is contained in:
adueck 2023-08-02 14:55:22 +04:00
parent 6eb5e081f0
commit 4cc81c8b10
7 changed files with 135 additions and 121 deletions

View File

@ -31,7 +31,11 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
<input <input
dir="rtl" dir="rtl"
className={`form-control ${ className={`form-control ${
text && errors.length ? "is-invalid" : text ? "is-valid" : "" text && (errors.length || !result.length)
? "is-invalid"
: result.length
? "is-valid"
: ""
}`} }`}
type="text" type="text"
value={text} value={text}
@ -41,7 +45,9 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
{errors.length > 0 && ( {errors.length > 0 && (
<> <>
<div className="alert alert-danger" role="alert"> <div className="alert alert-danger" role="alert">
<div>{errors[0]}</div> {errors.map((e) => (
<div>{e}</div>
))}
</div> </div>
<div className="text-center">Did you mean:</div> <div className="text-center">Did you mean:</div>
</> </>

View File

@ -46,11 +46,11 @@ export function fmapParseResult<A extends object, B extends object>(
f: (x: A) => B, f: (x: A) => B,
x: T.ParseResult<A>[] x: T.ParseResult<A>[]
): T.ParseResult<B>[] { ): T.ParseResult<B>[] {
return x.map<T.ParseResult<B>>(([tokens, result, errors]) => [ return x.map<T.ParseResult<B>>((xi) => ({
tokens, tokens: xi.tokens,
f(result), body: f(xi.body),
errors, errors: xi.errors,
]); }));
} }
export function fmapSingleOrLengthOpts<A extends object, B extends object>( export function fmapSingleOrLengthOpts<A extends object, B extends object>(

View File

@ -24,16 +24,16 @@ export function parseAdjective(
const matches = wideMatches.filter(deets.predicate); const matches = wideMatches.filter(deets.predicate);
matches.forEach((m) => { matches.forEach((m) => {
const selection = makeAdjectiveSelection(m); const selection = makeAdjectiveSelection(m);
w.push([ w.push({
rest, tokens: rest,
{ body: {
selection, selection,
inflection: deets.inflection, inflection: deets.inflection,
gender: deets.gender, gender: deets.gender,
given: first.s, given: first.s,
}, },
[], errors: [],
]); });
}); });
}); });
}); });

View File

@ -30,35 +30,41 @@ export function parseNoun(
if (possesor) { if (possesor) {
const runsAfterPossesor: T.ParseResult<NounResult | undefined>[] = possesor const runsAfterPossesor: T.ParseResult<NounResult | undefined>[] = possesor
? possesor ? possesor
: [[tokens, undefined, []]]; : [{ tokens, body: undefined, errors: [] }];
// could be a case for a monad ?? // could be a case for a monad ??
return removeUnneccesaryFailing( return removeUnneccesaryFailing(
runsAfterPossesor.flatMap(([tokens, possesor, errors]) => runsAfterPossesor.flatMap(
parseNoun( ({ tokens, body: possesor, errors }) =>
tokens, parseNoun(
lookup, tokens,
possesor lookup,
? { possesor
inflected: possesor.inflected, ? {
selection: { inflected: possesor.inflected,
...possesor.selection, selection: {
possesor: prevPossesor ...possesor.selection,
? { possesor: prevPossesor
shrunken: false, ? {
np: { shrunken: false,
type: "NP", np: {
selection: prevPossesor.selection, type: "NP",
}, selection: prevPossesor.selection,
} },
: undefined, }
}, : undefined,
} },
: undefined }
).map<T.ParseResult<NounResult>>(([t, r, errs]) => [ : undefined
t, )
r, // .map<T.ParseResult<NounResult>>(([t, r, errs]) => [
[...errs, ...errors], // t,
]) // r,
// // TODO: should the errors from the runsAfterPossesor be thrown out?
// // or ...errors should be kept?
// // to show an error like د غتو ماشومان نومونه
// // adj error غټ should be first inflection (seems confusing)
// [...errs, ...errors],
// ])
) )
); );
} else { } else {
@ -73,20 +79,20 @@ function removeUnneccesaryFailing(
): T.ParseResult<NounResult>[] { ): T.ParseResult<NounResult>[] {
// group by identical results // group by identical results
const groups = groupWith( const groups = groupWith(
(a, b) => equals(a[1].selection, b[1].selection), (a, b) => equals(a.body.selection, b.body.selection),
results results
); );
// if there's a group of identical results with some success in it // if there's a group of identical results with some success in it
// remove any erroneous results // remove any erroneous results
const stage1 = groups.flatMap((group) => { const stage1 = groups.flatMap((group) => {
if (group.find((x) => x[2].length === 0)) { if (group.find((x) => x.errors.length === 0)) {
return group.filter((x) => x[2].length === 0); return group.filter((x) => x.errors.length === 0);
} }
return group; return group;
}); });
// finally, if there's any success anywhere, remove any of the errors // finally, if there's any success anywhere, remove any of the errors
if (stage1.find((x) => x[2].length === 0)) { if (stage1.find((x) => x.errors.length === 0)) {
return stage1.filter((x) => x[2].length === 0); return stage1.filter((x) => x.errors.length === 0);
} else { } else {
return stage1; return stage1;
} }
@ -111,7 +117,7 @@ function parseNounAfterPossesor(
} }
// TODO: add recognition of او between adjectives // TODO: add recognition of او between adjectives
const adjRes = parseAdjective(tokens, lookup); const adjRes = parseAdjective(tokens, lookup);
const withAdj = adjRes.flatMap(([tkns, adj]) => const withAdj = adjRes.flatMap(({ tokens: tkns, body: adj }) =>
parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj]) parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
); );
const [first, ...rest] = tokens; const [first, ...rest] = tokens;
@ -141,9 +147,9 @@ function parseNounAfterPossesor(
convertInflection(inf, entry, gender, deets.plural).forEach( convertInflection(inf, entry, gender, deets.plural).forEach(
({ inflected, number }) => { ({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined); const selection = makeNounSelection(entry, undefined);
w.push([ w.push({
rest, tokens: rest,
{ body: {
inflected, inflected,
selection: { selection: {
...selection, ...selection,
@ -167,7 +173,7 @@ function parseNounAfterPossesor(
: undefined, : undefined,
}, },
}, },
[ errors: [
...(possesor?.inflected === false ...(possesor?.inflected === false
? [{ message: "possesor should be inflected" }] ? [{ message: "possesor should be inflected" }]
: []), : []),
@ -175,7 +181,7 @@ function parseNounAfterPossesor(
message, message,
})), })),
], ],
] as T.ParseResult<NounResult>); });
} }
); );
}); });

View File

@ -8,15 +8,13 @@ export function parsePhrase(
success: { inflected: boolean; selection: T.NPSelection }[]; success: { inflected: boolean; selection: T.NPSelection }[];
errors: string[]; errors: string[];
} { } {
const nps = parseNP(s, lookup).filter(([tkns]) => !tkns.length); const nps = parseNP(s, lookup).filter(({ tokens }) => !tokens.length);
const success = nps.map((x) => x[1]); const success = nps.map((x) => x.body);
return { return {
success, success,
errors: [ errors: [
...new Set( ...new Set(nps.flatMap(({ errors }) => errors.map((e) => e.message))),
nps.flatMap(([tkns, r, errors]) => errors.map((e) => e.message))
),
], ],
}; };
} }

View File

@ -9,9 +9,9 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
}>[] { }>[] {
const [{ s }, ...rest] = tokens; const [{ s }, ...rest] = tokens;
if (s === "زه") { if (s === "زه") {
return [0, 1].map((person) => [ return [0, 1].map((person) => ({
rest, tokens: rest,
{ body: {
inflected: false, inflected: false,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -19,12 +19,12 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "far", distance: "far",
}, },
}, },
[], errors: [],
]); }));
} else if (s === "ته") { } else if (s === "ته") {
return [2, 3].map((person) => [ return [2, 3].map((person) => ({
rest, tokens: rest,
{ body: {
inflected: false, inflected: false,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -32,13 +32,13 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "far", distance: "far",
}, },
}, },
[], errors: [],
]); }));
} else if (s === "هغه") { } else if (s === "هغه") {
return [ return [
...[false, true].map<Result>((inflected) => [ ...[false, true].map<Result>((inflected) => ({
rest, tokens: rest,
{ body: {
inflected, inflected,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -46,11 +46,11 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "far", distance: "far",
}, },
}, },
[], errors: [],
]), })),
[ {
rest, tokens: rest,
{ body: {
inflected: false, inflected: false,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -58,14 +58,14 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "far", distance: "far",
}, },
}, },
[], errors: [],
], },
]; ];
} else if (s === "هغې") { } else if (s === "هغې") {
return [ return [
[ {
rest, tokens: rest,
{ body: {
inflected: true, inflected: true,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -73,14 +73,14 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "far", distance: "far",
}, },
}, },
[], errors: [],
], },
]; ];
} else if (s === "دی") { } else if (s === "دی") {
return [ return [
[ {
rest, tokens: rest,
{ body: {
inflected: false, inflected: false,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -88,14 +88,14 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "near", distance: "near",
}, },
}, },
[], errors: [],
], },
]; ];
} else if (s === "ده") { } else if (s === "ده") {
return [ return [
[ {
rest, tokens: rest,
{ body: {
inflected: true, inflected: true,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -103,14 +103,14 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "near", distance: "near",
}, },
}, },
[], errors: [],
], },
]; ];
} else if (s === "دا") { } else if (s === "دا") {
return [ return [
[ {
rest, tokens: rest,
{ body: {
inflected: false, inflected: false,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -118,14 +118,14 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "near", distance: "near",
}, },
}, },
[], errors: [],
], },
]; ];
} else if (s === "دې") { } else if (s === "دې") {
return [ return [
[ {
rest, tokens: rest,
{ body: {
inflected: true, inflected: true,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -133,15 +133,15 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "near", distance: "near",
}, },
}, },
[], errors: [],
], },
]; ];
} else if (["مونږ", "موږ"].includes(s)) { } else if (["مونږ", "موږ"].includes(s)) {
return [false, true].flatMap<Result>((inflected) => return [false, true].flatMap<Result>((inflected) =>
[T.Person.FirstPlurMale, T.Person.FirstPlurFemale].map<Result>( [T.Person.FirstPlurMale, T.Person.FirstPlurFemale].map<Result>(
(person) => [ (person) => ({
rest, tokens: rest,
{ body: {
inflected, inflected,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -149,16 +149,16 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "far", distance: "far",
}, },
}, },
[], errors: [],
] })
) )
); );
} else if (["تاسو", "تاسې"].includes(s)) { } else if (["تاسو", "تاسې"].includes(s)) {
return [false, true].flatMap<Result>((inflected) => return [false, true].flatMap<Result>((inflected) =>
[T.Person.SecondPlurMale, T.Person.SecondPlurFemale].map<Result>( [T.Person.SecondPlurMale, T.Person.SecondPlurFemale].map<Result>(
(person) => [ (person) => ({
rest, tokens: rest,
{ body: {
inflected, inflected,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -166,16 +166,16 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "far", distance: "far",
}, },
}, },
[], errors: [],
] })
) )
); );
} else if (["هغوي", "هغوی"].includes(s)) { } else if (["هغوي", "هغوی"].includes(s)) {
return [false, true].flatMap<Result>((inflected) => return [false, true].flatMap<Result>((inflected) =>
[T.Person.ThirdPlurMale, T.Person.ThirdPlurFemale].map<Result>( [T.Person.ThirdPlurMale, T.Person.ThirdPlurFemale].map<Result>(
(person) => [ (person) => ({
rest, tokens: rest,
{ body: {
inflected, inflected,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -183,16 +183,16 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "far", distance: "far",
}, },
}, },
[], errors: [],
] })
) )
); );
} else if (["دوي", "دوی"].includes(s)) { } else if (["دوي", "دوی"].includes(s)) {
return [false, true].flatMap<Result>((inflected) => return [false, true].flatMap<Result>((inflected) =>
[T.Person.ThirdPlurMale, T.Person.ThirdPlurFemale].map<Result>( [T.Person.ThirdPlurMale, T.Person.ThirdPlurFemale].map<Result>(
(person) => [ (person) => ({
rest, tokens: rest,
{ body: {
inflected, inflected,
selection: { selection: {
type: "pronoun", type: "pronoun",
@ -200,8 +200,8 @@ export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
distance: "near", distance: "near",
}, },
}, },
[], errors: [],
] })
) )
); );
} }

View File

@ -1281,4 +1281,8 @@ export type ParseError = {
}; };
/** a tuple containing the [left over tokens, parse result, errors associated with the result] */ /** a tuple containing the [left over tokens, parse result, errors associated with the result] */
export type ParseResult<P> = [Readonly<Token[]>, P, ParseError[]]; export type ParseResult<P> = {
tokens: Readonly<Token[]>;
body: P;
errors: ParseError[];
};