wohoo getting better with NPs

This commit is contained in:
adueck 2023-08-02 14:24:35 +04:00
parent a084433064
commit 6eb5e081f0
12 changed files with 493 additions and 387 deletions

View File

@ -7,58 +7,119 @@ import {
getEnglishFromRendered,
getPashtoFromRendered,
} from "../../../lib/src/phrase-building/np-tools";
import {
renderNPSelection,
} from "../../../lib/src/phrase-building/render-np";
import { renderNPSelection } from "../../../lib/src/phrase-building/render-np";
import { NPBlock } from "../blocks/Block";
function NPDisplay({ NP, inflected, opts, justify, onlyOne, mode: preferredMode, script: preferredScript }: {
NP: T.NPSelection,
opts: T.TextOptions,
justify?: "left" | "right" | "center",
onlyOne?: boolean | "concat",
mode?: Mode,
script?: "p" | "f",
inflected: boolean,
function NPDisplay({
NP,
inflected,
opts,
justify,
onlyOne,
mode: preferredMode,
script: preferredScript,
}: {
NP: T.NPSelection;
opts: T.TextOptions;
justify?: "left" | "right" | "center";
onlyOne?: boolean | "concat";
mode?: Mode;
script?: "p" | "f";
inflected: boolean;
}) {
const [mode, setMode] = useState<Mode>(preferredMode || "text");
const [script, setScript] = useStickyState<"p" | "f">(preferredScript || "f", "blockScriptChoice");
const rendered = renderNPSelection(NP, inflected, false, "subject", "none", false);
const [script, setScript] = useStickyState<"p" | "f">(
preferredScript || "f",
"blockScriptChoice"
);
const rendered = renderNPSelection(
NP,
inflected,
false,
"subject",
"none",
false
);
const english = getEnglishFromRendered(rendered);
const pashto = getPashtoFromRendered(rendered, false);
const result = {
ps: pashto,
e: [english || ""],
};
return <div className={`text-${justify ? justify : "center"} mt-1`}>
return (
<div className={`text-${justify ? justify : "center"} mt-1`}>
<div className="d-flex flex-row mb-2 align-items-center">
<ModeSelect value={mode} onChange={setMode} />
{mode === "blocks" && <ScriptSelect value={script} onChange={setScript} />}
{mode === "blocks" && (
<ScriptSelect value={script} onChange={setScript} />
)}
</div>
{mode === "text"
? <CompiledPTextDisplay opts={opts} compiled={result} justify={justify} onlyOne={!!onlyOne} />
: <NPBlockDisplay opts={opts} np={rendered} justify={justify} script={script} />}
{result.e && <div className={`text-muted mt-2 text-${justify === "left" ? "left" : justify === "right" ? "right" : "center"}`}>
{inflected && <samp>INFLECTED</samp>}
{mode === "text" ? (
<CompiledPTextDisplay
opts={opts}
compiled={result}
justify={justify}
onlyOne={!!onlyOne}
/>
) : (
<NPBlockDisplay
opts={opts}
np={rendered}
justify={justify}
script={script}
/>
)}
{result.e && (
<div
className={`text-muted mt-2 text-${
justify === "left"
? "left"
: justify === "right"
? "right"
: "center"
}`}
>
{onlyOne === "concat"
? result.e.join(" • ")
: onlyOne
? [result.e[0]]
: result.e.map((e, i) => <div key={i}>{e}</div>)}
</div>}
</div>
)}
</div>
);
}
function NPBlockDisplay({ opts, np, justify, script }: {
script: "p" | "f",
opts: T.TextOptions,
np: T.Rendered<T.NPSelection>,
justify?: "left" | "right" | "center",
function NPBlockDisplay({
opts,
np,
justify,
script,
}: {
script: "p" | "f";
opts: T.TextOptions;
np: T.Rendered<T.NPSelection>;
justify?: "left" | "right" | "center";
}) {
return <div className={`d-flex flex-row justify-content-${justify ? justify : "center"}`}>
<div className={`d-flex flex-row${script === "p" ? "-reverse" : ""} justify-content-left align-items-end mt-3 pb-2`} style={{ overflowX: "auto" }}>
<NPBlock opts={opts} script={script}>{np}</NPBlock>
return (
<div
className={`d-flex flex-row justify-content-${
justify ? justify : "center"
}`}
>
<div
className={`d-flex flex-row${
script === "p" ? "-reverse" : ""
} justify-content-left align-items-end mt-3 pb-2`}
style={{ overflowX: "auto" }}
>
<NPBlock opts={opts} script={script}>
{np}
</NPBlock>
</div>
</div>
);
}
export default NPDisplay;

View File

@ -3,23 +3,26 @@ import * as T from "../types";
import { parsePhrase } from "../lib/src/parsing/parse-phrase";
import { lookup } from "../lib/src/parsing/lookup";
import { tokenizer } from "../lib/src/parsing/tokenizer";
import { NPDisplay } from "../components/library";
function ParserDemo({ opts }: { opts: T.TextOptions }) {
const [text, setText] = useState<string>("");
const [result, setResult] = useState<string>("");
const [result, setResult] = useState<
{ inflected: boolean; selection: T.NPSelection }[]
>([]);
const [errors, setErrors] = useState<string[]>([]);
function handleChange(e: React.ChangeEvent<HTMLInputElement>) {
const value = e.target.value;
if (!value) {
setText("");
setResult("");
setResult([]);
setErrors([]);
return;
}
const { success, errors } = parsePhrase(tokenizer(value), lookup);
setText(value);
setErrors(errors);
setResult(JSON.stringify(success, null, " "));
setResult(success);
}
return (
<div className="mt-3" style={{ marginBottom: "1000px" }}>
@ -28,23 +31,31 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
<input
dir="rtl"
className={`form-control ${
text && result === "[]" ? "is-invalid" : text ? "is-valid" : ""
text && errors.length ? "is-invalid" : text ? "is-valid" : ""
}`}
type="text"
value={text}
onChange={handleChange}
/>
</div>
{result === "[]" && errors.length > 0 && (
{errors.length > 0 && (
<>
<div className="alert alert-danger" role="alert">
{errors.map((e) => (
<div key={Math.random()}>{e}</div>
))}
<div>{errors[0]}</div>
</div>
<div className="text-center">Did you mean:</div>
</>
)}
{result.map((np) => (
<NPDisplay NP={np.selection} inflected={np.inflected} opts={opts} />
))}
<details>
<summary>AST</summary>
<samp>
<pre>{result}</pre>
<pre>{JSON.stringify(result, null, " ")}</pre>
</samp>
</details>
</div>
);
}

View File

@ -42,6 +42,17 @@ export const monoidPsStringWVars: Monoid<T.PsString[]> = {
empty: [monoidPsString.empty],
};
export function fmapParseResult<A extends object, B extends object>(
f: (x: A) => B,
x: T.ParseResult<A>[]
): T.ParseResult<B>[] {
return x.map<T.ParseResult<B>>(([tokens, result, errors]) => [
tokens,
f(result),
errors,
]);
}
export function fmapSingleOrLengthOpts<A extends object, B extends object>(
f: (x: A) => B,
x: T.SingleOrLengthOpts<A>

View File

@ -321,7 +321,7 @@ export function getInflectionQueries(
gender: ["masc"],
predicate: (e) =>
!(isNounEntry(e) && isPluralNounEntry(e)) &&
(isPattern1Entry(e) || isPattern(0)(e)),
(isPattern1Entry(e) || (isPattern(0)(e) && !isAdjectiveEntry(e))),
},
});
queries.push({

View File

@ -6,15 +6,12 @@ import { getInflectionQueries } from "./inflection-query";
export function parseAdjective(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): [
T.Token[],
{
): T.ParseResult<{
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}
][] {
}>[] {
const w: ReturnType<typeof parseAdjective> = [];
if (tokens.length === 0) {
return [];
@ -35,10 +32,10 @@ export function parseAdjective(
gender: deets.gender,
given: first.s,
},
[],
]);
});
});
});
return w;
}

View File

@ -1301,8 +1301,7 @@ describe("parsing nouns", () => {
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const { success } = parseNoun(tokens, lookup, undefined);
const res = success.map(([tkns, r]) => r);
const res = parseNoun(tokens, lookup, undefined).map(([t, res]) => res);
expect(res).toEqual(output);
});
});
@ -1430,16 +1429,15 @@ const adjsTests: {
},
];
describe("parsing nouns with adjectives", () => {
adjsTests.forEach(({ category, cases }) => {
// eslint-disable-next-line jest/valid-title
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
expect(
parseNoun(tokens, lookup, undefined).success.map((x) => x[1])
).toEqual(output);
});
});
});
});
// describe("parsing nouns with adjectives", () => {
// adjsTests.forEach(({ category, cases }) => {
// // eslint-disable-next-line jest/valid-title
// test(category, () => {
// cases.forEach(({ input, output }) => {
// const tokens = tokenizer(input);
// const res = parseNoun(tokens, lookup, undefined).map(([t, res]) => res);
// expect(res).toEqual(output);
// });
// });
// });
// });

View File

@ -9,69 +9,86 @@ import {
} from "../type-predicates";
import { getInflectionQueries } from "./inflection-query";
import { parseAdjective } from "./parse-adjective";
import { groupWith, equals } from "rambda";
// TODO:
// - cleanup the workflow and make sure all nouns are covered and test
// - add possesive parsing
type NounResult = { inflected: boolean; selection: T.NounSelection };
export function parseNoun(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
prevPossesor: T.NounSelection | undefined
): {
success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][];
errors: string[];
} {
prevPossesor: { inflected: boolean; selection: T.NounSelection } | undefined
): T.ParseResult<NounResult>[] {
if (tokens.length === 0) {
return {
success: [],
errors: [],
};
return [];
}
const [first, ...rest] = tokens;
const possesor =
first.s === "د" ? parseNoun(rest, lookup, undefined) : undefined;
if (possesor) {
const runsAfterPossesor: [
Readonly<T.Token[]>,
{ inflected: boolean; selection: T.NounSelection } | undefined
][] = possesor ? [...possesor.success] : [[tokens, undefined]];
const runsAfterPossesor: T.ParseResult<NounResult | undefined>[] = possesor
? possesor
: [[tokens, undefined, []]];
// could be a case for a monad ??
return runsAfterPossesor.reduce<ReturnType<typeof parseNoun>>(
(acc, [tokens, possesor]) => {
if (possesor?.inflected === false) {
return {
success: [...acc.success],
errors: [...acc.errors, "possesor should be inflected"],
};
}
const { success, errors } = parseNoun(
return removeUnneccesaryFailing(
runsAfterPossesor.flatMap(([tokens, possesor, errors]) =>
parseNoun(
tokens,
lookup,
possesor
? {
inflected: possesor.inflected,
selection: {
...possesor.selection,
possesor: prevPossesor
? {
shrunken: false,
np: {
type: "NP",
selection: prevPossesor,
selection: prevPossesor.selection,
},
}
: undefined,
},
}
: undefined
);
return {
success: [...acc.success, ...success],
errors: [...acc.errors, ...errors],
};
},
{ success: [], errors: [] }
).map<T.ParseResult<NounResult>>(([t, r, errs]) => [
t,
r,
[...errs, ...errors],
])
)
);
} else {
return parseNounAfterPossesor(tokens, lookup, prevPossesor, []);
return removeUnneccesaryFailing(
parseNounAfterPossesor(tokens, lookup, prevPossesor, [])
);
}
}
function removeUnneccesaryFailing(
results: T.ParseResult<NounResult>[]
): T.ParseResult<NounResult>[] {
// group by identical results
const groups = groupWith(
(a, b) => equals(a[1].selection, b[1].selection),
results
);
// if there's a group of identical results with some success in it
// remove any erroneous results
const stage1 = groups.flatMap((group) => {
if (group.find((x) => x[2].length === 0)) {
return group.filter((x) => x[2].length === 0);
}
return group;
});
// finally, if there's any success anywhere, remove any of the errors
if (stage1.find((x) => x[2].length === 0)) {
return stage1.filter((x) => x[2].length === 0);
} else {
return stage1;
}
}
@ -81,31 +98,24 @@ export function parseNoun(
function parseNounAfterPossesor(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
possesor: T.NounSelection | undefined,
possesor: { inflected: boolean; selection: T.NounSelection } | undefined,
adjectives: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}[]
): {
success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][];
errors: string[];
} {
): T.ParseResult<NounResult>[] {
if (tokens.length === 0) {
return {
success: [],
errors: [],
};
return [];
}
// TODO: add recognition of او between adjectives
const adjRes = parseAdjective(tokens, lookup);
const withAdj = adjRes.map(([tkns, adj]) =>
const withAdj = adjRes.flatMap(([tkns, adj]) =>
parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
);
const [first, ...rest] = tokens;
const success: ReturnType<typeof parseNoun>["success"] = [];
const errors: string[] = [];
const w: ReturnType<typeof parseNoun> = [];
const searches = getInflectionQueries(first.s, true);
@ -122,17 +132,16 @@ function parseNounAfterPossesor(
deets.gender.forEach((gender) => {
if (genders.includes(gender)) {
deets.inflection.forEach((inf) => {
const { ok, error } = adjsMatch(
const { error: adjErrors } = adjsMatch(
adjectives,
gender,
inf,
deets.plural
);
if (ok) {
convertInflection(inf, entry, gender, deets.plural).forEach(
({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined);
success.push([
w.push([
rest,
{
inflected,
@ -152,30 +161,30 @@ function parseNounAfterPossesor(
shrunken: false,
np: {
type: "NP",
selection: possesor,
selection: possesor.selection,
},
}
: undefined,
},
},
]);
[
...(possesor?.inflected === false
? [{ message: "possesor should be inflected" }]
: []),
...adjErrors.map((message) => ({
message,
})),
],
] as T.ParseResult<NounResult>);
}
);
} else {
error.forEach((e) => {
errors.push(e);
});
}
});
}
});
});
});
});
return {
success: [...withAdj.map((x) => x.success).flat(), ...success],
errors: [...withAdj.map((x) => x.errors).flat(), ...errors],
};
return [...withAdj, ...w];
}
function adjsMatch(

View File

@ -0,0 +1,38 @@
import * as T from "../../../types";
import { parsePronoun } from "./parse-pronoun";
import { parseNoun } from "./parse-noun";
import { fmapParseResult } from "../fp-ps";
export function parseNP(
s: T.Token[],
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): T.ParseResult<{ inflected: boolean; selection: T.NPSelection }>[] {
function makeNPSl(
a:
| {
inflected: boolean;
selection: T.PronounSelection;
}
| {
inflected: boolean;
selection: T.NounSelection;
}
): {
inflected: boolean;
selection: T.NPSelection;
} {
return {
inflected: a.inflected,
selection: {
type: "NP",
selection: a.selection,
} as T.NPSelection,
};
}
// @ts-ignore grrr webpack is having trouble with this
return fmapParseResult(makeNPSl, [
...parsePronoun(s),
...parseNoun(s, lookup, undefined),
]);
}

View File

@ -1,24 +1,22 @@
import { parseAdjective } from "./parse-adjective";
import * as T from "../../../types";
import { parsePronoun } from "./parse-pronoun";
import { parseNoun } from "./parse-noun";
import { parseNP } from "./parse-np";
export function parsePhrase(
s: T.Token[],
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): {
success: any[];
success: { inflected: boolean; selection: T.NPSelection }[];
errors: string[];
} {
const adjsRes = parseAdjective(s, lookup);
const prnsRes = parsePronoun(s);
const nounsRes = parseNoun(s, lookup, undefined);
const nps = parseNP(s, lookup).filter(([tkns]) => !tkns.length);
const correct = [...adjsRes, ...prnsRes, ...nounsRes.success]
.filter(([tkns]) => tkns.length === 0)
.map((x) => x[1]);
const success = nps.map((x) => x[1]);
return {
success: correct,
errors: nounsRes.errors,
success,
errors: [
...new Set(
nps.flatMap(([tkns, r, errors]) => errors.map((e) => e.message))
),
],
};
}

View File

@ -1,235 +1,210 @@
import * as T from "../../../types";
export function parsePronoun(tokens: Readonly<T.Token[]>): [
T.Token[],
{
inflected: boolean[];
type Result = ReturnType<typeof parsePronoun>[number];
// TODO: map for doubling true, false, and masc fem
export function parsePronoun(tokens: Readonly<T.Token[]>): T.ParseResult<{
inflected: boolean;
selection: T.PronounSelection;
}
][] {
}>[] {
const [{ s }, ...rest] = tokens;
const w: ReturnType<typeof parsePronoun> = [];
if (s === "زه") {
w.push([
return [0, 1].map((person) => [
rest,
{
inflected: [false],
inflected: false,
selection: {
type: "pronoun",
person: 0,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 1,
person,
distance: "far",
},
},
[],
]);
} else if (s === "ته") {
w.push([
return [2, 3].map((person) => [
rest,
{
inflected: [false],
inflected: false,
selection: {
type: "pronoun",
person: 2,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 3,
person,
distance: "far",
},
},
[],
]);
} else if (s === "هغه") {
w.push([
return [
...[false, true].map<Result>((inflected) => [
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: 4,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false],
inflected,
selection: {
type: "pronoun",
person: 5,
distance: "far",
},
},
]);
[],
]),
[
rest,
{
inflected: false,
selection: {
type: "pronoun",
person: 5,
distance: "far",
},
},
[],
],
];
} else if (s === "هغې") {
w.push([
return [
[
rest,
{
inflected: [true],
inflected: true,
selection: {
type: "pronoun",
person: T.Person.ThirdSingFemale,
distance: "far",
},
},
]);
[],
],
];
} else if (s === "دی") {
w.push([
return [
[
rest,
{
inflected: [false],
inflected: false,
selection: {
type: "pronoun",
person: T.Person.ThirdSingMale,
distance: "near",
},
},
]);
[],
],
];
} else if (s === "ده") {
w.push([
return [
[
rest,
{
inflected: [true],
inflected: true,
selection: {
type: "pronoun",
person: T.Person.ThirdSingMale,
distance: "near",
},
},
]);
[],
],
];
} else if (s === "دا") {
w.push([
return [
[
rest,
{
inflected: [false],
inflected: false,
selection: {
type: "pronoun",
person: T.Person.ThirdSingFemale,
distance: "near",
},
},
]);
[],
],
];
} else if (s === "دې") {
w.push([
return [
[
rest,
{
inflected: [true],
inflected: true,
selection: {
type: "pronoun",
person: T.Person.ThirdSingFemale,
distance: "near",
},
},
]);
[],
],
];
} else if (["مونږ", "موږ"].includes(s)) {
w.push([
return [false, true].flatMap<Result>((inflected) =>
[T.Person.FirstPlurMale, T.Person.FirstPlurFemale].map<Result>(
(person) => [
rest,
{
inflected: [false, true],
inflected,
selection: {
type: "pronoun",
person: T.Person.FirstPlurMale,
person,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.FirstPlurFemale,
distance: "far",
},
},
]);
[],
]
)
);
} else if (["تاسو", "تاسې"].includes(s)) {
w.push([
return [false, true].flatMap<Result>((inflected) =>
[T.Person.SecondPlurMale, T.Person.SecondPlurFemale].map<Result>(
(person) => [
rest,
{
inflected: [false, true],
inflected,
selection: {
type: "pronoun",
person: T.Person.SecondPlurMale,
person,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.SecondPlurFemale,
distance: "far",
},
},
]);
[],
]
)
);
} else if (["هغوي", "هغوی"].includes(s)) {
w.push([
return [false, true].flatMap<Result>((inflected) =>
[T.Person.ThirdPlurMale, T.Person.ThirdPlurFemale].map<Result>(
(person) => [
rest,
{
inflected: [false, true],
inflected,
selection: {
type: "pronoun",
person: T.Person.ThirdPlurMale,
person,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.ThirdPlurFemale,
distance: "far",
},
},
]);
[],
]
)
);
} else if (["دوي", "دوی"].includes(s)) {
w.push([
return [false, true].flatMap<Result>((inflected) =>
[T.Person.ThirdPlurMale, T.Person.ThirdPlurFemale].map<Result>(
(person) => [
rest,
{
inflected: [false, true],
inflected,
selection: {
type: "pronoun",
person: T.Person.ThirdPlurMale,
person,
distance: "near",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.ThirdPlurFemale,
distance: "near",
},
},
]);
[],
]
)
);
}
return w;
return [];
}

View File

@ -1,7 +1,7 @@
import { Token } from "../../../types";
export function tokenizer(s: string): Token[] {
const words = s.trim().split(" ");
const words = s.trim().split(/ +/);
const indexed: { i: number; s: string }[] = [];
for (let i = 0; i < words.length; i++) {
indexed.push({ i, s: words[i] });

View File

@ -1274,3 +1274,11 @@ export type Token = {
i: number;
s: string;
};
export type ParseError = {
message: string;
token?: Token;
};
/** a tuple containing the [left over tokens, parse result, errors associated with the result] */
export type ParseResult<P> = [Readonly<Token[]>, P, ParseError[]];