more on noun parser

This commit is contained in:
adueck 2024-08-23 19:28:58 -04:00
parent 1abc83ee01
commit 191abc5778
15 changed files with 411 additions and 199 deletions

View File

@ -5,9 +5,11 @@ import { tokenizer } from "../lib/src/parsing/tokenizer";
// import { NPDisplay } from "../components/library";
// import EditableVP from "../components/src/vp-explorer/EditableVP";
// import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
import { DictionaryAPI } from "../lib/src/dictionary/dictionary";
import { parseNoun } from "../lib/src/parsing/parse-noun-new";
import { JsonEditor } from "json-edit-react";
import { renderNounSelection } from "../lib/src/phrase-building/render-np";
import { NPBlock } from "../components/src/blocks/Block";
import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
const working = [
"limited demo vocab",
@ -47,16 +49,18 @@ const examples = [
];
function ParserDemo({
// opts,
opts,
// entryFeeder,
dictionary,
}: {
opts: T.TextOptions;
entryFeeder: T.EntryFeeder;
dictionary: DictionaryAPI;
dictionary: T.DictionaryAPI;
}) {
const [text, setText] = useState<string>("");
const [result, setResult] = useState<any[]>([]);
const [result, setResult] = useState<
ReturnType<typeof parseNoun>[number]["body"][]
>([]);
// ReturnType<typeof parsePhrase>["success"]
const [errors, setErrors] = useState<string[]>([]);
function handleInput(value: string) {
@ -66,8 +70,10 @@ function ParserDemo({
setErrors([]);
return;
}
const res = parseNoun(tokenizer(value), dictionary, undefined, []);
const success = res.filter((x) => !x.tokens.length).map((x) => x.body);
const res = parseNoun(tokenizer(value), dictionary, undefined);
const success: ReturnType<typeof parseNoun>[number]["body"][] = res
.filter((x) => !x.tokens.length)
.map((x) => x.body);
const errors = [
...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
];
@ -135,6 +141,29 @@ function ParserDemo({
<div className="text-center">Did you mean:</div>
</>
)}
{result.map((r) => {
try {
const renderedNP: T.Rendered<T.NPSelection> = {
type: "NP",
selection: renderNounSelection(r.selection, r.inflected, "none"),
};
return (
<>
{r.inflected ? "INFLECTED" : "PLAIN"}
<NPBlock
opts={opts}
script="p"
english={getEnglishFromRendered(renderedNP)}
>
{renderedNP}
</NPBlock>
</>
);
} catch (e) {
console.error(e);
return <div>ERROR RENDERING</div>;
}
})}
<JsonEditor data={result} />
{/* {result.map((res) =>
"inflected" in res ? (

View File

@ -53,17 +53,7 @@ function specialPluralLookup(p: string): T.NounEntry[] {
.filter(tp.isNounEntry);
}
export type DictionaryAPI = {
initialize: () => ReturnType<typeof dictDb.initialize>;
update: () => ReturnType<typeof dictDb.updateDictionary>;
queryP: (p: string) => T.DictionaryEntry[];
adjLookup: (p: string) => T.AdjectiveEntry[];
nounLookup: (p: string) => T.NounEntry[];
otherLookup: (key: keyof T.DictionaryEntry, p: string) => T.DictionaryEntry[];
specialPluralLookup: (p: string) => T.NounEntry[];
};
export const dictionary: DictionaryAPI = {
export const dictionary: T.DictionaryAPI = {
initialize: async () => await dictDb.initialize(),
update: async () => await dictDb.updateDictionary(() => null),
queryP: memoizedQueryP,

View File

@ -1,5 +1,4 @@
import * as T from "../../../types";
import type { DictionaryAPI } from "../dictionary/dictionary";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
import { entries } from "../../../../vocab/mini-dict-entries";
@ -26,7 +25,7 @@ function specialPluralLookup(p: string): T.NounEntry[] {
) as T.NounEntry[];
}
export const testDictionary: DictionaryAPI = {
export const testDictionary: T.DictionaryAPI = {
// @ts-expect-error we won't mock the initialization
initialize: async () => 0,
// @ts-expect-error not perfect mocking because won't need that

View File

@ -1,5 +1,4 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { fmapParseResult } from "../fp-ps";
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import * as tp from "../type-predicates";
@ -7,13 +6,8 @@ import { parseInflectableWord } from "./parse-inflectable-word";
export function parseAdjective(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<{
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}>[] {
dictionary: T.DictionaryAPI
): T.ParseResult<T.InflectableBaseParse<T.AdjectiveSelection>>[] {
if (tokens.length === 0) {
return [];
}
@ -27,7 +21,7 @@ export function parseAdjective(
inflection: r.inflection,
gender: r.gender,
given: r.given,
selection: makeAdjectiveSelection(r.entry as T.AdjectiveEntry),
selection: makeAdjectiveSelection(r.selection as T.AdjectiveEntry),
}),
adjectives
);

View File

@ -7,12 +7,7 @@ import { LookupFunction } from "./lookup";
export function parseAdjective(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction
): T.ParseResult<{
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}>[] {
): T.ParseResult<T.InflectableBaseParse<T.AdjectiveSelection>>[] {
const w: ReturnType<typeof parseAdjective> = [];
if (tokens.length === 0) {
return [];

View File

@ -0,0 +1,100 @@
import * as T from "../../../types";
import { determiners } from "../../../types";
import * as tp from "../type-predicates";
import { returnParseResult } from "./utils";
export const parseDeterminer: T.Parser<
T.InflectableBaseParse<T.DeterminerSelection>
> = (
tokens: Readonly<T.Token[]>,
// eslint-disable-next-line
dictionary: T.DictionaryAPI
) => {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("و")) {
const determiner = determiners.find((d) => d.p === first.s.slice(0, -1));
if (!determiner) return [];
if (!isInflectingDet(determiner)) return [];
return returnParseResult(rest, {
inflection: [2],
gender: ["masc", "fem"],
given: first.s,
selection: {
type: "determiner",
determiner,
},
});
}
if (first.s.endsWith("ې")) {
const determinerExact = determiners.find((d) => d.p === first.s);
const determinerInflected = determiners.find(
(d) => d.p === first.s.slice(0, -1)
);
return [
...(determinerExact
? returnParseResult(rest, {
inflection: [0, 1, 2],
gender: ["masc", "fem"],
given: first.s,
selection: {
type: "determiner",
determiner: determinerExact,
},
} satisfies T.InflectableBaseParse<T.DeterminerSelection>)
: []),
...(determinerInflected && isInflectingDet(determinerInflected)
? returnParseResult(rest, {
inflection: [1] satisfies (0 | 1 | 2)[],
gender: ["fem"],
given: first.s,
selection: {
type: "determiner",
determiner: determinerInflected,
},
} satisfies T.InflectableBaseParse<T.DeterminerSelection>)
: []),
];
}
const exact: T.ParseResult<T.InflectableBaseParse<T.DeterminerSelection>>[] =
(() => {
const determiner = determiners.find((d) => d.p === first.s);
if (!determiner) return [];
const canInflect = isInflectingDet(determiner);
return returnParseResult(rest, {
inflection: canInflect ? [0, 1] : [0, 1, 2],
gender: canInflect ? ["masc"] : ["masc", "fem"],
given: first.s,
selection: {
type: "determiner",
determiner,
},
});
})();
const aEnding: T.ParseResult<
T.InflectableBaseParse<T.DeterminerSelection>
>[] = (() => {
if (first.s.endsWith("ه")) {
const determiner = determiners.find((d) => d.p === first.s.slice(0, -1));
if (!determiner) return [];
if (!isInflectingDet(determiner)) return [];
return returnParseResult(rest, {
inflection: [0],
gender: ["fem"],
given: first.s,
selection: {
type: "determiner",
determiner,
},
});
}
return [];
})();
return [...exact, ...aEnding];
};
function isInflectingDet(d: T.Determiner): boolean {
return tp.isPattern1Entry(d) && !("noInf" in d && !d.noInf);
}

View File

@ -1,30 +1,29 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { andSuccTp } from "../fp-ps";
import { pashtoConsonants } from "../pashto-consonants";
import * as tp from "../type-predicates";
import { returnParseResults } from "./utils";
import { parserCombOr, returnParseResults } from "./utils";
type FemNounBaseParse = T.InflectableBaseParse<T.FemNounEntry>;
export function parseFemNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
}
return [
return parserCombOr([
plainPlural,
parsePattern1,
parsePattern2,
parsePattern3,
parseEeEnding,
].flatMap((f) => f(tokens, dictionary));
])(tokens, dictionary);
}
function plainPlural(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
@ -34,22 +33,24 @@ function plainPlural(
dictionary
.queryP(p)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPluralNounEntry));
const plain = plurLookup(first.s).map<FemNounBaseParse>((entry) => ({
const plain = plurLookup(first.s).map<FemNounBaseParse>((selection) => ({
inflection: [0],
gender: ["fem"],
entry,
selection,
given: first.s,
}));
const inflected = first.s.endsWith("و")
? (() => {
const base = first.s.slice(0, -1);
const guesses = [first.s, base + "ه", base + "ې"];
return guesses.flatMap(plurLookup).map<FemNounBaseParse>((entry) => ({
inflection: [2],
gender: ["fem"],
entry,
given: first.s,
}));
return guesses
.flatMap(plurLookup)
.map<FemNounBaseParse>((selection) => ({
inflection: [2],
gender: ["fem"],
selection,
given: first.s,
}));
})()
: [];
return returnParseResults(rest, [...plain, ...inflected]);
@ -57,7 +58,7 @@ function plainPlural(
function parsePattern1(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
@ -68,18 +69,18 @@ function parsePattern1(
.queryP(p)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPattern1Entry));
const plain = ["ه", "ع"].some((v) => first.s.endsWith(v))
? p1Lookup(first.s).map<FemNounBaseParse>((entry) => ({
? p1Lookup(first.s).map<FemNounBaseParse>((selection) => ({
inflection: [0],
gender: ["fem"],
entry,
selection,
given: first.s,
}))
: [];
const withoutA = pashtoConsonants.includes(first.s[first.s.length - 1])
? p1Lookup(first.s).map<FemNounBaseParse>((entry) => ({
? p1Lookup(first.s).map<FemNounBaseParse>((selection) => ({
inflection: [0],
gender: ["fem"],
entry,
selection,
given: first.s,
}))
: [];
@ -92,21 +93,23 @@ function parsePattern1(
? p1Lookup(base)
: []),
];
return lookups.map<FemNounBaseParse>((entry) => ({
return lookups.map<FemNounBaseParse>((selection) => ({
inflection: [1],
gender: ["fem"],
entry,
selection,
given: first.s,
}));
})()
: [];
const doubleInflected = first.s.endsWith("و")
? p1Lookup(first.s.slice(0, -1) + "ه").map<FemNounBaseParse>((entry) => ({
inflection: [2],
gender: ["fem"],
entry,
given: first.s,
}))
? p1Lookup(first.s.slice(0, -1) + "ه").map<FemNounBaseParse>(
(selection) => ({
inflection: [2],
gender: ["fem"],
selection,
given: first.s,
})
)
: [];
return returnParseResults(rest, [
...plain,
@ -118,7 +121,7 @@ function parsePattern1(
function parsePattern2(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
@ -133,12 +136,12 @@ function parsePattern2(
tp.isSingularEntry
)
)
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [0, 1],
gender: ["fem"],
entry,
selection,
given: first.s,
},
errors: [],
@ -150,12 +153,12 @@ function parsePattern2(
return dictionary
.queryP(eGuess)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPattern2Entry))
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["fem"],
entry,
selection,
given: first.s,
},
errors: [],
@ -166,7 +169,7 @@ function parsePattern2(
function parsePattern3(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
@ -181,12 +184,12 @@ function parsePattern3(
tp.isSingularEntry
)
)
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [0, 1],
gender: ["fem"],
entry,
selection,
given: first.s,
},
errors: [],
@ -198,12 +201,12 @@ function parsePattern3(
return dictionary
.queryP(eGuess)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPattern3Entry))
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["fem"],
entry,
selection,
given: first.s,
},
errors: [],
@ -214,7 +217,7 @@ function parsePattern3(
function parseEeEnding(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
@ -224,12 +227,12 @@ function parseEeEnding(
return dictionary
.queryP(first.s)
.filter(tp.isPattern6FemEntry)
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [0],
gender: ["fem"],
entry,
selection,
given: first.s,
},
errors: [],
@ -238,12 +241,12 @@ function parseEeEnding(
return dictionary
.queryP(first.s.slice(0, -1) + "ي")
.filter(tp.isPattern6FemEntry)
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [1],
gender: ["fem"],
entry,
selection,
given: first.s,
},
errors: [],
@ -255,12 +258,12 @@ function parseEeEnding(
return dictionary
.queryP(eGuess)
.filter(tp.isPattern6FemEntry)
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["fem"],
entry,
selection,
given: first.s,
},
errors: [],

View File

@ -1,11 +1,10 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { andSuccTp, orTp } from "../fp-ps";
import * as tp from "../type-predicates";
export function parseInflectableWord<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
dictionary: T.DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) {
@ -21,7 +20,7 @@ export function parseInflectableWord<W extends T.InflectableEntry>(
function parseNonInflecting<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
dictionary: T.DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) {
@ -31,12 +30,12 @@ function parseNonInflecting<W extends T.InflectableEntry>(
const matches = dictionary
.queryP(first.s)
.filter(andSuccTp(tpf, tp.isNonInflectingEntry));
return matches.map((entry) => ({
return matches.map((selection) => ({
tokens: rest,
body: {
inflection: tp.isNounEntry(entry) ? [0, 1] : [0, 1, 2],
inflection: tp.isNounEntry(selection) ? [0, 1] : [0, 1, 2],
gender: ["masc", "fem"],
entry,
selection,
given: first.s,
},
errors: [],
@ -45,7 +44,7 @@ function parseNonInflecting<W extends T.InflectableEntry>(
function parsePattern1<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
dictionary: T.DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) {
@ -58,35 +57,35 @@ function parsePattern1<W extends T.InflectableEntry>(
.filter(
(e) => tpf(e) && tp.isPattern1Entry(e) && !e.c.includes("fam.")
) as T.Pattern1Entry<W>[];
const mascPlainOrInflected = p1Lookup(first.s).map((entry) => ({
const mascPlainOrInflected = p1Lookup(first.s).map((selection) => ({
tokens: rest,
body: {
inflection: entry.c.includes("pl.") ? [0] : [0, 1],
inflection: selection.c.includes("pl.") ? [0] : [0, 1],
gender: ["masc"],
entry,
selection,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}));
const femPlain = first.s.endsWith("ه")
? p1Lookup(first.s.slice(0, -1)).map((entry) => ({
? p1Lookup(first.s.slice(0, -1)).map((selection) => ({
tokens: rest,
body: {
inflection: [0],
gender: ["fem"],
entry,
selection,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}))
: [];
const femInflected = first.s.endsWith("ې")
? p1Lookup(first.s.slice(0, -1)).map((entry) => ({
? p1Lookup(first.s.slice(0, -1)).map((selection) => ({
tokens: rest,
body: {
inflection: [1],
gender: ["fem"],
entry,
selection,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
@ -96,12 +95,12 @@ function parsePattern1<W extends T.InflectableEntry>(
? [
...p1Lookup(first.s.slice(0, -1)),
...p1Lookup(first.s.slice(0, -1) + "ه"),
].map((entry) => ({
].map((selection) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["masc", "fem"],
entry,
selection,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
@ -117,7 +116,7 @@ function parsePattern1<W extends T.InflectableEntry>(
function parsePattern2or3<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
dictionary: T.DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) {
@ -128,12 +127,12 @@ function parsePattern2or3<W extends T.InflectableEntry>(
return dictionary
.queryP(first.s)
.filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry)))
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [0],
gender: ["masc"],
entry,
selection,
given: first.s,
},
errors: [],
@ -142,12 +141,12 @@ function parsePattern2or3<W extends T.InflectableEntry>(
return dictionary
.queryP(first.s.slice(0, -1) + "ی")
.filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry)))
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [1],
gender: ["masc"],
entry,
selection,
given: first.s,
},
errors: [],
@ -156,12 +155,12 @@ function parsePattern2or3<W extends T.InflectableEntry>(
return dictionary
.queryP(first.s.slice(0, -1) + "ی")
.filter(andSuccTp(tpf, tp.isPattern2Entry))
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [0, 1],
gender: ["fem"],
entry,
selection,
given: first.s,
},
errors: [],
@ -170,12 +169,12 @@ function parsePattern2or3<W extends T.InflectableEntry>(
return dictionary
.queryP(first.s.slice(0, -1) + "ی")
.filter(andSuccTp(tpf, tp.isPattern3Entry))
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [0, 1],
gender: ["fem"],
entry,
selection,
given: first.s,
},
errors: [],
@ -187,12 +186,12 @@ function parsePattern2or3<W extends T.InflectableEntry>(
return dictionary
.queryP(eGuess)
.filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry)))
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["masc", "fem"],
entry,
selection,
given: first.s,
},
errors: [],
@ -203,7 +202,7 @@ function parsePattern2or3<W extends T.InflectableEntry>(
function parsePattern4or5<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
dictionary: T.DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) {
@ -214,12 +213,12 @@ function parsePattern4or5<W extends T.InflectableEntry>(
const plainMasc = dictionary
.queryP(first.s)
.filter(f)
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [0],
gender: ["masc"],
entry,
selection,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
@ -228,12 +227,12 @@ function parsePattern4or5<W extends T.InflectableEntry>(
? dictionary
.otherLookup("infap", first.s)
.filter(f)
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [1],
gender: ["masc"],
entry,
selection,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
@ -243,12 +242,12 @@ function parsePattern4or5<W extends T.InflectableEntry>(
? dictionary
.otherLookup("infbp", first.s.slice(0, -1))
.filter(f)
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [0],
gender: ["fem"],
entry,
selection,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
@ -258,12 +257,12 @@ function parsePattern4or5<W extends T.InflectableEntry>(
? dictionary
.otherLookup("infbp", first.s.slice(0, -1))
.filter(f)
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [1],
gender: ["fem"],
entry,
selection,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
@ -273,12 +272,12 @@ function parsePattern4or5<W extends T.InflectableEntry>(
? dictionary
.otherLookup("infbp", first.s.slice(0, -1))
.filter(f)
.map((entry) => ({
.map((selection) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["masc", "fem"],
entry,
selection,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],

View File

@ -1,12 +1,11 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { endsInConsonant } from "../p-text-helpers";
import * as tp from "../type-predicates";
import { returnParseResults } from "./utils";
export function parseIrregularPlural(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
if (tokens.length === 0) {
return [];

View File

@ -1,67 +1,96 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { makeNounSelection } from "../phrase-building/make-selections";
import { parseAdjective } from "./parse-adjective-new";
import { parseDeterminer } from "./parse-determiner";
import { parseNounWord } from "./parse-noun-word";
import { bindParseResult } from "./utils";
import { bindParseResult, parserCombMany, toParseError } from "./utils";
type NounResult = { inflected: boolean; selection: T.NounSelection };
// ISSUES - fem nouns like ښځه کتابچه not working
// زاړه مېلمانه adjective agreement problem
export function parseNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
possesor: T.PossesorSelection | undefined,
adjectives: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}[]
dictionary: T.DictionaryAPI,
possesor: T.PossesorSelection | undefined
): T.ParseResult<NounResult>[] {
if (tokens.length === 0) {
return [];
}
const detRes = parserCombMany(parseDeterminer)(tokens, dictionary);
// TODO: add recognition of او between adjectives
const withAdj = bindParseResult(
parseAdjective(tokens, dictionary),
(tkns, adj) => parseNoun(tkns, dictionary, possesor, [...adjectives, adj])
);
const nounWord = parseNounWord(tokens, dictionary);
// fit together with nouns
const nouns = bindParseResult(nounWord, (tkns, nr) => {
const { error: adjErrors } = adjsMatch(
adjectives,
nr.gender,
nr.inflected ? 1 : 0,
nr.plural
);
const s = makeNounSelection(nr.entry, undefined);
const body: NounResult = {
inflected: nr.inflected,
selection: {
...s,
gender: nr.gender,
number: nr.plural ? "plural" : "singular",
adjectives: adjectives.map((a) => a.selection),
possesor,
},
};
return [
{
body,
tokens: tkns,
errors: adjErrors.map((x) => ({ message: x })),
},
];
return bindParseResult(detRes, (t, determiners) => {
const adjRes = parserCombMany(parseAdjective)(t, dictionary);
return bindParseResult(adjRes, (tk, adjectives) => {
const nounWord = parseNounWord(tk, dictionary);
return bindParseResult(nounWord, (tkns, nr) => {
const { error: adjErrors } = adjDetsMatch(
adjectives,
nr.gender,
nr.inflected ? 1 : 0,
nr.plural
);
const { error: detErrors } = adjDetsMatch(
determiners,
nr.gender,
nr.inflected ? 1 : 0,
nr.plural
);
const dupErrors = checkForDeterminerDuplicates(determiners);
const s = makeNounSelection(nr.entry, undefined);
const body: NounResult = {
inflected: nr.inflected,
selection: {
...s,
gender: nr.gender,
number: nr.plural ? "plural" : "singular",
adjectives: adjectives.map((a) => a.selection),
determiners: determiners.length
? {
type: "determiners",
withNoun: true,
determiners: determiners.map((d) => d.selection),
}
: undefined,
possesor,
},
};
return [
{
body,
tokens: tkns,
errors: [
...detErrors.map(toParseError),
...dupErrors.map(toParseError),
...adjErrors.map(toParseError),
],
},
];
});
});
});
return [...nouns, ...withAdj];
}
function adjsMatch(
adjectives: Parameters<typeof parseNoun>[3],
function checkForDeterminerDuplicates(
determiners: T.InflectableBaseParse<T.DeterminerSelection>[]
): string[] {
// from https://flexiple.com/javascript/find-duplicates-javascript-array
const array = determiners.map((d) => d.selection.determiner.p);
const duplicates: string[] = [];
for (let i = 0; i < array.length; i++) {
for (let j = i + 1; j < array.length; j++) {
if (array[i] === array[j]) {
if (!duplicates.includes(array[i])) {
duplicates.push(array[i]);
}
}
}
}
return duplicates.map((x) => `duplicate ${x} determiner`);
}
function adjDetsMatch(
adjectives: T.InflectableBaseParse<
T.AdjectiveSelection | T.DeterminerSelection
>[],
gender: T.Gender,
inf: 0 | 1 | 2,
plural: boolean | undefined
@ -76,14 +105,17 @@ function adjsMatch(
return {
ok: false,
error: unmatching.map((x) => {
const adjText =
x.given === x.selection.entry.p
? x.given
: `${x.given} (${x.selection.entry.p})`;
const p =
x.selection.type === "adjective"
? x.selection.entry.p
: x.selection.determiner.p;
const adjText = x.given === p ? x.given : `${x.given} (${p})`;
const inflectionIssue = !x.inflection.some((x) => x === inflection)
? ` should be ${showInflection(inflection)}`
: ``;
return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
return `${
x.selection.type === "adjective" ? "Adjective" : "Determiner"
} agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
}),
};
} else {

View File

@ -1,5 +1,4 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { fFlatMapParseResult } from "../fp-ps";
import { getInflectionPattern } from "../inflection-pattern";
import { makeNounSelection } from "../phrase-building/make-selections";
@ -8,11 +7,12 @@ import { parseInflectableWord } from "./parse-inflectable-word";
import { parseFemNoun } from "./parse-fem-noun";
import { parsePluralEndingNoun } from "./parse-plural-ending-noun";
import { parseIrregularPlural } from "./parse-irregular-plural";
import { parserCombOr } from "./utils";
export function parseNounWord(
export const parseNounWord: T.Parser<T.ParsedNounWord<T.NounEntry>> = (
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
dictionary: T.DictionaryAPI
) => {
if (tokens.length === 0) {
return [];
}
@ -25,10 +25,12 @@ export function parseNounWord(
);
return [
...withoutPluralEndings,
...parsePluralEndingNoun(tokens, dictionary),
...parseIrregularPlural(tokens, dictionary),
...parserCombOr([parsePluralEndingNoun, parseIrregularPlural])(
tokens,
dictionary
),
];
}
};
function inflectableBaseParseToNounWordResults<N extends T.NounEntry>(
wr: T.InflectableBaseParse<N>
@ -46,17 +48,17 @@ function inflectableBaseParseToNounWordResults<N extends T.NounEntry>(
}
const possibleGenders = gendersWorkWithSelection(
wr.gender,
makeNounSelection(wr.entry, undefined)
makeNounSelection(wr.selection, undefined)
);
return possibleGenders.flatMap((gender) =>
wr.inflection.flatMap((inflection) =>
convertInflection(inflection, wr.entry, gender).flatMap(
convertInflection(inflection, wr.selection, gender).flatMap(
({ inflected, number }) => ({
inflected,
plural: number === "plural",
gender,
given: wr.given,
entry: wr.entry,
entry: wr.selection,
})
)
)

View File

@ -1,5 +1,4 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import {
endsInAaOrOo,
endsInConsonant,
@ -11,7 +10,7 @@ import { returnParseResults } from "./utils";
export function parsePluralEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
if (tokens.length === 0) {
return [];
@ -32,7 +31,7 @@ export function parsePluralEndingNoun(
// function parseSpecialPlural(
// tokens: Readonly<T.Token[]>,
// dictionary: DictionaryAPI
// dictionary: T.DictionaryAPI
// ): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
// if (tokens.length === 0) {
// return [];
@ -90,7 +89,7 @@ export function parsePluralEndingNoun(
function parseOonaEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) {
return [];
@ -132,7 +131,7 @@ function parseOonaEndingNoun(
function parseAanEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) {
return [];
@ -206,7 +205,7 @@ function parseAanEndingNoun(
function parseAaneEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
if (tokens.length === 0) {
return [];
@ -233,7 +232,7 @@ function parseAaneEndingNoun(
function parseGaanEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) {
return [];
@ -309,7 +308,7 @@ function parseGaanEndingNoun(
function parseGaaneEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.FemNounEntry>>[] {
if (tokens.length === 0) {
return [];
@ -413,7 +412,7 @@ function parseGaaneEndingNoun(
function parseWeEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.FemNounEntry>>[] {
if (tokens.length === 0) {
return [];
@ -489,7 +488,7 @@ function parseWeEndingNoun(
function parseIYaanEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) {
return [];
@ -563,7 +562,7 @@ function parseIYaanEndingNoun(
function parseIYaaneEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry | T.FemNounEntry>>[] {
if (tokens.length === 0) {
return [];

View File

@ -121,6 +121,48 @@ export function cleanOutResults<C>(
return Array.from(new Set(errorsCulled.map(JSON.stringify))).map(JSON.parse);
}
export type Parser<R> = (
tokens: Readonly<T.Token[]>,
dictionary: T.DictionaryAPI
) => T.ParseResult<R>[];
export function parserCombOr<R>(parsers: Parser<R>[]) {
return (tokens: Readonly<T.Token[]>, dictionary: T.DictionaryAPI) =>
parsers.flatMap((p) => p(tokens, dictionary));
}
/**
* A parser combinator to take a parser and make it run as many times as possible
* for each success, it will also return an option as if it failed, to allow for
* the words to be considered something else.
*
* @param parser
* @returns
*/
export function parserCombMany<R>(parser: Parser<R>): Parser<R[]> {
const r: Parser<R[]> = (
tokens: Readonly<T.Token[]>,
dictionary: T.DictionaryAPI
) => {
function go(acc: R[], t: Readonly<T.Token[]>): T.ParseResult<R[]>[] {
const one = parser(t, dictionary);
if (one.length === 0) {
return returnParseResult(t, acc);
}
return bindParseResult(one, (tkns, o) => {
return [
...go([...acc, o], tkns),
// also have a result where the next token is NOT
// considered a success
...returnParseResult(t, acc),
];
});
}
return go([], tokens);
};
return r;
}
export function isCompleteResult<C extends object>(
r: T.ParseResult<C>
): boolean {
@ -244,3 +286,7 @@ export function addShrunkenPossesor(
},
};
}
export function toParseError(message: string): T.ParseError {
return { message };
}

View File

@ -121,8 +121,10 @@ export function isMascNounEntry(
return !!e.c && e.c.includes("n. m.");
}
export function isFemNounEntry(e: T.DictionaryEntry): e is T.FemNounEntry {
return !!e.c && e.c.includes("n. f.");
export function isFemNounEntry(
e: T.DictionaryEntry | T.Determiner
): e is T.FemNounEntry {
return "c" in e && !!e.c && e.c.includes("n. f.");
}
export function isUnisexNounEntry(
@ -195,13 +197,13 @@ export function isNonInflectingEntry<T extends T.InflectableEntry>(
* @param e
* @returns
*/
export function isPattern1Entry<T extends T.InflectableEntry>(
export function isPattern1Entry<T extends T.InflectableEntry | T.Determiner>(
e: T
): e is T.Pattern1Entry<T> {
if (e.noInf) return false;
if (e.infap || e.infbp) return false;
if ("noInf" in e && e.noInf) return false;
if (("infap" in e && e.infap) || ("infbp" in e && e.infbp)) return false;
// family words like خور زوی etc with special plural don't follow pattern #1
if (e.c.includes("fam.")) {
if ("c" in e && e.c.includes("fam.")) {
return false;
}
if (isFemNounEntry(e)) {

View File

@ -1238,13 +1238,36 @@ export type EquativeBlock = { type: "equative"; equative: EquativeRendered };
export type NegativeBlock = { type: "negative"; imperative: boolean };
export type InflectableBaseParse<E extends InflectableEntry> = {
export type InflectableBaseParse<
E extends InflectableEntry | AdjectiveSelection | DeterminerSelection
> = {
inflection: (0 | 1 | 2)[];
gender: Gender[];
given: string;
entry: E;
selection: E;
};
export type DictionaryAPI = {
initialize: () => Promise<{
response: "loaded first time" | "loaded from saved";
dictionaryInfo: DictionaryInfo;
}>;
update: () => Promise<{
response: "no need for update" | "updated" | "unable to check";
dictionaryInfo: DictionaryInfo;
}>;
queryP: (p: string) => DictionaryEntry[];
adjLookup: (p: string) => AdjectiveEntry[];
nounLookup: (p: string) => NounEntry[];
otherLookup: (key: keyof DictionaryEntry, p: string) => DictionaryEntry[];
specialPluralLookup: (p: string) => NounEntry[];
};
export type Parser<R> = (
tokens: Readonly<Token[]>,
dictionary: DictionaryAPI
) => ParseResult<R>[];
export type ParsedNounWord<N extends NounEntry> = {
inflected: boolean;
plural: boolean;