more on noun parser

This commit is contained in:
adueck 2024-08-23 19:28:58 -04:00
parent 1abc83ee01
commit 191abc5778
15 changed files with 411 additions and 199 deletions

View File

@ -5,9 +5,11 @@ import { tokenizer } from "../lib/src/parsing/tokenizer";
// import { NPDisplay } from "../components/library"; // import { NPDisplay } from "../components/library";
// import EditableVP from "../components/src/vp-explorer/EditableVP"; // import EditableVP from "../components/src/vp-explorer/EditableVP";
// import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools"; // import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
import { DictionaryAPI } from "../lib/src/dictionary/dictionary";
import { parseNoun } from "../lib/src/parsing/parse-noun-new"; import { parseNoun } from "../lib/src/parsing/parse-noun-new";
import { JsonEditor } from "json-edit-react"; import { JsonEditor } from "json-edit-react";
import { renderNounSelection } from "../lib/src/phrase-building/render-np";
import { NPBlock } from "../components/src/blocks/Block";
import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
const working = [ const working = [
"limited demo vocab", "limited demo vocab",
@ -47,16 +49,18 @@ const examples = [
]; ];
function ParserDemo({ function ParserDemo({
// opts, opts,
// entryFeeder, // entryFeeder,
dictionary, dictionary,
}: { }: {
opts: T.TextOptions; opts: T.TextOptions;
entryFeeder: T.EntryFeeder; entryFeeder: T.EntryFeeder;
dictionary: DictionaryAPI; dictionary: T.DictionaryAPI;
}) { }) {
const [text, setText] = useState<string>(""); const [text, setText] = useState<string>("");
const [result, setResult] = useState<any[]>([]); const [result, setResult] = useState<
ReturnType<typeof parseNoun>[number]["body"][]
>([]);
// ReturnType<typeof parsePhrase>["success"] // ReturnType<typeof parsePhrase>["success"]
const [errors, setErrors] = useState<string[]>([]); const [errors, setErrors] = useState<string[]>([]);
function handleInput(value: string) { function handleInput(value: string) {
@ -66,8 +70,10 @@ function ParserDemo({
setErrors([]); setErrors([]);
return; return;
} }
const res = parseNoun(tokenizer(value), dictionary, undefined, []); const res = parseNoun(tokenizer(value), dictionary, undefined);
const success = res.filter((x) => !x.tokens.length).map((x) => x.body); const success: ReturnType<typeof parseNoun>[number]["body"][] = res
.filter((x) => !x.tokens.length)
.map((x) => x.body);
const errors = [ const errors = [
...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))), ...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
]; ];
@ -135,6 +141,29 @@ function ParserDemo({
<div className="text-center">Did you mean:</div> <div className="text-center">Did you mean:</div>
</> </>
)} )}
{result.map((r) => {
try {
const renderedNP: T.Rendered<T.NPSelection> = {
type: "NP",
selection: renderNounSelection(r.selection, r.inflected, "none"),
};
return (
<>
{r.inflected ? "INFLECTED" : "PLAIN"}
<NPBlock
opts={opts}
script="p"
english={getEnglishFromRendered(renderedNP)}
>
{renderedNP}
</NPBlock>
</>
);
} catch (e) {
console.error(e);
return <div>ERROR RENDERING</div>;
}
})}
<JsonEditor data={result} /> <JsonEditor data={result} />
{/* {result.map((res) => {/* {result.map((res) =>
"inflected" in res ? ( "inflected" in res ? (

View File

@ -53,17 +53,7 @@ function specialPluralLookup(p: string): T.NounEntry[] {
.filter(tp.isNounEntry); .filter(tp.isNounEntry);
} }
export type DictionaryAPI = { export const dictionary: T.DictionaryAPI = {
initialize: () => ReturnType<typeof dictDb.initialize>;
update: () => ReturnType<typeof dictDb.updateDictionary>;
queryP: (p: string) => T.DictionaryEntry[];
adjLookup: (p: string) => T.AdjectiveEntry[];
nounLookup: (p: string) => T.NounEntry[];
otherLookup: (key: keyof T.DictionaryEntry, p: string) => T.DictionaryEntry[];
specialPluralLookup: (p: string) => T.NounEntry[];
};
export const dictionary: DictionaryAPI = {
initialize: async () => await dictDb.initialize(), initialize: async () => await dictDb.initialize(),
update: async () => await dictDb.updateDictionary(() => null), update: async () => await dictDb.updateDictionary(() => null),
queryP: memoizedQueryP, queryP: memoizedQueryP,

View File

@ -1,5 +1,4 @@
import * as T from "../../../types"; import * as T from "../../../types";
import type { DictionaryAPI } from "../dictionary/dictionary";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
import { entries } from "../../../../vocab/mini-dict-entries"; import { entries } from "../../../../vocab/mini-dict-entries";
@ -26,7 +25,7 @@ function specialPluralLookup(p: string): T.NounEntry[] {
) as T.NounEntry[]; ) as T.NounEntry[];
} }
export const testDictionary: DictionaryAPI = { export const testDictionary: T.DictionaryAPI = {
// @ts-expect-error we won't mock the initialization // @ts-expect-error we won't mock the initialization
initialize: async () => 0, initialize: async () => 0,
// @ts-expect-error not perfect mocking because won't need that // @ts-expect-error not perfect mocking because won't need that

View File

@ -1,5 +1,4 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { fmapParseResult } from "../fp-ps"; import { fmapParseResult } from "../fp-ps";
import { makeAdjectiveSelection } from "../phrase-building/make-selections"; import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import * as tp from "../type-predicates"; import * as tp from "../type-predicates";
@ -7,13 +6,8 @@ import { parseInflectableWord } from "./parse-inflectable-word";
export function parseAdjective( export function parseAdjective(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<{ ): T.ParseResult<T.InflectableBaseParse<T.AdjectiveSelection>>[] {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
@ -27,7 +21,7 @@ export function parseAdjective(
inflection: r.inflection, inflection: r.inflection,
gender: r.gender, gender: r.gender,
given: r.given, given: r.given,
selection: makeAdjectiveSelection(r.entry as T.AdjectiveEntry), selection: makeAdjectiveSelection(r.selection as T.AdjectiveEntry),
}), }),
adjectives adjectives
); );

View File

@ -7,12 +7,7 @@ import { LookupFunction } from "./lookup";
export function parseAdjective( export function parseAdjective(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: LookupFunction lookup: LookupFunction
): T.ParseResult<{ ): T.ParseResult<T.InflectableBaseParse<T.AdjectiveSelection>>[] {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}>[] {
const w: ReturnType<typeof parseAdjective> = []; const w: ReturnType<typeof parseAdjective> = [];
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];

View File

@ -0,0 +1,100 @@
import * as T from "../../../types";
import { determiners } from "../../../types";
import * as tp from "../type-predicates";
import { returnParseResult } from "./utils";
export const parseDeterminer: T.Parser<
T.InflectableBaseParse<T.DeterminerSelection>
> = (
tokens: Readonly<T.Token[]>,
// eslint-disable-next-line
dictionary: T.DictionaryAPI
) => {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("و")) {
const determiner = determiners.find((d) => d.p === first.s.slice(0, -1));
if (!determiner) return [];
if (!isInflectingDet(determiner)) return [];
return returnParseResult(rest, {
inflection: [2],
gender: ["masc", "fem"],
given: first.s,
selection: {
type: "determiner",
determiner,
},
});
}
if (first.s.endsWith("ې")) {
const determinerExact = determiners.find((d) => d.p === first.s);
const determinerInflected = determiners.find(
(d) => d.p === first.s.slice(0, -1)
);
return [
...(determinerExact
? returnParseResult(rest, {
inflection: [0, 1, 2],
gender: ["masc", "fem"],
given: first.s,
selection: {
type: "determiner",
determiner: determinerExact,
},
} satisfies T.InflectableBaseParse<T.DeterminerSelection>)
: []),
...(determinerInflected && isInflectingDet(determinerInflected)
? returnParseResult(rest, {
inflection: [1] satisfies (0 | 1 | 2)[],
gender: ["fem"],
given: first.s,
selection: {
type: "determiner",
determiner: determinerInflected,
},
} satisfies T.InflectableBaseParse<T.DeterminerSelection>)
: []),
];
}
const exact: T.ParseResult<T.InflectableBaseParse<T.DeterminerSelection>>[] =
(() => {
const determiner = determiners.find((d) => d.p === first.s);
if (!determiner) return [];
const canInflect = isInflectingDet(determiner);
return returnParseResult(rest, {
inflection: canInflect ? [0, 1] : [0, 1, 2],
gender: canInflect ? ["masc"] : ["masc", "fem"],
given: first.s,
selection: {
type: "determiner",
determiner,
},
});
})();
const aEnding: T.ParseResult<
T.InflectableBaseParse<T.DeterminerSelection>
>[] = (() => {
if (first.s.endsWith("ه")) {
const determiner = determiners.find((d) => d.p === first.s.slice(0, -1));
if (!determiner) return [];
if (!isInflectingDet(determiner)) return [];
return returnParseResult(rest, {
inflection: [0],
gender: ["fem"],
given: first.s,
selection: {
type: "determiner",
determiner,
},
});
}
return [];
})();
return [...exact, ...aEnding];
};
function isInflectingDet(d: T.Determiner): boolean {
return tp.isPattern1Entry(d) && !("noInf" in d && !d.noInf);
}

View File

@ -1,30 +1,29 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { andSuccTp } from "../fp-ps"; import { andSuccTp } from "../fp-ps";
import { pashtoConsonants } from "../pashto-consonants"; import { pashtoConsonants } from "../pashto-consonants";
import * as tp from "../type-predicates"; import * as tp from "../type-predicates";
import { returnParseResults } from "./utils"; import { parserCombOr, returnParseResults } from "./utils";
type FemNounBaseParse = T.InflectableBaseParse<T.FemNounEntry>; type FemNounBaseParse = T.InflectableBaseParse<T.FemNounEntry>;
export function parseFemNoun( export function parseFemNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] { ): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
return [ return parserCombOr([
plainPlural, plainPlural,
parsePattern1, parsePattern1,
parsePattern2, parsePattern2,
parsePattern3, parsePattern3,
parseEeEnding, parseEeEnding,
].flatMap((f) => f(tokens, dictionary)); ])(tokens, dictionary);
} }
function plainPlural( function plainPlural(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] { ): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -34,20 +33,22 @@ function plainPlural(
dictionary dictionary
.queryP(p) .queryP(p)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPluralNounEntry)); .filter(andSuccTp(tp.isFemNounEntry, tp.isPluralNounEntry));
const plain = plurLookup(first.s).map<FemNounBaseParse>((entry) => ({ const plain = plurLookup(first.s).map<FemNounBaseParse>((selection) => ({
inflection: [0], inflection: [0],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
})); }));
const inflected = first.s.endsWith("و") const inflected = first.s.endsWith("و")
? (() => { ? (() => {
const base = first.s.slice(0, -1); const base = first.s.slice(0, -1);
const guesses = [first.s, base + "ه", base + "ې"]; const guesses = [first.s, base + "ه", base + "ې"];
return guesses.flatMap(plurLookup).map<FemNounBaseParse>((entry) => ({ return guesses
.flatMap(plurLookup)
.map<FemNounBaseParse>((selection) => ({
inflection: [2], inflection: [2],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
})); }));
})() })()
@ -57,7 +58,7 @@ function plainPlural(
function parsePattern1( function parsePattern1(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] { ): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -68,18 +69,18 @@ function parsePattern1(
.queryP(p) .queryP(p)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPattern1Entry)); .filter(andSuccTp(tp.isFemNounEntry, tp.isPattern1Entry));
const plain = ["ه", "ع"].some((v) => first.s.endsWith(v)) const plain = ["ه", "ع"].some((v) => first.s.endsWith(v))
? p1Lookup(first.s).map<FemNounBaseParse>((entry) => ({ ? p1Lookup(first.s).map<FemNounBaseParse>((selection) => ({
inflection: [0], inflection: [0],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
})) }))
: []; : [];
const withoutA = pashtoConsonants.includes(first.s[first.s.length - 1]) const withoutA = pashtoConsonants.includes(first.s[first.s.length - 1])
? p1Lookup(first.s).map<FemNounBaseParse>((entry) => ({ ? p1Lookup(first.s).map<FemNounBaseParse>((selection) => ({
inflection: [0], inflection: [0],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
})) }))
: []; : [];
@ -92,21 +93,23 @@ function parsePattern1(
? p1Lookup(base) ? p1Lookup(base)
: []), : []),
]; ];
return lookups.map<FemNounBaseParse>((entry) => ({ return lookups.map<FemNounBaseParse>((selection) => ({
inflection: [1], inflection: [1],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
})); }));
})() })()
: []; : [];
const doubleInflected = first.s.endsWith("و") const doubleInflected = first.s.endsWith("و")
? p1Lookup(first.s.slice(0, -1) + "ه").map<FemNounBaseParse>((entry) => ({ ? p1Lookup(first.s.slice(0, -1) + "ه").map<FemNounBaseParse>(
(selection) => ({
inflection: [2], inflection: [2],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
})) })
)
: []; : [];
return returnParseResults(rest, [ return returnParseResults(rest, [
...plain, ...plain,
@ -118,7 +121,7 @@ function parsePattern1(
function parsePattern2( function parsePattern2(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] { ): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -133,12 +136,12 @@ function parsePattern2(
tp.isSingularEntry tp.isSingularEntry
) )
) )
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [0, 1], inflection: [0, 1],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -150,12 +153,12 @@ function parsePattern2(
return dictionary return dictionary
.queryP(eGuess) .queryP(eGuess)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPattern2Entry)) .filter(andSuccTp(tp.isFemNounEntry, tp.isPattern2Entry))
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [2], inflection: [2],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -166,7 +169,7 @@ function parsePattern2(
function parsePattern3( function parsePattern3(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] { ): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -181,12 +184,12 @@ function parsePattern3(
tp.isSingularEntry tp.isSingularEntry
) )
) )
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [0, 1], inflection: [0, 1],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -198,12 +201,12 @@ function parsePattern3(
return dictionary return dictionary
.queryP(eGuess) .queryP(eGuess)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPattern3Entry)) .filter(andSuccTp(tp.isFemNounEntry, tp.isPattern3Entry))
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [2], inflection: [2],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -214,7 +217,7 @@ function parsePattern3(
function parseEeEnding( function parseEeEnding(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] { ): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -224,12 +227,12 @@ function parseEeEnding(
return dictionary return dictionary
.queryP(first.s) .queryP(first.s)
.filter(tp.isPattern6FemEntry) .filter(tp.isPattern6FemEntry)
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [0], inflection: [0],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -238,12 +241,12 @@ function parseEeEnding(
return dictionary return dictionary
.queryP(first.s.slice(0, -1) + "ي") .queryP(first.s.slice(0, -1) + "ي")
.filter(tp.isPattern6FemEntry) .filter(tp.isPattern6FemEntry)
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [1], inflection: [1],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -255,12 +258,12 @@ function parseEeEnding(
return dictionary return dictionary
.queryP(eGuess) .queryP(eGuess)
.filter(tp.isPattern6FemEntry) .filter(tp.isPattern6FemEntry)
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [2], inflection: [2],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],

View File

@ -1,11 +1,10 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { andSuccTp, orTp } from "../fp-ps"; import { andSuccTp, orTp } from "../fp-ps";
import * as tp from "../type-predicates"; import * as tp from "../type-predicates";
export function parseInflectableWord<W extends T.InflectableEntry>( export function parseInflectableWord<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI, dictionary: T.DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] { ): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
@ -21,7 +20,7 @@ export function parseInflectableWord<W extends T.InflectableEntry>(
function parseNonInflecting<W extends T.InflectableEntry>( function parseNonInflecting<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI, dictionary: T.DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] { ): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
@ -31,12 +30,12 @@ function parseNonInflecting<W extends T.InflectableEntry>(
const matches = dictionary const matches = dictionary
.queryP(first.s) .queryP(first.s)
.filter(andSuccTp(tpf, tp.isNonInflectingEntry)); .filter(andSuccTp(tpf, tp.isNonInflectingEntry));
return matches.map((entry) => ({ return matches.map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: tp.isNounEntry(entry) ? [0, 1] : [0, 1, 2], inflection: tp.isNounEntry(selection) ? [0, 1] : [0, 1, 2],
gender: ["masc", "fem"], gender: ["masc", "fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -45,7 +44,7 @@ function parseNonInflecting<W extends T.InflectableEntry>(
function parsePattern1<W extends T.InflectableEntry>( function parsePattern1<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI, dictionary: T.DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] { ): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
@ -58,35 +57,35 @@ function parsePattern1<W extends T.InflectableEntry>(
.filter( .filter(
(e) => tpf(e) && tp.isPattern1Entry(e) && !e.c.includes("fam.") (e) => tpf(e) && tp.isPattern1Entry(e) && !e.c.includes("fam.")
) as T.Pattern1Entry<W>[]; ) as T.Pattern1Entry<W>[];
const mascPlainOrInflected = p1Lookup(first.s).map((entry) => ({ const mascPlainOrInflected = p1Lookup(first.s).map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: entry.c.includes("pl.") ? [0] : [0, 1], inflection: selection.c.includes("pl.") ? [0] : [0, 1],
gender: ["masc"], gender: ["masc"],
entry, selection,
given: first.s, given: first.s,
} satisfies T.InflectableBaseParse<W>, } satisfies T.InflectableBaseParse<W>,
errors: [], errors: [],
})); }));
const femPlain = first.s.endsWith("ه") const femPlain = first.s.endsWith("ه")
? p1Lookup(first.s.slice(0, -1)).map((entry) => ({ ? p1Lookup(first.s.slice(0, -1)).map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [0], inflection: [0],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
} satisfies T.InflectableBaseParse<W>, } satisfies T.InflectableBaseParse<W>,
errors: [], errors: [],
})) }))
: []; : [];
const femInflected = first.s.endsWith("ې") const femInflected = first.s.endsWith("ې")
? p1Lookup(first.s.slice(0, -1)).map((entry) => ({ ? p1Lookup(first.s.slice(0, -1)).map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [1], inflection: [1],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
} satisfies T.InflectableBaseParse<W>, } satisfies T.InflectableBaseParse<W>,
errors: [], errors: [],
@ -96,12 +95,12 @@ function parsePattern1<W extends T.InflectableEntry>(
? [ ? [
...p1Lookup(first.s.slice(0, -1)), ...p1Lookup(first.s.slice(0, -1)),
...p1Lookup(first.s.slice(0, -1) + "ه"), ...p1Lookup(first.s.slice(0, -1) + "ه"),
].map((entry) => ({ ].map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [2], inflection: [2],
gender: ["masc", "fem"], gender: ["masc", "fem"],
entry, selection,
given: first.s, given: first.s,
} satisfies T.InflectableBaseParse<W>, } satisfies T.InflectableBaseParse<W>,
errors: [], errors: [],
@ -117,7 +116,7 @@ function parsePattern1<W extends T.InflectableEntry>(
function parsePattern2or3<W extends T.InflectableEntry>( function parsePattern2or3<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI, dictionary: T.DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] { ): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
@ -128,12 +127,12 @@ function parsePattern2or3<W extends T.InflectableEntry>(
return dictionary return dictionary
.queryP(first.s) .queryP(first.s)
.filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry))) .filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry)))
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [0], inflection: [0],
gender: ["masc"], gender: ["masc"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -142,12 +141,12 @@ function parsePattern2or3<W extends T.InflectableEntry>(
return dictionary return dictionary
.queryP(first.s.slice(0, -1) + "ی") .queryP(first.s.slice(0, -1) + "ی")
.filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry))) .filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry)))
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [1], inflection: [1],
gender: ["masc"], gender: ["masc"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -156,12 +155,12 @@ function parsePattern2or3<W extends T.InflectableEntry>(
return dictionary return dictionary
.queryP(first.s.slice(0, -1) + "ی") .queryP(first.s.slice(0, -1) + "ی")
.filter(andSuccTp(tpf, tp.isPattern2Entry)) .filter(andSuccTp(tpf, tp.isPattern2Entry))
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [0, 1], inflection: [0, 1],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -170,12 +169,12 @@ function parsePattern2or3<W extends T.InflectableEntry>(
return dictionary return dictionary
.queryP(first.s.slice(0, -1) + "ی") .queryP(first.s.slice(0, -1) + "ی")
.filter(andSuccTp(tpf, tp.isPattern3Entry)) .filter(andSuccTp(tpf, tp.isPattern3Entry))
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [0, 1], inflection: [0, 1],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -187,12 +186,12 @@ function parsePattern2or3<W extends T.InflectableEntry>(
return dictionary return dictionary
.queryP(eGuess) .queryP(eGuess)
.filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry))) .filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry)))
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [2], inflection: [2],
gender: ["masc", "fem"], gender: ["masc", "fem"],
entry, selection,
given: first.s, given: first.s,
}, },
errors: [], errors: [],
@ -203,7 +202,7 @@ function parsePattern2or3<W extends T.InflectableEntry>(
function parsePattern4or5<W extends T.InflectableEntry>( function parsePattern4or5<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI, dictionary: T.DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] { ): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
@ -214,12 +213,12 @@ function parsePattern4or5<W extends T.InflectableEntry>(
const plainMasc = dictionary const plainMasc = dictionary
.queryP(first.s) .queryP(first.s)
.filter(f) .filter(f)
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [0], inflection: [0],
gender: ["masc"], gender: ["masc"],
entry, selection,
given: first.s, given: first.s,
} satisfies T.InflectableBaseParse<W>, } satisfies T.InflectableBaseParse<W>,
errors: [], errors: [],
@ -228,12 +227,12 @@ function parsePattern4or5<W extends T.InflectableEntry>(
? dictionary ? dictionary
.otherLookup("infap", first.s) .otherLookup("infap", first.s)
.filter(f) .filter(f)
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [1], inflection: [1],
gender: ["masc"], gender: ["masc"],
entry, selection,
given: first.s, given: first.s,
} satisfies T.InflectableBaseParse<W>, } satisfies T.InflectableBaseParse<W>,
errors: [], errors: [],
@ -243,12 +242,12 @@ function parsePattern4or5<W extends T.InflectableEntry>(
? dictionary ? dictionary
.otherLookup("infbp", first.s.slice(0, -1)) .otherLookup("infbp", first.s.slice(0, -1))
.filter(f) .filter(f)
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [0], inflection: [0],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
} satisfies T.InflectableBaseParse<W>, } satisfies T.InflectableBaseParse<W>,
errors: [], errors: [],
@ -258,12 +257,12 @@ function parsePattern4or5<W extends T.InflectableEntry>(
? dictionary ? dictionary
.otherLookup("infbp", first.s.slice(0, -1)) .otherLookup("infbp", first.s.slice(0, -1))
.filter(f) .filter(f)
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [1], inflection: [1],
gender: ["fem"], gender: ["fem"],
entry, selection,
given: first.s, given: first.s,
} satisfies T.InflectableBaseParse<W>, } satisfies T.InflectableBaseParse<W>,
errors: [], errors: [],
@ -273,12 +272,12 @@ function parsePattern4or5<W extends T.InflectableEntry>(
? dictionary ? dictionary
.otherLookup("infbp", first.s.slice(0, -1)) .otherLookup("infbp", first.s.slice(0, -1))
.filter(f) .filter(f)
.map((entry) => ({ .map((selection) => ({
tokens: rest, tokens: rest,
body: { body: {
inflection: [2], inflection: [2],
gender: ["masc", "fem"], gender: ["masc", "fem"],
entry, selection,
given: first.s, given: first.s,
} satisfies T.InflectableBaseParse<W>, } satisfies T.InflectableBaseParse<W>,
errors: [], errors: [],

View File

@ -1,12 +1,11 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { endsInConsonant } from "../p-text-helpers"; import { endsInConsonant } from "../p-text-helpers";
import * as tp from "../type-predicates"; import * as tp from "../type-predicates";
import { returnParseResults } from "./utils"; import { returnParseResults } from "./utils";
export function parseIrregularPlural( export function parseIrregularPlural(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] { ): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];

View File

@ -1,43 +1,40 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { makeNounSelection } from "../phrase-building/make-selections"; import { makeNounSelection } from "../phrase-building/make-selections";
import { parseAdjective } from "./parse-adjective-new"; import { parseAdjective } from "./parse-adjective-new";
import { parseDeterminer } from "./parse-determiner";
import { parseNounWord } from "./parse-noun-word"; import { parseNounWord } from "./parse-noun-word";
import { bindParseResult } from "./utils"; import { bindParseResult, parserCombMany, toParseError } from "./utils";
type NounResult = { inflected: boolean; selection: T.NounSelection }; type NounResult = { inflected: boolean; selection: T.NounSelection };
// ISSUES - fem nouns like ښځه کتابچه not working
// زاړه مېلمانه adjective agreement problem
export function parseNoun( export function parseNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI, dictionary: T.DictionaryAPI,
possesor: T.PossesorSelection | undefined, possesor: T.PossesorSelection | undefined
adjectives: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}[]
): T.ParseResult<NounResult>[] { ): T.ParseResult<NounResult>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
const detRes = parserCombMany(parseDeterminer)(tokens, dictionary);
// TODO: add recognition of او between adjectives // TODO: add recognition of او between adjectives
const withAdj = bindParseResult( return bindParseResult(detRes, (t, determiners) => {
parseAdjective(tokens, dictionary), const adjRes = parserCombMany(parseAdjective)(t, dictionary);
(tkns, adj) => parseNoun(tkns, dictionary, possesor, [...adjectives, adj]) return bindParseResult(adjRes, (tk, adjectives) => {
); const nounWord = parseNounWord(tk, dictionary);
const nounWord = parseNounWord(tokens, dictionary); return bindParseResult(nounWord, (tkns, nr) => {
// fit together with nouns const { error: adjErrors } = adjDetsMatch(
const nouns = bindParseResult(nounWord, (tkns, nr) => {
const { error: adjErrors } = adjsMatch(
adjectives, adjectives,
nr.gender, nr.gender,
nr.inflected ? 1 : 0, nr.inflected ? 1 : 0,
nr.plural nr.plural
); );
const { error: detErrors } = adjDetsMatch(
determiners,
nr.gender,
nr.inflected ? 1 : 0,
nr.plural
);
const dupErrors = checkForDeterminerDuplicates(determiners);
const s = makeNounSelection(nr.entry, undefined); const s = makeNounSelection(nr.entry, undefined);
const body: NounResult = { const body: NounResult = {
inflected: nr.inflected, inflected: nr.inflected,
@ -46,6 +43,13 @@ export function parseNoun(
gender: nr.gender, gender: nr.gender,
number: nr.plural ? "plural" : "singular", number: nr.plural ? "plural" : "singular",
adjectives: adjectives.map((a) => a.selection), adjectives: adjectives.map((a) => a.selection),
determiners: determiners.length
? {
type: "determiners",
withNoun: true,
determiners: determiners.map((d) => d.selection),
}
: undefined,
possesor, possesor,
}, },
}; };
@ -53,15 +57,40 @@ export function parseNoun(
{ {
body, body,
tokens: tkns, tokens: tkns,
errors: adjErrors.map((x) => ({ message: x })), errors: [
...detErrors.map(toParseError),
...dupErrors.map(toParseError),
...adjErrors.map(toParseError),
],
}, },
]; ];
}); });
return [...nouns, ...withAdj]; });
});
} }
function adjsMatch( function checkForDeterminerDuplicates(
adjectives: Parameters<typeof parseNoun>[3], determiners: T.InflectableBaseParse<T.DeterminerSelection>[]
): string[] {
// from https://flexiple.com/javascript/find-duplicates-javascript-array
const array = determiners.map((d) => d.selection.determiner.p);
const duplicates: string[] = [];
for (let i = 0; i < array.length; i++) {
for (let j = i + 1; j < array.length; j++) {
if (array[i] === array[j]) {
if (!duplicates.includes(array[i])) {
duplicates.push(array[i]);
}
}
}
}
return duplicates.map((x) => `duplicate ${x} determiner`);
}
function adjDetsMatch(
adjectives: T.InflectableBaseParse<
T.AdjectiveSelection | T.DeterminerSelection
>[],
gender: T.Gender, gender: T.Gender,
inf: 0 | 1 | 2, inf: 0 | 1 | 2,
plural: boolean | undefined plural: boolean | undefined
@ -76,14 +105,17 @@ function adjsMatch(
return { return {
ok: false, ok: false,
error: unmatching.map((x) => { error: unmatching.map((x) => {
const adjText = const p =
x.given === x.selection.entry.p x.selection.type === "adjective"
? x.given ? x.selection.entry.p
: `${x.given} (${x.selection.entry.p})`; : x.selection.determiner.p;
const adjText = x.given === p ? x.given : `${x.given} (${p})`;
const inflectionIssue = !x.inflection.some((x) => x === inflection) const inflectionIssue = !x.inflection.some((x) => x === inflection)
? ` should be ${showInflection(inflection)}` ? ` should be ${showInflection(inflection)}`
: ``; : ``;
return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`; return `${
x.selection.type === "adjective" ? "Adjective" : "Determiner"
} agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
}), }),
}; };
} else { } else {

View File

@ -1,5 +1,4 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { fFlatMapParseResult } from "../fp-ps"; import { fFlatMapParseResult } from "../fp-ps";
import { getInflectionPattern } from "../inflection-pattern"; import { getInflectionPattern } from "../inflection-pattern";
import { makeNounSelection } from "../phrase-building/make-selections"; import { makeNounSelection } from "../phrase-building/make-selections";
@ -8,11 +7,12 @@ import { parseInflectableWord } from "./parse-inflectable-word";
import { parseFemNoun } from "./parse-fem-noun"; import { parseFemNoun } from "./parse-fem-noun";
import { parsePluralEndingNoun } from "./parse-plural-ending-noun"; import { parsePluralEndingNoun } from "./parse-plural-ending-noun";
import { parseIrregularPlural } from "./parse-irregular-plural"; import { parseIrregularPlural } from "./parse-irregular-plural";
import { parserCombOr } from "./utils";
export function parseNounWord( export const parseNounWord: T.Parser<T.ParsedNounWord<T.NounEntry>> = (
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] { ) => {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
@ -25,10 +25,12 @@ export function parseNounWord(
); );
return [ return [
...withoutPluralEndings, ...withoutPluralEndings,
...parsePluralEndingNoun(tokens, dictionary), ...parserCombOr([parsePluralEndingNoun, parseIrregularPlural])(
...parseIrregularPlural(tokens, dictionary), tokens,
dictionary
),
]; ];
} };
function inflectableBaseParseToNounWordResults<N extends T.NounEntry>( function inflectableBaseParseToNounWordResults<N extends T.NounEntry>(
wr: T.InflectableBaseParse<N> wr: T.InflectableBaseParse<N>
@ -46,17 +48,17 @@ function inflectableBaseParseToNounWordResults<N extends T.NounEntry>(
} }
const possibleGenders = gendersWorkWithSelection( const possibleGenders = gendersWorkWithSelection(
wr.gender, wr.gender,
makeNounSelection(wr.entry, undefined) makeNounSelection(wr.selection, undefined)
); );
return possibleGenders.flatMap((gender) => return possibleGenders.flatMap((gender) =>
wr.inflection.flatMap((inflection) => wr.inflection.flatMap((inflection) =>
convertInflection(inflection, wr.entry, gender).flatMap( convertInflection(inflection, wr.selection, gender).flatMap(
({ inflected, number }) => ({ ({ inflected, number }) => ({
inflected, inflected,
plural: number === "plural", plural: number === "plural",
gender, gender,
given: wr.given, given: wr.given,
entry: wr.entry, entry: wr.selection,
}) })
) )
) )

View File

@ -1,5 +1,4 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { import {
endsInAaOrOo, endsInAaOrOo,
endsInConsonant, endsInConsonant,
@ -11,7 +10,7 @@ import { returnParseResults } from "./utils";
export function parsePluralEndingNoun( export function parsePluralEndingNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] { ): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -32,7 +31,7 @@ export function parsePluralEndingNoun(
// function parseSpecialPlural( // function parseSpecialPlural(
// tokens: Readonly<T.Token[]>, // tokens: Readonly<T.Token[]>,
// dictionary: DictionaryAPI // dictionary: T.DictionaryAPI
// ): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] { // ): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
// if (tokens.length === 0) { // if (tokens.length === 0) {
// return []; // return [];
@ -90,7 +89,7 @@ export function parsePluralEndingNoun(
function parseOonaEndingNoun( function parseOonaEndingNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] { ): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -132,7 +131,7 @@ function parseOonaEndingNoun(
function parseAanEndingNoun( function parseAanEndingNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] { ): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -206,7 +205,7 @@ function parseAanEndingNoun(
function parseAaneEndingNoun( function parseAaneEndingNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] { ): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -233,7 +232,7 @@ function parseAaneEndingNoun(
function parseGaanEndingNoun( function parseGaanEndingNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] { ): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -309,7 +308,7 @@ function parseGaanEndingNoun(
function parseGaaneEndingNoun( function parseGaaneEndingNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.FemNounEntry>>[] { ): T.ParseResult<T.ParsedNounWord<T.FemNounEntry>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -413,7 +412,7 @@ function parseGaaneEndingNoun(
function parseWeEndingNoun( function parseWeEndingNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.FemNounEntry>>[] { ): T.ParseResult<T.ParsedNounWord<T.FemNounEntry>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -489,7 +488,7 @@ function parseWeEndingNoun(
function parseIYaanEndingNoun( function parseIYaanEndingNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] { ): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
@ -563,7 +562,7 @@ function parseIYaanEndingNoun(
function parseIYaaneEndingNoun( function parseIYaaneEndingNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry | T.FemNounEntry>>[] { ): T.ParseResult<T.ParsedNounWord<T.MascNounEntry | T.FemNounEntry>>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];

View File

@ -121,6 +121,48 @@ export function cleanOutResults<C>(
return Array.from(new Set(errorsCulled.map(JSON.stringify))).map(JSON.parse); return Array.from(new Set(errorsCulled.map(JSON.stringify))).map(JSON.parse);
} }
export type Parser<R> = (
tokens: Readonly<T.Token[]>,
dictionary: T.DictionaryAPI
) => T.ParseResult<R>[];
export function parserCombOr<R>(parsers: Parser<R>[]) {
return (tokens: Readonly<T.Token[]>, dictionary: T.DictionaryAPI) =>
parsers.flatMap((p) => p(tokens, dictionary));
}
/**
* A parser combinator to take a parser and make it run as many times as possible
* for each success, it will also return an option as if it failed, to allow for
* the words to be considered something else.
*
* @param parser
* @returns
*/
export function parserCombMany<R>(parser: Parser<R>): Parser<R[]> {
const r: Parser<R[]> = (
tokens: Readonly<T.Token[]>,
dictionary: T.DictionaryAPI
) => {
function go(acc: R[], t: Readonly<T.Token[]>): T.ParseResult<R[]>[] {
const one = parser(t, dictionary);
if (one.length === 0) {
return returnParseResult(t, acc);
}
return bindParseResult(one, (tkns, o) => {
return [
...go([...acc, o], tkns),
// also have a result where the next token is NOT
// considered a success
...returnParseResult(t, acc),
];
});
}
return go([], tokens);
};
return r;
}
export function isCompleteResult<C extends object>( export function isCompleteResult<C extends object>(
r: T.ParseResult<C> r: T.ParseResult<C>
): boolean { ): boolean {
@ -244,3 +286,7 @@ export function addShrunkenPossesor(
}, },
}; };
} }
export function toParseError(message: string): T.ParseError {
return { message };
}

View File

@ -121,8 +121,10 @@ export function isMascNounEntry(
return !!e.c && e.c.includes("n. m."); return !!e.c && e.c.includes("n. m.");
} }
export function isFemNounEntry(e: T.DictionaryEntry): e is T.FemNounEntry { export function isFemNounEntry(
return !!e.c && e.c.includes("n. f."); e: T.DictionaryEntry | T.Determiner
): e is T.FemNounEntry {
return "c" in e && !!e.c && e.c.includes("n. f.");
} }
export function isUnisexNounEntry( export function isUnisexNounEntry(
@ -195,13 +197,13 @@ export function isNonInflectingEntry<T extends T.InflectableEntry>(
* @param e * @param e
* @returns * @returns
*/ */
export function isPattern1Entry<T extends T.InflectableEntry>( export function isPattern1Entry<T extends T.InflectableEntry | T.Determiner>(
e: T e: T
): e is T.Pattern1Entry<T> { ): e is T.Pattern1Entry<T> {
if (e.noInf) return false; if ("noInf" in e && e.noInf) return false;
if (e.infap || e.infbp) return false; if (("infap" in e && e.infap) || ("infbp" in e && e.infbp)) return false;
// family words like خور زوی etc with special plural don't follow pattern #1 // family words like خور زوی etc with special plural don't follow pattern #1
if (e.c.includes("fam.")) { if ("c" in e && e.c.includes("fam.")) {
return false; return false;
} }
if (isFemNounEntry(e)) { if (isFemNounEntry(e)) {

View File

@ -1238,13 +1238,36 @@ export type EquativeBlock = { type: "equative"; equative: EquativeRendered };
export type NegativeBlock = { type: "negative"; imperative: boolean }; export type NegativeBlock = { type: "negative"; imperative: boolean };
export type InflectableBaseParse<E extends InflectableEntry> = { export type InflectableBaseParse<
E extends InflectableEntry | AdjectiveSelection | DeterminerSelection
> = {
inflection: (0 | 1 | 2)[]; inflection: (0 | 1 | 2)[];
gender: Gender[]; gender: Gender[];
given: string; given: string;
entry: E; selection: E;
}; };
export type DictionaryAPI = {
initialize: () => Promise<{
response: "loaded first time" | "loaded from saved";
dictionaryInfo: DictionaryInfo;
}>;
update: () => Promise<{
response: "no need for update" | "updated" | "unable to check";
dictionaryInfo: DictionaryInfo;
}>;
queryP: (p: string) => DictionaryEntry[];
adjLookup: (p: string) => AdjectiveEntry[];
nounLookup: (p: string) => NounEntry[];
otherLookup: (key: keyof DictionaryEntry, p: string) => DictionaryEntry[];
specialPluralLookup: (p: string) => NounEntry[];
};
export type Parser<R> = (
tokens: Readonly<Token[]>,
dictionary: DictionaryAPI
) => ParseResult<R>[];
export type ParsedNounWord<N extends NounEntry> = { export type ParsedNounWord<N extends NounEntry> = {
inflected: boolean; inflected: boolean;
plural: boolean; plural: boolean;