From 191abc57786101cf44e74d663f5db45f0f2a554e Mon Sep 17 00:00:00 2001 From: adueck Date: Fri, 23 Aug 2024 19:28:58 -0400 Subject: [PATCH] more on noun parser --- src/demo-components/ParserDemo.tsx | 41 +++++- src/lib/src/dictionary/dictionary.ts | 12 +- src/lib/src/parsing/mini-test-dictionary.ts | 3 +- src/lib/src/parsing/parse-adjective-new.ts | 12 +- src/lib/src/parsing/parse-adjective.ts | 7 +- src/lib/src/parsing/parse-determiner.ts | 100 +++++++++++++ src/lib/src/parsing/parse-fem-noun.ts | 91 ++++++------ src/lib/src/parsing/parse-inflectable-word.ts | 75 +++++----- src/lib/src/parsing/parse-irregular-plural.ts | 3 +- src/lib/src/parsing/parse-noun-new.ts | 136 +++++++++++------- src/lib/src/parsing/parse-noun-word.ts | 22 +-- .../src/parsing/parse-plural-ending-noun.ts | 21 ++- src/lib/src/parsing/utils.ts | 46 ++++++ src/lib/src/type-predicates.ts | 14 +- src/types.ts | 27 +++- 15 files changed, 411 insertions(+), 199 deletions(-) create mode 100644 src/lib/src/parsing/parse-determiner.ts diff --git a/src/demo-components/ParserDemo.tsx b/src/demo-components/ParserDemo.tsx index b1da389..ab05826 100644 --- a/src/demo-components/ParserDemo.tsx +++ b/src/demo-components/ParserDemo.tsx @@ -5,9 +5,11 @@ import { tokenizer } from "../lib/src/parsing/tokenizer"; // import { NPDisplay } from "../components/library"; // import EditableVP from "../components/src/vp-explorer/EditableVP"; // import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools"; -import { DictionaryAPI } from "../lib/src/dictionary/dictionary"; import { parseNoun } from "../lib/src/parsing/parse-noun-new"; import { JsonEditor } from "json-edit-react"; +import { renderNounSelection } from "../lib/src/phrase-building/render-np"; +import { NPBlock } from "../components/src/blocks/Block"; +import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools"; const working = [ "limited demo vocab", @@ -47,16 +49,18 @@ const examples = [ ]; function ParserDemo({ - // opts, + opts, // entryFeeder, dictionary, }: { opts: T.TextOptions; entryFeeder: T.EntryFeeder; - dictionary: DictionaryAPI; + dictionary: T.DictionaryAPI; }) { const [text, setText] = useState(""); - const [result, setResult] = useState([]); + const [result, setResult] = useState< + ReturnType[number]["body"][] + >([]); // ReturnType["success"] const [errors, setErrors] = useState([]); function handleInput(value: string) { @@ -66,8 +70,10 @@ function ParserDemo({ setErrors([]); return; } - const res = parseNoun(tokenizer(value), dictionary, undefined, []); - const success = res.filter((x) => !x.tokens.length).map((x) => x.body); + const res = parseNoun(tokenizer(value), dictionary, undefined); + const success: ReturnType[number]["body"][] = res + .filter((x) => !x.tokens.length) + .map((x) => x.body); const errors = [ ...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))), ]; @@ -135,6 +141,29 @@ function ParserDemo({
Did you mean:
)} + {result.map((r) => { + try { + const renderedNP: T.Rendered = { + type: "NP", + selection: renderNounSelection(r.selection, r.inflected, "none"), + }; + return ( + <> + {r.inflected ? "INFLECTED" : "PLAIN"} + + {renderedNP} + + + ); + } catch (e) { + console.error(e); + return
ERROR RENDERING
; + } + })} {/* {result.map((res) => "inflected" in res ? ( diff --git a/src/lib/src/dictionary/dictionary.ts b/src/lib/src/dictionary/dictionary.ts index 690330e..540fa1c 100644 --- a/src/lib/src/dictionary/dictionary.ts +++ b/src/lib/src/dictionary/dictionary.ts @@ -53,17 +53,7 @@ function specialPluralLookup(p: string): T.NounEntry[] { .filter(tp.isNounEntry); } -export type DictionaryAPI = { - initialize: () => ReturnType; - update: () => ReturnType; - queryP: (p: string) => T.DictionaryEntry[]; - adjLookup: (p: string) => T.AdjectiveEntry[]; - nounLookup: (p: string) => T.NounEntry[]; - otherLookup: (key: keyof T.DictionaryEntry, p: string) => T.DictionaryEntry[]; - specialPluralLookup: (p: string) => T.NounEntry[]; -}; - -export const dictionary: DictionaryAPI = { +export const dictionary: T.DictionaryAPI = { initialize: async () => await dictDb.initialize(), update: async () => await dictDb.updateDictionary(() => null), queryP: memoizedQueryP, diff --git a/src/lib/src/parsing/mini-test-dictionary.ts b/src/lib/src/parsing/mini-test-dictionary.ts index 700f8a0..636805e 100644 --- a/src/lib/src/parsing/mini-test-dictionary.ts +++ b/src/lib/src/parsing/mini-test-dictionary.ts @@ -1,5 +1,4 @@ import * as T from "../../../types"; -import type { DictionaryAPI } from "../dictionary/dictionary"; import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; import { entries } from "../../../../vocab/mini-dict-entries"; @@ -26,7 +25,7 @@ function specialPluralLookup(p: string): T.NounEntry[] { ) as T.NounEntry[]; } -export const testDictionary: DictionaryAPI = { +export const testDictionary: T.DictionaryAPI = { // @ts-expect-error we won't mock the initialization initialize: async () => 0, // @ts-expect-error not perfect mocking because won't need that diff --git a/src/lib/src/parsing/parse-adjective-new.ts b/src/lib/src/parsing/parse-adjective-new.ts index 5a8a016..f5ed0ee 100644 --- a/src/lib/src/parsing/parse-adjective-new.ts +++ b/src/lib/src/parsing/parse-adjective-new.ts @@ -1,5 +1,4 @@ import * as T from "../../../types"; -import { DictionaryAPI } from "../dictionary/dictionary"; import { fmapParseResult } from "../fp-ps"; import { makeAdjectiveSelection } from "../phrase-building/make-selections"; import * as tp from "../type-predicates"; @@ -7,13 +6,8 @@ import { parseInflectableWord } from "./parse-inflectable-word"; export function parseAdjective( tokens: Readonly, - dictionary: DictionaryAPI -): T.ParseResult<{ - inflection: (0 | 1 | 2)[]; - gender: T.Gender[]; - given: string; - selection: T.AdjectiveSelection; -}>[] { + dictionary: T.DictionaryAPI +): T.ParseResult>[] { if (tokens.length === 0) { return []; } @@ -27,7 +21,7 @@ export function parseAdjective( inflection: r.inflection, gender: r.gender, given: r.given, - selection: makeAdjectiveSelection(r.entry as T.AdjectiveEntry), + selection: makeAdjectiveSelection(r.selection as T.AdjectiveEntry), }), adjectives ); diff --git a/src/lib/src/parsing/parse-adjective.ts b/src/lib/src/parsing/parse-adjective.ts index 2685d30..d47ca92 100644 --- a/src/lib/src/parsing/parse-adjective.ts +++ b/src/lib/src/parsing/parse-adjective.ts @@ -7,12 +7,7 @@ import { LookupFunction } from "./lookup"; export function parseAdjective( tokens: Readonly, lookup: LookupFunction -): T.ParseResult<{ - inflection: (0 | 1 | 2)[]; - gender: T.Gender[]; - given: string; - selection: T.AdjectiveSelection; -}>[] { +): T.ParseResult>[] { const w: ReturnType = []; if (tokens.length === 0) { return []; diff --git a/src/lib/src/parsing/parse-determiner.ts b/src/lib/src/parsing/parse-determiner.ts new file mode 100644 index 0000000..3065800 --- /dev/null +++ b/src/lib/src/parsing/parse-determiner.ts @@ -0,0 +1,100 @@ +import * as T from "../../../types"; +import { determiners } from "../../../types"; +import * as tp from "../type-predicates"; +import { returnParseResult } from "./utils"; + +export const parseDeterminer: T.Parser< + T.InflectableBaseParse +> = ( + tokens: Readonly, + // eslint-disable-next-line + dictionary: T.DictionaryAPI +) => { + if (tokens.length === 0) { + return []; + } + const [first, ...rest] = tokens; + if (first.s.endsWith("و")) { + const determiner = determiners.find((d) => d.p === first.s.slice(0, -1)); + if (!determiner) return []; + if (!isInflectingDet(determiner)) return []; + return returnParseResult(rest, { + inflection: [2], + gender: ["masc", "fem"], + given: first.s, + selection: { + type: "determiner", + determiner, + }, + }); + } + if (first.s.endsWith("ې")) { + const determinerExact = determiners.find((d) => d.p === first.s); + const determinerInflected = determiners.find( + (d) => d.p === first.s.slice(0, -1) + ); + return [ + ...(determinerExact + ? returnParseResult(rest, { + inflection: [0, 1, 2], + gender: ["masc", "fem"], + given: first.s, + selection: { + type: "determiner", + determiner: determinerExact, + }, + } satisfies T.InflectableBaseParse) + : []), + ...(determinerInflected && isInflectingDet(determinerInflected) + ? returnParseResult(rest, { + inflection: [1] satisfies (0 | 1 | 2)[], + gender: ["fem"], + given: first.s, + selection: { + type: "determiner", + determiner: determinerInflected, + }, + } satisfies T.InflectableBaseParse) + : []), + ]; + } + const exact: T.ParseResult>[] = + (() => { + const determiner = determiners.find((d) => d.p === first.s); + if (!determiner) return []; + const canInflect = isInflectingDet(determiner); + return returnParseResult(rest, { + inflection: canInflect ? [0, 1] : [0, 1, 2], + gender: canInflect ? ["masc"] : ["masc", "fem"], + given: first.s, + selection: { + type: "determiner", + determiner, + }, + }); + })(); + const aEnding: T.ParseResult< + T.InflectableBaseParse + >[] = (() => { + if (first.s.endsWith("ه")) { + const determiner = determiners.find((d) => d.p === first.s.slice(0, -1)); + if (!determiner) return []; + if (!isInflectingDet(determiner)) return []; + return returnParseResult(rest, { + inflection: [0], + gender: ["fem"], + given: first.s, + selection: { + type: "determiner", + determiner, + }, + }); + } + return []; + })(); + return [...exact, ...aEnding]; +}; + +function isInflectingDet(d: T.Determiner): boolean { + return tp.isPattern1Entry(d) && !("noInf" in d && !d.noInf); +} diff --git a/src/lib/src/parsing/parse-fem-noun.ts b/src/lib/src/parsing/parse-fem-noun.ts index 26d47b6..1980cec 100644 --- a/src/lib/src/parsing/parse-fem-noun.ts +++ b/src/lib/src/parsing/parse-fem-noun.ts @@ -1,30 +1,29 @@ import * as T from "../../../types"; -import { DictionaryAPI } from "../dictionary/dictionary"; import { andSuccTp } from "../fp-ps"; import { pashtoConsonants } from "../pashto-consonants"; import * as tp from "../type-predicates"; -import { returnParseResults } from "./utils"; +import { parserCombOr, returnParseResults } from "./utils"; type FemNounBaseParse = T.InflectableBaseParse; export function parseFemNoun( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult[] { if (tokens.length === 0) { return []; } - return [ + return parserCombOr([ plainPlural, parsePattern1, parsePattern2, parsePattern3, parseEeEnding, - ].flatMap((f) => f(tokens, dictionary)); + ])(tokens, dictionary); } function plainPlural( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult[] { if (tokens.length === 0) { return []; @@ -34,22 +33,24 @@ function plainPlural( dictionary .queryP(p) .filter(andSuccTp(tp.isFemNounEntry, tp.isPluralNounEntry)); - const plain = plurLookup(first.s).map((entry) => ({ + const plain = plurLookup(first.s).map((selection) => ({ inflection: [0], gender: ["fem"], - entry, + selection, given: first.s, })); const inflected = first.s.endsWith("و") ? (() => { const base = first.s.slice(0, -1); const guesses = [first.s, base + "ه", base + "ې"]; - return guesses.flatMap(plurLookup).map((entry) => ({ - inflection: [2], - gender: ["fem"], - entry, - given: first.s, - })); + return guesses + .flatMap(plurLookup) + .map((selection) => ({ + inflection: [2], + gender: ["fem"], + selection, + given: first.s, + })); })() : []; return returnParseResults(rest, [...plain, ...inflected]); @@ -57,7 +58,7 @@ function plainPlural( function parsePattern1( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult[] { if (tokens.length === 0) { return []; @@ -68,18 +69,18 @@ function parsePattern1( .queryP(p) .filter(andSuccTp(tp.isFemNounEntry, tp.isPattern1Entry)); const plain = ["ه", "ع"].some((v) => first.s.endsWith(v)) - ? p1Lookup(first.s).map((entry) => ({ + ? p1Lookup(first.s).map((selection) => ({ inflection: [0], gender: ["fem"], - entry, + selection, given: first.s, })) : []; const withoutA = pashtoConsonants.includes(first.s[first.s.length - 1]) - ? p1Lookup(first.s).map((entry) => ({ + ? p1Lookup(first.s).map((selection) => ({ inflection: [0], gender: ["fem"], - entry, + selection, given: first.s, })) : []; @@ -92,21 +93,23 @@ function parsePattern1( ? p1Lookup(base) : []), ]; - return lookups.map((entry) => ({ + return lookups.map((selection) => ({ inflection: [1], gender: ["fem"], - entry, + selection, given: first.s, })); })() : []; const doubleInflected = first.s.endsWith("و") - ? p1Lookup(first.s.slice(0, -1) + "ه").map((entry) => ({ - inflection: [2], - gender: ["fem"], - entry, - given: first.s, - })) + ? p1Lookup(first.s.slice(0, -1) + "ه").map( + (selection) => ({ + inflection: [2], + gender: ["fem"], + selection, + given: first.s, + }) + ) : []; return returnParseResults(rest, [ ...plain, @@ -118,7 +121,7 @@ function parsePattern1( function parsePattern2( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult[] { if (tokens.length === 0) { return []; @@ -133,12 +136,12 @@ function parsePattern2( tp.isSingularEntry ) ) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [0, 1], gender: ["fem"], - entry, + selection, given: first.s, }, errors: [], @@ -150,12 +153,12 @@ function parsePattern2( return dictionary .queryP(eGuess) .filter(andSuccTp(tp.isFemNounEntry, tp.isPattern2Entry)) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [2], gender: ["fem"], - entry, + selection, given: first.s, }, errors: [], @@ -166,7 +169,7 @@ function parsePattern2( function parsePattern3( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult[] { if (tokens.length === 0) { return []; @@ -181,12 +184,12 @@ function parsePattern3( tp.isSingularEntry ) ) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [0, 1], gender: ["fem"], - entry, + selection, given: first.s, }, errors: [], @@ -198,12 +201,12 @@ function parsePattern3( return dictionary .queryP(eGuess) .filter(andSuccTp(tp.isFemNounEntry, tp.isPattern3Entry)) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [2], gender: ["fem"], - entry, + selection, given: first.s, }, errors: [], @@ -214,7 +217,7 @@ function parsePattern3( function parseEeEnding( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult[] { if (tokens.length === 0) { return []; @@ -224,12 +227,12 @@ function parseEeEnding( return dictionary .queryP(first.s) .filter(tp.isPattern6FemEntry) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [0], gender: ["fem"], - entry, + selection, given: first.s, }, errors: [], @@ -238,12 +241,12 @@ function parseEeEnding( return dictionary .queryP(first.s.slice(0, -1) + "ي") .filter(tp.isPattern6FemEntry) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [1], gender: ["fem"], - entry, + selection, given: first.s, }, errors: [], @@ -255,12 +258,12 @@ function parseEeEnding( return dictionary .queryP(eGuess) .filter(tp.isPattern6FemEntry) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [2], gender: ["fem"], - entry, + selection, given: first.s, }, errors: [], diff --git a/src/lib/src/parsing/parse-inflectable-word.ts b/src/lib/src/parsing/parse-inflectable-word.ts index 0e7f1a7..a4b61c2 100644 --- a/src/lib/src/parsing/parse-inflectable-word.ts +++ b/src/lib/src/parsing/parse-inflectable-word.ts @@ -1,11 +1,10 @@ import * as T from "../../../types"; -import { DictionaryAPI } from "../dictionary/dictionary"; import { andSuccTp, orTp } from "../fp-ps"; import * as tp from "../type-predicates"; export function parseInflectableWord( tokens: Readonly, - dictionary: DictionaryAPI, + dictionary: T.DictionaryAPI, tpf: (e: T.DictionaryEntry) => e is W ): T.ParseResult>[] { if (tokens.length === 0) { @@ -21,7 +20,7 @@ export function parseInflectableWord( function parseNonInflecting( tokens: Readonly, - dictionary: DictionaryAPI, + dictionary: T.DictionaryAPI, tpf: (e: T.DictionaryEntry) => e is W ): T.ParseResult>[] { if (tokens.length === 0) { @@ -31,12 +30,12 @@ function parseNonInflecting( const matches = dictionary .queryP(first.s) .filter(andSuccTp(tpf, tp.isNonInflectingEntry)); - return matches.map((entry) => ({ + return matches.map((selection) => ({ tokens: rest, body: { - inflection: tp.isNounEntry(entry) ? [0, 1] : [0, 1, 2], + inflection: tp.isNounEntry(selection) ? [0, 1] : [0, 1, 2], gender: ["masc", "fem"], - entry, + selection, given: first.s, }, errors: [], @@ -45,7 +44,7 @@ function parseNonInflecting( function parsePattern1( tokens: Readonly, - dictionary: DictionaryAPI, + dictionary: T.DictionaryAPI, tpf: (e: T.DictionaryEntry) => e is W ): T.ParseResult>[] { if (tokens.length === 0) { @@ -58,35 +57,35 @@ function parsePattern1( .filter( (e) => tpf(e) && tp.isPattern1Entry(e) && !e.c.includes("fam.") ) as T.Pattern1Entry[]; - const mascPlainOrInflected = p1Lookup(first.s).map((entry) => ({ + const mascPlainOrInflected = p1Lookup(first.s).map((selection) => ({ tokens: rest, body: { - inflection: entry.c.includes("pl.") ? [0] : [0, 1], + inflection: selection.c.includes("pl.") ? [0] : [0, 1], gender: ["masc"], - entry, + selection, given: first.s, } satisfies T.InflectableBaseParse, errors: [], })); const femPlain = first.s.endsWith("ه") - ? p1Lookup(first.s.slice(0, -1)).map((entry) => ({ + ? p1Lookup(first.s.slice(0, -1)).map((selection) => ({ tokens: rest, body: { inflection: [0], gender: ["fem"], - entry, + selection, given: first.s, } satisfies T.InflectableBaseParse, errors: [], })) : []; const femInflected = first.s.endsWith("ې") - ? p1Lookup(first.s.slice(0, -1)).map((entry) => ({ + ? p1Lookup(first.s.slice(0, -1)).map((selection) => ({ tokens: rest, body: { inflection: [1], gender: ["fem"], - entry, + selection, given: first.s, } satisfies T.InflectableBaseParse, errors: [], @@ -96,12 +95,12 @@ function parsePattern1( ? [ ...p1Lookup(first.s.slice(0, -1)), ...p1Lookup(first.s.slice(0, -1) + "ه"), - ].map((entry) => ({ + ].map((selection) => ({ tokens: rest, body: { inflection: [2], gender: ["masc", "fem"], - entry, + selection, given: first.s, } satisfies T.InflectableBaseParse, errors: [], @@ -117,7 +116,7 @@ function parsePattern1( function parsePattern2or3( tokens: Readonly, - dictionary: DictionaryAPI, + dictionary: T.DictionaryAPI, tpf: (e: T.DictionaryEntry) => e is W ): T.ParseResult>[] { if (tokens.length === 0) { @@ -128,12 +127,12 @@ function parsePattern2or3( return dictionary .queryP(first.s) .filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry))) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [0], gender: ["masc"], - entry, + selection, given: first.s, }, errors: [], @@ -142,12 +141,12 @@ function parsePattern2or3( return dictionary .queryP(first.s.slice(0, -1) + "ی") .filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry))) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [1], gender: ["masc"], - entry, + selection, given: first.s, }, errors: [], @@ -156,12 +155,12 @@ function parsePattern2or3( return dictionary .queryP(first.s.slice(0, -1) + "ی") .filter(andSuccTp(tpf, tp.isPattern2Entry)) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [0, 1], gender: ["fem"], - entry, + selection, given: first.s, }, errors: [], @@ -170,12 +169,12 @@ function parsePattern2or3( return dictionary .queryP(first.s.slice(0, -1) + "ی") .filter(andSuccTp(tpf, tp.isPattern3Entry)) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [0, 1], gender: ["fem"], - entry, + selection, given: first.s, }, errors: [], @@ -187,12 +186,12 @@ function parsePattern2or3( return dictionary .queryP(eGuess) .filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry))) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [2], gender: ["masc", "fem"], - entry, + selection, given: first.s, }, errors: [], @@ -203,7 +202,7 @@ function parsePattern2or3( function parsePattern4or5( tokens: Readonly, - dictionary: DictionaryAPI, + dictionary: T.DictionaryAPI, tpf: (e: T.DictionaryEntry) => e is W ): T.ParseResult>[] { if (tokens.length === 0) { @@ -214,12 +213,12 @@ function parsePattern4or5( const plainMasc = dictionary .queryP(first.s) .filter(f) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [0], gender: ["masc"], - entry, + selection, given: first.s, } satisfies T.InflectableBaseParse, errors: [], @@ -228,12 +227,12 @@ function parsePattern4or5( ? dictionary .otherLookup("infap", first.s) .filter(f) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [1], gender: ["masc"], - entry, + selection, given: first.s, } satisfies T.InflectableBaseParse, errors: [], @@ -243,12 +242,12 @@ function parsePattern4or5( ? dictionary .otherLookup("infbp", first.s.slice(0, -1)) .filter(f) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [0], gender: ["fem"], - entry, + selection, given: first.s, } satisfies T.InflectableBaseParse, errors: [], @@ -258,12 +257,12 @@ function parsePattern4or5( ? dictionary .otherLookup("infbp", first.s.slice(0, -1)) .filter(f) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [1], gender: ["fem"], - entry, + selection, given: first.s, } satisfies T.InflectableBaseParse, errors: [], @@ -273,12 +272,12 @@ function parsePattern4or5( ? dictionary .otherLookup("infbp", first.s.slice(0, -1)) .filter(f) - .map((entry) => ({ + .map((selection) => ({ tokens: rest, body: { inflection: [2], gender: ["masc", "fem"], - entry, + selection, given: first.s, } satisfies T.InflectableBaseParse, errors: [], diff --git a/src/lib/src/parsing/parse-irregular-plural.ts b/src/lib/src/parsing/parse-irregular-plural.ts index fa791c8..4510b09 100644 --- a/src/lib/src/parsing/parse-irregular-plural.ts +++ b/src/lib/src/parsing/parse-irregular-plural.ts @@ -1,12 +1,11 @@ import * as T from "../../../types"; -import { DictionaryAPI } from "../dictionary/dictionary"; import { endsInConsonant } from "../p-text-helpers"; import * as tp from "../type-predicates"; import { returnParseResults } from "./utils"; export function parseIrregularPlural( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult>[] { if (tokens.length === 0) { return []; diff --git a/src/lib/src/parsing/parse-noun-new.ts b/src/lib/src/parsing/parse-noun-new.ts index f04a657..c0431ba 100644 --- a/src/lib/src/parsing/parse-noun-new.ts +++ b/src/lib/src/parsing/parse-noun-new.ts @@ -1,67 +1,96 @@ import * as T from "../../../types"; -import { DictionaryAPI } from "../dictionary/dictionary"; import { makeNounSelection } from "../phrase-building/make-selections"; import { parseAdjective } from "./parse-adjective-new"; +import { parseDeterminer } from "./parse-determiner"; import { parseNounWord } from "./parse-noun-word"; -import { bindParseResult } from "./utils"; +import { bindParseResult, parserCombMany, toParseError } from "./utils"; type NounResult = { inflected: boolean; selection: T.NounSelection }; -// ISSUES - fem nouns like ښځه کتابچه not working -// زاړه مېلمانه adjective agreement problem - export function parseNoun( tokens: Readonly, - dictionary: DictionaryAPI, - possesor: T.PossesorSelection | undefined, - adjectives: { - inflection: (0 | 1 | 2)[]; - gender: T.Gender[]; - given: string; - selection: T.AdjectiveSelection; - }[] + dictionary: T.DictionaryAPI, + possesor: T.PossesorSelection | undefined ): T.ParseResult[] { if (tokens.length === 0) { return []; } + const detRes = parserCombMany(parseDeterminer)(tokens, dictionary); // TODO: add recognition of او between adjectives - const withAdj = bindParseResult( - parseAdjective(tokens, dictionary), - (tkns, adj) => parseNoun(tkns, dictionary, possesor, [...adjectives, adj]) - ); - const nounWord = parseNounWord(tokens, dictionary); - // fit together with nouns - const nouns = bindParseResult(nounWord, (tkns, nr) => { - const { error: adjErrors } = adjsMatch( - adjectives, - nr.gender, - nr.inflected ? 1 : 0, - nr.plural - ); - const s = makeNounSelection(nr.entry, undefined); - const body: NounResult = { - inflected: nr.inflected, - selection: { - ...s, - gender: nr.gender, - number: nr.plural ? "plural" : "singular", - adjectives: adjectives.map((a) => a.selection), - possesor, - }, - }; - return [ - { - body, - tokens: tkns, - errors: adjErrors.map((x) => ({ message: x })), - }, - ]; + return bindParseResult(detRes, (t, determiners) => { + const adjRes = parserCombMany(parseAdjective)(t, dictionary); + return bindParseResult(adjRes, (tk, adjectives) => { + const nounWord = parseNounWord(tk, dictionary); + return bindParseResult(nounWord, (tkns, nr) => { + const { error: adjErrors } = adjDetsMatch( + adjectives, + nr.gender, + nr.inflected ? 1 : 0, + nr.plural + ); + const { error: detErrors } = adjDetsMatch( + determiners, + nr.gender, + nr.inflected ? 1 : 0, + nr.plural + ); + const dupErrors = checkForDeterminerDuplicates(determiners); + const s = makeNounSelection(nr.entry, undefined); + const body: NounResult = { + inflected: nr.inflected, + selection: { + ...s, + gender: nr.gender, + number: nr.plural ? "plural" : "singular", + adjectives: adjectives.map((a) => a.selection), + determiners: determiners.length + ? { + type: "determiners", + withNoun: true, + determiners: determiners.map((d) => d.selection), + } + : undefined, + possesor, + }, + }; + return [ + { + body, + tokens: tkns, + errors: [ + ...detErrors.map(toParseError), + ...dupErrors.map(toParseError), + ...adjErrors.map(toParseError), + ], + }, + ]; + }); + }); }); - return [...nouns, ...withAdj]; } -function adjsMatch( - adjectives: Parameters[3], +function checkForDeterminerDuplicates( + determiners: T.InflectableBaseParse[] +): string[] { + // from https://flexiple.com/javascript/find-duplicates-javascript-array + const array = determiners.map((d) => d.selection.determiner.p); + const duplicates: string[] = []; + for (let i = 0; i < array.length; i++) { + for (let j = i + 1; j < array.length; j++) { + if (array[i] === array[j]) { + if (!duplicates.includes(array[i])) { + duplicates.push(array[i]); + } + } + } + } + return duplicates.map((x) => `duplicate ${x} determiner`); +} + +function adjDetsMatch( + adjectives: T.InflectableBaseParse< + T.AdjectiveSelection | T.DeterminerSelection + >[], gender: T.Gender, inf: 0 | 1 | 2, plural: boolean | undefined @@ -76,14 +105,17 @@ function adjsMatch( return { ok: false, error: unmatching.map((x) => { - const adjText = - x.given === x.selection.entry.p - ? x.given - : `${x.given} (${x.selection.entry.p})`; + const p = + x.selection.type === "adjective" + ? x.selection.entry.p + : x.selection.determiner.p; + const adjText = x.given === p ? x.given : `${x.given} (${p})`; const inflectionIssue = !x.inflection.some((x) => x === inflection) ? ` should be ${showInflection(inflection)}` : ``; - return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`; + return `${ + x.selection.type === "adjective" ? "Adjective" : "Determiner" + } agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`; }), }; } else { diff --git a/src/lib/src/parsing/parse-noun-word.ts b/src/lib/src/parsing/parse-noun-word.ts index cfc1480..901bf47 100644 --- a/src/lib/src/parsing/parse-noun-word.ts +++ b/src/lib/src/parsing/parse-noun-word.ts @@ -1,5 +1,4 @@ import * as T from "../../../types"; -import { DictionaryAPI } from "../dictionary/dictionary"; import { fFlatMapParseResult } from "../fp-ps"; import { getInflectionPattern } from "../inflection-pattern"; import { makeNounSelection } from "../phrase-building/make-selections"; @@ -8,11 +7,12 @@ import { parseInflectableWord } from "./parse-inflectable-word"; import { parseFemNoun } from "./parse-fem-noun"; import { parsePluralEndingNoun } from "./parse-plural-ending-noun"; import { parseIrregularPlural } from "./parse-irregular-plural"; +import { parserCombOr } from "./utils"; -export function parseNounWord( +export const parseNounWord: T.Parser> = ( tokens: Readonly, - dictionary: DictionaryAPI -): T.ParseResult>[] { + dictionary: T.DictionaryAPI +) => { if (tokens.length === 0) { return []; } @@ -25,10 +25,12 @@ export function parseNounWord( ); return [ ...withoutPluralEndings, - ...parsePluralEndingNoun(tokens, dictionary), - ...parseIrregularPlural(tokens, dictionary), + ...parserCombOr([parsePluralEndingNoun, parseIrregularPlural])( + tokens, + dictionary + ), ]; -} +}; function inflectableBaseParseToNounWordResults( wr: T.InflectableBaseParse @@ -46,17 +48,17 @@ function inflectableBaseParseToNounWordResults( } const possibleGenders = gendersWorkWithSelection( wr.gender, - makeNounSelection(wr.entry, undefined) + makeNounSelection(wr.selection, undefined) ); return possibleGenders.flatMap((gender) => wr.inflection.flatMap((inflection) => - convertInflection(inflection, wr.entry, gender).flatMap( + convertInflection(inflection, wr.selection, gender).flatMap( ({ inflected, number }) => ({ inflected, plural: number === "plural", gender, given: wr.given, - entry: wr.entry, + entry: wr.selection, }) ) ) diff --git a/src/lib/src/parsing/parse-plural-ending-noun.ts b/src/lib/src/parsing/parse-plural-ending-noun.ts index 2dab141..f4c5004 100644 --- a/src/lib/src/parsing/parse-plural-ending-noun.ts +++ b/src/lib/src/parsing/parse-plural-ending-noun.ts @@ -1,5 +1,4 @@ import * as T from "../../../types"; -import { DictionaryAPI } from "../dictionary/dictionary"; import { endsInAaOrOo, endsInConsonant, @@ -11,7 +10,7 @@ import { returnParseResults } from "./utils"; export function parsePluralEndingNoun( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult>[] { if (tokens.length === 0) { return []; @@ -32,7 +31,7 @@ export function parsePluralEndingNoun( // function parseSpecialPlural( // tokens: Readonly, -// dictionary: DictionaryAPI +// dictionary: T.DictionaryAPI // ): T.ParseResult>[] { // if (tokens.length === 0) { // return []; @@ -90,7 +89,7 @@ export function parsePluralEndingNoun( function parseOonaEndingNoun( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult>[] { if (tokens.length === 0) { return []; @@ -132,7 +131,7 @@ function parseOonaEndingNoun( function parseAanEndingNoun( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult>[] { if (tokens.length === 0) { return []; @@ -206,7 +205,7 @@ function parseAanEndingNoun( function parseAaneEndingNoun( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult>[] { if (tokens.length === 0) { return []; @@ -233,7 +232,7 @@ function parseAaneEndingNoun( function parseGaanEndingNoun( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult>[] { if (tokens.length === 0) { return []; @@ -309,7 +308,7 @@ function parseGaanEndingNoun( function parseGaaneEndingNoun( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult>[] { if (tokens.length === 0) { return []; @@ -413,7 +412,7 @@ function parseGaaneEndingNoun( function parseWeEndingNoun( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult>[] { if (tokens.length === 0) { return []; @@ -489,7 +488,7 @@ function parseWeEndingNoun( function parseIYaanEndingNoun( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult>[] { if (tokens.length === 0) { return []; @@ -563,7 +562,7 @@ function parseIYaanEndingNoun( function parseIYaaneEndingNoun( tokens: Readonly, - dictionary: DictionaryAPI + dictionary: T.DictionaryAPI ): T.ParseResult>[] { if (tokens.length === 0) { return []; diff --git a/src/lib/src/parsing/utils.ts b/src/lib/src/parsing/utils.ts index 01f814f..a4f470a 100644 --- a/src/lib/src/parsing/utils.ts +++ b/src/lib/src/parsing/utils.ts @@ -121,6 +121,48 @@ export function cleanOutResults( return Array.from(new Set(errorsCulled.map(JSON.stringify))).map(JSON.parse); } +export type Parser = ( + tokens: Readonly, + dictionary: T.DictionaryAPI +) => T.ParseResult[]; + +export function parserCombOr(parsers: Parser[]) { + return (tokens: Readonly, dictionary: T.DictionaryAPI) => + parsers.flatMap((p) => p(tokens, dictionary)); +} + +/** + * A parser combinator to take a parser and make it run as many times as possible + * for each success, it will also return an option as if it failed, to allow for + * the words to be considered something else. + * + * @param parser + * @returns + */ +export function parserCombMany(parser: Parser): Parser { + const r: Parser = ( + tokens: Readonly, + dictionary: T.DictionaryAPI + ) => { + function go(acc: R[], t: Readonly): T.ParseResult[] { + const one = parser(t, dictionary); + if (one.length === 0) { + return returnParseResult(t, acc); + } + return bindParseResult(one, (tkns, o) => { + return [ + ...go([...acc, o], tkns), + // also have a result where the next token is NOT + // considered a success + ...returnParseResult(t, acc), + ]; + }); + } + return go([], tokens); + }; + return r; +} + export function isCompleteResult( r: T.ParseResult ): boolean { @@ -244,3 +286,7 @@ export function addShrunkenPossesor( }, }; } + +export function toParseError(message: string): T.ParseError { + return { message }; +} diff --git a/src/lib/src/type-predicates.ts b/src/lib/src/type-predicates.ts index 8c0e60d..fc88c43 100644 --- a/src/lib/src/type-predicates.ts +++ b/src/lib/src/type-predicates.ts @@ -121,8 +121,10 @@ export function isMascNounEntry( return !!e.c && e.c.includes("n. m."); } -export function isFemNounEntry(e: T.DictionaryEntry): e is T.FemNounEntry { - return !!e.c && e.c.includes("n. f."); +export function isFemNounEntry( + e: T.DictionaryEntry | T.Determiner +): e is T.FemNounEntry { + return "c" in e && !!e.c && e.c.includes("n. f."); } export function isUnisexNounEntry( @@ -195,13 +197,13 @@ export function isNonInflectingEntry( * @param e * @returns */ -export function isPattern1Entry( +export function isPattern1Entry( e: T ): e is T.Pattern1Entry { - if (e.noInf) return false; - if (e.infap || e.infbp) return false; + if ("noInf" in e && e.noInf) return false; + if (("infap" in e && e.infap) || ("infbp" in e && e.infbp)) return false; // family words like خور زوی etc with special plural don't follow pattern #1 - if (e.c.includes("fam.")) { + if ("c" in e && e.c.includes("fam.")) { return false; } if (isFemNounEntry(e)) { diff --git a/src/types.ts b/src/types.ts index 8897874..fc16856 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1238,13 +1238,36 @@ export type EquativeBlock = { type: "equative"; equative: EquativeRendered }; export type NegativeBlock = { type: "negative"; imperative: boolean }; -export type InflectableBaseParse = { +export type InflectableBaseParse< + E extends InflectableEntry | AdjectiveSelection | DeterminerSelection +> = { inflection: (0 | 1 | 2)[]; gender: Gender[]; given: string; - entry: E; + selection: E; }; +export type DictionaryAPI = { + initialize: () => Promise<{ + response: "loaded first time" | "loaded from saved"; + dictionaryInfo: DictionaryInfo; + }>; + update: () => Promise<{ + response: "no need for update" | "updated" | "unable to check"; + dictionaryInfo: DictionaryInfo; + }>; + queryP: (p: string) => DictionaryEntry[]; + adjLookup: (p: string) => AdjectiveEntry[]; + nounLookup: (p: string) => NounEntry[]; + otherLookup: (key: keyof DictionaryEntry, p: string) => DictionaryEntry[]; + specialPluralLookup: (p: string) => NounEntry[]; +}; + +export type Parser = ( + tokens: Readonly, + dictionary: DictionaryAPI +) => ParseResult[]; + export type ParsedNounWord = { inflected: boolean; plural: boolean;