more work on new noun parsing, and inflections

This commit is contained in:
adueck 2024-08-21 14:35:01 -04:00
parent d0e1a71827
commit 005d542cc9
32 changed files with 2834 additions and 384 deletions

1
.gitignore vendored
View File

@ -10,6 +10,7 @@ lerna-debug.log*
# fetched vocab # fetched vocab
src/verbs.ts src/verbs.ts
src/nouns-adjs.ts src/nouns-adjs.ts
vocab/mini-dict-entries.ts
# testing # testing
/coverage /coverage

View File

@ -15,7 +15,7 @@ type InflectionError = {
async function checkAll() { async function checkAll() {
console.log("Checking inflection functions on all dictionary words"); console.log("Checking inflection functions on all dictionary words");
const res = await fetch(process.env.LINGDOCS_DICTIONARY_URL); const res = await fetch(process.env.LINGDOCS_DICTIONARY_URL + ".json");
const { entries }: T.Dictionary = await res.json(); const { entries }: T.Dictionary = await res.json();
const errors: InflectionError[] = []; const errors: InflectionError[] = [];

22
get-mini-dict.ts Normal file
View File

@ -0,0 +1,22 @@
import * as T from "./src/types";
import fs from "fs";
import { entries as collection } from "./vocab/mini-dict-tss";
const res = await fetch(
"https://storage.lingdocs.com/dictionary/dictionary.json"
);
const dictionary = (await res.json()) as T.Dictionary;
const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
collection.includes(x.ts)
);
const contents = `import { DictionaryEntry } from "../src/types";
// DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
export const entries: DictionaryEntry[] = [
${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
];
`;
fs.writeFileSync("./vocab/mini-dict-entries.ts", contents);

View File

@ -6,6 +6,7 @@ export default {
"ts-jest", "ts-jest",
{ {
tsconfig: "tsconfig.app.json", tsconfig: "tsconfig.app.json",
diagnostics: false,
}, },
], ],
}, },

30
package-lock.json generated
View File

@ -1,15 +1,16 @@
{ {
"name": "pashto-inflector-website", "name": "pashto-inflector-website",
"version": "7.6.5", "version": "7.7.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "pashto-inflector-website", "name": "pashto-inflector-website",
"version": "7.6.5", "version": "7.7.0",
"dependencies": { "dependencies": {
"@fortawesome/fontawesome-free": "^5.15.2", "@fortawesome/fontawesome-free": "^5.15.2",
"bootstrap": "4.6.1", "bootstrap": "4.6.1",
"json-edit-react": "^1.15.7",
"react": "^18.3.1", "react": "^18.3.1",
"react-bootstrap": "1.5.1", "react-bootstrap": "1.5.1",
"react-dom": "^18.3.1", "react-dom": "^18.3.1",
@ -5443,6 +5444,19 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/json-edit-react": {
"version": "1.15.7",
"resolved": "https://registry.npmjs.org/json-edit-react/-/json-edit-react-1.15.7.tgz",
"integrity": "sha512-PMw7FSTVrY23m5tdwDFxqjVL4OHyR/zSkPIu1BhKVJofW+l0I93moevs7CWTD+3ZWA/m8OtKpxYV3CQ30OlNJA==",
"license": "MIT",
"dependencies": {
"object-property-assigner": "^1.3.0",
"object-property-extractor": "^1.0.11"
},
"peerDependencies": {
"react": ">=16.0.0"
}
},
"node_modules/json-parse-even-better-errors": { "node_modules/json-parse-even-better-errors": {
"version": "2.3.1", "version": "2.3.1",
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
@ -5837,6 +5851,18 @@
"node": ">=0.10.0" "node": ">=0.10.0"
} }
}, },
"node_modules/object-property-assigner": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/object-property-assigner/-/object-property-assigner-1.3.0.tgz",
"integrity": "sha512-19A0RsC9rP9klCKHDPL/MeERxeopV9wyMNfP+eD2uKOafzLjF+OUEN4FoP6RAlCFHmerBPJ4ohNv/WrgaNpeIA==",
"license": "MIT"
},
"node_modules/object-property-extractor": {
"version": "1.0.11",
"resolved": "https://registry.npmjs.org/object-property-extractor/-/object-property-extractor-1.0.11.tgz",
"integrity": "sha512-VnDQcyN0FTXZ0hMZS/CTb2QkIssZ9XKB8zlf5rnFh1HjFQX1P73EHawavSztBOiPDGqAPNXebv4agjhF9eACAw==",
"license": "MIT"
},
"node_modules/once": { "node_modules/once": {
"version": "1.4.0", "version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",

View File

@ -1,6 +1,6 @@
{ {
"name": "pashto-inflector-website", "name": "pashto-inflector-website",
"version": "7.6.5", "version": "7.7.0",
"type": "module", "type": "module",
"scripts": { "scripts": {
"patch": "npm version patch --no-git-tag-version && cd src/lib && npm version patch --no-git-tag-version && cd ../components && npm version patch --no-git-tag-version", "patch": "npm version patch --no-git-tag-version && cd src/lib && npm version patch --no-git-tag-version && cd ../components && npm version patch --no-git-tag-version",
@ -14,12 +14,13 @@
"build-website": "tsc -b && vite build", "build-website": "tsc -b && vite build",
"build-components": "rm -rf src/components/dist && tsc --project src/components/tsconfig.json && cd src/components && node post-build.cjs", "build-components": "rm -rf src/components/dist && tsc --project src/components/tsconfig.json && cd src/components && node post-build.cjs",
"build-lib": "rm -rf src/lib/dist && tsc --project src/lib/tsconfig.json && tsup src/lib/library.ts --format cjs && mv dist/library.cjs src/lib/dist/lib", "build-lib": "rm -rf src/lib/dist && tsc --project src/lib/tsconfig.json && tsup src/lib/library.ts --format cjs && mv dist/library.cjs src/lib/dist/lib",
"get-words": "node get-words.cjs", "get-words": "node get-words.cjs && tsx get-mini-dict.ts",
"check-all-inflections": "tsx check-all-inflections.ts" "check-all-inflections": "tsx check-all-inflections.ts"
}, },
"dependencies": { "dependencies": {
"@fortawesome/fontawesome-free": "^5.15.2", "@fortawesome/fontawesome-free": "^5.15.2",
"bootstrap": "4.6.1", "bootstrap": "4.6.1",
"json-edit-react": "^1.15.7",
"react": "^18.3.1", "react": "^18.3.1",
"react-bootstrap": "1.5.1", "react-bootstrap": "1.5.1",
"react-dom": "^18.3.1", "react-dom": "^18.3.1",

View File

@ -19,7 +19,7 @@ import { entryFeeder } from "./demo-components/entryFeeder";
import Hider from "./components/src/Hider"; import Hider from "./components/src/Hider";
import InflectionDemo from "./demo-components/InflectionDemo"; import InflectionDemo from "./demo-components/InflectionDemo";
import SpellingDemo from "./demo-components/SpellingDemo"; import SpellingDemo from "./demo-components/SpellingDemo";
import ParserDemo from "./demo-components/ParserDemo"; // import ParserDemo from "./demo-components/ParserDemo";
// import InflectionTable from "./components/src/InflectionsTable"; // import InflectionTable from "./components/src/InflectionsTable";
function App() { function App() {
@ -31,7 +31,6 @@ function App() {
defualtTextOptions, defualtTextOptions,
"textOpts1" "textOpts1"
); );
const [dictionaryReady, setDictionaryIsReady] = useState<boolean>(false);
const [theme, setTheme] = useStickyState<"light" | "dark">("light", "theme1"); const [theme, setTheme] = useStickyState<"light" | "dark">("light", "theme1");
const [showing, setShowing] = useState<string>(""); const [showing, setShowing] = useState<string>("");
function handleHiderClick(label: string) { function handleHiderClick(label: string) {
@ -39,14 +38,14 @@ function App() {
} }
useEffect(() => { useEffect(() => {
console.log("WILL INIT");
dictionary dictionary
.initialize() .initialize()
.then(() => { .catch(console.error)
console.log("DONE INIT"); .then((res) => {
setDictionaryIsReady(true); if (res && res.response === "loaded from saved") {
}) dictionary.update();
.catch(console.error); }
});
}, []); }, []);
useEffect(() => { useEffect(() => {
@ -99,7 +98,6 @@ function App() {
<h1 className="display-4 mt-2"> <h1 className="display-4 mt-2">
<code>Pashto Inflector</code> <code>Pashto Inflector</code>
</h1> </h1>
{dictionaryReady && <div>READY</div>}
<p <p
className="lead my-3" className="lead my-3"
style={{ maxWidth: "600px", margin: "0 auto" }} style={{ maxWidth: "600px", margin: "0 auto" }}
@ -165,14 +163,18 @@ function App() {
> >
<SpellingDemo opts={textOptions} onChange={setTextOptions} /> <SpellingDemo opts={textOptions} onChange={setTextOptions} />
</Hider> </Hider>
<Hider {/* <Hider
label="Parser (🚧 IN PROGRESS 🚧)" label="Parser (🚧 IN PROGRESS 🚧)"
hLevel={3} hLevel={3}
showing={showing === "parser"} showing={showing === "parser"}
handleChange={() => handleHiderClick("parser")} handleChange={() => handleHiderClick("parser")}
> >
<ParserDemo opts={textOptions} entryFeeder={entryFeeder} /> <ParserDemo
</Hider> opts={textOptions}
entryFeeder={entryFeeder}
dictionary={dictionary}
/>
</Hider> */}
</div> </div>
</main> </main>
<Modal <Modal

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/ps-react", "name": "@lingdocs/ps-react",
"version": "7.6.5", "version": "7.7.0",
"description": "Pashto inflector library module with React components", "description": "Pashto inflector library module with React components",
"main": "dist/components/library.js", "main": "dist/components/library.js",
"module": "dist/components/library.js", "module": "dist/components/library.js",

View File

@ -1,10 +1,13 @@
import { useState } from "react"; import { useState } from "react";
import * as T from "../types"; import * as T from "../types";
import { parsePhrase } from "../lib/src/parsing/parse-phrase"; // import { parsePhrase } from "../lib/src/parsing/parse-phrase";
import { tokenizer } from "../lib/src/parsing/tokenizer"; import { tokenizer } from "../lib/src/parsing/tokenizer";
import { NPDisplay } from "../components/library"; // import { NPDisplay } from "../components/library";
import EditableVP from "../components/src/vp-explorer/EditableVP"; // import EditableVP from "../components/src/vp-explorer/EditableVP";
import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools"; // import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
import { DictionaryAPI } from "../lib/src/dictionary/dictionary";
import { parseNoun } from "../lib/src/parsing/parse-noun-new";
import { JsonEditor } from "json-edit-react";
const working = [ const working = [
"limited demo vocab", "limited demo vocab",
@ -44,16 +47,17 @@ const examples = [
]; ];
function ParserDemo({ function ParserDemo({
opts, // opts,
entryFeeder, // entryFeeder,
dictionary,
}: { }: {
opts: T.TextOptions; opts: T.TextOptions;
entryFeeder: T.EntryFeeder; entryFeeder: T.EntryFeeder;
dictionary: DictionaryAPI;
}) { }) {
const [text, setText] = useState<string>(""); const [text, setText] = useState<string>("");
const [result, setResult] = useState< const [result, setResult] = useState<any[]>([]);
ReturnType<typeof parsePhrase>["success"] // ReturnType<typeof parsePhrase>["success"]
>([]);
const [errors, setErrors] = useState<string[]>([]); const [errors, setErrors] = useState<string[]>([]);
function handleInput(value: string) { function handleInput(value: string) {
if (!value) { if (!value) {
@ -62,7 +66,11 @@ function ParserDemo({
setErrors([]); setErrors([]);
return; return;
} }
const { success, errors } = parsePhrase(tokenizer(value)); const res = parseNoun(tokenizer(value), dictionary, undefined, []);
const success = res.filter((x) => !x.tokens.length).map((x) => x.body);
const errors = [
...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
];
setText(value); setText(value);
setErrors(errors); setErrors(errors);
setResult(success); setResult(success);
@ -127,8 +135,8 @@ function ParserDemo({
<div className="text-center">Did you mean:</div> <div className="text-center">Did you mean:</div>
</> </>
)} )}
<JsonEditor data={result} />
{result.map((res) => {/* {result.map((res) =>
"inflected" in res ? ( "inflected" in res ? (
<NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} /> <NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
) : "verb" in res ? ( ) : "verb" in res ? (
@ -166,7 +174,7 @@ function ParserDemo({
<pre>{JSON.stringify(res, null, " ")}</pre> <pre>{JSON.stringify(res, null, " ")}</pre>
</samp> </samp>
) )
)} )} */}
<details> <details>
<summary>AST</summary> <summary>AST</summary>
<samp> <samp>

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/inflect", "name": "@lingdocs/inflect",
"version": "7.6.5", "version": "7.7.0",
"description": "Pashto inflector library", "description": "Pashto inflector library",
"main": "dist/lib/library.cjs", "main": "dist/lib/library.cjs",
"module": "dist/lib/library.js", "module": "dist/lib/library.js",

View File

@ -31,9 +31,44 @@ function nounLookup(p: string): T.NounEntry[] {
return res.filter(tp.isNounEntry); return res.filter(tp.isNounEntry);
} }
export const dictionary = { function otherLookup(
key: keyof T.DictionaryEntry,
p: string
): T.DictionaryEntry[] {
if (!dictDb.collection) {
return [];
}
return dictDb.collection.find({ [key]: p });
}
function specialPluralLookup(p: string): T.NounEntry[] {
if (!dictDb.collection) {
return [];
}
const regex = new RegExp(`(^|\\s|,)${p}($|,)`);
return dictDb.collection
.find({
$or: [{ ppp: { $regex: regex } }, { app: { $regex: regex } }],
})
.filter(tp.isNounEntry);
}
export type DictionaryAPI = {
initialize: () => ReturnType<typeof dictDb.initialize>;
update: () => ReturnType<typeof dictDb.updateDictionary>;
queryP: (p: string) => T.DictionaryEntry[];
adjLookup: (p: string) => T.AdjectiveEntry[];
nounLookup: (p: string) => T.NounEntry[];
otherLookup: (key: keyof T.DictionaryEntry, p: string) => T.DictionaryEntry[];
specialPluralLookup: (p: string) => T.NounEntry[];
};
export const dictionary: DictionaryAPI = {
initialize: async () => await dictDb.initialize(), initialize: async () => await dictDb.initialize(),
update: async () => await dictDb.updateDictionary(() => null),
queryP: memoizedQueryP, queryP: memoizedQueryP,
adjLookup: memoize(adjLookup), adjLookup: memoize(adjLookup),
nounLookup: memoize(nounLookup), nounLookup: memoize(nounLookup),
otherLookup: memoize(otherLookup),
specialPluralLookup: memoize(specialPluralLookup),
}; };

View File

@ -53,6 +53,20 @@ export function fmapParseResult<A extends object, B extends object>(
})); }));
} }
export function fFlatMapParseResult<A extends object, B extends object>(
f: (x: A) => B[],
x: T.ParseResult<A>[]
): T.ParseResult<B>[] {
return x.flatMap<T.ParseResult<B>>((xi) => {
const bodies = f(xi.body);
return bodies.map((body) => ({
tokens: xi.tokens,
body,
errors: xi.errors,
}));
});
}
export function fmapSingleOrLengthOpts<A, B>( export function fmapSingleOrLengthOpts<A, B>(
f: (x: A) => B, f: (x: A) => B,
x: T.SingleOrLengthOpts<A> x: T.SingleOrLengthOpts<A>
@ -216,3 +230,35 @@ export function mapVerbRenderedOutput(
return fmapVB(v); return fmapVB(v);
} }
} }
/**
* a type predicate OR combinator
*/
export function orTp<A, B extends A, C extends A>(
f: (x: A) => x is B,
g: (x: A) => x is C
): (x: A) => x is B | C {
return (x: A) => f(x) || g(x);
}
/**
* a type predicate AND combinator
*/
export function andTp<A, B extends A, C extends A>(
f: (x: A) => x is B,
g: (x: A) => x is C
): (x: A) => x is B & C {
return (x: A) => f(x) && g(x);
}
/**
* a type predicate successive AND combinator
* the second predicate is based on the first predicate
* being true and narrows the type further
*/
export function andSuccTp<A, B extends A, C extends B>(
f: (x: A) => x is B,
g: (x: B) => x is C
): (x: A) => x is B & C {
return (x: A) => f(x) && g(x);
}

View File

@ -13,6 +13,7 @@ import {
export function getInflectionPattern( export function getInflectionPattern(
e: T.InflectableEntry e: T.InflectableEntry
): T.InflectionPattern { ): T.InflectionPattern {
if (e.noInf) return 0;
return isPattern1Entry(e) return isPattern1Entry(e)
? T.InflectionPattern.Basic ? T.InflectionPattern.Basic
: isPattern2Entry(e) : isPattern2Entry(e)

View File

@ -20,6 +20,7 @@ import { removeDuplicates } from "./phrase-building/vp-tools";
import { import {
isAdjOrUnisexNounEntry, isAdjOrUnisexNounEntry,
isAnimNounEntry, isAnimNounEntry,
isDeterminerEntry,
isFemNounEntry, isFemNounEntry,
isInflectableEntry, isInflectableEntry,
isMascNounEntry, isMascNounEntry,
@ -59,7 +60,6 @@ export function getInfsAndVocative(
if (!isInflectableEntry(entryR)) { if (!isInflectableEntry(entryR)) {
return false; return false;
} }
// @ts-ignore
const entry: T.InflectableEntry = entryR as T.InflectableEntry; const entry: T.InflectableEntry = entryR as T.InflectableEntry;
const pattern = getInflectionPattern(entry); const pattern = getInflectionPattern(entry);
if ( if (
@ -77,8 +77,15 @@ export function getInfsAndVocative(
}), }),
}; };
} }
if ("c" in entry && entry.c?.includes("fam.") && isMascNounEntry(entry)) {
return {
vocative: familialMascVocative(entry, plurals),
};
}
const gender: T.Gender | "unisex" = const gender: T.Gender | "unisex" =
isAdjOrUnisexNounEntry(entry) || isNumberEntry(entry) isAdjOrUnisexNounEntry(entry) ||
isNumberEntry(entry) ||
isDeterminerEntry(entry)
? "unisex" ? "unisex"
: isMascNounEntry(entry) : isMascNounEntry(entry)
? "masc" ? "masc"
@ -101,6 +108,20 @@ export function getInfsAndVocative(
return aggregateInfsAndVoc(masc, fem); return aggregateInfsAndVoc(masc, fem);
} }
function familialMascVocative(
entry: T.MascNounEntry,
plurals: Plurals
): T.PluralInflections | undefined {
if (endsInConsonant(entry) || hasShwaEnding(entry)) {
const plr = genderPlural("masc", plurals);
const { vocative } = pattern1Masc({ entry, plurals: plr });
return {
masc: [vocative[0], plr] as T.PluralInflectionSet,
};
}
return undefined;
}
type PatternInput = { type PatternInput = {
entry: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry; entry: T.DictionaryEntryNoFVars | T.NounEntry | T.InflectableEntry;
plurals: T.PsString[]; plurals: T.PsString[];
@ -179,17 +200,12 @@ function vocFemAnimException({
"plural missing for feminine animate exception noun " + entry.p "plural missing for feminine animate exception noun " + entry.p
); );
} }
// TODO: HANDLE BETTER WITH PLURALS!
const plurBase = mapPsString(
(x) => x.slice(0, -1),
makePsString(entry.ppp, entry.ppf)
);
const base = const base =
countSyllables(entry) === 1 countSyllables(entry) === 1
? accentOnNFromEnd(entry, 0) ? accentOnNFromEnd(entry, 0)
: psStringFromEntry(entry); : psStringFromEntry(entry);
return { return {
fem: [[concatPs(base, e)], addPlurals([concatPs(plurBase, o)], plurals)], fem: [[concatPs(base, e)], plurals as T.ArrayOneOrMore<T.PsString>],
}; };
} }

View File

@ -183,8 +183,6 @@ export function makePlural(
}; };
} }
function addLongVowelSuffix(gender: "masc" | "fem"): T.PluralInflectionSet { function addLongVowelSuffix(gender: "masc" | "fem"): T.PluralInflectionSet {
if (pashtoPlural) {
}
const base = removeEndTick(makePsString(w.p, w.f)); const base = removeEndTick(makePsString(w.p, w.f));
const baseWOutAccents = removeAccents(base); const baseWOutAccents = removeAccents(base);
const space = const space =

View File

@ -69,7 +69,7 @@ function nounAdjLookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
.includes(value as string) .includes(value as string)
); );
} }
// @ts-expect-error because // @ts-expect-error its ok
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[]; return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
} }

View File

@ -0,0 +1,39 @@
import * as T from "../../../types";
import type { DictionaryAPI } from "../dictionary/dictionary";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
import { entries } from "../../../../vocab/mini-dict-entries";
const queryP = (p: string) => entries.filter((e) => e.p === p);
function adjLookup(p: string): T.AdjectiveEntry[] {
return queryP(p).filter(isAdjectiveEntry) as T.AdjectiveEntry[];
}
function nounLookup(p: string): T.NounEntry[] {
return queryP(p).filter(isNounEntry) as T.NounEntry[];
}
function otherLookup(
key: keyof T.DictionaryEntry,
p: string
): T.DictionaryEntry[] {
return entries.filter((e) => e[key] === p);
}
function specialPluralLookup(p: string): T.NounEntry[] {
const regex = new RegExp(`(^|\\s|,)${p}($|,)`);
return entries.filter(
(e) => (e.ppp?.match(regex) || e.app?.match(regex)) && isNounEntry(e)
) as T.NounEntry[];
}
export const testDictionary: DictionaryAPI = {
// @ts-expect-error we won't mock the initialization
initialize: async () => 0,
// @ts-expect-error not perfect mocking because won't need that
update: async () => ({ response: "updated" }),
queryP,
adjLookup,
nounLookup,
otherLookup,
specialPluralLookup,
};

View File

@ -0,0 +1,289 @@
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import * as T from "../../../types";
import { parseAdjective } from "./parse-adjective-new";
import { tokenizer } from "./tokenizer";
import { testDictionary } from "./mini-test-dictionary";
const khufa = testDictionary.adjLookup("خفه")[0];
const ghut = testDictionary.adjLookup("غټ")[0];
const sturey = testDictionary.adjLookup("ستړی")[0];
const naray = testDictionary.adjLookup("نری")[0];
const zor = testDictionary.adjLookup("زوړ")[0];
const sheen = testDictionary.adjLookup("شین")[0];
const tests: {
category: string;
cases: {
input: string;
output: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
selection: T.AdjectiveSelection;
}[];
}[];
}[] = [
{
category: "pattern 1",
cases: [
{
input: "غټ",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [0, 1],
gender: ["masc"],
},
],
},
{
input: "غټه",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [0],
gender: ["fem"],
},
],
},
{
input: "غټې",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [1],
gender: ["fem"],
},
],
},
{
input: "غټو",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 2",
cases: [
{
input: "ستړی",
output: [
{
selection: makeAdjectiveSelection(sturey),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "ستړې",
output: [
{
selection: makeAdjectiveSelection(sturey),
inflection: [0, 1],
gender: ["fem"],
},
],
},
{
input: "ستړو",
output: [
{
selection: makeAdjectiveSelection(sturey),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 3",
cases: [
{
input: "نری",
output: [
{
selection: makeAdjectiveSelection(naray),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "نري",
output: [
{
selection: makeAdjectiveSelection(naray),
inflection: [1],
gender: ["masc"],
},
],
},
{
input: "نرۍ",
output: [
{
selection: makeAdjectiveSelection(naray),
inflection: [0, 1],
gender: ["fem"],
},
],
},
{
input: "نرو",
output: [
{
selection: makeAdjectiveSelection(naray),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
{
input: "نریو",
output: [
{
selection: makeAdjectiveSelection(naray),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "non-inflecting",
cases: [
{
input: "خفه",
output: [
{
selection: makeAdjectiveSelection(khufa),
inflection: [0, 1, 2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 4",
cases: [
{
input: "زوړ",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "زاړه",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [1],
gender: ["masc"],
},
],
},
{
input: "زړه",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [0],
gender: ["fem"],
},
],
},
{
input: "زړې",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [1],
gender: ["fem"],
},
],
},
{
input: "زړو",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 5",
cases: [
{
input: "شین",
output: [
{
selection: makeAdjectiveSelection(sheen),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "شنه",
output: [
{
selection: makeAdjectiveSelection(sheen),
inflection: [1],
gender: ["masc"],
},
{
selection: makeAdjectiveSelection(sheen),
inflection: [0],
gender: ["fem"],
},
],
},
{
input: "شنو",
output: [
{
selection: makeAdjectiveSelection(sheen),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
];
describe("parsing adjectives", () => {
tests.forEach(({ category, cases }) => {
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const possibilities = parseAdjective(tokens, testDictionary).map(
(x) => x.body
);
expect(possibilities).toEqual(
output.map((o) => ({
...o,
given: input,
}))
);
});
});
});
});

View File

@ -0,0 +1,34 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { fmapParseResult } from "../fp-ps";
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import * as tp from "../type-predicates";
import { parseInflectableWord } from "./parse-inflectable-word";
export function parseAdjective(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<{
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}>[] {
if (tokens.length === 0) {
return [];
}
const adjectives = parseInflectableWord(
tokens,
dictionary,
tp.isAdjectiveEntry
);
return fmapParseResult(
(r) => ({
inflection: r.inflection,
gender: r.gender,
given: r.given,
selection: makeAdjectiveSelection(r.entry as T.AdjectiveEntry),
}),
adjectives
);
}

View File

@ -0,0 +1,261 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { andSuccTp } from "../fp-ps";
import { pashtoConsonants } from "../pashto-consonants";
import * as tp from "../type-predicates";
import { returnParseResults } from "./utils";
type FemNounBaseParse = T.InflectableBaseParse<T.FemNounEntry>;
export function parseFemNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
}
return [
plainPlural,
parsePattern1,
parsePattern2,
parsePattern3,
parseEeEnding,
].flatMap((f) => f(tokens, dictionary));
}
function plainPlural(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const plurLookup = (p: string) =>
dictionary
.queryP(p)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPluralNounEntry));
const plain = plurLookup(first.s).map<FemNounBaseParse>((entry) => ({
inflection: [0],
gender: ["fem"],
entry,
given: first.s,
}));
const inflected = first.s.endsWith("و")
? (() => {
const base = first.s.slice(0, -1);
const guesses = [first.s, base + "ه", base + "ې"];
return guesses.flatMap(plurLookup).map<FemNounBaseParse>((entry) => ({
inflection: [2],
gender: ["fem"],
entry,
given: first.s,
}));
})()
: [];
return returnParseResults(rest, [...plain, ...inflected]);
}
function parsePattern1(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const p1Lookup = (p: string) =>
dictionary
.queryP(p)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPattern1Entry));
const plain = first.s.endsWith("ه")
? p1Lookup(first.s).map<FemNounBaseParse>((entry) => ({
inflection: [0],
gender: ["fem"],
entry,
given: first.s,
}))
: [];
const withoutA = pashtoConsonants.includes(first.s[first.s.length - 1])
? p1Lookup(first.s).map<FemNounBaseParse>((entry) => ({
inflection: [0],
gender: ["fem"],
entry,
given: first.s,
}))
: [];
const inflected = first.s.endsWith("ې")
? p1Lookup(first.s.slice(0, -1) + "ه").map<FemNounBaseParse>((entry) => ({
inflection: [1],
gender: ["fem"],
entry,
given: first.s,
}))
: [];
const doubleInflected = first.s.endsWith("و")
? p1Lookup(first.s.slice(0, -1) + "ه").map<FemNounBaseParse>((entry) => ({
inflection: [2],
gender: ["fem"],
entry,
given: first.s,
}))
: [];
return returnParseResults(rest, [
...plain,
...withoutA,
...inflected,
...doubleInflected,
]);
}
function parsePattern2(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("ې")) {
return dictionary
.queryP(first.s)
.filter(
andSuccTp(
andSuccTp(tp.isFemNounEntry, tp.isPattern2Entry),
tp.isSingularEntry
)
)
.map((entry) => ({
tokens: rest,
body: {
inflection: [0, 1],
gender: ["fem"],
entry,
given: first.s,
},
errors: [],
}));
} else if (first.s.endsWith("و")) {
const eGuess = first.s.endsWith("یو")
? first.s.slice(0, -1)
: first.s.slice(0, -1) + "ې";
return dictionary
.queryP(eGuess)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPattern2Entry))
.map((entry) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["fem"],
entry,
given: first.s,
},
errors: [],
}));
}
return [];
}
function parsePattern3(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("ۍ")) {
return dictionary
.queryP(first.s)
.filter(
andSuccTp(
andSuccTp(tp.isFemNounEntry, tp.isPattern3Entry),
tp.isSingularEntry
)
)
.map((entry) => ({
tokens: rest,
body: {
inflection: [0, 1],
gender: ["fem"],
entry,
given: first.s,
},
errors: [],
}));
} else if (first.s.endsWith("و")) {
const eGuess = first.s.endsWith("یو")
? first.s.slice(0, -2) + "ۍ"
: first.s.slice(0, -1) + "ۍ";
return dictionary
.queryP(eGuess)
.filter(andSuccTp(tp.isFemNounEntry, tp.isPattern3Entry))
.map((entry) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["fem"],
entry,
given: first.s,
},
errors: [],
}));
}
return [];
}
function parseEeEnding(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<FemNounBaseParse>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("ي")) {
return dictionary
.queryP(first.s)
.filter(tp.isPattern6FemEntry)
.map((entry) => ({
tokens: rest,
body: {
inflection: [0],
gender: ["fem"],
entry,
given: first.s,
},
errors: [],
}));
} else if (first.s.endsWith("ۍ")) {
return dictionary
.queryP(first.s.slice(0, -1) + "ي")
.filter(tp.isPattern6FemEntry)
.map((entry) => ({
tokens: rest,
body: {
inflection: [1],
gender: ["fem"],
entry,
given: first.s,
},
errors: [],
}));
} else if (first.s.endsWith("و")) {
const eGuess = first.s.endsWith("یو")
? first.s.slice(0, -2) + "ي"
: first.s.slice(0, -1) + "ي";
return dictionary
.queryP(eGuess)
.filter(tp.isPattern6FemEntry)
.map((entry) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["fem"],
entry,
given: first.s,
},
errors: [],
}));
}
return [];
}

View File

@ -0,0 +1,289 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { andSuccTp, orTp } from "../fp-ps";
import * as tp from "../type-predicates";
export function parseInflectableWord<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) {
return [];
}
return [
parseNonInflecting,
parsePattern1,
parsePattern2or3,
parsePattern4or5,
].flatMap((f) => f(tokens, dictionary, tpf));
}
function parseNonInflecting<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const matches = dictionary
.queryP(first.s)
.filter(andSuccTp(tpf, tp.isNonInflectingEntry));
return matches.map((entry) => ({
tokens: rest,
body: {
inflection: tp.isNounEntry(entry) ? [0, 1] : [0, 1, 2],
gender: ["masc", "fem"],
entry,
given: first.s,
},
errors: [],
}));
}
function parsePattern1<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const p1Lookup = (p: string) =>
dictionary
.queryP(p)
.filter(
(e) => tpf(e) && tp.isPattern1Entry(e) && !e.c.includes("fam.")
) as T.Pattern1Entry<W>[];
const mascPlainOrInflected = p1Lookup(first.s).map((entry) => ({
tokens: rest,
body: {
inflection: entry.c.includes("pl.") ? [0] : [0, 1],
gender: ["masc"],
entry,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}));
const femPlain = first.s.endsWith("ه")
? p1Lookup(first.s.slice(0, -1)).map((entry) => ({
tokens: rest,
body: {
inflection: [0],
gender: ["fem"],
entry,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}))
: [];
const femInflected = first.s.endsWith("ې")
? p1Lookup(first.s.slice(0, -1)).map((entry) => ({
tokens: rest,
body: {
inflection: [1],
gender: ["fem"],
entry,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}))
: [];
const doubleInflected = first.s.endsWith("و")
? [
...p1Lookup(first.s.slice(0, -1)),
...p1Lookup(first.s.slice(0, -1) + "ه"),
].map((entry) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["masc", "fem"],
entry,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}))
: [];
return [
...mascPlainOrInflected,
...femPlain,
...femInflected,
...doubleInflected,
];
}
function parsePattern2or3<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("ی")) {
return dictionary
.queryP(first.s)
.filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry)))
.map((entry) => ({
tokens: rest,
body: {
inflection: [0],
gender: ["masc"],
entry,
given: first.s,
},
errors: [],
}));
} else if (first.s.endsWith("ي")) {
return dictionary
.queryP(first.s.slice(0, -1) + "ی")
.filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry)))
.map((entry) => ({
tokens: rest,
body: {
inflection: [1],
gender: ["masc"],
entry,
given: first.s,
},
errors: [],
}));
} else if (first.s.endsWith("ې")) {
return dictionary
.queryP(first.s.slice(0, -1) + "ی")
.filter(andSuccTp(tpf, tp.isPattern2Entry))
.map((entry) => ({
tokens: rest,
body: {
inflection: [0, 1],
gender: ["fem"],
entry,
given: first.s,
},
errors: [],
}));
} else if (first.s.endsWith("ۍ")) {
return dictionary
.queryP(first.s.slice(0, -1) + "ی")
.filter(andSuccTp(tpf, tp.isPattern3Entry))
.map((entry) => ({
tokens: rest,
body: {
inflection: [0, 1],
gender: ["fem"],
entry,
given: first.s,
},
errors: [],
}));
} else if (first.s.endsWith("و")) {
const eGuess = first.s.endsWith("یو")
? first.s.slice(0, -1)
: first.s.slice(0, -1) + "ی";
return dictionary
.queryP(eGuess)
.filter(andSuccTp(tpf, orTp(tp.isPattern2Entry, tp.isPattern3Entry)))
.map((entry) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["masc", "fem"],
entry,
given: first.s,
},
errors: [],
}));
}
return [];
}
function parsePattern4or5<W extends T.InflectableEntry>(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
tpf: (e: T.DictionaryEntry) => e is W
): T.ParseResult<T.InflectableBaseParse<W>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const f = andSuccTp(tpf, orTp(tp.isPattern4Entry, tp.isPattern5Entry));
const plainMasc = dictionary
.queryP(first.s)
.filter(f)
.map((entry) => ({
tokens: rest,
body: {
inflection: [0],
gender: ["masc"],
entry,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}));
const firstMasc = first.s.endsWith("ه")
? dictionary
.otherLookup("infap", first.s)
.filter(f)
.map((entry) => ({
tokens: rest,
body: {
inflection: [1],
gender: ["masc"],
entry,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}))
: [];
const femPlain = first.s.endsWith("ه")
? dictionary
.otherLookup("infbp", first.s.slice(0, -1))
.filter(f)
.map((entry) => ({
tokens: rest,
body: {
inflection: [0],
gender: ["fem"],
entry,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}))
: [];
const femFirst = first.s.endsWith("ې")
? dictionary
.otherLookup("infbp", first.s.slice(0, -1))
.filter(f)
.map((entry) => ({
tokens: rest,
body: {
inflection: [1],
gender: ["fem"],
entry,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}))
: [];
const second = first.s.endsWith("و")
? dictionary
.otherLookup("infbp", first.s.slice(0, -1))
.filter(f)
.map((entry) => ({
tokens: rest,
body: {
inflection: [2],
gender: ["masc", "fem"],
entry,
given: first.s,
} satisfies T.InflectableBaseParse<W>,
errors: [],
}))
: [];
return [...plainMasc, ...firstMasc, ...femPlain, ...femFirst, ...second];
}

View File

@ -0,0 +1,77 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import * as tp from "../type-predicates";
import { returnParseResults } from "./utils";
export function parseIrregularPlural(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const plain = dictionary
.specialPluralLookup(first.s)
.filter(tp.isNounEntry)
.map<T.ParsedNounWord<T.NounEntry>>((entry) => ({
entry,
gender: tp.isFemNounEntry(entry) ? "fem" : "masc",
inflected: false,
given: first.s,
plural: true,
}));
const inflected = first.s.endsWith("و")
? dictionary
.specialPluralLookup(first.s.slice(0, -1) + "(ه|ې|ع)?")
.filter(tp.isNounEntry)
.map<T.ParsedNounWord<T.NounEntry>>((entry) => ({
entry,
gender: tp.isFemNounEntry(entry) ? "fem" : "masc",
inflected: true,
given: first.s,
plural: true,
}))
: [];
const inflectedAfterLong = first.s.endsWith("وو")
? dictionary
.specialPluralLookup(first.s.slice(0, -2))
.filter((e) => tp.isNounEntry(e) && e.app?.endsWith("ا"))
.map<T.ParsedNounWord<T.NounEntry>>((entry) => ({
entry,
gender: tp.isFemNounEntry(entry) ? "fem" : "masc",
inflected: true,
given: first.s,
plural: true,
}))
: [];
const inflectedAfterLongSep = (() => {
if (tokens.length < 2) {
return [];
}
if (tokens[1].s !== "وو") {
return [];
}
return returnParseResults(
tokens.slice(2),
dictionary
.specialPluralLookup(first.s)
.filter((e) => tp.isNounEntry(e) && e.app?.endsWith("ا"))
.map<T.ParsedNounWord<T.NounEntry>>((entry) => ({
entry,
gender: tp.isFemNounEntry(entry) ? "fem" : "masc",
inflected: true,
given: first.s,
plural: true,
}))
);
})();
return [
...returnParseResults(rest, [
...plain,
...inflected,
...inflectedAfterLong,
]),
...inflectedAfterLongSep,
];
}

View File

@ -0,0 +1,103 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { makeNounSelection } from "../phrase-building/make-selections";
import { parseAdjective } from "./parse-adjective-new";
import { parseNounWord } from "./parse-noun-word";
import { bindParseResult } from "./utils";
type NounResult = { inflected: boolean; selection: T.NounSelection };
// ISSUES - fem nouns like ښځه کتابچه not working
// زاړه مېلمانه adjective agreement problem
export function parseNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI,
possesor: T.PossesorSelection | undefined,
adjectives: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}[]
): T.ParseResult<NounResult>[] {
if (tokens.length === 0) {
return [];
}
// TODO: add recognition of او between adjectives
const withAdj = bindParseResult(
parseAdjective(tokens, dictionary),
(tkns, adj) => parseNoun(tkns, dictionary, possesor, [...adjectives, adj])
);
const nounWord = parseNounWord(tokens, dictionary);
// fit together with nouns
const nouns = bindParseResult(nounWord, (tkns, nr) => {
const { error: adjErrors } = adjsMatch(
adjectives,
nr.gender,
nr.inflected ? 1 : 0,
nr.plural
);
const s = makeNounSelection(nr.entry, undefined);
const body: NounResult = {
inflected: nr.inflected,
selection: {
...s,
gender: nr.gender,
number: nr.plural ? "plural" : "singular",
adjectives: adjectives.map((a) => a.selection),
possesor,
},
};
return [
{
body,
tokens: tkns,
errors: adjErrors.map((x) => ({ message: x })),
},
];
});
return [...nouns, ...withAdj];
}
function adjsMatch(
adjectives: Parameters<typeof parseNoun>[3],
gender: T.Gender,
inf: 0 | 1 | 2,
plural: boolean | undefined
): { ok: boolean; error: string[] } {
const inflection = (plural && inf < 2 ? inf + 1 : inf) as 0 | 1 | 2;
const unmatching = adjectives.filter(
(adj) =>
!adj.gender.includes(gender) ||
!adj.inflection.some((i) => i === inflection)
);
if (unmatching.length) {
return {
ok: false,
error: unmatching.map((x) => {
const adjText =
x.given === x.selection.entry.p
? x.given
: `${x.given} (${x.selection.entry.p})`;
const inflectionIssue = !x.inflection.some((x) => x === inflection)
? ` should be ${showInflection(inflection)}`
: ``;
return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
}),
};
} else {
return {
ok: true,
error: [],
};
}
}
function showInflection(inf: 0 | 1 | 2): string {
return inf === 0
? "plain"
: inf === 1
? "first inflection"
: "second inflection";
}

View File

@ -0,0 +1,117 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import { fFlatMapParseResult } from "../fp-ps";
import { getInflectionPattern } from "../inflection-pattern";
import { makeNounSelection } from "../phrase-building/make-selections";
import { isNounEntry, isPluralNounEntry } from "../type-predicates";
import { parseInflectableWord } from "./parse-inflectable-word";
import { parseFemNoun } from "./parse-fem-noun";
import { parsePluralEndingNoun } from "./parse-plural-ending-noun";
import { parseIrregularPlural } from "./parse-irregular-plural";
export function parseNounWord(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
if (tokens.length === 0) {
return [];
}
const withoutPluralEndings = fFlatMapParseResult(
inflectableBaseParseToNounWordResults,
[
...parseInflectableWord(tokens, dictionary, isNounEntry),
...parseFemNoun(tokens, dictionary),
]
);
return [
...withoutPluralEndings,
...parsePluralEndingNoun(tokens, dictionary),
...parseIrregularPlural(tokens, dictionary),
];
}
function inflectableBaseParseToNounWordResults<N extends T.NounEntry>(
wr: T.InflectableBaseParse<N>
): T.ParsedNounWord<N>[] {
function gendersWorkWithSelection(
genders: T.Gender[],
selection: T.NounSelection
): T.Gender[] {
return genders.filter((g) => {
if (selection.genderCanChange) {
return true;
}
return selection.gender === g;
});
}
const possibleGenders = gendersWorkWithSelection(
wr.gender,
makeNounSelection(wr.entry, undefined)
);
return possibleGenders.flatMap((gender) =>
wr.inflection.flatMap((inflection) =>
convertInflection(inflection, wr.entry, gender).flatMap(
({ inflected, number }) => ({
inflected,
plural: number === "plural",
gender,
given: wr.given,
entry: wr.entry,
})
)
)
);
}
function convertInflection(
inflection: 0 | 1 | 2,
entry: T.NounEntry,
gender: T.Gender
): {
inflected: boolean;
number: T.NounNumber;
}[] {
const plural = isPluralNounEntry(entry);
const pattern = getInflectionPattern(entry);
const inf = (plural && inflection < 2 ? inflection + 1 : inflection) as
| 0
| 1
| 2;
if (inf === 0) {
return [
{
inflected: false,
number: "singular",
},
];
} else if (inf === 1) {
return [
...(!plural &&
!(pattern === 4 && entry.p.endsWith("ه") && gender === "masc")
? [
{
inflected: true,
number: "singular" as T.NounNumber,
},
]
: []),
...(pattern > 1 ||
(pattern > 0 && gender === "fem") ||
(isNounEntry(entry) && isPluralNounEntry(entry)) ||
plural
? [
{
inflected: false,
number: "plural" as T.NounNumber,
},
]
: []),
];
}
return [
{
inflected: true,
number: "plural",
},
];
}

View File

@ -0,0 +1,620 @@
import * as T from "../../../types";
import { DictionaryAPI } from "../dictionary/dictionary";
import {
endsInAaOrOo,
endsInConsonant,
endsWith,
hasShwaEnding,
} from "../p-text-helpers";
import * as tp from "../type-predicates";
import { returnParseResults } from "./utils";
export function parsePluralEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
if (tokens.length === 0) {
return [];
}
// TODO: should maybe differentiate animate and inanimate aan endings ?
return [
// parseSpecialPlural,
parseOonaEndingNoun,
parseAanEndingNoun,
parseAaneEndingNoun,
parseGaanEndingNoun,
parseGaaneEndingNoun,
parseWeEndingNoun,
parseIYaanEndingNoun,
parseIYaaneEndingNoun,
].flatMap((f) => f(tokens, dictionary));
}
// function parseSpecialPlural(
// tokens: Readonly<T.Token[]>,
// dictionary: DictionaryAPI
// ): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
// if (tokens.length === 0) {
// return [];
// }
// const [first, ...rest] = tokens;
// if (first.s.endsWith("یانې")) {
// const withoutIYaane = first.s.slice(0, -4);
// const eeEnding = dictionary
// .queryP(withoutIYaane + "ي")
// .filter(tp.isFemNounEntry);
// const pattern3UnisexAnim = dictionary
// .queryP(withoutIYaane + "ی")
// .filter(
// (e) => tp.isUnisexNounEntry(e) && tp.isPattern3Entry(e)
// ) as T.MascNounEntry[];
// const pattern3Fem = dictionary
// .queryP(withoutIYaane + "ۍ")
// .filter((e) => tp.isFemNounEntry(e) && tp.isPattern3Entry(e));
// return returnParseResults(
// rest,
// [...eeEnding, ...pattern3UnisexAnim, ...pattern3Fem].map<
// T.ParsedNounWord<T.MascNounEntry | T.FemNounEntry>
// >((entry) => ({
// inflected: false,
// gender: "masc",
// given: first.s,
// plural: true,
// entry,
// }))
// );
// }
// if (first.s.endsWith("یانو")) {
// const withoutIYaano = first.s.slice(0, -4);
// const eeEnding = dictionary
// .queryP(withoutIYaano + "ي")
// .filter((e) => tp.isFemNounEntry(e) && tp.isSingularEntry(e));
// const pattern3Anim = dictionary
// .queryP(withoutIYaano + "ۍ")
// .filter((e) => tp.isFemNounEntry(e) && tp.isPattern3Entry(e));
// return returnParseResults(
// rest,
// [...eeEnding, ...pattern3Anim].map<T.ParsedNounWord<T.FemNounEntry>>(
// (entry) => ({
// inflected: true,
// gender: "masc",
// given: first.s,
// plural: true,
// entry,
// })
// )
// );
// }
// return [];
// }
function parseOonaEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (!first.s.endsWith("ونه") && !first.s.endsWith("ونو")) {
return [];
}
const withoutOona = first.s.slice(0, -3);
const consonantEnding = dictionary
.queryP(withoutOona)
.filter(
(e) =>
tp.isMascNounEntry(e) &&
endsInConsonant(e) &&
!e.ppp &&
tp.isSingularEntry(e)
) as T.MascNounEntry[];
const shwaEnding = dictionary
.queryP(withoutOona + "ه")
.filter(
(e) =>
tp.isMascNounEntry(e) &&
hasShwaEnding(e) &&
!e.ppp &&
tp.isSingularEntry(e)
) as T.MascNounEntry[];
const body = [...consonantEnding, ...shwaEnding].map<
T.ParsedNounWord<T.MascNounEntry>
>((entry) => ({
inflected: first.s.endsWith("ونو"),
gender: "masc",
given: first.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
function parseAanEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("ان")) {
const withoutAan = first.s.slice(0, -2);
const consonantEnding = dictionary
.queryP(withoutAan)
.filter(
(e) =>
tp.isMascNounEntry(e) && endsInConsonant(e) && tp.isSingularEntry(e)
) as T.MascNounEntry[];
const shwaEnding = dictionary
.queryP(withoutAan + "ه")
.filter(
(e) =>
tp.isMascNounEntry(e) && hasShwaEnding(e) && tp.isSingularEntry(e)
) as T.MascNounEntry[];
const body = [...consonantEnding, ...shwaEnding].map<
T.ParsedNounWord<T.MascNounEntry>
>((entry) => ({
inflected: false,
gender: "masc",
given: first.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
if (first.s.endsWith("انو")) {
const withoutAano = first.s.slice(0, -3);
const consonantEnding = dictionary
.queryP(withoutAano)
.filter(
(e) =>
tp.isMascNounEntry(e) && endsInConsonant(e) && tp.isSingularEntry(e)
) as T.MascNounEntry[];
const shwaEnding = dictionary
.queryP(withoutAano + "ه")
.filter(
(e) =>
tp.isMascNounEntry(e) && hasShwaEnding(e) && tp.isSingularEntry(e)
) as T.MascNounEntry[];
const body = [...consonantEnding, ...shwaEnding].flatMap<
T.ParsedNounWord<T.MascNounEntry>
>((entry) => [
{
inflected: true,
gender: "masc",
given: first.s,
plural: true,
entry,
},
...(tp.isUnisexAnimNounEntry(entry)
? [
{
inflected: true,
gender: "fem",
given: first.s,
plural: true,
entry,
} satisfies T.ParsedNounWord<T.MascNounEntry>,
]
: []),
]);
return returnParseResults(rest, body);
}
return [];
}
function parseAaneEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.NounEntry>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("انې")) {
const withoutAane = first.s.slice(0, -3);
const unisexAy = dictionary
.queryP(withoutAane)
.filter(tp.isUnisexAnimNounEntry);
const body = unisexAy.map<T.ParsedNounWord<T.UnisexAnimNounEntry>>(
(entry) => ({
inflected: false,
gender: "fem",
given: first.s,
plural: true,
entry,
})
);
return returnParseResults(rest, body);
}
return [];
}
function parseGaanEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("ګان")) {
const body = dictionary
.queryP(first.s.slice(0, -3))
.filter(
(e) => tp.isMascNounEntry(e) && endsInAaOrOo(e) && tp.isSingularEntry(e)
)
.map<T.ParsedNounWord<T.MascNounEntry>>((entry) => ({
inflected: false,
gender: "masc",
given: first.s,
plural: true,
entry: entry as T.MascNounEntry,
}));
return returnParseResults(rest, body);
}
if (first.s.endsWith("ګانو")) {
const body = dictionary
.queryP(first.s.slice(0, -4))
.filter(
(e) => tp.isMascNounEntry(e) && endsInAaOrOo(e) && tp.isSingularEntry(e)
)
.map<T.ParsedNounWord<T.MascNounEntry>>((entry) => ({
inflected: true,
gender: "masc",
given: first.s,
plural: true,
entry: entry as T.MascNounEntry,
}));
return returnParseResults(rest, body);
}
if (tokens.length >= 2) {
const [first, next, ...rest] = tokens;
if (next.s === "ګان") {
const body = dictionary
.queryP(first.s)
.filter(
(e) =>
tp.isMascNounEntry(e) && endsInAaOrOo(e) && tp.isSingularEntry(e)
)
.map<T.ParsedNounWord<T.MascNounEntry>>((entry) => ({
inflected: false,
gender: "masc",
given: first.s + " " + next.s,
plural: true,
entry: entry as T.MascNounEntry,
}));
return returnParseResults(rest, body);
}
if (next.s === "ګانو") {
const body = dictionary
.queryP(first.s)
.filter(
(e) =>
tp.isMascNounEntry(e) && endsInAaOrOo(e) && tp.isSingularEntry(e)
)
.map<T.ParsedNounWord<T.MascNounEntry>>((entry) => ({
inflected: true,
gender: "masc",
given: first.s + " " + next.s,
plural: true,
entry: entry as T.MascNounEntry,
}));
return returnParseResults(rest, body);
}
}
return [];
}
function parseGaaneEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.FemNounEntry>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const canTakeGaane = (e: T.DictionaryEntry): e is T.FemNounEntry =>
tp.isFemNounEntry(e) &&
tp.isSingularEntry(e) &&
endsWith(
[
{ p: "و", f: "o" },
{ p: "ا", f: "aa" },
{ p: "ې", f: "e" },
{ p: "ي", f: "ee" },
],
e
);
if (first.s.endsWith("یګانې")) {
const body = dictionary
.queryP(first.s.slice(0, -5) + "ي")
.filter(canTakeGaane)
.map<T.ParsedNounWord<T.FemNounEntry>>((entry) => ({
inflected: false,
gender: "fem",
given: first.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
if (first.s.endsWith("ګانې")) {
const body = dictionary
.queryP(first.s.slice(0, -4))
.filter(canTakeGaane)
.map<T.ParsedNounWord<T.FemNounEntry>>((entry) => ({
inflected: false,
gender: "fem",
given: first.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
if (first.s.endsWith("یګانو")) {
const body = dictionary
.queryP(first.s.slice(0, -5) + "ي")
.filter(canTakeGaane)
.map<T.ParsedNounWord<T.FemNounEntry>>((entry) => ({
inflected: true,
gender: "fem",
given: first.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
if (first.s.endsWith("ګانو")) {
const body = dictionary
.queryP(first.s.slice(0, -4))
.filter(canTakeGaane)
.map<T.ParsedNounWord<T.FemNounEntry>>((entry) => ({
inflected: true,
gender: "fem",
given: first.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
if (tokens.length >= 2) {
const [first, next, ...rest] = tokens;
if (next.s === "ګانې") {
const body = dictionary
.queryP(first.s)
.filter(canTakeGaane)
.map<T.ParsedNounWord<T.FemNounEntry>>((entry) => ({
inflected: false,
gender: "fem",
given: first.s + " " + next.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
if (next.s === "ګانو") {
const body = dictionary
.queryP(first.s)
.filter(canTakeGaane)
.map<T.ParsedNounWord<T.FemNounEntry>>((entry) => ({
inflected: true,
gender: "fem",
given: first.s + " " + next.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
}
return [];
}
function parseWeEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.FemNounEntry>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const canTakeWe = (e: T.DictionaryEntry): e is T.FemNounEntry =>
tp.isFemNounEntry(e) &&
tp.isSingularEntry(e) &&
endsWith(
[
{ p: "و", f: "o" },
{ p: "ا", f: "aa" },
{ p: "ې", f: "e" },
],
e
);
if (first.s.endsWith("وې")) {
const body = dictionary
.queryP(first.s.slice(0, -2))
.filter(canTakeWe)
.map<T.ParsedNounWord<T.FemNounEntry>>((entry) => ({
inflected: false,
gender: "fem",
given: first.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
} else if (first.s.endsWith("وو")) {
const body = dictionary
.queryP(first.s.slice(0, -2))
.filter(canTakeWe)
.map<T.ParsedNounWord<T.FemNounEntry>>((entry) => ({
inflected: true,
gender: "fem",
given: first.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
if (tokens.length >= 2) {
const [first, next, ...rest] = tokens;
if (next.s === "وې") {
const body = dictionary
.queryP(first.s)
.filter(canTakeWe)
.map<T.ParsedNounWord<T.FemNounEntry>>((entry) => ({
inflected: false,
gender: "fem",
given: first.s + " " + next.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
if (next.s === "وو") {
const body = dictionary
.queryP(first.s)
.filter(canTakeWe)
.map<T.ParsedNounWord<T.FemNounEntry>>((entry) => ({
inflected: true,
gender: "fem",
given: first.s + " " + next.s,
plural: true,
entry,
}));
return returnParseResults(rest, body);
}
}
return [];
}
function parseIYaanEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("یان")) {
const withoutIYaan = first.s.slice(0, -3);
const eeEnding = dictionary
.queryP(withoutIYaan + "ي")
.filter(
(e) => tp.isMascNounEntry(e) && tp.isSingularEntry(e)
) as T.MascNounEntry[];
const pattern3Anim = dictionary
.queryP(withoutIYaan + "ی")
.filter(
(e) => tp.isMascNounEntry(e) && tp.isPattern3Entry(e)
) as T.MascNounEntry[];
return returnParseResults(
rest,
[...eeEnding, ...pattern3Anim].map<T.ParsedNounWord<T.MascNounEntry>>(
(entry) => ({
inflected: false,
gender: "masc",
given: first.s,
plural: true,
entry: entry as T.MascNounEntry,
})
)
);
}
if (first.s.endsWith("یانو")) {
const withoutIYaano = first.s.slice(0, -4);
const eeEnding = dictionary
.queryP(withoutIYaano + "ي")
.filter(
(e) => tp.isMascNounEntry(e) && tp.isSingularEntry(e)
) as T.MascNounEntry[];
const pattern3Anim = dictionary
.queryP(withoutIYaano + "ی")
.filter(
(e) => tp.isMascNounEntry(e) && tp.isPattern3Entry(e)
) as T.MascNounEntry[];
return returnParseResults(
rest,
[...eeEnding, ...pattern3Anim].flatMap<T.ParsedNounWord<T.MascNounEntry>>(
(entry) => [
{
inflected: true,
gender: "masc",
given: first.s,
plural: true,
entry: entry as T.MascNounEntry,
},
...(tp.isUnisexNounEntry(entry)
? [
{
inflected: true,
gender: "fem",
given: first.s,
plural: true,
entry: entry as T.UnisexNounEntry,
} satisfies T.ParsedNounWord<T.MascNounEntry>,
]
: []),
]
)
);
}
return [];
}
function parseIYaaneEndingNoun(
tokens: Readonly<T.Token[]>,
dictionary: DictionaryAPI
): T.ParseResult<T.ParsedNounWord<T.MascNounEntry | T.FemNounEntry>>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
if (first.s.endsWith("یانې")) {
const withoutIYaane = first.s.slice(0, -4);
const eeEnding = dictionary
.queryP(withoutIYaane + "ي")
.filter(tp.isFemNounEntry);
const pattern3UnisexAnim = dictionary
.queryP(withoutIYaane + "ی")
.filter(
(e) => tp.isUnisexNounEntry(e) && tp.isPattern3Entry(e)
) as T.MascNounEntry[];
const pattern3Fem = dictionary
.queryP(withoutIYaane + "ۍ")
.filter((e) => tp.isFemNounEntry(e) && tp.isPattern3Entry(e));
return returnParseResults(
rest,
[...eeEnding, ...pattern3UnisexAnim, ...pattern3Fem].map<
T.ParsedNounWord<T.MascNounEntry | T.FemNounEntry>
>((entry) => ({
inflected: false,
gender: "masc",
given: first.s,
plural: true,
entry,
}))
);
}
if (first.s.endsWith("یانو")) {
const withoutIYaano = first.s.slice(0, -4);
const eeEnding = dictionary
.queryP(withoutIYaano + "ي")
.filter((e) => tp.isFemNounEntry(e) && tp.isSingularEntry(e));
const pattern3Anim = dictionary
.queryP(withoutIYaano + "ۍ")
.filter((e) => tp.isFemNounEntry(e) && tp.isPattern3Entry(e));
return returnParseResults(
rest,
[...eeEnding, ...pattern3Anim].map<T.ParsedNounWord<T.FemNounEntry>>(
(entry) => ({
inflected: true,
gender: "masc",
given: first.s,
plural: true,
entry,
})
)
);
}
return [];
}

View File

@ -1772,7 +1772,6 @@ const nouns: {
}, },
}, },
// Feminine animate ending in a consonant // Feminine animate ending in a consonant
// TODO: ALLOW FOR MULTIPLE PLURAL POSSIBILITIES میندې, میېنې etc.
{ {
in: { in: {
ts: 1527812928, ts: 1527812928,
@ -1780,17 +1779,32 @@ const nouns: {
f: "mor", f: "mor",
g: "", g: "",
e: "mother, mom", e: "mother, mom",
c: "n. f. anim.", c: "n. f. anim. fam.",
ppp: "میندې", ppp: "میندې, میېندې",
ppf: "maynde", ppf: "maynde, myende",
i: 11113, i: 11113,
}, },
out: { out: {
vocative: { vocative: {
fem: [[{ p: "مورې", f: "móre" }], [{ p: "میندو", f: "mayndo" }]], fem: [
[{ p: "مورې", f: "móre" }],
[
{ p: "میندو", f: "mayndo" },
{ p: "میېندو", f: "myendo" },
],
],
}, },
plural: { plural: {
fem: [[{ p: "میندې", f: "maynde" }], [{ p: "میندو", f: "mayndo" }]], fem: [
[
{ p: "میندې", f: "maynde" },
{ p: "میېندې", f: "myende" },
],
[
{ p: "میندو", f: "mayndo" },
{ p: "میېندو", f: "myendo" },
],
],
}, },
}, },
}, },
@ -2061,7 +2075,6 @@ const nouns: {
}, },
}, },
}, },
// TODO: Plaar plaroona paaraan - wrooNa
// Word with no inflections // Word with no inflections
{ {
in: { in: {
@ -2116,6 +2129,79 @@ const nouns: {
}, },
}, },
}, },
// masc familial word
{
in: {
i: 3205,
ts: 1527815177,
p: "پلار",
f: "plaar",
g: "plaar",
e: "father",
r: 4,
a: 1,
c: "n. m. anim. fam.",
ppp: "پلرونه",
ppf: "plaróona",
},
out: {
plural: {
masc: [
[{ p: "پلرونه", f: "plaróona" }],
[{ p: "پلرونو", f: "plaróono" }],
],
},
vocative: {
masc: [[{ p: "پلاره", f: "pláara" }], [{ p: "پلرونو", f: "plaróono" }]],
},
},
},
// Determiner
{
in: {
i: 13415,
ts: 1527813602,
p: "کوم",
f: "koom",
g: "koom",
e: "which, that, any, some, (a) certain",
r: 4,
a: 1,
c: "det.",
},
out: {
inflections: {
masc: [
[{ p: "کوم", f: "koom" }],
[{ p: "کوم", f: "koom" }],
[{ p: "کومو", f: "kóomo" }],
],
fem: [
[{ p: "کومه", f: "kóoma" }],
[{ p: "کومې", f: "kóome" }],
[{ p: "کومو", f: "kóomo" }],
],
},
vocative: {
masc: [[{ p: "کومه", f: "kóoma" }], [{ p: "کومو", f: "kóomo" }]],
fem: [[{ p: "کومې", f: "kóome" }], [{ p: "کومو", f: "kóomo" }]],
},
},
},
{
in: {
i: 17677,
ts: 1586519006879,
p: "هیڅ",
f: "heets, hits",
g: "heets,hits",
e: "negative determiner, used for nothing, never, no, none etc. (used with negative phrases)",
r: 4,
c: "det. / adv.",
noInf: true,
},
out: false,
},
]; ];
const others: T.DictionaryEntry[] = [ const others: T.DictionaryEntry[] = [
@ -2148,7 +2234,8 @@ adjectives.forEach((word) => {
nouns.forEach((word) => { nouns.forEach((word) => {
test(`${word.in.p} should inflect properly`, () => { test(`${word.in.p} should inflect properly`, () => {
expect(inflectWord(word.in)).toEqual(word.out); const res = inflectWord(word.in);
expect(res).toEqual(word.out);
}); });
}); });

View File

@ -48,6 +48,13 @@ export function isAdverbEntry(
return !!e.c?.includes("adv."); return !!e.c?.includes("adv.");
} }
export function isDeterminerEntry(
e: T.Entry | T.DictionaryEntry
): e is T.DeterminerEntry {
if ("entry" in e) return false;
return !!e.c?.includes("det.");
}
export function isLocativeAdverbEntry( export function isLocativeAdverbEntry(
e: T.Entry | T.DictionaryEntry e: T.Entry | T.DictionaryEntry
): e is T.LocativeAdverbEntry { ): e is T.LocativeAdverbEntry {
@ -70,7 +77,12 @@ export function isInflectableEntry(
if (isDeterminer(e)) { if (isDeterminer(e)) {
return true; return true;
} }
return isNounEntry(e) || isAdjectiveEntry(e) || isNumberEntry(e); return (
isNounEntry(e) ||
isAdjectiveEntry(e) ||
isNumberEntry(e) ||
isDeterminerEntry(e)
);
} }
export function isDeterminer( export function isDeterminer(
@ -103,7 +115,9 @@ export function isVerbEntry(
return "entry" in e && isVerbDictionaryEntry(e.entry); return "entry" in e && isVerbDictionaryEntry(e.entry);
} }
export function isMascNounEntry(e: T.InflectableEntry): e is T.MascNounEntry { export function isMascNounEntry(
e: T.InflectableEntry | T.DictionaryEntry
): e is T.MascNounEntry {
return !!e.c && e.c.includes("n. m."); return !!e.c && e.c.includes("n. m.");
} }
@ -112,7 +126,7 @@ export function isFemNounEntry(e: T.DictionaryEntry): e is T.FemNounEntry {
} }
export function isUnisexNounEntry( export function isUnisexNounEntry(
e: T.InflectableEntry e: T.InflectableEntry | T.DictionaryEntry
): e is T.UnisexNounEntry { ): e is T.UnisexNounEntry {
return isNounEntry(e) && e.c.includes("unisex"); return isNounEntry(e) && e.c.includes("unisex");
} }
@ -122,7 +136,7 @@ export function isAnimNounEntry(e: T.InflectableEntry): e is T.AnimNounEntry {
} }
export function isUnisexAnimNounEntry( export function isUnisexAnimNounEntry(
e: T.InflectableEntry e: T.InflectableEntry | T.DictionaryEntry
): e is T.UnisexAnimNounEntry { ): e is T.UnisexAnimNounEntry {
return isUnisexNounEntry(e) && isAnimNounEntry(e); return isUnisexNounEntry(e) && isAnimNounEntry(e);
} }
@ -137,13 +151,7 @@ export function isPattern(
p: T.InflectionPattern | "all" p: T.InflectionPattern | "all"
): (entry: T.InflectableEntry) => boolean { ): (entry: T.InflectableEntry) => boolean {
if (p === 0) { if (p === 0) {
return (e: T.InflectableEntry) => return isNonInflectingEntry;
!isPattern1Entry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e) &&
!isPattern5Entry(e) &&
!isPattern6FemEntry(e);
} }
if (p === 1) { if (p === 1) {
return isPattern1Entry; return isPattern1Entry;
@ -166,6 +174,21 @@ export function isPattern(
return () => true; return () => true;
} }
export function isNonInflectingEntry<T extends T.InflectableEntry>(
e: T
): e is T.NonInflecting<T> {
if (e.noInf) return true;
return (
!isPattern1Entry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e) &&
!isPattern5Entry(e) &&
!isPattern6FemEntry(e) &&
(!isNounEntry(e) || !isPluralNounEntry(e))
);
}
/** /**
* shows if a noun/adjective has the basic (consonant / ه) inflection pattern * shows if a noun/adjective has the basic (consonant / ه) inflection pattern
* *
@ -177,6 +200,10 @@ export function isPattern1Entry<T extends T.InflectableEntry>(
): e is T.Pattern1Entry<T> { ): e is T.Pattern1Entry<T> {
if (e.noInf) return false; if (e.noInf) return false;
if (e.infap || e.infbp) return false; if (e.infap || e.infbp) return false;
// family words like خور زوی etc with special plural don't follow pattern #1
if (e.c.includes("fam.")) {
return false;
}
if (isFemNounEntry(e)) { if (isFemNounEntry(e)) {
return ( return (
(endsWith( (endsWith(
@ -189,7 +216,7 @@ export function isPattern1Entry<T extends T.InflectableEntry>(
e e
) && ) &&
!e.p.endsWith("اع")) || !e.p.endsWith("اع")) ||
(endsWith({ p: pashtoConsonants }, e) && !(e.ppp && e.ppf)) endsWith({ p: pashtoConsonants }, e)
); );
} }
return endsInConsonant(e) || hasShwaEnding(e); return endsInConsonant(e) || hasShwaEnding(e);
@ -267,7 +294,7 @@ export function isPattern5Entry<T extends T.InflectableEntry>(
} }
export function isPattern6FemEntry( export function isPattern6FemEntry(
e: T.InflectableEntry e: T.InflectableEntry | T.DictionaryEntry
): e is T.Pattern6FemEntry<T.FemNounEntry> { ): e is T.Pattern6FemEntry<T.FemNounEntry> {
if (!isFemNounEntry(e)) return false; if (!isFemNounEntry(e)) return false;
if (e.c.includes("anim.")) return false; if (e.c.includes("anim.")) return false;

View File

@ -477,7 +477,8 @@ export type InflectionSet = ArrayFixed<ArrayOneOrMore<PsString>, 3>;
// Plural and Second Inflection // Plural and Second Inflection
export type PluralInflectionSet = ArrayFixed<ArrayOneOrMore<PsString>, 2>; export type PluralInflectionSet = ArrayFixed<ArrayOneOrMore<PsString>, 2>;
export type Gender = "masc" | "fem"; export const genders = ["masc", "fem"] as const;
export type Gender = (typeof genders)[number];
export type UnisexSet<T> = Record<Gender, T>; export type UnisexSet<T> = Record<Gender, T>;
export type GenderedSet<T> = export type GenderedSet<T> =
@ -634,7 +635,8 @@ export type InflectableEntry =
| NounEntry | NounEntry
| AdjectiveEntry | AdjectiveEntry
| NumberEntry | NumberEntry
| AdverbEntry; | AdverbEntry
| DeterminerEntry;
export type NounEntry = DictionaryEntry & { c: string } & { export type NounEntry = DictionaryEntry & { c: string } & {
__brand: "a noun entry"; __brand: "a noun entry";
}; };
@ -653,6 +655,9 @@ export type UnisexAnimNounEntry = UnisexNounEntry & {
export type AdverbEntry = DictionaryEntry & { c: string } & { export type AdverbEntry = DictionaryEntry & { c: string } & {
__brand: "an adverb entry"; __brand: "an adverb entry";
}; };
export type DeterminerEntry = DictionaryEntry & { c: string } & {
__brand: "a determiner entry";
};
export type LocativeAdverbEntry = AdverbEntry & { export type LocativeAdverbEntry = AdverbEntry & {
__brand2: "a locative adverb entry"; __brand2: "a locative adverb entry";
}; };
@ -1233,6 +1238,21 @@ export type EquativeBlock = { type: "equative"; equative: EquativeRendered };
export type NegativeBlock = { type: "negative"; imperative: boolean }; export type NegativeBlock = { type: "negative"; imperative: boolean };
export type InflectableBaseParse<E extends InflectableEntry> = {
inflection: (0 | 1 | 2)[];
gender: Gender[];
given: string;
entry: E;
};
export type ParsedNounWord<N extends NounEntry> = {
inflected: boolean;
plural: boolean;
gender: Gender;
given: string;
entry: N;
};
export type Block = { export type Block = {
key: number; key: number;
block: block:

View File

@ -18,5 +18,5 @@
"noUnusedParameters": true, "noUnusedParameters": true,
"noFallthroughCasesInSwitch": true "noFallthroughCasesInSwitch": true
}, },
"include": ["vite.config.ts"] "include": ["vite.config.ts", "get-mini-dict.ts"]
} }

50
vocab/mini-dict-tss.ts Normal file
View File

@ -0,0 +1,50 @@
import * as T from "../src/types";
export const entries: T.DictionaryEntry["ts"][] = [
// adjectives
1527812798, // خفه
1527812625, // غټ
1527815306, // ستړی
1527819320, // نری
1527815451, // زوړ
1527815265, // شین
1527813293, // سور
// masc nouns
1527815251, // سړی
1527812828, // کور
1527812802, // خر
1527813593, // غر
1527812928, // مور
1527815177, // پلار
1527817330, // غنم
1527815206, // قاضي
1527815394, // واده
1527812878, // ماما
1527815450, // زوی
1527823093, // نبي
1527822456, // لفظ
1708911813165, // کارغه
1527820648, // الو
// unisex nouns
1527816747, // ډاکټر
1527814159, // ملګری
1527816249, // لمسی
1527812908, // مېلمه
1575924767041, // شپون
1527815333, // نتور
// fem nouns
1527811877, // دوستي
1586551382412, // وریژې
1527812797, // ښځه
1527814203, // کرسۍ
1527815129, // اوبه
1527814150, // لار
1527823526, // قاضۍ
1527815163, // پیشو
1527812582, // دعا
1591803598624, // هتکړۍ
1527812861, // لور - loor
];

View File

@ -2700,6 +2700,14 @@ json-buffer@3.0.1:
resolved "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz" resolved "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz"
integrity sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ== integrity sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==
json-edit-react@^1.15.7:
version "1.15.7"
resolved "https://registry.npmjs.org/json-edit-react/-/json-edit-react-1.15.7.tgz"
integrity sha512-PMw7FSTVrY23m5tdwDFxqjVL4OHyR/zSkPIu1BhKVJofW+l0I93moevs7CWTD+3ZWA/m8OtKpxYV3CQ30OlNJA==
dependencies:
object-property-assigner "^1.3.0"
object-property-extractor "^1.0.11"
json-parse-even-better-errors@^2.3.0: json-parse-even-better-errors@^2.3.0:
version "2.3.1" version "2.3.1"
resolved "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz" resolved "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz"
@ -2949,6 +2957,16 @@ object-assign@^4.0.1, object-assign@^4.1.1:
resolved "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz" resolved "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz"
integrity sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg== integrity sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==
object-property-assigner@^1.3.0:
version "1.3.0"
resolved "https://registry.npmjs.org/object-property-assigner/-/object-property-assigner-1.3.0.tgz"
integrity sha512-19A0RsC9rP9klCKHDPL/MeERxeopV9wyMNfP+eD2uKOafzLjF+OUEN4FoP6RAlCFHmerBPJ4ohNv/WrgaNpeIA==
object-property-extractor@^1.0.11:
version "1.0.11"
resolved "https://registry.npmjs.org/object-property-extractor/-/object-property-extractor-1.0.11.tgz"
integrity sha512-VnDQcyN0FTXZ0hMZS/CTb2QkIssZ9XKB8zlf5rnFh1HjFQX1P73EHawavSztBOiPDGqAPNXebv4agjhF9eACAw==
once@^1.3.0: once@^1.3.0:
version "1.4.0" version "1.4.0"
resolved "https://registry.npmjs.org/once/-/once-1.4.0.tgz" resolved "https://registry.npmjs.org/once/-/once-1.4.0.tgz"
@ -3265,7 +3283,7 @@ react-transition-group@^4.4.1:
loose-envify "^1.4.0" loose-envify "^1.4.0"
prop-types "^15.6.2" prop-types "^15.6.2"
react@^18.3.1, react@>=0.14.0, react@>=15.0.0, react@>=16.3.0, react@>=16.3.2, react@>=16.6.0, react@>=16.8.0: react@^18.3.1, react@>=0.14.0, react@>=15.0.0, react@>=16.0.0, react@>=16.3.0, react@>=16.3.2, react@>=16.6.0, react@>=16.8.0:
version "18.3.1" version "18.3.1"
resolved "https://registry.npmjs.org/react/-/react-18.3.1.tgz" resolved "https://registry.npmjs.org/react/-/react-18.3.1.tgz"
integrity sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ== integrity sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==