Compare commits

..

3 Commits

Author SHA1 Message Date
adueck f17ebddaa1 comment out tests for parser in progress 2024-12-06 15:10:17 +05:00
adueck 0ade410698 fix up masc inflection of demonstratives 2024-12-06 15:06:14 +05:00
adueck 73eb04d7e0 parser working - a bit slow/rough - with dictionary lookup 2024-10-14 20:22:32 -04:00
39 changed files with 1163 additions and 1002 deletions

1
.gitignore vendored
View File

@ -11,6 +11,7 @@ lerna-debug.log*
src/verbs.ts src/verbs.ts
src/nouns-adjs.ts src/nouns-adjs.ts
vocab/mini-dict-entries.ts vocab/mini-dict-entries.ts
src/lib/src/parsing/split-verbs.ts
# testing # testing
/coverage /coverage

View File

@ -0,0 +1,41 @@
import * as T from "./src/types";
import * as tp from "./src/lib/src/type-predicates";
import fs from "fs";
import { entries as collection } from "./vocab/mini-dict-tss";
const res = await fetch(
"https://storage.lingdocs.com/dictionary/dictionary.json"
);
const dictionary = (await res.json()) as T.Dictionary;
const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
collection.includes(x.ts)
);
const splitEntries: T.VerbDictionaryEntry[] =
dictionary.entries.filter<T.VerbDictionaryEntry>(
(x): x is T.VerbDictionaryEntry =>
tp.isVerbDictionaryEntry(x) &&
!!x.separationAtP &&
!["کول", "کېدل"].includes(x.p)
);
const miniDictContents = `import { DictionaryEntry } from "../src/types";
// DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
export const entries: DictionaryEntry[] = [
${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
];
`;
const splitVerbContents = `import { VerbEntry, VerbDictionaryEntry } from "../../../types";
// DO NOT MODIFY - GENERATED
export const entries: VerbEntry[] = [
${splitEntries
.map((e) => `\t{ entry: ${JSON.stringify(e)} as VerbDictionaryEntry },`)
.join("\n")}
];
`;
fs.writeFileSync("./vocab/mini-dict-entries.ts", miniDictContents);
fs.writeFileSync("./src/lib/src/parsing/split-verbs.ts", splitVerbContents);

View File

@ -1,22 +0,0 @@
import * as T from "./src/types";
import fs from "fs";
import { entries as collection } from "./vocab/mini-dict-tss";
const res = await fetch(
"https://storage.lingdocs.com/dictionary/dictionary.json"
);
const dictionary = (await res.json()) as T.Dictionary;
const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
collection.includes(x.ts)
);
const contents = `import { DictionaryEntry } from "../src/types";
// DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
export const entries: DictionaryEntry[] = [
${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
];
`;
fs.writeFileSync("./vocab/mini-dict-entries.ts", contents);

View File

@ -1,6 +1,6 @@
{ {
"name": "pashto-inflector-website", "name": "pashto-inflector-website",
"version": "7.7.1", "version": "7.7.3",
"type": "module", "type": "module",
"scripts": { "scripts": {
"patch": "npm version patch --no-git-tag-version && cd src/lib && npm version patch --no-git-tag-version && cd ../components && npm version patch --no-git-tag-version", "patch": "npm version patch --no-git-tag-version && cd src/lib && npm version patch --no-git-tag-version && cd ../components && npm version patch --no-git-tag-version",
@ -14,7 +14,7 @@
"build-website": "tsc -b && vite build", "build-website": "tsc -b && vite build",
"build-components": "rm -rf src/components/dist && tsc --project src/components/tsconfig.json && cd src/components && node post-build.cjs", "build-components": "rm -rf src/components/dist && tsc --project src/components/tsconfig.json && cd src/components && node post-build.cjs",
"build-lib": "rm -rf src/lib/dist && tsc --project src/lib/tsconfig.json && tsup src/lib/library.ts --format cjs && mv dist/library.cjs src/lib/dist/lib", "build-lib": "rm -rf src/lib/dist && tsc --project src/lib/tsconfig.json && tsup src/lib/library.ts --format cjs && mv dist/library.cjs src/lib/dist/lib",
"get-words": "node get-words.cjs && tsx get-mini-dict.ts", "get-words": "node get-words.cjs && tsx get-mini-dict-and-split-verbs.ts",
"check-all-inflections": "tsx check-all-inflections.ts" "check-all-inflections": "tsx check-all-inflections.ts"
}, },
"dependencies": { "dependencies": {

View File

@ -19,7 +19,7 @@ import { entryFeeder } from "./demo-components/entryFeeder";
import Hider from "./components/src/Hider"; import Hider from "./components/src/Hider";
import InflectionDemo from "./demo-components/InflectionDemo"; import InflectionDemo from "./demo-components/InflectionDemo";
import SpellingDemo from "./demo-components/SpellingDemo"; import SpellingDemo from "./demo-components/SpellingDemo";
// import ParserDemo from "./demo-components/ParserDemo"; import ParserDemo from "./demo-components/ParserDemo";
// import InflectionTable from "./components/src/InflectionsTable"; // import InflectionTable from "./components/src/InflectionsTable";
function App() { function App() {
@ -163,7 +163,7 @@ function App() {
> >
<SpellingDemo opts={textOptions} onChange={setTextOptions} /> <SpellingDemo opts={textOptions} onChange={setTextOptions} />
</Hider> </Hider>
{/* <Hider <Hider
label="Parser (🚧 IN PROGRESS 🚧)" label="Parser (🚧 IN PROGRESS 🚧)"
hLevel={3} hLevel={3}
showing={showing === "parser"} showing={showing === "parser"}
@ -174,7 +174,7 @@ function App() {
entryFeeder={entryFeeder} entryFeeder={entryFeeder}
dictionary={dictionary} dictionary={dictionary}
/> />
</Hider> */} </Hider>
</div> </div>
</main> </main>
<Modal <Modal

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/ps-react", "name": "@lingdocs/ps-react",
"version": "7.7.1", "version": "7.7.3",
"description": "Pashto inflector library module with React components", "description": "Pashto inflector library module with React components",
"main": "dist/components/library.js", "main": "dist/components/library.js",
"module": "dist/components/library.js", "module": "dist/components/library.js",

View File

@ -3,13 +3,18 @@ import * as T from "../types";
// import { parsePhrase } from "../lib/src/parsing/parse-phrase"; // import { parsePhrase } from "../lib/src/parsing/parse-phrase";
import { tokenizer } from "../lib/src/parsing/tokenizer"; import { tokenizer } from "../lib/src/parsing/tokenizer";
// import { NPDisplay } from "../components/library"; // import { NPDisplay } from "../components/library";
// import EditableVP from "../components/src/vp-explorer/EditableVP"; import EditableVP from "../components/src/vp-explorer/EditableVP";
// import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools"; import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
import { parseNoun } from "../lib/src/parsing/parse-noun-new"; // import { parseNoun } from "../lib/src/parsing/parse-noun-new";
import { JsonEditor } from "json-edit-react"; import { JsonEditor } from "json-edit-react";
import { renderNounSelection } from "../lib/src/phrase-building/render-np"; // import { renderNounSelection } from "../lib/src/phrase-building/render-np";
import { NPBlock } from "../components/src/blocks/Block"; // import { NPBlock } from "../components/src/blocks/Block";
import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools"; // import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
import { parsePhrase } from "../lib/src/parsing/parse-phrase";
//import { renderVP } from "../lib/src/phrase-building/render-vp";
// import VPDisplay from "../components/src/vp-explorer/VPDisplay";
import { entryFeeder } from "./entryFeeder";
import { removeRedundantVPSs } from "../lib/src/phrase-building/remove-redundant";
const working = [ const working = [
"limited demo vocab", "limited demo vocab",
@ -59,7 +64,7 @@ function ParserDemo({
}) { }) {
const [text, setText] = useState<string>(""); const [text, setText] = useState<string>("");
const [result, setResult] = useState< const [result, setResult] = useState<
ReturnType<typeof parseNoun>[number]["body"][] ReturnType<typeof parsePhrase>["success"]
>([]); >([]);
// ReturnType<typeof parsePhrase>["success"] // ReturnType<typeof parsePhrase>["success"]
const [errors, setErrors] = useState<string[]>([]); const [errors, setErrors] = useState<string[]>([]);
@ -70,16 +75,10 @@ function ParserDemo({
setErrors([]); setErrors([]);
return; return;
} }
const res = parseNoun(tokenizer(value), dictionary, undefined); const res = parsePhrase(tokenizer(value), dictionary);
const success: ReturnType<typeof parseNoun>[number]["body"][] = res
.filter((x) => !x.tokens.length)
.map((x) => x.body);
const errors = [
...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
];
setText(value); setText(value);
setErrors(errors); setErrors(res.errors);
setResult(success); setResult(removeRedundantVPSs(res.success));
} }
return ( return (
<div className="mt-3" style={{ marginBottom: "1000px" }}> <div className="mt-3" style={{ marginBottom: "1000px" }}>
@ -141,34 +140,8 @@ function ParserDemo({
<div className="text-center">Did you mean:</div> <div className="text-center">Did you mean:</div>
</> </>
)} )}
{result.map((r) => { {result.map((res) => (
try { <>
const renderedNP: T.Rendered<T.NPSelection> = {
type: "NP",
selection: renderNounSelection(r.selection, r.inflected, "none"),
};
return (
<>
{r.inflected ? "INFLECTED" : "PLAIN"}
<NPBlock
opts={opts}
script="p"
english={getEnglishFromRendered(renderedNP)}
>
{renderedNP}
</NPBlock>
</>
);
} catch (e) {
console.error(e);
return <div>ERROR RENDERING</div>;
}
})}
<JsonEditor data={result} />
{/* {result.map((res) =>
"inflected" in res ? (
<NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
) : "verb" in res ? (
<EditableVP <EditableVP
opts={opts} opts={opts}
entryFeeder={entryFeeder} entryFeeder={entryFeeder}
@ -176,42 +149,76 @@ function ParserDemo({
> >
{uncompleteVPSelection(res)} {uncompleteVPSelection(res)}
</EditableVP> </EditableVP>
) : ( <details>
// (() => { <summary>AST</summary>
// try { <JsonEditor data={res} />
// const rendered = renderVP(res); </details>
// const compiled = compileVP(rendered, res.form); </>
// return ( ))}
// <div>
// <CompiledPTextDisplay compiled={compiled} opts={opts} />
// {compiled.e && (
// <div className={`text-muted mt-2 text-center`}>
// {compiled.e.map((e, i) => (
// <div key={i}>{e}</div>
// ))}
// </div>
// )}
// </div>
// );
// } catch (e) {
// console.error(e);
// console.log({ res });
// return <div>ERROR</div>;
// }
// })()
<samp>
<pre>{JSON.stringify(res, null, " ")}</pre>
</samp>
)
)} */}
<details>
<summary>AST</summary>
<samp>
<pre>{JSON.stringify(result, null, " ")}</pre>
</samp>
</details>
</div> </div>
); );
} }
export default ParserDemo; export default ParserDemo;
// {/* {result.map((res) =>
// "inflected" in res ? (
// <NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
// ) : "verb" in res ? (
// <EditableVP
// opts={opts}
// entryFeeder={entryFeeder}
// allVariations={true}
// >
// {uncompleteVPSelection(res)}
// </EditableVP>
// ) : (
// (() => {
// try {
// const rendered = renderVP(res);
// const compiled = compileVP(rendered, res.form);
// return (
// <div>
// <CompiledPTextDisplay compiled={compiled} opts={opts} />
// {compiled.e && (
// <div className={`text-muted mt-2 text-center`}>
// {compiled.e.map((e, i) => (
// <div key={i}>{e}</div>
// ))}
// </div>
// )}
// </div>
// );
// } catch (e) {
// console.error(e);
// console.log({ res });
// return <div>ERROR</div>;
// }
// })()
// <samp>
// <pre>{JSON.stringify(res, null, " ")}</pre>
// </samp>
// )
// )} */}
// try {
// const renderedNP: T.Rendered<T.NPSelection> = {
// type: "NP",
// selection: renderNounSelection(r.selection, r.inflected, "none"),
// };
// return (
// <>
// {r.inflected ? "INFLECTED" : "PLAIN"}
// <NPBlock
// opts={opts}
// script="p"
// english={getEnglishFromRendered(renderedNP)}
// >
// {renderedNP}
// </NPBlock>
// </>
// );
// } catch (e) {
// console.error(e);
// return <div>ERROR RENDERING</div>;
// }

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/inflect", "name": "@lingdocs/inflect",
"version": "7.7.1", "version": "7.7.3",
"description": "Pashto inflector library", "description": "Pashto inflector library",
"main": "dist/lib/library.cjs", "main": "dist/lib/library.cjs",
"module": "dist/lib/library.js", "module": "dist/lib/library.js",

View File

@ -19,7 +19,19 @@ function queryP(p: string): T.DictionaryEntry[] {
} }
return dictDb.collection.find({ p }); return dictDb.collection.find({ p });
} }
const memoizedQueryP = queryP; const memoizedQueryP = memoize(queryP);
function queryTs(ts: number): T.DictionaryEntry {
if (!dictDb.collection) {
throw new Error("dictionary not initialized yet");
}
const res = dictDb.findOneByTs(ts);
if (!res) {
throw new Error("complement link broken");
}
return res;
}
const memoizedQueryTs = memoize(queryTs);
function adjLookup(p: string): T.AdjectiveEntry[] { function adjLookup(p: string): T.AdjectiveEntry[] {
const res = memoizedQueryP(p); const res = memoizedQueryP(p);
@ -33,26 +45,51 @@ function nounLookup(p: string): T.NounEntry[] {
function otherLookup( function otherLookup(
key: keyof T.DictionaryEntry, key: keyof T.DictionaryEntry,
p: string p: string,
regex?: boolean
): T.DictionaryEntry[] { ): T.DictionaryEntry[] {
if (!dictDb.collection) { if (!dictDb.collection) {
return []; return [];
} }
return dictDb.collection.find({ [key]: p }); return dictDb.collection.find({ [key]: regex ? variationRegex(p) : p });
} }
function specialPluralLookup(p: string): T.NounEntry[] { function specialPluralLookup(p: string): T.NounEntry[] {
if (!dictDb.collection) { if (!dictDb.collection) {
return []; return [];
} }
const regex = new RegExp(`(^|\\s|,)${p}($|,)`); const regex = variationRegex(p);
return dictDb.collection return dictDb.collection
.find({ .find({
$or: [{ ppp: { $regex: regex } }, { app: { $regex: regex } }], $or: [{ ppp: regex }, { app: regex }],
}) })
.filter(tp.isNounEntry); .filter(tp.isNounEntry);
} }
function verbEntryLookup(p: string): T.VerbEntry[] {
if (!dictDb.collection) {
return [];
}
return memoizedQueryP(p)
.filter(tp.isVerbDictionaryEntry)
.map((entry) =>
entry.l
? {
entry,
complement: memoizedQueryTs(entry.l),
}
: { entry }
);
}
/**
* creates a RegEx mongo query to search for a variation in a certain field
* ie. to search for کاته in کوت, کاته
*/
function variationRegex(p: string): { $regex: RegExp } {
return { $regex: new RegExp(`(^|\\s|,)${p}($|,)`) };
}
export const dictionary: T.DictionaryAPI = { export const dictionary: T.DictionaryAPI = {
initialize: async () => await dictDb.initialize(), initialize: async () => await dictDb.initialize(),
update: async () => await dictDb.updateDictionary(() => null), update: async () => await dictDb.updateDictionary(() => null),
@ -61,4 +98,5 @@ export const dictionary: T.DictionaryAPI = {
nounLookup: memoize(nounLookup), nounLookup: memoize(nounLookup),
otherLookup: memoize(otherLookup), otherLookup: memoize(otherLookup),
specialPluralLookup: memoize(specialPluralLookup), specialPluralLookup: memoize(specialPluralLookup),
verbEntryLookup: memoize(verbEntryLookup),
}; };

View File

@ -1,7 +1,15 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; import {
isAdjectiveEntry,
isNounEntry,
isVerbDictionaryEntry,
} from "../type-predicates";
import { entries } from "../../../../vocab/mini-dict-entries"; import { entries } from "../../../../vocab/mini-dict-entries";
function variationRegex(p: string): { $regex: RegExp } {
return { $regex: new RegExp(`(^|\\s|,)${p}($|,)`) };
}
const queryP = (p: string) => entries.filter((e) => e.p === p); const queryP = (p: string) => entries.filter((e) => e.p === p);
function adjLookup(p: string): T.AdjectiveEntry[] { function adjLookup(p: string): T.AdjectiveEntry[] {
return queryP(p).filter(isAdjectiveEntry) as T.AdjectiveEntry[]; return queryP(p).filter(isAdjectiveEntry) as T.AdjectiveEntry[];
@ -13,18 +21,37 @@ function nounLookup(p: string): T.NounEntry[] {
function otherLookup( function otherLookup(
key: keyof T.DictionaryEntry, key: keyof T.DictionaryEntry,
p: string p: string,
regex?: boolean
): T.DictionaryEntry[] { ): T.DictionaryEntry[] {
if (regex) {
const { $regex: regex } = variationRegex(p);
return entries.filter((e) => (e[key] as string)?.match(regex));
}
return entries.filter((e) => e[key] === p); return entries.filter((e) => e[key] === p);
} }
function specialPluralLookup(p: string): T.NounEntry[] { function specialPluralLookup(p: string): T.NounEntry[] {
const regex = new RegExp(`(^|\\s|,)${p}($|,)`); const { $regex: regex } = variationRegex(p);
return entries.filter( return entries.filter(
(e) => (e.ppp?.match(regex) || e.app?.match(regex)) && isNounEntry(e) (e) => (e.ppp?.match(regex) || e.app?.match(regex)) && isNounEntry(e)
) as T.NounEntry[]; ) as T.NounEntry[];
} }
function verbEntryLookup(p: string): T.VerbEntry[] {
return entries
.filter((e) => e.p === p)
.filter(isVerbDictionaryEntry)
.map<T.VerbEntry>((entry) =>
entry.l
? {
entry,
complement: entries.find((e) => e.ts === entry.l),
}
: { entry }
);
}
export const testDictionary: T.DictionaryAPI = { export const testDictionary: T.DictionaryAPI = {
// @ts-expect-error we won't mock the initialization // @ts-expect-error we won't mock the initialization
initialize: async () => 0, initialize: async () => 0,
@ -35,4 +62,5 @@ export const testDictionary: T.DictionaryAPI = {
nounLookup, nounLookup,
otherLookup, otherLookup,
specialPluralLookup, specialPluralLookup,
verbEntryLookup,
}; };

View File

@ -1,16 +1,16 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { LookupFunction } from "./lookup"; import { isAdverbEntry } from "../type-predicates";
import { returnParseResultS } from "./utils"; import { returnParseResultS } from "./utils";
export function parseAdverb( export function parseAdverb(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: LookupFunction dictionary: T.DictionaryAPI
): T.ParseResult<T.APSelection>[] { ): T.ParseResult<T.APSelection>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
const [first, ...rest] = tokens; const [first, ...rest] = tokens;
const adverbs = lookup(first.s, "adverb"); const adverbs = dictionary.queryP(first.s).filter(isAdverbEntry);
return adverbs.map((entry) => return adverbs.map((entry) =>
returnParseResultS(rest, { returnParseResultS(rest, {
type: "AP", type: "AP",

View File

@ -1,26 +1,25 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { fmapParseResult } from "../fp-ps"; import { fmapParseResult } from "../fp-ps";
import { LookupFunction } from "./lookup";
import { parseAdverb } from "./parse-adverb"; import { parseAdverb } from "./parse-adverb";
import { parseSandwich } from "./parse-sandwich"; import { parseSandwich } from "./parse-sandwich";
export function parseAP( export function parseAP(
s: Readonly<T.Token[]>, s: Readonly<T.Token[]>,
lookup: LookupFunction, dicitonary: T.DictionaryAPI,
possesor: T.PossesorSelection | undefined possesor: T.PossesorSelection | undefined
): T.ParseResult<T.APSelection>[] { ): T.ParseResult<T.APSelection>[] {
if (s.length === 0) { if (s.length === 0) {
return []; return [];
} }
return [ return [
...(!possesor ? parseAdverb(s, lookup) : []), ...(!possesor ? parseAdverb(s, dicitonary) : []),
...fmapParseResult( ...fmapParseResult(
(selection) => (selection) =>
({ ({
type: "AP", type: "AP",
selection, selection,
} as const), } as const),
parseSandwich(s, lookup, possesor) parseSandwich(s, dicitonary, possesor)
), ),
]; ];
} }

View File

@ -1,12 +1,11 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parseEquative } from "./parse-equative"; import { parseEquative } from "./parse-equative";
import { parseKidsSection } from "./parse-kids-section"; import { parseKidsSection } from "./parse-kids-section";
import { parseNeg } from "./parse-negative"; import { parseNeg } from "./parse-negative";
import { parseNPAP } from "./parse-npap"; import { parseNPAP } from "./parse-npap";
import { parseVBP } from "./parse-vbp"; import { parseVBP } from "./parse-vbp";
import { parsePH } from "./parse-ph"; import { parsePH } from "./parse-ph";
import { parseVBE } from "./parse-vbe"; import { parseVBE } from "./parse-vbe-new";
import { import {
bindParseResult, bindParseResult,
returnParseResult, returnParseResult,
@ -18,7 +17,7 @@ import { isKedulStatEntry } from "./parse-verb-helpers";
export function parseBlocks( export function parseBlocks(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: LookupFunction, dicitonary: T.DictionaryAPI,
blocks: T.ParsedBlock[], blocks: T.ParsedBlock[],
kids: T.ParsedKid[] kids: T.ParsedKid[]
): T.ParseResult<{ ): T.ParseResult<{
@ -35,13 +34,13 @@ export function parseBlocks(
// TOOD: rather parse VBP / VBE // TOOD: rather parse VBP / VBE
const allBlocks: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [ const allBlocks: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
...(!inVerbSection ? parseNPAP(tokens, lookup) : []), ...(!inVerbSection ? parseNPAP(tokens, dicitonary) : []),
// ensure at most one of each PH, VBE, VBP // ensure at most one of each PH, VBE, VBP
...(prevPh ? [] : parsePH(tokens)), ...(prevPh ? [] : parsePH(tokens)),
...(blocks.some(isParsedVBE) ...(blocks.some(isParsedVBE)
? [] ? []
: [...parseVBE(tokens, lookup), ...parseEquative(tokens)]), : [...parseVBE(tokens, dicitonary), ...parseEquative(tokens)]),
...(blocks.some(isParsedVBP) ? [] : parseVBP(tokens, lookup)), ...(blocks.some(isParsedVBP) ? [] : parseVBP(tokens, dicitonary)),
...(blocks.some((b) => b.type === "negative") ? [] : parseNeg(tokens)), ...(blocks.some((b) => b.type === "negative") ? [] : parseNeg(tokens)),
...parseKidsSection(tokens, []), ...parseKidsSection(tokens, []),
]; ];
@ -50,7 +49,7 @@ export function parseBlocks(
const errors: T.ParseError[] = []; const errors: T.ParseError[] = [];
if (r.type === "kids") { if (r.type === "kids") {
return { return {
next: parseBlocks(tokens, lookup, blocks, [...kids, ...r.kids]), next: parseBlocks(tokens, dicitonary, blocks, [...kids, ...r.kids]),
errors: errors:
blocks.length !== 1 blocks.length !== 1
? [{ message: "kids' section out of place" }] ? [{ message: "kids' section out of place" }]
@ -71,7 +70,7 @@ export function parseBlocks(
return []; return [];
} }
return { return {
next: parseBlocks(tokens, lookup, [...blocks, r], kids), next: parseBlocks(tokens, dicitonary, [...blocks, r], kids),
errors, errors,
}; };
}); });

View File

@ -1736,7 +1736,7 @@ describe("parsing nouns", () => {
test(category, () => { test(category, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const res = parseNoun(tokens, testDictionary, undefined, []).flatMap( const res = parseNoun(tokens, testDictionary, undefined).flatMap(
// only take the ones that used all the tokens // only take the ones that used all the tokens
({ body, tokens }) => (tokens.length === 0 ? [body] : []) ({ body, tokens }) => (tokens.length === 0 ? [body] : [])
); );

View File

@ -3,7 +3,12 @@ import { makeNounSelection } from "../phrase-building/make-selections";
import { parseAdjective } from "./parse-adjective-new"; import { parseAdjective } from "./parse-adjective-new";
import { parseDeterminer } from "./parse-determiner"; import { parseDeterminer } from "./parse-determiner";
import { parseNounWord } from "./parse-noun-word"; import { parseNounWord } from "./parse-noun-word";
import { bindParseResult, parserCombMany, toParseError } from "./utils"; import {
bindParseResult,
parserCombMany,
parserCombSucc3,
toParseError,
} from "./utils";
type NounResult = { inflected: boolean; selection: T.NounSelection }; type NounResult = { inflected: boolean; selection: T.NounSelection };
@ -15,57 +20,54 @@ export function parseNoun(
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
const detRes = parserCombMany(parseDeterminer)(tokens, dictionary); const res = parserCombSucc3([
// TODO: add recognition of او between adjectives parserCombMany(parseDeterminer),
return bindParseResult(detRes, (t, determiners) => { parserCombMany(parseAdjective),
const adjRes = parserCombMany(parseAdjective)(t, dictionary); parseNounWord,
return bindParseResult(adjRes, (tk, adjectives) => { ])(tokens, dictionary);
const nounWord = parseNounWord(tk, dictionary); return bindParseResult(res, (tkns, [determiners, adjectives, nounWord]) => {
return bindParseResult(nounWord, (tkns, nr) => { const { error: adjErrors } = adjDetsMatch(
const { error: adjErrors } = adjDetsMatch( adjectives,
adjectives, nounWord.gender,
nr.gender, nounWord.inflected ? 1 : 0,
nr.inflected ? 1 : 0, nounWord.plural
nr.plural );
); const { error: detErrors } = adjDetsMatch(
const { error: detErrors } = adjDetsMatch( determiners,
determiners, nounWord.gender,
nr.gender, nounWord.inflected ? 1 : 0,
nr.inflected ? 1 : 0, nounWord.plural
nr.plural );
); const dupErrors = checkForDeterminerDuplicates(determiners);
const dupErrors = checkForDeterminerDuplicates(determiners); const s = makeNounSelection(nounWord.entry, undefined);
const s = makeNounSelection(nr.entry, undefined); const body: NounResult = {
const body: NounResult = { inflected: nounWord.inflected,
inflected: nr.inflected, selection: {
selection: { ...s,
...s, gender: nounWord.gender,
gender: nr.gender, number: nounWord.plural ? "plural" : "singular",
number: nr.plural ? "plural" : "singular", adjectives: adjectives.map((a) => a.selection),
adjectives: adjectives.map((a) => a.selection), determiners: determiners.length
determiners: determiners.length ? {
? { type: "determiners",
type: "determiners", withNoun: true,
withNoun: true, determiners: determiners.map((d) => d.selection),
determiners: determiners.map((d) => d.selection), }
} : undefined,
: undefined, possesor,
possesor, },
}, };
}; return [
return [ {
{ body,
body, tokens: tkns,
tokens: tkns, errors: [
errors: [ ...detErrors.map(toParseError),
...detErrors.map(toParseError), ...dupErrors.map(toParseError),
...dupErrors.map(toParseError), ...adjErrors.map(toParseError),
...adjErrors.map(toParseError), ],
], },
}, ];
];
});
});
}); });
} }

View File

@ -1,191 +0,0 @@
import * as T from "../../../types";
import { getInflectionPattern } from "../inflection-pattern";
import { makeNounSelection } from "../phrase-building/make-selections";
import {
isMascNounEntry,
isNounEntry,
isPluralNounEntry,
isUnisexNounEntry,
} from "../type-predicates";
import { getInflectionQueries } from "./inflection-query";
import { LookupFunction } from "./lookup";
import { parseAdjective } from "./parse-adjective";
import { bindParseResult } from "./utils";
type NounResult = { inflected: boolean; selection: T.NounSelection };
export function parseNoun(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction,
possesor: T.PossesorSelection | undefined,
adjectives: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
given: string;
selection: T.AdjectiveSelection;
}[]
): T.ParseResult<NounResult>[] {
if (tokens.length === 0) {
return [];
}
// TODO: add recognition of او between adjectives
const adjRes = parseAdjective(tokens, lookup);
const withAdj = bindParseResult(adjRes, (tkns, adj) =>
parseNoun(tkns, lookup, possesor, [...adjectives, adj])
);
const [first, ...rest] = tokens;
const searches = getInflectionQueries(first.s, true);
const w: ReturnType<typeof parseNoun> = [];
searches.forEach(({ search, details }) => {
const nounEntries = lookup(search, "nounAdj").filter(isNounEntry);
details.forEach((deets) => {
const fittingEntries = nounEntries.filter(deets.predicate);
fittingEntries.forEach((entry) => {
const genders: T.Gender[] = isUnisexNounEntry(entry)
? ["masc", "fem"]
: isMascNounEntry(entry)
? ["masc"]
: ["fem"];
deets.gender.forEach((gender) => {
if (genders.includes(gender)) {
deets.inflection.forEach((inf) => {
const { error: adjErrors } = adjsMatch(
adjectives,
gender,
inf,
deets.plural
);
convertInflection(inf, entry, gender, deets.plural).forEach(
({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined);
const errors = [
...adjErrors.map((message) => ({
message,
})),
];
w.push({
tokens: rest,
body: {
inflected,
selection: {
...selection,
gender: selection.genderCanChange
? gender
: selection.gender,
number: selection.numberCanChange
? number
: selection.number,
adjectives: adjectives.map((a) => a.selection),
// TODO: could be nicer to validate that the possesor is inflected before
// and just pass in the selection
possesor,
},
},
errors,
});
}
);
});
}
});
});
});
});
return [...withAdj, ...w];
}
function adjsMatch(
adjectives: Parameters<typeof parseNoun>[3],
gender: T.Gender,
inf: 0 | 1 | 2,
plural: boolean | undefined
): { ok: boolean; error: string[] } {
const inflection = (plural && inf < 2 ? inf + 1 : inf) as 0 | 1 | 2;
const unmatching = adjectives.filter(
(adj) =>
!adj.gender.includes(gender) ||
!adj.inflection.some((i) => i === inflection)
);
if (unmatching.length) {
return {
ok: false,
error: unmatching.map((x) => {
const adjText =
x.given === x.selection.entry.p
? x.given
: `${x.given} (${x.selection.entry.p})`;
const inflectionIssue = !x.inflection.some((x) => x === inflection)
? ` should be ${showInflection(inflection)}`
: ``;
return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
}),
};
} else {
return {
ok: true,
error: [],
};
}
}
function convertInflection(
inflection: 0 | 1 | 2,
entry: T.NounEntry | T.AdjectiveEntry,
gender: T.Gender,
plural: boolean | undefined
): {
inflected: boolean;
number: T.NounNumber;
}[] {
const pattern = getInflectionPattern(entry);
const inf = (plural && inflection < 2 ? inflection + 1 : inflection) as
| 0
| 1
| 2;
if (inf === 0) {
return [
{
inflected: false,
number: "singular",
},
];
} else if (inf === 1) {
return [
...(!((isNounEntry(entry) && isPluralNounEntry(entry)) || plural) &&
!(pattern === 4 && entry.p.endsWith("ه") && gender === "masc")
? [
{
inflected: true,
number: "singular" as T.NounNumber,
},
]
: []),
...(pattern > 1 ||
(pattern > 0 && gender === "fem") ||
(isNounEntry(entry) && isPluralNounEntry(entry)) ||
plural
? [
{
inflected: false,
number: "plural" as T.NounNumber,
},
]
: []),
];
}
return [
{
inflected: true,
number: "plural",
},
];
}
function showInflection(inf: 0 | 1 | 2): string {
return inf === 0
? "plain"
: inf === 1
? "first inflection"
: "second inflection";
}

View File

@ -1,13 +1,12 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { parsePronoun } from "./parse-pronoun"; import { parsePronoun } from "./parse-pronoun";
import { parseNoun } from "./parse-noun"; import { parseNoun } from "./parse-noun-new";
import { fmapParseResult } from "../fp-ps"; import { fmapParseResult } from "../fp-ps";
import { parseParticiple } from "./parse-participle"; import { parseParticiple } from "./parse-participle";
import { LookupFunction } from "./lookup";
export function parseNP( export function parseNP(
s: Readonly<T.Token[]>, s: Readonly<T.Token[]>,
lookup: LookupFunction, dicitonary: T.DictionaryAPI,
possesor: T.PossesorSelection | undefined possesor: T.PossesorSelection | undefined
): T.ParseResult<T.ParsedNP>[] { ): T.ParseResult<T.ParsedNP>[] {
if (s.length === 0) { if (s.length === 0) {
@ -41,7 +40,7 @@ export function parseNP(
return fmapParseResult(makeNPSl, [ return fmapParseResult(makeNPSl, [
...(!possesor ? parsePronoun(s) : []), ...(!possesor ? parsePronoun(s) : []),
...parseNoun(s, lookup, possesor, []), ...parseNoun(s, dicitonary, possesor),
...parseParticiple(s, lookup, possesor), ...parseParticiple(s, dicitonary, possesor),
]); ]);
} }

View File

@ -1,5 +1,4 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parseAP } from "./parse-ap"; import { parseAP } from "./parse-ap";
import { parseNP } from "./parse-np"; import { parseNP } from "./parse-np";
import { parsePossesor } from "./parse-possesor"; import { parsePossesor } from "./parse-possesor";
@ -7,19 +6,25 @@ import { bindParseResult } from "./utils";
export function parseNPAP( export function parseNPAP(
s: Readonly<T.Token[]>, s: Readonly<T.Token[]>,
lookup: LookupFunction dictionary: T.DictionaryAPI
): T.ParseResult<T.APSelection | T.ParsedNP>[] { ): T.ParseResult<T.APSelection | T.ParsedNP>[] {
if (s.length === 0) { if (s.length === 0) {
return []; return [];
} }
const possesor = parsePossesor(s, lookup, undefined); const possesor = parsePossesor(s, dictionary, undefined);
if (!possesor.length) { if (!possesor.length) {
return [...parseNP(s, lookup, undefined), ...parseAP(s, lookup, undefined)]; return [
...parseNP(s, dictionary, undefined),
...parseAP(s, dictionary, undefined),
];
} }
return bindParseResult<T.PossesorSelection, T.APSelection | T.ParsedNP>( return bindParseResult<T.PossesorSelection, T.APSelection | T.ParsedNP>(
possesor, possesor,
(tokens, p) => { (tokens, p) => {
return [...parseNP(tokens, lookup, p), ...parseAP(tokens, lookup, p)]; return [
...parseNP(tokens, dictionary, p),
...parseAP(tokens, dictionary, p),
];
} }
); );
} }

View File

@ -4,14 +4,16 @@ import {
makePossesorSelection, makePossesorSelection,
} from "../phrase-building/make-selections"; } from "../phrase-building/make-selections";
import * as T from "../../../types"; import * as T from "../../../types";
import { lookup, wordQuery } from "./lookup"; import { testDictionary } from "./mini-test-dictionary";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
import { parseNPAP } from "./parse-npap"; import { parseNPAP } from "./parse-npap";
const leedul = wordQuery("لیدل", "verb"); const leedul = testDictionary.verbEntryLookup("لیدل")[0];
const akheestul = wordQuery("اخیستل", "verb"); const akheestul = testDictionary.verbEntryLookup("اخیستل")[0];
const wahul = wordQuery("وهل", "verb"); const wahul = testDictionary.verbEntryLookup("وهل")[0];
const saray = wordQuery("سړی", "noun"); const saray = testDictionary.nounLookup("سړی")[0];
// TODO: uncomment and get parsing of short participles working
const tests: { const tests: {
label: string; label: string;
@ -113,20 +115,20 @@ const tests: {
}, },
], ],
}, },
{ // {
input: "د سړي لیدو", // input: "د سړي لیدو",
output: [ // output: [
{ // {
inflected: true, // inflected: true,
selection: { // selection: {
...makeParticipleSelection(leedul), // ...makeParticipleSelection(leedul),
possesor: makePossesorSelection( // possesor: makePossesorSelection(
makeNounSelection(saray, undefined) // makeNounSelection(saray, undefined)
), // ),
}, // },
}, // },
], // ],
}, // },
], ],
}, },
]; ];
@ -136,7 +138,7 @@ describe("parsing participles", () => {
test(label, () => { test(label, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const res = parseNPAP(tokens, lookup).map(({ body }) => body); const res = parseNPAP(tokens, testDictionary).map(({ body }) => body);
expect(res).toEqual( expect(res).toEqual(
output.map( output.map(
(x): T.ParsedNP => ({ (x): T.ParsedNP => ({

View File

@ -1,5 +1,5 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { LookupFunction } from "./lookup"; import { shortVerbEndConsonant } from "./misc";
type ParticipleResult = { type ParticipleResult = {
inflected: boolean; inflected: boolean;
@ -7,9 +7,10 @@ type ParticipleResult = {
}; };
// TODO: should have adverbs with participle // TODO: should have adverbs with participle
// TODO: NOTE this does not work with compound verbs yet
export function parseParticiple( export function parseParticiple(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: LookupFunction, dicitonary: T.DictionaryAPI,
possesor: T.PossesorSelection | undefined possesor: T.PossesorSelection | undefined
): T.ParseResult<ParticipleResult>[] { ): T.ParseResult<ParticipleResult>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
@ -20,8 +21,13 @@ export function parseParticiple(
return []; return [];
} }
const inflected = first.s.endsWith("و"); const inflected = first.s.endsWith("و");
const matches = lookup(first.s, "participle");
return matches.map<T.ParseResult<ParticipleResult>>((verb) => ({ return [
...dicitonary.verbEntryLookup(inflected ? first.s.slice(0, -1) : first.s),
...(inflected && shortVerbEndConsonant.includes(first.s.at(-2) || "")
? dicitonary.verbEntryLookup(first.s.slice(0, -1) + "ل")
: []),
].map<T.ParseResult<ParticipleResult>>((verb) => ({
tokens: rest, tokens: rest,
body: { body: {
inflected, inflected,

View File

@ -1,24 +1,24 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { lookup } from "./lookup";
import { parseVP } from "./parse-vp"; import { parseVP } from "./parse-vp";
// شو should not be sheyaano !! // شو should not be sheyaano !!
export function parsePhrase(s: T.Token[]): { export function parsePhrase(
success: ( s: T.Token[],
| { dicitonary: T.DictionaryAPI
inflected: boolean; ): {
selection: T.NPSelection; success: // | {
} // inflected: boolean;
| Omit<T.VBE, "ps"> // selection: T.NPSelection;
| T.VPSelectionComplete // }
)[]; // | Omit<T.VBE, "ps">
T.VPSelectionComplete[];
errors: string[]; errors: string[];
} { } {
const res = [ const res = [
// ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length), // ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
// ...parseVerb(s, verbLookup), // ...parseVerb(s, verbLookup),
...parseVP(s, lookup), ...parseVP(s, dicitonary),
]; ];
const success = res.filter((x) => !x.tokens.length).map((x) => x.body); const success = res.filter((x) => !x.tokens.length).map((x) => x.body);

View File

@ -4,16 +4,16 @@ import {
makeNounSelection, makeNounSelection,
makePronounSelection, makePronounSelection,
} from "../phrase-building/make-selections"; } from "../phrase-building/make-selections";
import { lookup, wordQuery } from "./lookup";
import { parsePossesor } from "./parse-possesor"; import { parsePossesor } from "./parse-possesor";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
import { isCompleteResult } from "./utils"; import { isCompleteResult } from "./utils";
import { testDictionary as dictionary } from "./mini-test-dictionary";
const sturey = wordQuery("ستړی", "adj"); const sturey = dictionary.adjLookup("ستړی")[0];
const sarey = wordQuery("سړی", "noun"); const sarey = dictionary.nounLookup("سړی")[0];
const maashoom = wordQuery("ماشوم", "noun"); const maashoom = dictionary.nounLookup("ماشوم")[0];
const malguray = wordQuery("ملګری", "noun"); const malguray = dictionary.nounLookup("ملګری")[0];
const plaar = wordQuery("پلار", "noun"); const plaar = dictionary.nounLookup("پلار")[0];
const tests: { const tests: {
input: string; input: string;
@ -109,12 +109,12 @@ const tests: {
test("parse possesor", () => { test("parse possesor", () => {
tests.forEach(({ input, output }) => { tests.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const parsed = parsePossesor(tokens, lookup, undefined); const parsed = parsePossesor(tokens, dictionary, undefined);
if (output === "error") { if (output === "error") {
expect(parsed.some((x) => x.errors.length)).toBe(true); expect(parsed.some((x) => x.errors.length)).toBe(true);
} else { } else {
expect( expect(
parsePossesor(tokens, lookup, undefined) parsePossesor(tokens, dictionary, undefined)
.filter(isCompleteResult) .filter(isCompleteResult)
.map((x) => x.body.np.selection) .map((x) => x.body.np.selection)
).toEqual(output); ).toEqual(output);

View File

@ -1,5 +1,4 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parseNP } from "./parse-np"; import { parseNP } from "./parse-np";
import { bindParseResult } from "./utils"; import { bindParseResult } from "./utils";
// TODO: maybe contractions should just be male to cut down on the // TODO: maybe contractions should just be male to cut down on the
@ -19,7 +18,7 @@ const contractions: [string[], T.Person[]][] = [
export function parsePossesor( export function parsePossesor(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: LookupFunction, dictionary: T.DictionaryAPI,
prevPossesor: T.PossesorSelection | undefined prevPossesor: T.PossesorSelection | undefined
): T.ParseResult<T.PossesorSelection>[] { ): T.ParseResult<T.PossesorSelection>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
@ -43,14 +42,14 @@ export function parsePossesor(
? [{ message: "a pronoun cannot have a possesor" }] ? [{ message: "a pronoun cannot have a possesor" }]
: []; : [];
return contractions return contractions
.flatMap((p) => parsePossesor(rest, lookup, p)) .flatMap((p) => parsePossesor(rest, dictionary, p))
.map((x) => ({ .map((x) => ({
...x, ...x,
errors: [...errors, ...x.errors], errors: [...errors, ...x.errors],
})); }));
} }
if (first.s === "د") { if (first.s === "د") {
const np = parseNP(rest, lookup, undefined); const np = parseNP(rest, dictionary, undefined);
return bindParseResult(np, (tokens, body) => { return bindParseResult(np, (tokens, body) => {
const possesor: T.PossesorSelection = { const possesor: T.PossesorSelection = {
shrunken: false, shrunken: false,
@ -63,7 +62,11 @@ export function parsePossesor(
[{ message: `possesor should be inflected` }] [{ message: `possesor should be inflected` }]
: [], : [],
// add and check error - can't add possesor to pronoun // add and check error - can't add possesor to pronoun
next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)), next: parsePossesor(
tokens,
dictionary,
addPoss(prevPossesor, possesor)
),
}; };
}); });
} }

View File

@ -1,5 +1,4 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { sandwiches } from "../sandwiches"; import { sandwiches } from "../sandwiches";
import { parseNP } from "./parse-np"; import { parseNP } from "./parse-np";
import { bindParseResult } from "./utils"; import { bindParseResult } from "./utils";
@ -14,7 +13,7 @@ import { bindParseResult } from "./utils";
export function parseSandwich( export function parseSandwich(
s: Readonly<T.Token[]>, s: Readonly<T.Token[]>,
lookup: LookupFunction, dictionary: T.DictionaryAPI,
possesor: T.PossesorSelection | undefined possesor: T.PossesorSelection | undefined
): T.ParseResult<T.SandwichSelection<T.Sandwich>>[] { ): T.ParseResult<T.SandwichSelection<T.Sandwich>>[] {
if (s.length === 0) { if (s.length === 0) {
@ -27,7 +26,7 @@ export function parseSandwich(
(x) => x.before && x.before.p === first.s (x) => x.before && x.before.p === first.s
); );
// TODO: this could be be really repetitive... // TODO: this could be be really repetitive...
const nps = parseNP(startMatches.length ? rest : s, lookup, possesor); const nps = parseNP(startMatches.length ? rest : s, dictionary, possesor);
return bindParseResult(nps, (tokens, np) => { return bindParseResult(nps, (tokens, np) => {
if (!tokens.length) { if (!tokens.length) {
return []; return [];

View File

@ -7,28 +7,35 @@ import {
wartlul, wartlul,
raatlul, raatlul,
} from "./irreg-verbs"; } from "./irreg-verbs";
import { lookup, wordQuery } from "./lookup"; import { parseVBE } from "./parse-vbe-new";
import { parseVBE } from "./parse-vbe";
import { tokenizer } from "./tokenizer"; import { tokenizer } from "./tokenizer";
import { getPeople, removeKeys } from "./utils"; import { getPeople, removeKeys } from "./utils";
import { testDictionary } from "./mini-test-dictionary";
const wahul = wordQuery("وهل", "verb"); const wahul = testDictionary.verbEntryLookup("وهل")[0];
const leekul = wordQuery("لیکل", "verb"); const leekul = testDictionary.verbEntryLookup("لیکل")[0];
const manul = wordQuery("منل", "verb"); const manul = testDictionary.verbEntryLookup("منل")[0];
// const gaalul = wordQuery("ګالل", "verb"); const gaalul = testDictionary.verbEntryLookup("ګالل")[0];
const rasedul = wordQuery("رسېدل", "verb"); const rasedul = testDictionary.verbEntryLookup("رسېدل")[0];
const leedul = wordQuery("لیدل", "verb"); const leedul = testDictionary.verbEntryLookup("لیدل")[0];
const khorul = wordQuery("خوړل", "verb"); const awuxtul = testDictionary.verbEntryLookup("اوښتل")[0];
const kenaastul = wordQuery("کېناستل", "verb"); const khorul = testDictionary.verbEntryLookup("خوړل")[0];
const prexodul = wordQuery("پرېښودل", "verb"); const kenaastul = testDictionary.verbEntryLookup("کېناستل")[0];
const xodul = wordQuery("ښودل", "verb"); const kxenaastul = testDictionary.verbEntryLookup("کښېناستل")[0];
const kexodul = wordQuery("کېښودل", "verb"); const prexodul = testDictionary.verbEntryLookup("پرېښودل")[0];
const katul = wordQuery("کتل", "verb"); const prexowul = testDictionary.verbEntryLookup("پرېښوول")[0];
const watul = wordQuery("وتل", "verb"); const prexawul = testDictionary.verbEntryLookup("پرېښول")[0];
const wurul = wordQuery("وړل", "verb"); const xodul = testDictionary.verbEntryLookup("ښودل")[0];
const akheestul = wordQuery("اخیستل", "verb"); const kexodul = testDictionary.verbEntryLookup("کېښودل")[0];
const alwatul = wordQuery("الوتل", "verb"); const kxexodul = testDictionary.verbEntryLookup("کښېښودل")[0];
// const dartlul = wordQuery("درتلل", "verb") const katul = testDictionary.verbEntryLookup("کتل")[0];
const watul = testDictionary.verbEntryLookup("وتل")[0];
const wurul = testDictionary.verbEntryLookup("وړل")[0];
const akheestul = testDictionary.verbEntryLookup("اخیستل")[0];
const alwatul = testDictionary.verbEntryLookup("الوتل")[0];
const dartlul = testDictionary.verbEntryLookup("درتلل")[0];
// TODO: Prefix searching on split verbs for perfective head parsing
// TODO: azmoyul etc // TODO: azmoyul etc
// TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc // TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc
@ -311,19 +318,6 @@ const tests: {
}, },
], ],
}, },
{
input: "وینم",
output: [
{
stem: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["imperfective", "perfective"],
},
verb: leedul,
},
],
},
// TODO!! THESE COULD ALSO BE MALE
{ {
input: "لیده", input: "لیده",
output: [ output: [
@ -364,42 +358,6 @@ const tests: {
}, },
], ],
}, },
{
input: "خوړ",
output: [
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: khorul,
},
],
},
{
input: "کوت",
output: [
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: katul,
},
],
},
{
input: "کاته",
output: [
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: katul,
},
],
},
{ {
input: "خلم", input: "خلم",
output: [ output: [
@ -436,6 +394,11 @@ const tests: {
}, },
], ],
}, },
],
},
{
label: "verbs with seperating perfective heads",
cases: [
{ {
input: "الوځې", input: "الوځې",
output: [ output: [
@ -460,6 +423,18 @@ const tests: {
}, },
], ],
}, },
{
input: "لوتلم",
output: [
{
root: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: alwatul,
},
],
},
], ],
}, },
{ {
@ -492,6 +467,13 @@ const tests: {
}, },
verb: kenaastul, verb: kenaastul,
}, },
{
stem: {
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
aspects: ["perfective"],
},
verb: kxenaastul,
},
], ],
}, },
{ {
@ -507,46 +489,64 @@ const tests: {
], ],
}, },
{ {
input: "ناست", input: "کېناسته",
output: [ output: [
{ {
root: { root: {
persons: [T.Person.ThirdSingMale], persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
aspects: ["perfective"], aspects: ["imperfective"],
}, },
verb: kenaastul, verb: kenaastul,
}, },
], ],
}, },
{ {
input: "پرېږدو", input: "ناست",
output: [ output: [kenaastul, kxenaastul].map((verb) => ({
{ root: {
stem: { persons: [T.Person.ThirdSingMale],
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], aspects: ["perfective"],
aspects: ["imperfective"],
},
verb: prexodul,
}, },
], verb,
})),
},
{
input: "ناسته",
output: [kenaastul, kxenaastul].map((verb) => ({
root: {
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb,
})),
},
{
input: "پرېږدو",
output: [prexodul, prexowul, prexawul].map((verb) => ({
stem: {
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
aspects: ["imperfective"],
},
verb,
})),
}, },
{ {
input: "ږدو", input: "ږدو",
output: [ output: [
{ ...[prexodul, prexawul, prexowul, kexodul, kxexodul].map((verb) => ({
stem: { stem: {
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
aspects: ["perfective"], aspects: ["perfective"] satisfies T.Aspect[],
}, },
verb: prexodul, verb,
}, })),
{ ...[kexodul, kxexodul].map((verb) => ({
stem: { stem: {
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
aspects: ["imperfective", "perfective"], aspects: ["imperfective"] satisfies T.Aspect[],
}, },
verb: kexodul, verb,
}, })),
], ],
}, },
{ {
@ -571,20 +571,13 @@ const tests: {
}, },
verb: xodul, verb: xodul,
}, },
{ ...[prexodul, kexodul, kxexodul].map((verb) => ({
root: { root: {
persons: [T.Person.ThirdSingFemale], persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"], aspects: ["perfective"] satisfies T.Aspect[],
}, },
verb: prexodul, verb,
}, })),
{
root: {
persons: [T.Person.ThirdSingFemale],
aspects: ["perfective"],
},
verb: kexodul,
},
], ],
}, },
{ {
@ -661,43 +654,9 @@ const tests: {
}, },
], ],
}, },
{
input: "ړلم",
output: [
{
root: {
persons: getPeople(1, "sing"),
aspects: ["perfective"],
},
verb: wurul,
},
{
root: {
persons: getPeople(1, "sing"),
aspects: ["perfective"],
},
verb: tlul,
},
],
},
{ {
input: "ړ", input: "ړ",
output: [ output: [],
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: wurul,
},
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: tlul,
},
],
}, },
// should not match with the prefix for perfective // should not match with the prefix for perfective
{ {
@ -713,6 +672,78 @@ const tests: {
{ {
label: "verbs with different 3rd pers sing past endings", label: "verbs with different 3rd pers sing past endings",
cases: [ cases: [
{
input: "خوړ",
output: [
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: khorul,
},
],
},
{
input: "خوړه",
output: [
{
root: {
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
aspects: ["imperfective", "perfective"],
},
verb: khorul,
},
],
},
{
input: "کوت",
output: [
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: katul,
},
],
},
{
input: "کاته",
output: [
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: katul,
},
],
},
{
input: "واته",
output: [
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: watul,
},
],
},
{
input: "ووت",
output: [
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"],
},
verb: watul,
},
],
},
{ {
input: "رسېد", input: "رسېد",
output: [ output: [
@ -725,6 +756,18 @@ const tests: {
}, },
], ],
}, },
{
input: "رسېده",
output: [
{
root: {
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
aspects: ["imperfective", "perfective"],
},
verb: rasedul,
},
],
},
{ {
input: "کېناسته", input: "کېناسته",
output: [ output: [
@ -766,27 +809,69 @@ const tests: {
], ],
}, },
{ {
input: "واته", input: "اوښت",
output: [ output: [
{ {
root: { root: {
persons: [T.Person.ThirdSingMale], persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"], aspects: ["imperfective"],
}, },
verb: watul, verb: awuxtul,
}, },
], ],
}, },
{ {
input: "ووت", input: "ښت",
output: [],
},
{
input: "اوښته",
output: [
{
root: {
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
aspects: ["imperfective"],
},
verb: awuxtul,
},
],
},
{
input: "ښود",
output: [ output: [
{ {
root: { root: {
persons: [T.Person.ThirdSingMale], persons: [T.Person.ThirdSingMale],
aspects: ["imperfective", "perfective"], aspects: ["imperfective", "perfective"],
}, },
verb: watul, verb: xodul,
}, },
...[prexodul, kexodul, kxexodul].map((verb) => ({
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"] satisfies T.Aspect[],
},
verb,
})),
],
},
{
input: "ښوده",
output: [
{
root: {
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
aspects: ["imperfective", "perfective"],
},
verb: xodul,
},
...[prexodul, kexodul, kxexodul].map((verb) => ({
root: {
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
aspects: ["perfective"] satisfies T.Aspect[],
},
verb,
})),
], ],
}, },
], ],
@ -971,7 +1056,7 @@ tests.forEach(({ label, cases }) => {
test(label, () => { test(label, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const vbs = parseVBE(tokens, lookup).map((r) => r.body); const vbs = parseVBE(tokens, testDictionary).map((r) => r.body);
const madeVbsS = output.reduce<T.ParsedVBE[]>((acc, o) => { const madeVbsS = output.reduce<T.ParsedVBE[]>((acc, o) => {
return [ return [
...acc, ...acc,

View File

@ -0,0 +1,387 @@
import * as T from "../../../types";
import { dartlul, raatlul, tlul, wartlul } from "./irreg-verbs";
import { parseKedul } from "./parse-kedul";
import { getVerbEnding } from "./parse-verb-helpers";
import { returnParseResults } from "./utils";
import { entries as splitVerbEntries } from "./split-verbs";
import * as tp from "../type-predicates";
import memoize from "micro-memoize";
import { pashtoConsonants } from "../pashto-consonants";
// TODO: و ارزول
// TODO: کول verbs!
// check that aawu stuff is working
// check oo`azmooy -
// TODO: proper use of sepOo (hasBreakawayAleph) when checking for perfective roots/stems
// check څاته
// laaRa shum etc
// TODO: proper use of perfective with sh
// TODO: use of raa, dar, war with sh
// TODO: هغه لاړ
// TODO: don't have کول کېدل in split-verbs
type BaseInfo = Extract<T.ParsedVBE["info"], { type: "verb" }>;
type StemInfo = Omit<BaseInfo, "base"> & {
base: "stem";
};
type RootInfo = Omit<BaseInfo, "base"> & {
base: "root";
};
export function parseVBE(
tokens: Readonly<T.Token[]>,
dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedVBE>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const irregResults = parseIrregularVerb(first.s);
if (irregResults.length) {
return returnParseResults(rest, irregResults);
}
const kedulStat = parseKedul(tokens);
const ending = first.s.at(-1) || "";
const base = ending === "ل" ? first.s : first.s.slice(0, -1);
const { stem, root } = getVerbEnding(ending);
// todo imperative for seperating
const imperative = getImperativeVerbEnding(ending);
const stemRes = returnParseResults(rest, [
...[
...findImperfectiveStem(base, dictionary),
...findPerfectiveStem(base, dictionary),
].flatMap<T.ParsedVBE>((info) => [
...stem.map<T.ParsedVBE>((person) => ({
type: "VB",
person,
info,
})),
...imperative.map<T.ParsedVBE>((person) => ({
type: "VB",
person,
info: {
...info,
imperative: true,
},
})),
]),
]);
const rootRes = returnParseResults(rest, [
...[
...findImperfectiveRoot(base, dictionary),
...findPerfectiveRoot(base, dictionary),
].flatMap<T.ParsedVBE>((info) => {
const shortThird = thirdPersSingMascShortFromRoot(base, ending, info);
return [
...shortThird,
...root.map<T.ParsedVBE>((person) => ({
type: "VB",
person,
info,
})),
];
}),
...specialThirdPersMascSingForm(base, ending, dictionary),
]);
return [...kedulStat, ...stemRes, ...rootRes];
}
function specialThirdPersMascSingForm(
base: string,
ending: string,
dicitonary: T.DictionaryAPI
): T.ParsedVBE[] {
if (ending !== "ه" && !pashtoConsonants.includes(ending)) {
return [];
}
// const imperfectiveWSep = [base + ending, ...(ending === "ه" ? [base] : [])]
// .flatMap((v) =>
// splitVerbEntries.filter((entry) => entry.entry.p.slice(0, -1) === v)
// )
// .map<T.ParsedVBE>((verb) => ({
// type: "VB",
// person: T.Person.ThirdSingMale,
// info: {
// type: "verb",
// aspect: "imperfective",
// base: "root",
// verb,
// },
// }));
// const perfectiveWSep = [base + ending, ...(ending === "ه" ? [base] : [])]
// .flatMap((v) => {
// const b = splitVerbEntries.filter(({ entry }) => {
// if (entry.tppp) {
// return splitVarients(entry.tppp).some(
// (varient) => varient.slice(entry.separationAtP) === v
// );
// } else {
// return entry.p.slice(entry.separationAtP, -1) === v;
// }
// });
// return b;
// })
// .map<T.ParsedVBE>((verb) => ({
// type: "VB",
// person: T.Person.ThirdSingMale,
// info: {
// type: "verb",
// aspect: "perfective",
// base: "root",
// verb,
// },
// }));
const hardEnding: T.ParsedVBE[] =
(ending === "د" && ["ې", "و"].some((x) => base.endsWith(x))) ||
(ending === "ت" &&
["س", "ښ"].some((x) => base.endsWith(x)) &&
base.length > 1)
? [
...findPerfectiveRoot(base + ending + "ل", dicitonary),
...findImperfectiveRoot(base + ending + "ل", dicitonary),
].map<T.ParsedVBE>((info) => ({
type: "VB",
person: T.Person.ThirdSingMale,
info,
}))
: [];
const regular: T.ParsedVBE[] = [
base + ending,
...(ending === "ه" ? [base] : []),
]
.flatMap(withAlefAdded)
.flatMap((v) => dicitonary.otherLookup("tppp", v, true))
.filter(
(e): e is T.VerbDictionaryEntry =>
tp.isVerbDictionaryEntry(e) && !e.l && !!e.tppp
)
.flatMap((entry) =>
// NOT IF STARTS WITH ALEPH!
(entry.separationAtP
? (["imperfective"] as const)
: startsWithAleph(entry.p) && !startsWithAleph(base)
? (["perfective"] as const)
: (["imperfective", "perfective"] as const)
).map<T.ParsedVBE>((aspect) => ({
type: "VB" as const,
person: T.Person.ThirdSingMale,
info: {
type: "verb",
aspect,
base: "root",
verb: { entry },
} as const,
}))
);
return [...regular, ...hardEnding];
// ...imperfectiveWSep, ...perfectiveWSep];
}
function thirdPersSingMascShortFromRoot(
base: string,
ending: string,
info: RootInfo
): T.ParsedVBE[] {
if (info.verb.entry.tppp) {
return [];
}
if (ending === "ه" && !base.endsWith("ل")) {
return [
{
type: "VB",
person: T.Person.ThirdSingMale,
info,
},
];
}
return [];
}
function findImperfectiveStem(
s: string,
dicitonary: T.DictionaryAPI
): StemInfo[] {
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
return [];
}
const regulars = regStemSearch(s, dicitonary);
const irregulars = dicitonary
.otherLookup("psp", s)
.filter(
(e): e is T.VerbDictionaryEntry => tp.isVerbDictionaryEntry(e) && !e.l
)
.map<T.VerbEntry>((entry) => ({
entry,
}));
return [...regulars, ...irregulars].map((verb) => ({
type: "verb",
aspect: "imperfective",
base: "stem",
verb,
}));
}
function withAlefAdded(s: string): string[] {
return [s, ...(startsWithAleph(s) ? [] : ["ا" + s, "آ" + s])];
}
const stemSplitLookup = memoize((s: string) =>
splitVerbEntries.filter(
(e) =>
(e.entry.ssp || e.entry.psp || e.entry.p).slice(
e.entry.separationAtP || 0
) === s
)
);
function findPerfectiveStem(
s: string,
dicitonary: T.DictionaryAPI
): StemInfo[] {
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
return [];
}
if (startsWithAleph(s)) {
return [];
}
const baseQ = withAlefAdded(s);
const regulars = baseQ
.flatMap((q) => regStemSearch(q, dicitonary))
.filter((e) => !e.entry.separationAtP);
const irregularsBasedOnImperf = baseQ
.flatMap((q) => dicitonary.otherLookup("psp", q))
.filter(
(e): e is T.VerbDictionaryEntry =>
tp.isVerbDictionaryEntry(e) && !e.l && !e.ssp && !e.separationAtP
)
.map<T.VerbEntry>((entry) => ({
entry,
}));
return [...regulars, ...irregularsBasedOnImperf, ...stemSplitLookup(s)].map(
(verb) => ({
type: "verb",
aspect: "perfective",
base: "stem",
verb,
})
);
}
function regStemSearch(s: string, dicitonary: T.DictionaryAPI): T.VerbEntry[] {
const regTrans = dicitonary
.verbEntryLookup(s + "ل")
.filter(
(e) =>
!e.entry.c.includes("comp") &&
!e.entry.ssp &&
!e.entry.psp &&
!e.entry.c.includes("intrans")
);
const regIntrans = dicitonary
.verbEntryLookup((s.endsWith("ېږ") ? s.slice(0, -2) : s) + "ېدل")
.filter(
(e) =>
!e.entry.c.includes("comp") &&
!e.entry.ssp &&
!e.entry.psp &&
e.entry.c.includes("intrans")
);
return [...regTrans, ...regIntrans];
}
function findImperfectiveRoot(
s: string,
dicitonary: T.DictionaryAPI
): RootInfo[] {
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
return [];
}
const reg = [s, s + "ل"]
.flatMap(dicitonary.verbEntryLookup)
.filter((e) => !e.entry.c.includes("comp"));
return reg.map((verb) => ({
type: "verb",
aspect: "imperfective",
base: "root",
verb,
}));
}
const rootSplitLookup = memoize((s: string) =>
splitVerbEntries.filter((e) =>
[s, s + "ل"].some(
(x) => (e.entry.prp || e.entry.p).slice(e.entry.separationAtP || 0) === x
)
)
);
function findPerfectiveRoot(
s: string,
dicitonary: T.DictionaryAPI
): RootInfo[] {
if (startsWithAleph(s) || ["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
return [];
}
const reg = [s, s + "ل"]
.flatMap(withAlefAdded)
.flatMap(dicitonary.verbEntryLookup)
.filter(
(e) =>
!e.entry.c.includes("comp") && !e.entry.prp && !e.entry.separationAtP
);
return [...reg, ...rootSplitLookup(s)].map((verb) => ({
type: "verb",
aspect: "perfective",
base: "root",
verb,
}));
}
function getImperativeVerbEnding(e: string): T.Person[] {
if (e === "ه") {
return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
}
if (e === "ئ") {
return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
}
return [];
}
// TODO: could handle all sh- verbs for efficiencies sake
function parseIrregularVerb(s: string): T.ParsedVBE[] {
if (["ته", "راته", "ورته", "درته"].includes(s)) {
return [
{
type: "VB",
info: {
aspect: "imperfective",
base: "root",
type: "verb",
verb: s.startsWith("را")
? raatlul
: s.startsWith("ور")
? wartlul
: s.startsWith("در")
? dartlul
: tlul,
},
person: T.Person.ThirdSingMale,
},
];
}
return [];
}
// function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
// return !e.sepOo && startsWithAleph(e.p);
// }
function startsWithAleph(base: string): boolean {
return ["ا", "آ"].includes(base[0]);
}

View File

@ -1,354 +0,0 @@
import * as T from "../../../types";
import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
import { isInVarients, lastVowelNotA } from "../p-text-helpers";
import { dartlul, raatlul, tlul, wartlul } from "./irreg-verbs";
import { LookupFunction } from "./lookup";
import { shortVerbEndConsonant } from "./misc";
import { parseKedul } from "./parse-kedul";
import { getVerbEnding } from "./parse-verb-helpers";
// TODO: کول verbs!
// check that aawu stuff is working
// check oo`azmooy -
// check څاته
// laaRa shum etc
// TODO: proper use of perfective with sh
// TODO: use of raa, dar, war with sh
// TODO: هغه لاړ
export function parseVBE(
tokens: Readonly<T.Token[]>,
lookup: LookupFunction
): T.ParseResult<T.ParsedVBE>[] {
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const irregResults = parseIrregularVerb(first.s);
if (irregResults.length) {
return irregResults.map((body) => ({
tokens: rest,
body,
errors: [],
}));
}
const kedulStat = parseKedul(tokens);
const ending = first.s.at(-1) || "";
const people = getVerbEnding(ending);
const imperativePeople = getImperativeVerbEnding(ending);
// First do rough verb lookup, grab wide pool of possible verbs (low searching complexity for fast lookup)
// TODO: can optimize this to not have to look for possible stems/roots if none
const verbs = lookup(first.s, "verb");
// if (first.s === "سم") {
// console.log({ verbs: JSON.stringify(verbs) });
// }
// Then find out which ones match exactly and how
return [
...kedulStat,
...matchVerbs(first.s, verbs, people, imperativePeople).map((body) => ({
tokens: rest,
body,
errors: [],
})),
];
}
function matchVerbs(
s: string,
entries: T.VerbEntry[],
people: {
root: T.Person[];
stem: T.Person[];
},
imperativePeople: T.Person[]
): T.ParsedVBE[] {
const w: T.ParsedVBE[] = [];
const lEnding = s.endsWith("ل");
const base = s.endsWith("ل") ? s : s.slice(0, -1);
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(base)) {
return [];
}
const matchShortOrLong = (b: string, x: string) => {
return b === x || (!lEnding && b === x.slice(0, -1));
};
if (people.stem.length || imperativePeople.length) {
const stemMatches = {
imperfective: entries.filter(({ entry: e }) => {
if (e.c.includes("comp")) {
return false;
}
if (e.psp) {
return e.psp === base;
}
if (e.c.includes("intrans.")) {
const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
return miniRoot + "ېږ" === base || miniRoot === base;
} else {
return e.p.slice(0, -1) === base;
}
}),
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
const e = entry.entry;
const baseWAa = "ا" + base;
if (e.c.includes("comp")) {
return acc;
}
if (e.ssp) {
if (e.separationAtP) {
const bRest = e.ssp.slice(e.separationAtP);
if (bRest === base) {
return [...acc, entry];
}
} else {
if (e.ssp === base) {
return [...acc, entry];
}
}
} else if (e.psp) {
if (hasBreakawayAlef(e) && startsWithAleph(base)) {
return acc;
}
if (e.separationAtP) {
const bRest = e.psp.slice(e.separationAtP);
if (bRest === base) {
return [...acc, entry];
}
} else {
if (!e.sepOo) {
if (baseWAa === e.psp) {
return [...acc, entry];
}
}
if (base === e.psp) {
return [...acc, entry];
}
}
} else if (hasBreakawayAlef(e) && startsWithAleph(base)) {
return acc;
} else if (e.c.includes("intrans.")) {
const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
const miniRootEg = miniRoot + "ېږ";
if ([miniRoot, miniRootEg].includes(base)) {
return [...acc, entry];
}
} else {
const eb = e.p.slice(0, -1);
if (eb === base) {
return [...acc, entry];
} else if (!e.sepOo) {
if (baseWAa === base.slice(1)) {
return [...acc, entry];
}
}
}
return acc;
}, []),
};
Object.entries(stemMatches).forEach(([aspect, entries]) => {
entries.forEach((verb) => {
people.stem.forEach((person) => {
w.push({
type: "VB",
person,
info: {
type: "verb",
aspect: aspect as T.Aspect,
base: "stem",
verb: removeFVarientsFromVerb(verb),
},
});
});
imperativePeople.forEach((person) => {
w.push({
type: "VB",
person,
info: {
type: "verb",
aspect: aspect as T.Aspect,
base: "stem",
verb: removeFVarientsFromVerb(verb),
imperative: true,
},
});
});
});
});
}
if (people.root.length) {
const rootMatches = {
imperfective: entries.filter(
({ entry: e }) => !e.c.includes("comp") && matchShortOrLong(base, e.p)
),
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
const e = entry.entry;
if (e.c.includes("comp")) {
return acc;
}
if (e.separationAtP) {
const b = e.prp || e.p;
const bRest = b.slice(e.separationAtP);
if (matchShortOrLong(base, bRest)) {
return [...acc, entry];
}
} else if (hasBreakawayAlef(e) && startsWithAleph(base) && !e.prp) {
return acc;
} else {
const p = e.prp || e.p;
if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
return [...acc, entry];
}
}
return acc;
}, []),
};
Object.entries(rootMatches).forEach(([aspect, entries]) => {
entries.forEach((verb) => {
people.root.forEach((person) => {
w.push({
type: "VB",
person,
info: {
type: "verb",
aspect: aspect as T.Aspect,
base: "root",
verb: removeFVarientsFromVerb(verb),
},
});
});
});
});
}
const hamzaEnd = s.at(-1) === "ه";
const oEnd = s.at(-1) === "و";
const abruptEnd = shortVerbEndConsonant.includes(s.slice(-1));
const tppMatches = {
imperfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
const e = entry.entry;
if (e.c.includes("comp")) {
return acc;
}
if (!e.prp && isInVarients(e.tppp, s)) {
return [...acc, entry];
}
if (oEnd && matchShortOrLong(base, e.p)) {
return [...acc, entry];
}
if (
lastVowelNotA(e.g.slice(0, -2)) &&
(hamzaEnd ? base : abruptEnd ? s : "XX") === e.p.slice(0, -1)
) {
return [...acc, entry];
}
// TODO: if check for modified aaXu thing!
return acc;
}, []),
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
const e = entry.entry;
if (e.c.includes("comp")) {
return acc;
}
if (e.separationAtP) {
const b = e.prp || e.p;
const bRest = b.slice(e.separationAtP);
if (bRest === "شول") {
return acc;
}
if (abruptEnd) {
if (s === bRest.slice(0, -1)) {
return [...acc, entry];
}
} else if (hamzaEnd) {
if (base === bRest.slice(0, -1)) {
return [...acc, entry];
}
} else if (oEnd) {
if ([bRest, bRest.slice(0, -1)].includes(base)) {
return [...acc, entry];
}
}
} else if (!e.prp) {
if (hasBreakawayAlef(e) && startsWithAleph(base)) {
return acc;
}
if (oEnd) {
if ([e.p, e.p.slice(0, -1)].includes(base)) {
return [...acc, entry];
}
} else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) {
const b = hamzaEnd ? base : s;
const p = e.p.slice(0, -1);
if (b === p) {
return [...acc, entry];
}
}
}
if (!e.separationAtP) {
if (isInVarients(e.tppp, s)) {
return [...acc, entry];
} else if (isInVarients(e.tppp, "ا" + s)) {
return [...acc, entry];
}
}
return acc;
}, []),
};
Object.entries(tppMatches).forEach(([aspect, entries]) => {
entries.forEach((verb) => {
w.push({
type: "VB",
person: T.Person.ThirdSingMale,
info: {
type: "verb",
aspect: aspect as T.Aspect,
base: "root",
verb: removeFVarientsFromVerb(verb),
},
});
});
});
return w;
}
function getImperativeVerbEnding(e: string): T.Person[] {
if (e === "ه") {
return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
}
if (e === "ئ") {
return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
}
return [];
}
// TODO: could handle all sh- verbs for efficiencies sake
function parseIrregularVerb(s: string): T.ParsedVBE[] {
if (["ته", "راته", "ورته", "درته"].includes(s)) {
return [
{
type: "VB",
info: {
aspect: "imperfective",
base: "root",
type: "verb",
verb: s.startsWith("را")
? raatlul
: s.startsWith("ور")
? wartlul
: s.startsWith("در")
? dartlul
: tlul,
},
person: T.Person.ThirdSingMale,
},
];
}
return [];
}
function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
return !e.sepOo && ["ا", "آ"].includes(e.p[0]);
}
function startsWithAleph(base: string): boolean {
return ["ا", "آ"].includes(base[0]);
}

View File

@ -1,46 +1,46 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { LookupFunction } from "./lookup"; // import { returnParseResult } from "./utils";
import { returnParseResult } from "./utils";
export function parseVBP( export function parseVBP(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: LookupFunction dictionary: T.DictionaryAPI
): T.ParseResult<T.ParsedVBP>[] { ): T.ParseResult<T.ParsedVBP>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
return [ return [];
...parsePastPart(tokens, lookup), // return [
// ...parseAbility(tokens), // ...parsePastPart(tokens, lookup),
]; // // ...parseAbility(tokens),
// ];
} }
function parsePastPart( // function parsePastPart(
tokens: Readonly<T.Token[]>, // tokens: Readonly<T.Token[]>,
lookup: LookupFunction // dicitonary: T.DictionaryAPI,
): T.ParseResult<T.ParsedVBP>[] { // ): T.ParseResult<T.ParsedVBP>[] {
const [{ s }, ...rest] = tokens; // const [{ s }, ...rest] = tokens;
const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې"; // const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې";
if (!ending || !["ی", "ي", "ې"].includes(ending)) { // if (!ending || !["ی", "ي", "ې"].includes(ending)) {
return []; // return [];
} // }
// TODO: ALSO HANDLE SHORT FORMS // // TODO: ALSO HANDLE SHORT FORMS
const wOutEnd = s.slice(0, -1); // const wOutEnd = s.slice(0, -1);
const matches = lookup(wOutEnd, "pPart"); // const matches = lookup(wOutEnd, "pPart");
const genNums = endingGenderNum(ending); // const genNums = endingGenderNum(ending);
return matches // return matches
.flatMap<T.ParsedVBP>((verb) => // .flatMap<T.ParsedVBP>((verb) =>
genNums.map<T.ParsedVBP>((genNum) => ({ // genNums.map<T.ParsedVBP>((genNum) => ({
type: "VB", // type: "VB",
info: { // info: {
type: "ppart", // type: "ppart",
verb, // verb,
genNum, // genNum,
}, // },
})) // }))
) // )
.flatMap((m) => returnParseResult(rest, m)); // .flatMap((m) => returnParseResult(rest, m));
} // }
// function parseAbility( // function parseAbility(
// tokens: Readonly<T.Token[]>, // tokens: Readonly<T.Token[]>,
@ -70,33 +70,33 @@ function parsePastPart(
// .flatMap((m) => returnParseResult(rest, m)); // .flatMap((m) => returnParseResult(rest, m));
// } // }
function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] { // function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] {
if (ending === "ی") { // if (ending === "ی") {
return [ // return [
{ // {
gender: "masc", // gender: "masc",
number: "singular", // number: "singular",
}, // },
]; // ];
} // }
if (ending === "ي") { // if (ending === "ي") {
return [ // return [
{ // {
gender: "masc", // gender: "masc",
number: "plural", // number: "plural",
}, // },
]; // ];
} // }
// if (ending === "ې") { // // if (ending === "ې") {
return [ // return [
{ // {
gender: "fem", // gender: "fem",
number: "singular", // number: "singular",
}, // },
{ // {
gender: "fem", // gender: "fem",
number: "plural", // number: "plural",
}, // },
]; // ];
// } // // }
} // }

View File

@ -4,6 +4,10 @@ export function isKedulStatEntry(v: T.VerbDictionaryEntry): boolean {
return v.p === "کېدل" && v.e === "to become _____"; return v.p === "کېدل" && v.e === "to become _____";
} }
/**
* gets the possible people for stem and root endings
* but DOES NOT INCLUDE short third pers masc sing
*/
export function getVerbEnding(e: string): { export function getVerbEnding(e: string): {
stem: T.Person[]; stem: T.Person[];
root: T.Person[]; root: T.Person[];
@ -34,7 +38,11 @@ export function getVerbEnding(e: string): {
}; };
} else if (e === "و") { } else if (e === "و") {
return { return {
root: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], root: [
T.Person.FirstPlurMale,
T.Person.FirstPlurFemale,
T.Person.ThirdSingMale,
],
stem: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale], stem: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
}; };
} else if (e === "ئ") { } else if (e === "ئ") {

View File

@ -24,7 +24,6 @@ import {
import { parseBlocks } from "./parse-blocks"; import { parseBlocks } from "./parse-blocks";
import { makePronounSelection } from "../phrase-building/make-selections"; import { makePronounSelection } from "../phrase-building/make-selections";
import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp"; import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
import { LookupFunction } from "./lookup";
import { isSecondPerson, personToGenNum } from "../misc-helpers"; import { isSecondPerson, personToGenNum } from "../misc-helpers";
import { equals, zip } from "rambda"; import { equals, zip } from "rambda";
import { isImperativeTense } from "../type-predicates"; import { isImperativeTense } from "../type-predicates";
@ -41,12 +40,12 @@ import { isImperativeTense } from "../type-predicates";
export function parseVP( export function parseVP(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: LookupFunction dictionary: T.DictionaryAPI
): T.ParseResult<T.VPSelectionComplete>[] { ): T.ParseResult<T.VPSelectionComplete>[] {
if (tokens.length === 0) { if (tokens.length === 0) {
return []; return [];
} }
const blocks = parseBlocks(tokens, lookup, [], []); const blocks = parseBlocks(tokens, dictionary, [], []);
return bindParseResult( return bindParseResult(
createPossesivePossibilities(blocks), createPossesivePossibilities(blocks),
(tokens, { blocks, kids }) => { (tokens, { blocks, kids }) => {
@ -892,7 +891,7 @@ function getMiniPronouns(kids: T.ParsedKid[]): T.ParsedMiniPronoun[] {
function getPeopleFromMiniPronouns(kids: T.ParsedKid[]): T.Person[] { function getPeopleFromMiniPronouns(kids: T.ParsedKid[]): T.Person[] {
const p: T.Person[] = []; const p: T.Person[] = [];
for (let k of kids) { for (const k of kids) {
if (k === "me") { if (k === "me") {
p.push(T.Person.FirstSingMale); p.push(T.Person.FirstSingMale);
p.push(T.Person.FirstSingFemale); p.push(T.Person.FirstSingFemale);

View File

@ -163,6 +163,38 @@ export function parserCombMany<R>(parser: Parser<R>): Parser<R[]> {
return r; return r;
} }
export function parserCombSucc2<A, B>(
parsers: [Parser<A>, Parser<B>]
): Parser<[A, B]> {
return function (
tokens: Readonly<T.Token[]>,
dictionary: T.DictionaryAPI
): T.ParseResult<[A, B]>[] {
return bindParseResult(parsers[0](tokens, dictionary), (t, a) =>
bindParseResult(parsers[1](t, dictionary), (tk, b) =>
returnParseResult(tk, [a, b])
)
);
};
}
export function parserCombSucc3<A, B, C>(
parsers: [Parser<A>, Parser<B>, Parser<C>]
): Parser<[A, B, C]> {
return function (
tokens: Readonly<T.Token[]>,
dictionary: T.DictionaryAPI
): T.ParseResult<[A, B, C]>[] {
return bindParseResult(parsers[0](tokens, dictionary), (t, a) =>
bindParseResult(parsers[1](t, dictionary), (tk, b) =>
bindParseResult(parsers[2](tk, dictionary), (tkn, c) =>
returnParseResult(tkn, [a, b, c])
)
)
);
};
}
export function isCompleteResult<C extends object>( export function isCompleteResult<C extends object>(
r: T.ParseResult<C> r: T.ParseResult<C>
): boolean { ): boolean {

View File

@ -214,7 +214,7 @@ function addArticlesAndAdjs(
? np.determiners.determiners ? np.determiners.determiners
// @ts-ignore - weird, ts is not recognizing this as rendered // @ts-ignore - weird, ts is not recognizing this as rendered
.map((x) => (moreThanOneDet ? `(${x.e})` : x.e)) .map((x) => (moreThanOneDet ? `(${x.e})` : x.e))
.join(" ") .join(" ") + " "
: ""; : "";
const detsWithoutNoun = np.determiners && !np.determiners.withNoun; const detsWithoutNoun = np.determiners && !np.determiners.withNoun;
return `${np.determiners ? "" : articles}${determiners}${ return `${np.determiners ? "" : articles}${determiners}${

View File

@ -0,0 +1,58 @@
import * as T from "../../../types";
import { compileVP } from "./compile";
import { renderVP } from "./render-vp";
export function removeRedundantVPSs(
vs: T.VPSelectionComplete[]
): T.VPSelectionComplete[] {
const versions = vs.map((x) => compileVP(renderVP(x), x.form));
const toRemove = new Set<number>();
versions.forEach((a, i) => {
const duplicates = findAllIndices(
versions.slice(i + 1),
(b) => !toRemove.has(i) && isDuplicate(a, b)
);
duplicates.forEach((d) => toRemove.add(d + i + 1));
});
return vs.reduce<T.VPSelectionComplete[]>((acc, v, i) => {
if (toRemove.has(i)) {
return acc;
}
return [...acc, v];
}, []);
}
function isDuplicate(
a: {
ps: T.SingleOrLengthOpts<T.PsString[]>;
e?: string[];
},
b: { ps: T.SingleOrLengthOpts<T.PsString[]>; e?: string[] }
): boolean {
if (!a.e || !b.e) {
return false;
}
if (a.e.length !== b.e.length) {
return false;
}
return a.e.every(
(x, i) =>
removeGenderGloss(x) === removeGenderGloss(b.e ? b.e[i] : "") &&
JSON.stringify(a.ps) === JSON.stringify(b.ps)
);
}
function removeGenderGloss(s: string): string {
// TODO: combine into one RegEx
return s.replaceAll(/\((m|f)\.\)/g, "").replaceAll(/\((m|f)\. pl\.\)/g, "");
}
function findAllIndices<N>(arr: N[], f: (x: N) => boolean): number[] {
const indices: number[] = [];
arr.forEach((x, i) => {
if (f(x)) {
indices.push(i);
}
});
return indices;
}

View File

@ -178,7 +178,7 @@ function renderDeterminer({
? number === "plural" ? number === "plural"
? { p: "دغو", f: "dágho" } ? { p: "دغو", f: "dágho" }
: gender === "masc" : gender === "masc"
? { p: "دغه", f: "dághu" } ? { p: "دغه", f: "dágha" }
: { p: "دغې", f: "dághe" } : { p: "دغې", f: "dághe" }
: { p: "دغه", f: "dágha" }; : { p: "دغه", f: "dágha" };
return { return {
@ -196,7 +196,7 @@ function renderDeterminer({
? number === "plural" ? number === "plural"
? { p: "هغو", f: "hágho" } ? { p: "هغو", f: "hágho" }
: gender === "masc" : gender === "masc"
? { p: "هغه", f: "hághu" } ? { p: "هغه", f: "hágha" }
: { p: "هغې", f: "hághe" } : { p: "هغې", f: "hághe" }
: { p: "هغه", f: "hágha" }; : { p: "هغه", f: "hágha" };
return { return {

View File

@ -1259,8 +1259,13 @@ export type DictionaryAPI = {
queryP: (p: string) => DictionaryEntry[]; queryP: (p: string) => DictionaryEntry[];
adjLookup: (p: string) => AdjectiveEntry[]; adjLookup: (p: string) => AdjectiveEntry[];
nounLookup: (p: string) => NounEntry[]; nounLookup: (p: string) => NounEntry[];
otherLookup: (key: keyof DictionaryEntry, p: string) => DictionaryEntry[]; otherLookup: (
key: keyof DictionaryEntry,
p: string,
regex?: boolean
) => DictionaryEntry[];
specialPluralLookup: (p: string) => NounEntry[]; specialPluralLookup: (p: string) => NounEntry[];
verbEntryLookup: (p: string) => VerbEntry[];
}; };
export type Parser<R> = ( export type Parser<R> = (

View File

@ -18,5 +18,5 @@
"noUnusedParameters": true, "noUnusedParameters": true,
"noFallthroughCasesInSwitch": true "noFallthroughCasesInSwitch": true
}, },
"include": ["vite.config.ts", "get-mini-dict.ts"] "include": ["vite.config.ts", "get-mini-dict-and-split-verbs.ts"]
} }

View File

@ -34,6 +34,7 @@ export const entries: T.DictionaryEntry["ts"][] = [
1527812908, // مېلمه 1527812908, // مېلمه
1575924767041, // شپون 1575924767041, // شپون
1527815333, // نتور 1527815333, // نتور
1527812881, // ماشوم
// fem nouns // fem nouns
1527811877, // دوستي 1527811877, // دوستي
@ -50,4 +51,28 @@ export const entries: T.DictionaryEntry["ts"][] = [
1589023873660, // فتح - fatha 1589023873660, // فتح - fatha
1527814342, // نفع - nafa 1527814342, // نفع - nafa
1527815329, // تجربه 1527815329, // تجربه
// verbs
1527815399, // وهل
1527817298, // اخیستل
1527812275, // لیدل
1527812856, // لیکل
1527815085, // منل
1527817661, // ګالل
1527813573, // رسېدل
1527812790, // خوړل
1527812759, // کېناستل
1527812758, // کښېناستل
1527815190, // پرېښودل
1527811293, // ښودل
1527812284, // کېښودل
1527812751, // کتل
1527823376, // وتل
1527816865, // وړل
1527813473, // الوتل
1585228551150, // درتلل
1527817577, // کښېښودل
1527814012, // اوښتل
1577390597820, // پرېښوول
1527815191, // پرېښول
]; ];