Compare commits
3 Commits
191abc5778
...
f17ebddaa1
Author | SHA1 | Date |
---|---|---|
adueck | f17ebddaa1 | |
adueck | 0ade410698 | |
adueck | 73eb04d7e0 |
|
@ -11,6 +11,7 @@ lerna-debug.log*
|
||||||
src/verbs.ts
|
src/verbs.ts
|
||||||
src/nouns-adjs.ts
|
src/nouns-adjs.ts
|
||||||
vocab/mini-dict-entries.ts
|
vocab/mini-dict-entries.ts
|
||||||
|
src/lib/src/parsing/split-verbs.ts
|
||||||
|
|
||||||
# testing
|
# testing
|
||||||
/coverage
|
/coverage
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
import * as T from "./src/types";
|
||||||
|
import * as tp from "./src/lib/src/type-predicates";
|
||||||
|
import fs from "fs";
|
||||||
|
|
||||||
|
import { entries as collection } from "./vocab/mini-dict-tss";
|
||||||
|
|
||||||
|
const res = await fetch(
|
||||||
|
"https://storage.lingdocs.com/dictionary/dictionary.json"
|
||||||
|
);
|
||||||
|
const dictionary = (await res.json()) as T.Dictionary;
|
||||||
|
|
||||||
|
const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
|
||||||
|
collection.includes(x.ts)
|
||||||
|
);
|
||||||
|
|
||||||
|
const splitEntries: T.VerbDictionaryEntry[] =
|
||||||
|
dictionary.entries.filter<T.VerbDictionaryEntry>(
|
||||||
|
(x): x is T.VerbDictionaryEntry =>
|
||||||
|
tp.isVerbDictionaryEntry(x) &&
|
||||||
|
!!x.separationAtP &&
|
||||||
|
!["کول", "کېدل"].includes(x.p)
|
||||||
|
);
|
||||||
|
|
||||||
|
const miniDictContents = `import { DictionaryEntry } from "../src/types";
|
||||||
|
// DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
|
||||||
|
export const entries: DictionaryEntry[] = [
|
||||||
|
${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
|
||||||
|
];
|
||||||
|
`;
|
||||||
|
|
||||||
|
const splitVerbContents = `import { VerbEntry, VerbDictionaryEntry } from "../../../types";
|
||||||
|
// DO NOT MODIFY - GENERATED
|
||||||
|
export const entries: VerbEntry[] = [
|
||||||
|
${splitEntries
|
||||||
|
.map((e) => `\t{ entry: ${JSON.stringify(e)} as VerbDictionaryEntry },`)
|
||||||
|
.join("\n")}
|
||||||
|
];
|
||||||
|
`;
|
||||||
|
|
||||||
|
fs.writeFileSync("./vocab/mini-dict-entries.ts", miniDictContents);
|
||||||
|
fs.writeFileSync("./src/lib/src/parsing/split-verbs.ts", splitVerbContents);
|
|
@ -1,22 +0,0 @@
|
||||||
import * as T from "./src/types";
|
|
||||||
import fs from "fs";
|
|
||||||
|
|
||||||
import { entries as collection } from "./vocab/mini-dict-tss";
|
|
||||||
|
|
||||||
const res = await fetch(
|
|
||||||
"https://storage.lingdocs.com/dictionary/dictionary.json"
|
|
||||||
);
|
|
||||||
const dictionary = (await res.json()) as T.Dictionary;
|
|
||||||
|
|
||||||
const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
|
|
||||||
collection.includes(x.ts)
|
|
||||||
);
|
|
||||||
|
|
||||||
const contents = `import { DictionaryEntry } from "../src/types";
|
|
||||||
// DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
|
|
||||||
export const entries: DictionaryEntry[] = [
|
|
||||||
${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
|
|
||||||
];
|
|
||||||
`;
|
|
||||||
|
|
||||||
fs.writeFileSync("./vocab/mini-dict-entries.ts", contents);
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "pashto-inflector-website",
|
"name": "pashto-inflector-website",
|
||||||
"version": "7.7.1",
|
"version": "7.7.3",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"patch": "npm version patch --no-git-tag-version && cd src/lib && npm version patch --no-git-tag-version && cd ../components && npm version patch --no-git-tag-version",
|
"patch": "npm version patch --no-git-tag-version && cd src/lib && npm version patch --no-git-tag-version && cd ../components && npm version patch --no-git-tag-version",
|
||||||
|
@ -14,7 +14,7 @@
|
||||||
"build-website": "tsc -b && vite build",
|
"build-website": "tsc -b && vite build",
|
||||||
"build-components": "rm -rf src/components/dist && tsc --project src/components/tsconfig.json && cd src/components && node post-build.cjs",
|
"build-components": "rm -rf src/components/dist && tsc --project src/components/tsconfig.json && cd src/components && node post-build.cjs",
|
||||||
"build-lib": "rm -rf src/lib/dist && tsc --project src/lib/tsconfig.json && tsup src/lib/library.ts --format cjs && mv dist/library.cjs src/lib/dist/lib",
|
"build-lib": "rm -rf src/lib/dist && tsc --project src/lib/tsconfig.json && tsup src/lib/library.ts --format cjs && mv dist/library.cjs src/lib/dist/lib",
|
||||||
"get-words": "node get-words.cjs && tsx get-mini-dict.ts",
|
"get-words": "node get-words.cjs && tsx get-mini-dict-and-split-verbs.ts",
|
||||||
"check-all-inflections": "tsx check-all-inflections.ts"
|
"check-all-inflections": "tsx check-all-inflections.ts"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|
|
@ -19,7 +19,7 @@ import { entryFeeder } from "./demo-components/entryFeeder";
|
||||||
import Hider from "./components/src/Hider";
|
import Hider from "./components/src/Hider";
|
||||||
import InflectionDemo from "./demo-components/InflectionDemo";
|
import InflectionDemo from "./demo-components/InflectionDemo";
|
||||||
import SpellingDemo from "./demo-components/SpellingDemo";
|
import SpellingDemo from "./demo-components/SpellingDemo";
|
||||||
// import ParserDemo from "./demo-components/ParserDemo";
|
import ParserDemo from "./demo-components/ParserDemo";
|
||||||
// import InflectionTable from "./components/src/InflectionsTable";
|
// import InflectionTable from "./components/src/InflectionsTable";
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
|
@ -163,7 +163,7 @@ function App() {
|
||||||
>
|
>
|
||||||
<SpellingDemo opts={textOptions} onChange={setTextOptions} />
|
<SpellingDemo opts={textOptions} onChange={setTextOptions} />
|
||||||
</Hider>
|
</Hider>
|
||||||
{/* <Hider
|
<Hider
|
||||||
label="Parser (🚧 IN PROGRESS 🚧)"
|
label="Parser (🚧 IN PROGRESS 🚧)"
|
||||||
hLevel={3}
|
hLevel={3}
|
||||||
showing={showing === "parser"}
|
showing={showing === "parser"}
|
||||||
|
@ -174,7 +174,7 @@ function App() {
|
||||||
entryFeeder={entryFeeder}
|
entryFeeder={entryFeeder}
|
||||||
dictionary={dictionary}
|
dictionary={dictionary}
|
||||||
/>
|
/>
|
||||||
</Hider> */}
|
</Hider>
|
||||||
</div>
|
</div>
|
||||||
</main>
|
</main>
|
||||||
<Modal
|
<Modal
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "@lingdocs/ps-react",
|
"name": "@lingdocs/ps-react",
|
||||||
"version": "7.7.1",
|
"version": "7.7.3",
|
||||||
"description": "Pashto inflector library module with React components",
|
"description": "Pashto inflector library module with React components",
|
||||||
"main": "dist/components/library.js",
|
"main": "dist/components/library.js",
|
||||||
"module": "dist/components/library.js",
|
"module": "dist/components/library.js",
|
||||||
|
|
|
@ -3,13 +3,18 @@ import * as T from "../types";
|
||||||
// import { parsePhrase } from "../lib/src/parsing/parse-phrase";
|
// import { parsePhrase } from "../lib/src/parsing/parse-phrase";
|
||||||
import { tokenizer } from "../lib/src/parsing/tokenizer";
|
import { tokenizer } from "../lib/src/parsing/tokenizer";
|
||||||
// import { NPDisplay } from "../components/library";
|
// import { NPDisplay } from "../components/library";
|
||||||
// import EditableVP from "../components/src/vp-explorer/EditableVP";
|
import EditableVP from "../components/src/vp-explorer/EditableVP";
|
||||||
// import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
|
import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
|
||||||
import { parseNoun } from "../lib/src/parsing/parse-noun-new";
|
// import { parseNoun } from "../lib/src/parsing/parse-noun-new";
|
||||||
import { JsonEditor } from "json-edit-react";
|
import { JsonEditor } from "json-edit-react";
|
||||||
import { renderNounSelection } from "../lib/src/phrase-building/render-np";
|
// import { renderNounSelection } from "../lib/src/phrase-building/render-np";
|
||||||
import { NPBlock } from "../components/src/blocks/Block";
|
// import { NPBlock } from "../components/src/blocks/Block";
|
||||||
import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
|
// import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
|
||||||
|
import { parsePhrase } from "../lib/src/parsing/parse-phrase";
|
||||||
|
//import { renderVP } from "../lib/src/phrase-building/render-vp";
|
||||||
|
// import VPDisplay from "../components/src/vp-explorer/VPDisplay";
|
||||||
|
import { entryFeeder } from "./entryFeeder";
|
||||||
|
import { removeRedundantVPSs } from "../lib/src/phrase-building/remove-redundant";
|
||||||
|
|
||||||
const working = [
|
const working = [
|
||||||
"limited demo vocab",
|
"limited demo vocab",
|
||||||
|
@ -59,7 +64,7 @@ function ParserDemo({
|
||||||
}) {
|
}) {
|
||||||
const [text, setText] = useState<string>("");
|
const [text, setText] = useState<string>("");
|
||||||
const [result, setResult] = useState<
|
const [result, setResult] = useState<
|
||||||
ReturnType<typeof parseNoun>[number]["body"][]
|
ReturnType<typeof parsePhrase>["success"]
|
||||||
>([]);
|
>([]);
|
||||||
// ReturnType<typeof parsePhrase>["success"]
|
// ReturnType<typeof parsePhrase>["success"]
|
||||||
const [errors, setErrors] = useState<string[]>([]);
|
const [errors, setErrors] = useState<string[]>([]);
|
||||||
|
@ -70,16 +75,10 @@ function ParserDemo({
|
||||||
setErrors([]);
|
setErrors([]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const res = parseNoun(tokenizer(value), dictionary, undefined);
|
const res = parsePhrase(tokenizer(value), dictionary);
|
||||||
const success: ReturnType<typeof parseNoun>[number]["body"][] = res
|
|
||||||
.filter((x) => !x.tokens.length)
|
|
||||||
.map((x) => x.body);
|
|
||||||
const errors = [
|
|
||||||
...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
|
|
||||||
];
|
|
||||||
setText(value);
|
setText(value);
|
||||||
setErrors(errors);
|
setErrors(res.errors);
|
||||||
setResult(success);
|
setResult(removeRedundantVPSs(res.success));
|
||||||
}
|
}
|
||||||
return (
|
return (
|
||||||
<div className="mt-3" style={{ marginBottom: "1000px" }}>
|
<div className="mt-3" style={{ marginBottom: "1000px" }}>
|
||||||
|
@ -141,34 +140,8 @@ function ParserDemo({
|
||||||
<div className="text-center">Did you mean:</div>
|
<div className="text-center">Did you mean:</div>
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
{result.map((r) => {
|
{result.map((res) => (
|
||||||
try {
|
<>
|
||||||
const renderedNP: T.Rendered<T.NPSelection> = {
|
|
||||||
type: "NP",
|
|
||||||
selection: renderNounSelection(r.selection, r.inflected, "none"),
|
|
||||||
};
|
|
||||||
return (
|
|
||||||
<>
|
|
||||||
{r.inflected ? "INFLECTED" : "PLAIN"}
|
|
||||||
<NPBlock
|
|
||||||
opts={opts}
|
|
||||||
script="p"
|
|
||||||
english={getEnglishFromRendered(renderedNP)}
|
|
||||||
>
|
|
||||||
{renderedNP}
|
|
||||||
</NPBlock>
|
|
||||||
</>
|
|
||||||
);
|
|
||||||
} catch (e) {
|
|
||||||
console.error(e);
|
|
||||||
return <div>ERROR RENDERING</div>;
|
|
||||||
}
|
|
||||||
})}
|
|
||||||
<JsonEditor data={result} />
|
|
||||||
{/* {result.map((res) =>
|
|
||||||
"inflected" in res ? (
|
|
||||||
<NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
|
|
||||||
) : "verb" in res ? (
|
|
||||||
<EditableVP
|
<EditableVP
|
||||||
opts={opts}
|
opts={opts}
|
||||||
entryFeeder={entryFeeder}
|
entryFeeder={entryFeeder}
|
||||||
|
@ -176,42 +149,76 @@ function ParserDemo({
|
||||||
>
|
>
|
||||||
{uncompleteVPSelection(res)}
|
{uncompleteVPSelection(res)}
|
||||||
</EditableVP>
|
</EditableVP>
|
||||||
) : (
|
<details>
|
||||||
// (() => {
|
<summary>AST</summary>
|
||||||
// try {
|
<JsonEditor data={res} />
|
||||||
// const rendered = renderVP(res);
|
</details>
|
||||||
// const compiled = compileVP(rendered, res.form);
|
</>
|
||||||
// return (
|
))}
|
||||||
// <div>
|
|
||||||
// <CompiledPTextDisplay compiled={compiled} opts={opts} />
|
|
||||||
// {compiled.e && (
|
|
||||||
// <div className={`text-muted mt-2 text-center`}>
|
|
||||||
// {compiled.e.map((e, i) => (
|
|
||||||
// <div key={i}>{e}</div>
|
|
||||||
// ))}
|
|
||||||
// </div>
|
|
||||||
// )}
|
|
||||||
// </div>
|
|
||||||
// );
|
|
||||||
// } catch (e) {
|
|
||||||
// console.error(e);
|
|
||||||
// console.log({ res });
|
|
||||||
// return <div>ERROR</div>;
|
|
||||||
// }
|
|
||||||
// })()
|
|
||||||
<samp>
|
|
||||||
<pre>{JSON.stringify(res, null, " ")}</pre>
|
|
||||||
</samp>
|
|
||||||
)
|
|
||||||
)} */}
|
|
||||||
<details>
|
|
||||||
<summary>AST</summary>
|
|
||||||
<samp>
|
|
||||||
<pre>{JSON.stringify(result, null, " ")}</pre>
|
|
||||||
</samp>
|
|
||||||
</details>
|
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export default ParserDemo;
|
export default ParserDemo;
|
||||||
|
|
||||||
|
// {/* {result.map((res) =>
|
||||||
|
// "inflected" in res ? (
|
||||||
|
// <NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
|
||||||
|
// ) : "verb" in res ? (
|
||||||
|
// <EditableVP
|
||||||
|
// opts={opts}
|
||||||
|
// entryFeeder={entryFeeder}
|
||||||
|
// allVariations={true}
|
||||||
|
// >
|
||||||
|
// {uncompleteVPSelection(res)}
|
||||||
|
// </EditableVP>
|
||||||
|
// ) : (
|
||||||
|
// (() => {
|
||||||
|
// try {
|
||||||
|
// const rendered = renderVP(res);
|
||||||
|
// const compiled = compileVP(rendered, res.form);
|
||||||
|
// return (
|
||||||
|
// <div>
|
||||||
|
// <CompiledPTextDisplay compiled={compiled} opts={opts} />
|
||||||
|
// {compiled.e && (
|
||||||
|
// <div className={`text-muted mt-2 text-center`}>
|
||||||
|
// {compiled.e.map((e, i) => (
|
||||||
|
// <div key={i}>{e}</div>
|
||||||
|
// ))}
|
||||||
|
// </div>
|
||||||
|
// )}
|
||||||
|
// </div>
|
||||||
|
// );
|
||||||
|
// } catch (e) {
|
||||||
|
// console.error(e);
|
||||||
|
// console.log({ res });
|
||||||
|
// return <div>ERROR</div>;
|
||||||
|
// }
|
||||||
|
// })()
|
||||||
|
// <samp>
|
||||||
|
// <pre>{JSON.stringify(res, null, " ")}</pre>
|
||||||
|
// </samp>
|
||||||
|
// )
|
||||||
|
// )} */}
|
||||||
|
|
||||||
|
// try {
|
||||||
|
// const renderedNP: T.Rendered<T.NPSelection> = {
|
||||||
|
// type: "NP",
|
||||||
|
// selection: renderNounSelection(r.selection, r.inflected, "none"),
|
||||||
|
// };
|
||||||
|
// return (
|
||||||
|
// <>
|
||||||
|
// {r.inflected ? "INFLECTED" : "PLAIN"}
|
||||||
|
// <NPBlock
|
||||||
|
// opts={opts}
|
||||||
|
// script="p"
|
||||||
|
// english={getEnglishFromRendered(renderedNP)}
|
||||||
|
// >
|
||||||
|
// {renderedNP}
|
||||||
|
// </NPBlock>
|
||||||
|
// </>
|
||||||
|
// );
|
||||||
|
// } catch (e) {
|
||||||
|
// console.error(e);
|
||||||
|
// return <div>ERROR RENDERING</div>;
|
||||||
|
// }
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "@lingdocs/inflect",
|
"name": "@lingdocs/inflect",
|
||||||
"version": "7.7.1",
|
"version": "7.7.3",
|
||||||
"description": "Pashto inflector library",
|
"description": "Pashto inflector library",
|
||||||
"main": "dist/lib/library.cjs",
|
"main": "dist/lib/library.cjs",
|
||||||
"module": "dist/lib/library.js",
|
"module": "dist/lib/library.js",
|
||||||
|
|
|
@ -19,7 +19,19 @@ function queryP(p: string): T.DictionaryEntry[] {
|
||||||
}
|
}
|
||||||
return dictDb.collection.find({ p });
|
return dictDb.collection.find({ p });
|
||||||
}
|
}
|
||||||
const memoizedQueryP = queryP;
|
const memoizedQueryP = memoize(queryP);
|
||||||
|
|
||||||
|
function queryTs(ts: number): T.DictionaryEntry {
|
||||||
|
if (!dictDb.collection) {
|
||||||
|
throw new Error("dictionary not initialized yet");
|
||||||
|
}
|
||||||
|
const res = dictDb.findOneByTs(ts);
|
||||||
|
if (!res) {
|
||||||
|
throw new Error("complement link broken");
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
const memoizedQueryTs = memoize(queryTs);
|
||||||
|
|
||||||
function adjLookup(p: string): T.AdjectiveEntry[] {
|
function adjLookup(p: string): T.AdjectiveEntry[] {
|
||||||
const res = memoizedQueryP(p);
|
const res = memoizedQueryP(p);
|
||||||
|
@ -33,26 +45,51 @@ function nounLookup(p: string): T.NounEntry[] {
|
||||||
|
|
||||||
function otherLookup(
|
function otherLookup(
|
||||||
key: keyof T.DictionaryEntry,
|
key: keyof T.DictionaryEntry,
|
||||||
p: string
|
p: string,
|
||||||
|
regex?: boolean
|
||||||
): T.DictionaryEntry[] {
|
): T.DictionaryEntry[] {
|
||||||
if (!dictDb.collection) {
|
if (!dictDb.collection) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
return dictDb.collection.find({ [key]: p });
|
return dictDb.collection.find({ [key]: regex ? variationRegex(p) : p });
|
||||||
}
|
}
|
||||||
|
|
||||||
function specialPluralLookup(p: string): T.NounEntry[] {
|
function specialPluralLookup(p: string): T.NounEntry[] {
|
||||||
if (!dictDb.collection) {
|
if (!dictDb.collection) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const regex = new RegExp(`(^|\\s|,)${p}($|,)`);
|
const regex = variationRegex(p);
|
||||||
return dictDb.collection
|
return dictDb.collection
|
||||||
.find({
|
.find({
|
||||||
$or: [{ ppp: { $regex: regex } }, { app: { $regex: regex } }],
|
$or: [{ ppp: regex }, { app: regex }],
|
||||||
})
|
})
|
||||||
.filter(tp.isNounEntry);
|
.filter(tp.isNounEntry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function verbEntryLookup(p: string): T.VerbEntry[] {
|
||||||
|
if (!dictDb.collection) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
return memoizedQueryP(p)
|
||||||
|
.filter(tp.isVerbDictionaryEntry)
|
||||||
|
.map((entry) =>
|
||||||
|
entry.l
|
||||||
|
? {
|
||||||
|
entry,
|
||||||
|
complement: memoizedQueryTs(entry.l),
|
||||||
|
}
|
||||||
|
: { entry }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* creates a RegEx mongo query to search for a variation in a certain field
|
||||||
|
* ie. to search for کاته in کوت, کاته
|
||||||
|
*/
|
||||||
|
function variationRegex(p: string): { $regex: RegExp } {
|
||||||
|
return { $regex: new RegExp(`(^|\\s|,)${p}($|,)`) };
|
||||||
|
}
|
||||||
|
|
||||||
export const dictionary: T.DictionaryAPI = {
|
export const dictionary: T.DictionaryAPI = {
|
||||||
initialize: async () => await dictDb.initialize(),
|
initialize: async () => await dictDb.initialize(),
|
||||||
update: async () => await dictDb.updateDictionary(() => null),
|
update: async () => await dictDb.updateDictionary(() => null),
|
||||||
|
@ -61,4 +98,5 @@ export const dictionary: T.DictionaryAPI = {
|
||||||
nounLookup: memoize(nounLookup),
|
nounLookup: memoize(nounLookup),
|
||||||
otherLookup: memoize(otherLookup),
|
otherLookup: memoize(otherLookup),
|
||||||
specialPluralLookup: memoize(specialPluralLookup),
|
specialPluralLookup: memoize(specialPluralLookup),
|
||||||
|
verbEntryLookup: memoize(verbEntryLookup),
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,7 +1,15 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
|
import {
|
||||||
|
isAdjectiveEntry,
|
||||||
|
isNounEntry,
|
||||||
|
isVerbDictionaryEntry,
|
||||||
|
} from "../type-predicates";
|
||||||
import { entries } from "../../../../vocab/mini-dict-entries";
|
import { entries } from "../../../../vocab/mini-dict-entries";
|
||||||
|
|
||||||
|
function variationRegex(p: string): { $regex: RegExp } {
|
||||||
|
return { $regex: new RegExp(`(^|\\s|,)${p}($|,)`) };
|
||||||
|
}
|
||||||
|
|
||||||
const queryP = (p: string) => entries.filter((e) => e.p === p);
|
const queryP = (p: string) => entries.filter((e) => e.p === p);
|
||||||
function adjLookup(p: string): T.AdjectiveEntry[] {
|
function adjLookup(p: string): T.AdjectiveEntry[] {
|
||||||
return queryP(p).filter(isAdjectiveEntry) as T.AdjectiveEntry[];
|
return queryP(p).filter(isAdjectiveEntry) as T.AdjectiveEntry[];
|
||||||
|
@ -13,18 +21,37 @@ function nounLookup(p: string): T.NounEntry[] {
|
||||||
|
|
||||||
function otherLookup(
|
function otherLookup(
|
||||||
key: keyof T.DictionaryEntry,
|
key: keyof T.DictionaryEntry,
|
||||||
p: string
|
p: string,
|
||||||
|
regex?: boolean
|
||||||
): T.DictionaryEntry[] {
|
): T.DictionaryEntry[] {
|
||||||
|
if (regex) {
|
||||||
|
const { $regex: regex } = variationRegex(p);
|
||||||
|
return entries.filter((e) => (e[key] as string)?.match(regex));
|
||||||
|
}
|
||||||
return entries.filter((e) => e[key] === p);
|
return entries.filter((e) => e[key] === p);
|
||||||
}
|
}
|
||||||
|
|
||||||
function specialPluralLookup(p: string): T.NounEntry[] {
|
function specialPluralLookup(p: string): T.NounEntry[] {
|
||||||
const regex = new RegExp(`(^|\\s|,)${p}($|,)`);
|
const { $regex: regex } = variationRegex(p);
|
||||||
return entries.filter(
|
return entries.filter(
|
||||||
(e) => (e.ppp?.match(regex) || e.app?.match(regex)) && isNounEntry(e)
|
(e) => (e.ppp?.match(regex) || e.app?.match(regex)) && isNounEntry(e)
|
||||||
) as T.NounEntry[];
|
) as T.NounEntry[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function verbEntryLookup(p: string): T.VerbEntry[] {
|
||||||
|
return entries
|
||||||
|
.filter((e) => e.p === p)
|
||||||
|
.filter(isVerbDictionaryEntry)
|
||||||
|
.map<T.VerbEntry>((entry) =>
|
||||||
|
entry.l
|
||||||
|
? {
|
||||||
|
entry,
|
||||||
|
complement: entries.find((e) => e.ts === entry.l),
|
||||||
|
}
|
||||||
|
: { entry }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
export const testDictionary: T.DictionaryAPI = {
|
export const testDictionary: T.DictionaryAPI = {
|
||||||
// @ts-expect-error we won't mock the initialization
|
// @ts-expect-error we won't mock the initialization
|
||||||
initialize: async () => 0,
|
initialize: async () => 0,
|
||||||
|
@ -35,4 +62,5 @@ export const testDictionary: T.DictionaryAPI = {
|
||||||
nounLookup,
|
nounLookup,
|
||||||
otherLookup,
|
otherLookup,
|
||||||
specialPluralLookup,
|
specialPluralLookup,
|
||||||
|
verbEntryLookup,
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { LookupFunction } from "./lookup";
|
import { isAdverbEntry } from "../type-predicates";
|
||||||
import { returnParseResultS } from "./utils";
|
import { returnParseResultS } from "./utils";
|
||||||
|
|
||||||
export function parseAdverb(
|
export function parseAdverb(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction
|
dictionary: T.DictionaryAPI
|
||||||
): T.ParseResult<T.APSelection>[] {
|
): T.ParseResult<T.APSelection>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const [first, ...rest] = tokens;
|
const [first, ...rest] = tokens;
|
||||||
const adverbs = lookup(first.s, "adverb");
|
const adverbs = dictionary.queryP(first.s).filter(isAdverbEntry);
|
||||||
return adverbs.map((entry) =>
|
return adverbs.map((entry) =>
|
||||||
returnParseResultS(rest, {
|
returnParseResultS(rest, {
|
||||||
type: "AP",
|
type: "AP",
|
||||||
|
|
|
@ -1,26 +1,25 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { fmapParseResult } from "../fp-ps";
|
import { fmapParseResult } from "../fp-ps";
|
||||||
import { LookupFunction } from "./lookup";
|
|
||||||
import { parseAdverb } from "./parse-adverb";
|
import { parseAdverb } from "./parse-adverb";
|
||||||
import { parseSandwich } from "./parse-sandwich";
|
import { parseSandwich } from "./parse-sandwich";
|
||||||
|
|
||||||
export function parseAP(
|
export function parseAP(
|
||||||
s: Readonly<T.Token[]>,
|
s: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction,
|
dicitonary: T.DictionaryAPI,
|
||||||
possesor: T.PossesorSelection | undefined
|
possesor: T.PossesorSelection | undefined
|
||||||
): T.ParseResult<T.APSelection>[] {
|
): T.ParseResult<T.APSelection>[] {
|
||||||
if (s.length === 0) {
|
if (s.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
return [
|
return [
|
||||||
...(!possesor ? parseAdverb(s, lookup) : []),
|
...(!possesor ? parseAdverb(s, dicitonary) : []),
|
||||||
...fmapParseResult(
|
...fmapParseResult(
|
||||||
(selection) =>
|
(selection) =>
|
||||||
({
|
({
|
||||||
type: "AP",
|
type: "AP",
|
||||||
selection,
|
selection,
|
||||||
} as const),
|
} as const),
|
||||||
parseSandwich(s, lookup, possesor)
|
parseSandwich(s, dicitonary, possesor)
|
||||||
),
|
),
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { LookupFunction } from "./lookup";
|
|
||||||
import { parseEquative } from "./parse-equative";
|
import { parseEquative } from "./parse-equative";
|
||||||
import { parseKidsSection } from "./parse-kids-section";
|
import { parseKidsSection } from "./parse-kids-section";
|
||||||
import { parseNeg } from "./parse-negative";
|
import { parseNeg } from "./parse-negative";
|
||||||
import { parseNPAP } from "./parse-npap";
|
import { parseNPAP } from "./parse-npap";
|
||||||
import { parseVBP } from "./parse-vbp";
|
import { parseVBP } from "./parse-vbp";
|
||||||
import { parsePH } from "./parse-ph";
|
import { parsePH } from "./parse-ph";
|
||||||
import { parseVBE } from "./parse-vbe";
|
import { parseVBE } from "./parse-vbe-new";
|
||||||
import {
|
import {
|
||||||
bindParseResult,
|
bindParseResult,
|
||||||
returnParseResult,
|
returnParseResult,
|
||||||
|
@ -18,7 +17,7 @@ import { isKedulStatEntry } from "./parse-verb-helpers";
|
||||||
|
|
||||||
export function parseBlocks(
|
export function parseBlocks(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction,
|
dicitonary: T.DictionaryAPI,
|
||||||
blocks: T.ParsedBlock[],
|
blocks: T.ParsedBlock[],
|
||||||
kids: T.ParsedKid[]
|
kids: T.ParsedKid[]
|
||||||
): T.ParseResult<{
|
): T.ParseResult<{
|
||||||
|
@ -35,13 +34,13 @@ export function parseBlocks(
|
||||||
|
|
||||||
// TOOD: rather parse VBP / VBE
|
// TOOD: rather parse VBP / VBE
|
||||||
const allBlocks: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
|
const allBlocks: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
|
||||||
...(!inVerbSection ? parseNPAP(tokens, lookup) : []),
|
...(!inVerbSection ? parseNPAP(tokens, dicitonary) : []),
|
||||||
// ensure at most one of each PH, VBE, VBP
|
// ensure at most one of each PH, VBE, VBP
|
||||||
...(prevPh ? [] : parsePH(tokens)),
|
...(prevPh ? [] : parsePH(tokens)),
|
||||||
...(blocks.some(isParsedVBE)
|
...(blocks.some(isParsedVBE)
|
||||||
? []
|
? []
|
||||||
: [...parseVBE(tokens, lookup), ...parseEquative(tokens)]),
|
: [...parseVBE(tokens, dicitonary), ...parseEquative(tokens)]),
|
||||||
...(blocks.some(isParsedVBP) ? [] : parseVBP(tokens, lookup)),
|
...(blocks.some(isParsedVBP) ? [] : parseVBP(tokens, dicitonary)),
|
||||||
...(blocks.some((b) => b.type === "negative") ? [] : parseNeg(tokens)),
|
...(blocks.some((b) => b.type === "negative") ? [] : parseNeg(tokens)),
|
||||||
...parseKidsSection(tokens, []),
|
...parseKidsSection(tokens, []),
|
||||||
];
|
];
|
||||||
|
@ -50,7 +49,7 @@ export function parseBlocks(
|
||||||
const errors: T.ParseError[] = [];
|
const errors: T.ParseError[] = [];
|
||||||
if (r.type === "kids") {
|
if (r.type === "kids") {
|
||||||
return {
|
return {
|
||||||
next: parseBlocks(tokens, lookup, blocks, [...kids, ...r.kids]),
|
next: parseBlocks(tokens, dicitonary, blocks, [...kids, ...r.kids]),
|
||||||
errors:
|
errors:
|
||||||
blocks.length !== 1
|
blocks.length !== 1
|
||||||
? [{ message: "kids' section out of place" }]
|
? [{ message: "kids' section out of place" }]
|
||||||
|
@ -71,7 +70,7 @@ export function parseBlocks(
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
next: parseBlocks(tokens, lookup, [...blocks, r], kids),
|
next: parseBlocks(tokens, dicitonary, [...blocks, r], kids),
|
||||||
errors,
|
errors,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
|
@ -1736,7 +1736,7 @@ describe("parsing nouns", () => {
|
||||||
test(category, () => {
|
test(category, () => {
|
||||||
cases.forEach(({ input, output }) => {
|
cases.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const res = parseNoun(tokens, testDictionary, undefined, []).flatMap(
|
const res = parseNoun(tokens, testDictionary, undefined).flatMap(
|
||||||
// only take the ones that used all the tokens
|
// only take the ones that used all the tokens
|
||||||
({ body, tokens }) => (tokens.length === 0 ? [body] : [])
|
({ body, tokens }) => (tokens.length === 0 ? [body] : [])
|
||||||
);
|
);
|
||||||
|
|
|
@ -3,7 +3,12 @@ import { makeNounSelection } from "../phrase-building/make-selections";
|
||||||
import { parseAdjective } from "./parse-adjective-new";
|
import { parseAdjective } from "./parse-adjective-new";
|
||||||
import { parseDeterminer } from "./parse-determiner";
|
import { parseDeterminer } from "./parse-determiner";
|
||||||
import { parseNounWord } from "./parse-noun-word";
|
import { parseNounWord } from "./parse-noun-word";
|
||||||
import { bindParseResult, parserCombMany, toParseError } from "./utils";
|
import {
|
||||||
|
bindParseResult,
|
||||||
|
parserCombMany,
|
||||||
|
parserCombSucc3,
|
||||||
|
toParseError,
|
||||||
|
} from "./utils";
|
||||||
|
|
||||||
type NounResult = { inflected: boolean; selection: T.NounSelection };
|
type NounResult = { inflected: boolean; selection: T.NounSelection };
|
||||||
|
|
||||||
|
@ -15,57 +20,54 @@ export function parseNoun(
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const detRes = parserCombMany(parseDeterminer)(tokens, dictionary);
|
const res = parserCombSucc3([
|
||||||
// TODO: add recognition of او between adjectives
|
parserCombMany(parseDeterminer),
|
||||||
return bindParseResult(detRes, (t, determiners) => {
|
parserCombMany(parseAdjective),
|
||||||
const adjRes = parserCombMany(parseAdjective)(t, dictionary);
|
parseNounWord,
|
||||||
return bindParseResult(adjRes, (tk, adjectives) => {
|
])(tokens, dictionary);
|
||||||
const nounWord = parseNounWord(tk, dictionary);
|
return bindParseResult(res, (tkns, [determiners, adjectives, nounWord]) => {
|
||||||
return bindParseResult(nounWord, (tkns, nr) => {
|
const { error: adjErrors } = adjDetsMatch(
|
||||||
const { error: adjErrors } = adjDetsMatch(
|
adjectives,
|
||||||
adjectives,
|
nounWord.gender,
|
||||||
nr.gender,
|
nounWord.inflected ? 1 : 0,
|
||||||
nr.inflected ? 1 : 0,
|
nounWord.plural
|
||||||
nr.plural
|
);
|
||||||
);
|
const { error: detErrors } = adjDetsMatch(
|
||||||
const { error: detErrors } = adjDetsMatch(
|
determiners,
|
||||||
determiners,
|
nounWord.gender,
|
||||||
nr.gender,
|
nounWord.inflected ? 1 : 0,
|
||||||
nr.inflected ? 1 : 0,
|
nounWord.plural
|
||||||
nr.plural
|
);
|
||||||
);
|
const dupErrors = checkForDeterminerDuplicates(determiners);
|
||||||
const dupErrors = checkForDeterminerDuplicates(determiners);
|
const s = makeNounSelection(nounWord.entry, undefined);
|
||||||
const s = makeNounSelection(nr.entry, undefined);
|
const body: NounResult = {
|
||||||
const body: NounResult = {
|
inflected: nounWord.inflected,
|
||||||
inflected: nr.inflected,
|
selection: {
|
||||||
selection: {
|
...s,
|
||||||
...s,
|
gender: nounWord.gender,
|
||||||
gender: nr.gender,
|
number: nounWord.plural ? "plural" : "singular",
|
||||||
number: nr.plural ? "plural" : "singular",
|
adjectives: adjectives.map((a) => a.selection),
|
||||||
adjectives: adjectives.map((a) => a.selection),
|
determiners: determiners.length
|
||||||
determiners: determiners.length
|
? {
|
||||||
? {
|
type: "determiners",
|
||||||
type: "determiners",
|
withNoun: true,
|
||||||
withNoun: true,
|
determiners: determiners.map((d) => d.selection),
|
||||||
determiners: determiners.map((d) => d.selection),
|
}
|
||||||
}
|
: undefined,
|
||||||
: undefined,
|
possesor,
|
||||||
possesor,
|
},
|
||||||
},
|
};
|
||||||
};
|
return [
|
||||||
return [
|
{
|
||||||
{
|
body,
|
||||||
body,
|
tokens: tkns,
|
||||||
tokens: tkns,
|
errors: [
|
||||||
errors: [
|
...detErrors.map(toParseError),
|
||||||
...detErrors.map(toParseError),
|
...dupErrors.map(toParseError),
|
||||||
...dupErrors.map(toParseError),
|
...adjErrors.map(toParseError),
|
||||||
...adjErrors.map(toParseError),
|
],
|
||||||
],
|
},
|
||||||
},
|
];
|
||||||
];
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,191 +0,0 @@
|
||||||
import * as T from "../../../types";
|
|
||||||
import { getInflectionPattern } from "../inflection-pattern";
|
|
||||||
import { makeNounSelection } from "../phrase-building/make-selections";
|
|
||||||
import {
|
|
||||||
isMascNounEntry,
|
|
||||||
isNounEntry,
|
|
||||||
isPluralNounEntry,
|
|
||||||
isUnisexNounEntry,
|
|
||||||
} from "../type-predicates";
|
|
||||||
import { getInflectionQueries } from "./inflection-query";
|
|
||||||
import { LookupFunction } from "./lookup";
|
|
||||||
import { parseAdjective } from "./parse-adjective";
|
|
||||||
import { bindParseResult } from "./utils";
|
|
||||||
|
|
||||||
type NounResult = { inflected: boolean; selection: T.NounSelection };
|
|
||||||
|
|
||||||
export function parseNoun(
|
|
||||||
tokens: Readonly<T.Token[]>,
|
|
||||||
lookup: LookupFunction,
|
|
||||||
possesor: T.PossesorSelection | undefined,
|
|
||||||
adjectives: {
|
|
||||||
inflection: (0 | 1 | 2)[];
|
|
||||||
gender: T.Gender[];
|
|
||||||
given: string;
|
|
||||||
selection: T.AdjectiveSelection;
|
|
||||||
}[]
|
|
||||||
): T.ParseResult<NounResult>[] {
|
|
||||||
if (tokens.length === 0) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
// TODO: add recognition of او between adjectives
|
|
||||||
const adjRes = parseAdjective(tokens, lookup);
|
|
||||||
const withAdj = bindParseResult(adjRes, (tkns, adj) =>
|
|
||||||
parseNoun(tkns, lookup, possesor, [...adjectives, adj])
|
|
||||||
);
|
|
||||||
const [first, ...rest] = tokens;
|
|
||||||
const searches = getInflectionQueries(first.s, true);
|
|
||||||
|
|
||||||
const w: ReturnType<typeof parseNoun> = [];
|
|
||||||
searches.forEach(({ search, details }) => {
|
|
||||||
const nounEntries = lookup(search, "nounAdj").filter(isNounEntry);
|
|
||||||
details.forEach((deets) => {
|
|
||||||
const fittingEntries = nounEntries.filter(deets.predicate);
|
|
||||||
fittingEntries.forEach((entry) => {
|
|
||||||
const genders: T.Gender[] = isUnisexNounEntry(entry)
|
|
||||||
? ["masc", "fem"]
|
|
||||||
: isMascNounEntry(entry)
|
|
||||||
? ["masc"]
|
|
||||||
: ["fem"];
|
|
||||||
deets.gender.forEach((gender) => {
|
|
||||||
if (genders.includes(gender)) {
|
|
||||||
deets.inflection.forEach((inf) => {
|
|
||||||
const { error: adjErrors } = adjsMatch(
|
|
||||||
adjectives,
|
|
||||||
gender,
|
|
||||||
inf,
|
|
||||||
deets.plural
|
|
||||||
);
|
|
||||||
convertInflection(inf, entry, gender, deets.plural).forEach(
|
|
||||||
({ inflected, number }) => {
|
|
||||||
const selection = makeNounSelection(entry, undefined);
|
|
||||||
const errors = [
|
|
||||||
...adjErrors.map((message) => ({
|
|
||||||
message,
|
|
||||||
})),
|
|
||||||
];
|
|
||||||
w.push({
|
|
||||||
tokens: rest,
|
|
||||||
body: {
|
|
||||||
inflected,
|
|
||||||
selection: {
|
|
||||||
...selection,
|
|
||||||
gender: selection.genderCanChange
|
|
||||||
? gender
|
|
||||||
: selection.gender,
|
|
||||||
number: selection.numberCanChange
|
|
||||||
? number
|
|
||||||
: selection.number,
|
|
||||||
adjectives: adjectives.map((a) => a.selection),
|
|
||||||
// TODO: could be nicer to validate that the possesor is inflected before
|
|
||||||
// and just pass in the selection
|
|
||||||
possesor,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
errors,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
return [...withAdj, ...w];
|
|
||||||
}
|
|
||||||
|
|
||||||
function adjsMatch(
|
|
||||||
adjectives: Parameters<typeof parseNoun>[3],
|
|
||||||
gender: T.Gender,
|
|
||||||
inf: 0 | 1 | 2,
|
|
||||||
plural: boolean | undefined
|
|
||||||
): { ok: boolean; error: string[] } {
|
|
||||||
const inflection = (plural && inf < 2 ? inf + 1 : inf) as 0 | 1 | 2;
|
|
||||||
const unmatching = adjectives.filter(
|
|
||||||
(adj) =>
|
|
||||||
!adj.gender.includes(gender) ||
|
|
||||||
!adj.inflection.some((i) => i === inflection)
|
|
||||||
);
|
|
||||||
if (unmatching.length) {
|
|
||||||
return {
|
|
||||||
ok: false,
|
|
||||||
error: unmatching.map((x) => {
|
|
||||||
const adjText =
|
|
||||||
x.given === x.selection.entry.p
|
|
||||||
? x.given
|
|
||||||
: `${x.given} (${x.selection.entry.p})`;
|
|
||||||
const inflectionIssue = !x.inflection.some((x) => x === inflection)
|
|
||||||
? ` should be ${showInflection(inflection)}`
|
|
||||||
: ``;
|
|
||||||
return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
|
|
||||||
}),
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
return {
|
|
||||||
ok: true,
|
|
||||||
error: [],
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function convertInflection(
|
|
||||||
inflection: 0 | 1 | 2,
|
|
||||||
entry: T.NounEntry | T.AdjectiveEntry,
|
|
||||||
gender: T.Gender,
|
|
||||||
plural: boolean | undefined
|
|
||||||
): {
|
|
||||||
inflected: boolean;
|
|
||||||
number: T.NounNumber;
|
|
||||||
}[] {
|
|
||||||
const pattern = getInflectionPattern(entry);
|
|
||||||
const inf = (plural && inflection < 2 ? inflection + 1 : inflection) as
|
|
||||||
| 0
|
|
||||||
| 1
|
|
||||||
| 2;
|
|
||||||
if (inf === 0) {
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
inflected: false,
|
|
||||||
number: "singular",
|
|
||||||
},
|
|
||||||
];
|
|
||||||
} else if (inf === 1) {
|
|
||||||
return [
|
|
||||||
...(!((isNounEntry(entry) && isPluralNounEntry(entry)) || plural) &&
|
|
||||||
!(pattern === 4 && entry.p.endsWith("ه") && gender === "masc")
|
|
||||||
? [
|
|
||||||
{
|
|
||||||
inflected: true,
|
|
||||||
number: "singular" as T.NounNumber,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
: []),
|
|
||||||
...(pattern > 1 ||
|
|
||||||
(pattern > 0 && gender === "fem") ||
|
|
||||||
(isNounEntry(entry) && isPluralNounEntry(entry)) ||
|
|
||||||
plural
|
|
||||||
? [
|
|
||||||
{
|
|
||||||
inflected: false,
|
|
||||||
number: "plural" as T.NounNumber,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
: []),
|
|
||||||
];
|
|
||||||
}
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
inflected: true,
|
|
||||||
number: "plural",
|
|
||||||
},
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
function showInflection(inf: 0 | 1 | 2): string {
|
|
||||||
return inf === 0
|
|
||||||
? "plain"
|
|
||||||
: inf === 1
|
|
||||||
? "first inflection"
|
|
||||||
: "second inflection";
|
|
||||||
}
|
|
|
@ -1,13 +1,12 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { parsePronoun } from "./parse-pronoun";
|
import { parsePronoun } from "./parse-pronoun";
|
||||||
import { parseNoun } from "./parse-noun";
|
import { parseNoun } from "./parse-noun-new";
|
||||||
import { fmapParseResult } from "../fp-ps";
|
import { fmapParseResult } from "../fp-ps";
|
||||||
import { parseParticiple } from "./parse-participle";
|
import { parseParticiple } from "./parse-participle";
|
||||||
import { LookupFunction } from "./lookup";
|
|
||||||
|
|
||||||
export function parseNP(
|
export function parseNP(
|
||||||
s: Readonly<T.Token[]>,
|
s: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction,
|
dicitonary: T.DictionaryAPI,
|
||||||
possesor: T.PossesorSelection | undefined
|
possesor: T.PossesorSelection | undefined
|
||||||
): T.ParseResult<T.ParsedNP>[] {
|
): T.ParseResult<T.ParsedNP>[] {
|
||||||
if (s.length === 0) {
|
if (s.length === 0) {
|
||||||
|
@ -41,7 +40,7 @@ export function parseNP(
|
||||||
|
|
||||||
return fmapParseResult(makeNPSl, [
|
return fmapParseResult(makeNPSl, [
|
||||||
...(!possesor ? parsePronoun(s) : []),
|
...(!possesor ? parsePronoun(s) : []),
|
||||||
...parseNoun(s, lookup, possesor, []),
|
...parseNoun(s, dicitonary, possesor),
|
||||||
...parseParticiple(s, lookup, possesor),
|
...parseParticiple(s, dicitonary, possesor),
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { LookupFunction } from "./lookup";
|
|
||||||
import { parseAP } from "./parse-ap";
|
import { parseAP } from "./parse-ap";
|
||||||
import { parseNP } from "./parse-np";
|
import { parseNP } from "./parse-np";
|
||||||
import { parsePossesor } from "./parse-possesor";
|
import { parsePossesor } from "./parse-possesor";
|
||||||
|
@ -7,19 +6,25 @@ import { bindParseResult } from "./utils";
|
||||||
|
|
||||||
export function parseNPAP(
|
export function parseNPAP(
|
||||||
s: Readonly<T.Token[]>,
|
s: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction
|
dictionary: T.DictionaryAPI
|
||||||
): T.ParseResult<T.APSelection | T.ParsedNP>[] {
|
): T.ParseResult<T.APSelection | T.ParsedNP>[] {
|
||||||
if (s.length === 0) {
|
if (s.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const possesor = parsePossesor(s, lookup, undefined);
|
const possesor = parsePossesor(s, dictionary, undefined);
|
||||||
if (!possesor.length) {
|
if (!possesor.length) {
|
||||||
return [...parseNP(s, lookup, undefined), ...parseAP(s, lookup, undefined)];
|
return [
|
||||||
|
...parseNP(s, dictionary, undefined),
|
||||||
|
...parseAP(s, dictionary, undefined),
|
||||||
|
];
|
||||||
}
|
}
|
||||||
return bindParseResult<T.PossesorSelection, T.APSelection | T.ParsedNP>(
|
return bindParseResult<T.PossesorSelection, T.APSelection | T.ParsedNP>(
|
||||||
possesor,
|
possesor,
|
||||||
(tokens, p) => {
|
(tokens, p) => {
|
||||||
return [...parseNP(tokens, lookup, p), ...parseAP(tokens, lookup, p)];
|
return [
|
||||||
|
...parseNP(tokens, dictionary, p),
|
||||||
|
...parseAP(tokens, dictionary, p),
|
||||||
|
];
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,14 +4,16 @@ import {
|
||||||
makePossesorSelection,
|
makePossesorSelection,
|
||||||
} from "../phrase-building/make-selections";
|
} from "../phrase-building/make-selections";
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { lookup, wordQuery } from "./lookup";
|
import { testDictionary } from "./mini-test-dictionary";
|
||||||
import { tokenizer } from "./tokenizer";
|
import { tokenizer } from "./tokenizer";
|
||||||
import { parseNPAP } from "./parse-npap";
|
import { parseNPAP } from "./parse-npap";
|
||||||
|
|
||||||
const leedul = wordQuery("لیدل", "verb");
|
const leedul = testDictionary.verbEntryLookup("لیدل")[0];
|
||||||
const akheestul = wordQuery("اخیستل", "verb");
|
const akheestul = testDictionary.verbEntryLookup("اخیستل")[0];
|
||||||
const wahul = wordQuery("وهل", "verb");
|
const wahul = testDictionary.verbEntryLookup("وهل")[0];
|
||||||
const saray = wordQuery("سړی", "noun");
|
const saray = testDictionary.nounLookup("سړی")[0];
|
||||||
|
|
||||||
|
// TODO: uncomment and get parsing of short participles working
|
||||||
|
|
||||||
const tests: {
|
const tests: {
|
||||||
label: string;
|
label: string;
|
||||||
|
@ -113,20 +115,20 @@ const tests: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
// {
|
||||||
input: "د سړي لیدو",
|
// input: "د سړي لیدو",
|
||||||
output: [
|
// output: [
|
||||||
{
|
// {
|
||||||
inflected: true,
|
// inflected: true,
|
||||||
selection: {
|
// selection: {
|
||||||
...makeParticipleSelection(leedul),
|
// ...makeParticipleSelection(leedul),
|
||||||
possesor: makePossesorSelection(
|
// possesor: makePossesorSelection(
|
||||||
makeNounSelection(saray, undefined)
|
// makeNounSelection(saray, undefined)
|
||||||
),
|
// ),
|
||||||
},
|
// },
|
||||||
},
|
// },
|
||||||
],
|
// ],
|
||||||
},
|
// },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
@ -136,7 +138,7 @@ describe("parsing participles", () => {
|
||||||
test(label, () => {
|
test(label, () => {
|
||||||
cases.forEach(({ input, output }) => {
|
cases.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const res = parseNPAP(tokens, lookup).map(({ body }) => body);
|
const res = parseNPAP(tokens, testDictionary).map(({ body }) => body);
|
||||||
expect(res).toEqual(
|
expect(res).toEqual(
|
||||||
output.map(
|
output.map(
|
||||||
(x): T.ParsedNP => ({
|
(x): T.ParsedNP => ({
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { LookupFunction } from "./lookup";
|
import { shortVerbEndConsonant } from "./misc";
|
||||||
|
|
||||||
type ParticipleResult = {
|
type ParticipleResult = {
|
||||||
inflected: boolean;
|
inflected: boolean;
|
||||||
|
@ -7,9 +7,10 @@ type ParticipleResult = {
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO: should have adverbs with participle
|
// TODO: should have adverbs with participle
|
||||||
|
// TODO: NOTE this does not work with compound verbs yet
|
||||||
export function parseParticiple(
|
export function parseParticiple(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction,
|
dicitonary: T.DictionaryAPI,
|
||||||
possesor: T.PossesorSelection | undefined
|
possesor: T.PossesorSelection | undefined
|
||||||
): T.ParseResult<ParticipleResult>[] {
|
): T.ParseResult<ParticipleResult>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
|
@ -20,8 +21,13 @@ export function parseParticiple(
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const inflected = first.s.endsWith("و");
|
const inflected = first.s.endsWith("و");
|
||||||
const matches = lookup(first.s, "participle");
|
|
||||||
return matches.map<T.ParseResult<ParticipleResult>>((verb) => ({
|
return [
|
||||||
|
...dicitonary.verbEntryLookup(inflected ? first.s.slice(0, -1) : first.s),
|
||||||
|
...(inflected && shortVerbEndConsonant.includes(first.s.at(-2) || "")
|
||||||
|
? dicitonary.verbEntryLookup(first.s.slice(0, -1) + "ل")
|
||||||
|
: []),
|
||||||
|
].map<T.ParseResult<ParticipleResult>>((verb) => ({
|
||||||
tokens: rest,
|
tokens: rest,
|
||||||
body: {
|
body: {
|
||||||
inflected,
|
inflected,
|
||||||
|
|
|
@ -1,24 +1,24 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { lookup } from "./lookup";
|
|
||||||
import { parseVP } from "./parse-vp";
|
import { parseVP } from "./parse-vp";
|
||||||
|
|
||||||
// شو should not be sheyaano !!
|
// شو should not be sheyaano !!
|
||||||
|
|
||||||
export function parsePhrase(s: T.Token[]): {
|
export function parsePhrase(
|
||||||
success: (
|
s: T.Token[],
|
||||||
| {
|
dicitonary: T.DictionaryAPI
|
||||||
inflected: boolean;
|
): {
|
||||||
selection: T.NPSelection;
|
success: // | {
|
||||||
}
|
// inflected: boolean;
|
||||||
| Omit<T.VBE, "ps">
|
// selection: T.NPSelection;
|
||||||
| T.VPSelectionComplete
|
// }
|
||||||
)[];
|
// | Omit<T.VBE, "ps">
|
||||||
|
T.VPSelectionComplete[];
|
||||||
errors: string[];
|
errors: string[];
|
||||||
} {
|
} {
|
||||||
const res = [
|
const res = [
|
||||||
// ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
|
// ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
|
||||||
// ...parseVerb(s, verbLookup),
|
// ...parseVerb(s, verbLookup),
|
||||||
...parseVP(s, lookup),
|
...parseVP(s, dicitonary),
|
||||||
];
|
];
|
||||||
|
|
||||||
const success = res.filter((x) => !x.tokens.length).map((x) => x.body);
|
const success = res.filter((x) => !x.tokens.length).map((x) => x.body);
|
||||||
|
|
|
@ -4,16 +4,16 @@ import {
|
||||||
makeNounSelection,
|
makeNounSelection,
|
||||||
makePronounSelection,
|
makePronounSelection,
|
||||||
} from "../phrase-building/make-selections";
|
} from "../phrase-building/make-selections";
|
||||||
import { lookup, wordQuery } from "./lookup";
|
|
||||||
import { parsePossesor } from "./parse-possesor";
|
import { parsePossesor } from "./parse-possesor";
|
||||||
import { tokenizer } from "./tokenizer";
|
import { tokenizer } from "./tokenizer";
|
||||||
import { isCompleteResult } from "./utils";
|
import { isCompleteResult } from "./utils";
|
||||||
|
import { testDictionary as dictionary } from "./mini-test-dictionary";
|
||||||
|
|
||||||
const sturey = wordQuery("ستړی", "adj");
|
const sturey = dictionary.adjLookup("ستړی")[0];
|
||||||
const sarey = wordQuery("سړی", "noun");
|
const sarey = dictionary.nounLookup("سړی")[0];
|
||||||
const maashoom = wordQuery("ماشوم", "noun");
|
const maashoom = dictionary.nounLookup("ماشوم")[0];
|
||||||
const malguray = wordQuery("ملګری", "noun");
|
const malguray = dictionary.nounLookup("ملګری")[0];
|
||||||
const plaar = wordQuery("پلار", "noun");
|
const plaar = dictionary.nounLookup("پلار")[0];
|
||||||
|
|
||||||
const tests: {
|
const tests: {
|
||||||
input: string;
|
input: string;
|
||||||
|
@ -109,12 +109,12 @@ const tests: {
|
||||||
test("parse possesor", () => {
|
test("parse possesor", () => {
|
||||||
tests.forEach(({ input, output }) => {
|
tests.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const parsed = parsePossesor(tokens, lookup, undefined);
|
const parsed = parsePossesor(tokens, dictionary, undefined);
|
||||||
if (output === "error") {
|
if (output === "error") {
|
||||||
expect(parsed.some((x) => x.errors.length)).toBe(true);
|
expect(parsed.some((x) => x.errors.length)).toBe(true);
|
||||||
} else {
|
} else {
|
||||||
expect(
|
expect(
|
||||||
parsePossesor(tokens, lookup, undefined)
|
parsePossesor(tokens, dictionary, undefined)
|
||||||
.filter(isCompleteResult)
|
.filter(isCompleteResult)
|
||||||
.map((x) => x.body.np.selection)
|
.map((x) => x.body.np.selection)
|
||||||
).toEqual(output);
|
).toEqual(output);
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { LookupFunction } from "./lookup";
|
|
||||||
import { parseNP } from "./parse-np";
|
import { parseNP } from "./parse-np";
|
||||||
import { bindParseResult } from "./utils";
|
import { bindParseResult } from "./utils";
|
||||||
// TODO: maybe contractions should just be male to cut down on the
|
// TODO: maybe contractions should just be male to cut down on the
|
||||||
|
@ -19,7 +18,7 @@ const contractions: [string[], T.Person[]][] = [
|
||||||
|
|
||||||
export function parsePossesor(
|
export function parsePossesor(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction,
|
dictionary: T.DictionaryAPI,
|
||||||
prevPossesor: T.PossesorSelection | undefined
|
prevPossesor: T.PossesorSelection | undefined
|
||||||
): T.ParseResult<T.PossesorSelection>[] {
|
): T.ParseResult<T.PossesorSelection>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
|
@ -43,14 +42,14 @@ export function parsePossesor(
|
||||||
? [{ message: "a pronoun cannot have a possesor" }]
|
? [{ message: "a pronoun cannot have a possesor" }]
|
||||||
: [];
|
: [];
|
||||||
return contractions
|
return contractions
|
||||||
.flatMap((p) => parsePossesor(rest, lookup, p))
|
.flatMap((p) => parsePossesor(rest, dictionary, p))
|
||||||
.map((x) => ({
|
.map((x) => ({
|
||||||
...x,
|
...x,
|
||||||
errors: [...errors, ...x.errors],
|
errors: [...errors, ...x.errors],
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
if (first.s === "د") {
|
if (first.s === "د") {
|
||||||
const np = parseNP(rest, lookup, undefined);
|
const np = parseNP(rest, dictionary, undefined);
|
||||||
return bindParseResult(np, (tokens, body) => {
|
return bindParseResult(np, (tokens, body) => {
|
||||||
const possesor: T.PossesorSelection = {
|
const possesor: T.PossesorSelection = {
|
||||||
shrunken: false,
|
shrunken: false,
|
||||||
|
@ -63,7 +62,11 @@ export function parsePossesor(
|
||||||
[{ message: `possesor should be inflected` }]
|
[{ message: `possesor should be inflected` }]
|
||||||
: [],
|
: [],
|
||||||
// add and check error - can't add possesor to pronoun
|
// add and check error - can't add possesor to pronoun
|
||||||
next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)),
|
next: parsePossesor(
|
||||||
|
tokens,
|
||||||
|
dictionary,
|
||||||
|
addPoss(prevPossesor, possesor)
|
||||||
|
),
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { LookupFunction } from "./lookup";
|
|
||||||
import { sandwiches } from "../sandwiches";
|
import { sandwiches } from "../sandwiches";
|
||||||
import { parseNP } from "./parse-np";
|
import { parseNP } from "./parse-np";
|
||||||
import { bindParseResult } from "./utils";
|
import { bindParseResult } from "./utils";
|
||||||
|
@ -14,7 +13,7 @@ import { bindParseResult } from "./utils";
|
||||||
|
|
||||||
export function parseSandwich(
|
export function parseSandwich(
|
||||||
s: Readonly<T.Token[]>,
|
s: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction,
|
dictionary: T.DictionaryAPI,
|
||||||
possesor: T.PossesorSelection | undefined
|
possesor: T.PossesorSelection | undefined
|
||||||
): T.ParseResult<T.SandwichSelection<T.Sandwich>>[] {
|
): T.ParseResult<T.SandwichSelection<T.Sandwich>>[] {
|
||||||
if (s.length === 0) {
|
if (s.length === 0) {
|
||||||
|
@ -27,7 +26,7 @@ export function parseSandwich(
|
||||||
(x) => x.before && x.before.p === first.s
|
(x) => x.before && x.before.p === first.s
|
||||||
);
|
);
|
||||||
// TODO: this could be be really repetitive...
|
// TODO: this could be be really repetitive...
|
||||||
const nps = parseNP(startMatches.length ? rest : s, lookup, possesor);
|
const nps = parseNP(startMatches.length ? rest : s, dictionary, possesor);
|
||||||
return bindParseResult(nps, (tokens, np) => {
|
return bindParseResult(nps, (tokens, np) => {
|
||||||
if (!tokens.length) {
|
if (!tokens.length) {
|
||||||
return [];
|
return [];
|
||||||
|
|
|
@ -7,28 +7,35 @@ import {
|
||||||
wartlul,
|
wartlul,
|
||||||
raatlul,
|
raatlul,
|
||||||
} from "./irreg-verbs";
|
} from "./irreg-verbs";
|
||||||
import { lookup, wordQuery } from "./lookup";
|
import { parseVBE } from "./parse-vbe-new";
|
||||||
import { parseVBE } from "./parse-vbe";
|
|
||||||
import { tokenizer } from "./tokenizer";
|
import { tokenizer } from "./tokenizer";
|
||||||
import { getPeople, removeKeys } from "./utils";
|
import { getPeople, removeKeys } from "./utils";
|
||||||
|
import { testDictionary } from "./mini-test-dictionary";
|
||||||
|
|
||||||
const wahul = wordQuery("وهل", "verb");
|
const wahul = testDictionary.verbEntryLookup("وهل")[0];
|
||||||
const leekul = wordQuery("لیکل", "verb");
|
const leekul = testDictionary.verbEntryLookup("لیکل")[0];
|
||||||
const manul = wordQuery("منل", "verb");
|
const manul = testDictionary.verbEntryLookup("منل")[0];
|
||||||
// const gaalul = wordQuery("ګالل", "verb");
|
const gaalul = testDictionary.verbEntryLookup("ګالل")[0];
|
||||||
const rasedul = wordQuery("رسېدل", "verb");
|
const rasedul = testDictionary.verbEntryLookup("رسېدل")[0];
|
||||||
const leedul = wordQuery("لیدل", "verb");
|
const leedul = testDictionary.verbEntryLookup("لیدل")[0];
|
||||||
const khorul = wordQuery("خوړل", "verb");
|
const awuxtul = testDictionary.verbEntryLookup("اوښتل")[0];
|
||||||
const kenaastul = wordQuery("کېناستل", "verb");
|
const khorul = testDictionary.verbEntryLookup("خوړل")[0];
|
||||||
const prexodul = wordQuery("پرېښودل", "verb");
|
const kenaastul = testDictionary.verbEntryLookup("کېناستل")[0];
|
||||||
const xodul = wordQuery("ښودل", "verb");
|
const kxenaastul = testDictionary.verbEntryLookup("کښېناستل")[0];
|
||||||
const kexodul = wordQuery("کېښودل", "verb");
|
const prexodul = testDictionary.verbEntryLookup("پرېښودل")[0];
|
||||||
const katul = wordQuery("کتل", "verb");
|
const prexowul = testDictionary.verbEntryLookup("پرېښوول")[0];
|
||||||
const watul = wordQuery("وتل", "verb");
|
const prexawul = testDictionary.verbEntryLookup("پرېښول")[0];
|
||||||
const wurul = wordQuery("وړل", "verb");
|
const xodul = testDictionary.verbEntryLookup("ښودل")[0];
|
||||||
const akheestul = wordQuery("اخیستل", "verb");
|
const kexodul = testDictionary.verbEntryLookup("کېښودل")[0];
|
||||||
const alwatul = wordQuery("الوتل", "verb");
|
const kxexodul = testDictionary.verbEntryLookup("کښېښودل")[0];
|
||||||
// const dartlul = wordQuery("درتلل", "verb")
|
const katul = testDictionary.verbEntryLookup("کتل")[0];
|
||||||
|
const watul = testDictionary.verbEntryLookup("وتل")[0];
|
||||||
|
const wurul = testDictionary.verbEntryLookup("وړل")[0];
|
||||||
|
const akheestul = testDictionary.verbEntryLookup("اخیستل")[0];
|
||||||
|
const alwatul = testDictionary.verbEntryLookup("الوتل")[0];
|
||||||
|
const dartlul = testDictionary.verbEntryLookup("درتلل")[0];
|
||||||
|
|
||||||
|
// TODO: Prefix searching on split verbs for perfective head parsing
|
||||||
|
|
||||||
// TODO: azmoyul etc
|
// TODO: azmoyul etc
|
||||||
// TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc
|
// TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc
|
||||||
|
@ -311,19 +318,6 @@ const tests: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
|
||||||
input: "وینم",
|
|
||||||
output: [
|
|
||||||
{
|
|
||||||
stem: {
|
|
||||||
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
|
|
||||||
aspects: ["imperfective", "perfective"],
|
|
||||||
},
|
|
||||||
verb: leedul,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
// TODO!! THESE COULD ALSO BE MALE
|
|
||||||
{
|
{
|
||||||
input: "لیده",
|
input: "لیده",
|
||||||
output: [
|
output: [
|
||||||
|
@ -364,42 +358,6 @@ const tests: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
|
||||||
input: "خوړ",
|
|
||||||
output: [
|
|
||||||
{
|
|
||||||
root: {
|
|
||||||
persons: [T.Person.ThirdSingMale],
|
|
||||||
aspects: ["imperfective", "perfective"],
|
|
||||||
},
|
|
||||||
verb: khorul,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: "کوت",
|
|
||||||
output: [
|
|
||||||
{
|
|
||||||
root: {
|
|
||||||
persons: [T.Person.ThirdSingMale],
|
|
||||||
aspects: ["imperfective", "perfective"],
|
|
||||||
},
|
|
||||||
verb: katul,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: "کاته",
|
|
||||||
output: [
|
|
||||||
{
|
|
||||||
root: {
|
|
||||||
persons: [T.Person.ThirdSingMale],
|
|
||||||
aspects: ["imperfective", "perfective"],
|
|
||||||
},
|
|
||||||
verb: katul,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
input: "خلم",
|
input: "خلم",
|
||||||
output: [
|
output: [
|
||||||
|
@ -436,6 +394,11 @@ const tests: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "verbs with seperating perfective heads",
|
||||||
|
cases: [
|
||||||
{
|
{
|
||||||
input: "الوځې",
|
input: "الوځې",
|
||||||
output: [
|
output: [
|
||||||
|
@ -460,6 +423,18 @@ const tests: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
input: "لوتلم",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
|
||||||
|
aspects: ["perfective"],
|
||||||
|
},
|
||||||
|
verb: alwatul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -492,6 +467,13 @@ const tests: {
|
||||||
},
|
},
|
||||||
verb: kenaastul,
|
verb: kenaastul,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
stem: {
|
||||||
|
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
|
||||||
|
aspects: ["perfective"],
|
||||||
|
},
|
||||||
|
verb: kxenaastul,
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -507,46 +489,64 @@ const tests: {
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "ناست",
|
input: "کېناسته",
|
||||||
output: [
|
output: [
|
||||||
{
|
{
|
||||||
root: {
|
root: {
|
||||||
persons: [T.Person.ThirdSingMale],
|
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||||
aspects: ["perfective"],
|
aspects: ["imperfective"],
|
||||||
},
|
},
|
||||||
verb: kenaastul,
|
verb: kenaastul,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "پرېږدو",
|
input: "ناست",
|
||||||
output: [
|
output: [kenaastul, kxenaastul].map((verb) => ({
|
||||||
{
|
root: {
|
||||||
stem: {
|
persons: [T.Person.ThirdSingMale],
|
||||||
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
aspects: ["perfective"],
|
||||||
aspects: ["imperfective"],
|
|
||||||
},
|
|
||||||
verb: prexodul,
|
|
||||||
},
|
},
|
||||||
],
|
verb,
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "ناسته",
|
||||||
|
output: [kenaastul, kxenaastul].map((verb) => ({
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||||
|
aspects: ["perfective"],
|
||||||
|
},
|
||||||
|
verb,
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "پرېږدو",
|
||||||
|
output: [prexodul, prexowul, prexawul].map((verb) => ({
|
||||||
|
stem: {
|
||||||
|
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
||||||
|
aspects: ["imperfective"],
|
||||||
|
},
|
||||||
|
verb,
|
||||||
|
})),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "ږدو",
|
input: "ږدو",
|
||||||
output: [
|
output: [
|
||||||
{
|
...[prexodul, prexawul, prexowul, kexodul, kxexodul].map((verb) => ({
|
||||||
stem: {
|
stem: {
|
||||||
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
||||||
aspects: ["perfective"],
|
aspects: ["perfective"] satisfies T.Aspect[],
|
||||||
},
|
},
|
||||||
verb: prexodul,
|
verb,
|
||||||
},
|
})),
|
||||||
{
|
...[kexodul, kxexodul].map((verb) => ({
|
||||||
stem: {
|
stem: {
|
||||||
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
||||||
aspects: ["imperfective", "perfective"],
|
aspects: ["imperfective"] satisfies T.Aspect[],
|
||||||
},
|
},
|
||||||
verb: kexodul,
|
verb,
|
||||||
},
|
})),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -571,20 +571,13 @@ const tests: {
|
||||||
},
|
},
|
||||||
verb: xodul,
|
verb: xodul,
|
||||||
},
|
},
|
||||||
{
|
...[prexodul, kexodul, kxexodul].map((verb) => ({
|
||||||
root: {
|
root: {
|
||||||
persons: [T.Person.ThirdSingFemale],
|
persons: [T.Person.ThirdSingFemale],
|
||||||
aspects: ["perfective"],
|
aspects: ["perfective"] satisfies T.Aspect[],
|
||||||
},
|
},
|
||||||
verb: prexodul,
|
verb,
|
||||||
},
|
})),
|
||||||
{
|
|
||||||
root: {
|
|
||||||
persons: [T.Person.ThirdSingFemale],
|
|
||||||
aspects: ["perfective"],
|
|
||||||
},
|
|
||||||
verb: kexodul,
|
|
||||||
},
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -661,43 +654,9 @@ const tests: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
|
||||||
input: "ړلم",
|
|
||||||
output: [
|
|
||||||
{
|
|
||||||
root: {
|
|
||||||
persons: getPeople(1, "sing"),
|
|
||||||
aspects: ["perfective"],
|
|
||||||
},
|
|
||||||
verb: wurul,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
root: {
|
|
||||||
persons: getPeople(1, "sing"),
|
|
||||||
aspects: ["perfective"],
|
|
||||||
},
|
|
||||||
verb: tlul,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
input: "ړ",
|
input: "ړ",
|
||||||
output: [
|
output: [],
|
||||||
{
|
|
||||||
root: {
|
|
||||||
persons: [T.Person.ThirdSingMale],
|
|
||||||
aspects: ["perfective"],
|
|
||||||
},
|
|
||||||
verb: wurul,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
root: {
|
|
||||||
persons: [T.Person.ThirdSingMale],
|
|
||||||
aspects: ["perfective"],
|
|
||||||
},
|
|
||||||
verb: tlul,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
},
|
||||||
// should not match with the prefix for perfective
|
// should not match with the prefix for perfective
|
||||||
{
|
{
|
||||||
|
@ -713,6 +672,78 @@ const tests: {
|
||||||
{
|
{
|
||||||
label: "verbs with different 3rd pers sing past endings",
|
label: "verbs with different 3rd pers sing past endings",
|
||||||
cases: [
|
cases: [
|
||||||
|
{
|
||||||
|
input: "خوړ",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale],
|
||||||
|
aspects: ["imperfective", "perfective"],
|
||||||
|
},
|
||||||
|
verb: khorul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "خوړه",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||||
|
aspects: ["imperfective", "perfective"],
|
||||||
|
},
|
||||||
|
verb: khorul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "کوت",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale],
|
||||||
|
aspects: ["imperfective", "perfective"],
|
||||||
|
},
|
||||||
|
verb: katul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "کاته",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale],
|
||||||
|
aspects: ["imperfective", "perfective"],
|
||||||
|
},
|
||||||
|
verb: katul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "واته",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale],
|
||||||
|
aspects: ["imperfective", "perfective"],
|
||||||
|
},
|
||||||
|
verb: watul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "ووت",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale],
|
||||||
|
aspects: ["imperfective", "perfective"],
|
||||||
|
},
|
||||||
|
verb: watul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
input: "رسېد",
|
input: "رسېد",
|
||||||
output: [
|
output: [
|
||||||
|
@ -725,6 +756,18 @@ const tests: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
input: "رسېده",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||||
|
aspects: ["imperfective", "perfective"],
|
||||||
|
},
|
||||||
|
verb: rasedul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
input: "کېناسته",
|
input: "کېناسته",
|
||||||
output: [
|
output: [
|
||||||
|
@ -766,27 +809,69 @@ const tests: {
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "واته",
|
input: "اوښت",
|
||||||
output: [
|
output: [
|
||||||
{
|
{
|
||||||
root: {
|
root: {
|
||||||
persons: [T.Person.ThirdSingMale],
|
persons: [T.Person.ThirdSingMale],
|
||||||
aspects: ["imperfective", "perfective"],
|
aspects: ["imperfective"],
|
||||||
},
|
},
|
||||||
verb: watul,
|
verb: awuxtul,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
input: "ووت",
|
input: "ښت",
|
||||||
|
output: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "اوښته",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||||
|
aspects: ["imperfective"],
|
||||||
|
},
|
||||||
|
verb: awuxtul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "ښود",
|
||||||
output: [
|
output: [
|
||||||
{
|
{
|
||||||
root: {
|
root: {
|
||||||
persons: [T.Person.ThirdSingMale],
|
persons: [T.Person.ThirdSingMale],
|
||||||
aspects: ["imperfective", "perfective"],
|
aspects: ["imperfective", "perfective"],
|
||||||
},
|
},
|
||||||
verb: watul,
|
verb: xodul,
|
||||||
},
|
},
|
||||||
|
...[prexodul, kexodul, kxexodul].map((verb) => ({
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale],
|
||||||
|
aspects: ["perfective"] satisfies T.Aspect[],
|
||||||
|
},
|
||||||
|
verb,
|
||||||
|
})),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "ښوده",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||||
|
aspects: ["imperfective", "perfective"],
|
||||||
|
},
|
||||||
|
verb: xodul,
|
||||||
|
},
|
||||||
|
...[prexodul, kexodul, kxexodul].map((verb) => ({
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||||
|
aspects: ["perfective"] satisfies T.Aspect[],
|
||||||
|
},
|
||||||
|
verb,
|
||||||
|
})),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -971,7 +1056,7 @@ tests.forEach(({ label, cases }) => {
|
||||||
test(label, () => {
|
test(label, () => {
|
||||||
cases.forEach(({ input, output }) => {
|
cases.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const vbs = parseVBE(tokens, lookup).map((r) => r.body);
|
const vbs = parseVBE(tokens, testDictionary).map((r) => r.body);
|
||||||
const madeVbsS = output.reduce<T.ParsedVBE[]>((acc, o) => {
|
const madeVbsS = output.reduce<T.ParsedVBE[]>((acc, o) => {
|
||||||
return [
|
return [
|
||||||
...acc,
|
...acc,
|
|
@ -0,0 +1,387 @@
|
||||||
|
import * as T from "../../../types";
|
||||||
|
import { dartlul, raatlul, tlul, wartlul } from "./irreg-verbs";
|
||||||
|
import { parseKedul } from "./parse-kedul";
|
||||||
|
import { getVerbEnding } from "./parse-verb-helpers";
|
||||||
|
import { returnParseResults } from "./utils";
|
||||||
|
import { entries as splitVerbEntries } from "./split-verbs";
|
||||||
|
import * as tp from "../type-predicates";
|
||||||
|
import memoize from "micro-memoize";
|
||||||
|
import { pashtoConsonants } from "../pashto-consonants";
|
||||||
|
|
||||||
|
// TODO: و ارزول
|
||||||
|
|
||||||
|
// TODO: کول verbs!
|
||||||
|
// check that aawu stuff is working
|
||||||
|
// check oo`azmooy -
|
||||||
|
// TODO: proper use of sepOo (hasBreakawayAleph) when checking for perfective roots/stems
|
||||||
|
// check څاته
|
||||||
|
// laaRa shum etc
|
||||||
|
// TODO: proper use of perfective with sh
|
||||||
|
// TODO: use of raa, dar, war with sh
|
||||||
|
// TODO: هغه لاړ
|
||||||
|
// TODO: don't have کول کېدل in split-verbs
|
||||||
|
|
||||||
|
type BaseInfo = Extract<T.ParsedVBE["info"], { type: "verb" }>;
|
||||||
|
type StemInfo = Omit<BaseInfo, "base"> & {
|
||||||
|
base: "stem";
|
||||||
|
};
|
||||||
|
type RootInfo = Omit<BaseInfo, "base"> & {
|
||||||
|
base: "root";
|
||||||
|
};
|
||||||
|
|
||||||
|
export function parseVBE(
|
||||||
|
tokens: Readonly<T.Token[]>,
|
||||||
|
dictionary: T.DictionaryAPI
|
||||||
|
): T.ParseResult<T.ParsedVBE>[] {
|
||||||
|
if (tokens.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
const [first, ...rest] = tokens;
|
||||||
|
const irregResults = parseIrregularVerb(first.s);
|
||||||
|
if (irregResults.length) {
|
||||||
|
return returnParseResults(rest, irregResults);
|
||||||
|
}
|
||||||
|
const kedulStat = parseKedul(tokens);
|
||||||
|
const ending = first.s.at(-1) || "";
|
||||||
|
const base = ending === "ل" ? first.s : first.s.slice(0, -1);
|
||||||
|
const { stem, root } = getVerbEnding(ending);
|
||||||
|
// todo imperative for seperating
|
||||||
|
const imperative = getImperativeVerbEnding(ending);
|
||||||
|
const stemRes = returnParseResults(rest, [
|
||||||
|
...[
|
||||||
|
...findImperfectiveStem(base, dictionary),
|
||||||
|
...findPerfectiveStem(base, dictionary),
|
||||||
|
].flatMap<T.ParsedVBE>((info) => [
|
||||||
|
...stem.map<T.ParsedVBE>((person) => ({
|
||||||
|
type: "VB",
|
||||||
|
person,
|
||||||
|
info,
|
||||||
|
})),
|
||||||
|
...imperative.map<T.ParsedVBE>((person) => ({
|
||||||
|
type: "VB",
|
||||||
|
person,
|
||||||
|
info: {
|
||||||
|
...info,
|
||||||
|
imperative: true,
|
||||||
|
},
|
||||||
|
})),
|
||||||
|
]),
|
||||||
|
]);
|
||||||
|
const rootRes = returnParseResults(rest, [
|
||||||
|
...[
|
||||||
|
...findImperfectiveRoot(base, dictionary),
|
||||||
|
...findPerfectiveRoot(base, dictionary),
|
||||||
|
].flatMap<T.ParsedVBE>((info) => {
|
||||||
|
const shortThird = thirdPersSingMascShortFromRoot(base, ending, info);
|
||||||
|
return [
|
||||||
|
...shortThird,
|
||||||
|
...root.map<T.ParsedVBE>((person) => ({
|
||||||
|
type: "VB",
|
||||||
|
person,
|
||||||
|
info,
|
||||||
|
})),
|
||||||
|
];
|
||||||
|
}),
|
||||||
|
...specialThirdPersMascSingForm(base, ending, dictionary),
|
||||||
|
]);
|
||||||
|
return [...kedulStat, ...stemRes, ...rootRes];
|
||||||
|
}
|
||||||
|
|
||||||
|
function specialThirdPersMascSingForm(
|
||||||
|
base: string,
|
||||||
|
ending: string,
|
||||||
|
dicitonary: T.DictionaryAPI
|
||||||
|
): T.ParsedVBE[] {
|
||||||
|
if (ending !== "ه" && !pashtoConsonants.includes(ending)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
// const imperfectiveWSep = [base + ending, ...(ending === "ه" ? [base] : [])]
|
||||||
|
// .flatMap((v) =>
|
||||||
|
// splitVerbEntries.filter((entry) => entry.entry.p.slice(0, -1) === v)
|
||||||
|
// )
|
||||||
|
// .map<T.ParsedVBE>((verb) => ({
|
||||||
|
// type: "VB",
|
||||||
|
// person: T.Person.ThirdSingMale,
|
||||||
|
// info: {
|
||||||
|
// type: "verb",
|
||||||
|
// aspect: "imperfective",
|
||||||
|
// base: "root",
|
||||||
|
// verb,
|
||||||
|
// },
|
||||||
|
// }));
|
||||||
|
|
||||||
|
// const perfectiveWSep = [base + ending, ...(ending === "ه" ? [base] : [])]
|
||||||
|
// .flatMap((v) => {
|
||||||
|
// const b = splitVerbEntries.filter(({ entry }) => {
|
||||||
|
// if (entry.tppp) {
|
||||||
|
// return splitVarients(entry.tppp).some(
|
||||||
|
// (varient) => varient.slice(entry.separationAtP) === v
|
||||||
|
// );
|
||||||
|
// } else {
|
||||||
|
// return entry.p.slice(entry.separationAtP, -1) === v;
|
||||||
|
// }
|
||||||
|
// });
|
||||||
|
// return b;
|
||||||
|
// })
|
||||||
|
// .map<T.ParsedVBE>((verb) => ({
|
||||||
|
// type: "VB",
|
||||||
|
// person: T.Person.ThirdSingMale,
|
||||||
|
// info: {
|
||||||
|
// type: "verb",
|
||||||
|
// aspect: "perfective",
|
||||||
|
// base: "root",
|
||||||
|
// verb,
|
||||||
|
// },
|
||||||
|
// }));
|
||||||
|
|
||||||
|
const hardEnding: T.ParsedVBE[] =
|
||||||
|
(ending === "د" && ["ې", "و"].some((x) => base.endsWith(x))) ||
|
||||||
|
(ending === "ت" &&
|
||||||
|
["س", "ښ"].some((x) => base.endsWith(x)) &&
|
||||||
|
base.length > 1)
|
||||||
|
? [
|
||||||
|
...findPerfectiveRoot(base + ending + "ل", dicitonary),
|
||||||
|
...findImperfectiveRoot(base + ending + "ل", dicitonary),
|
||||||
|
].map<T.ParsedVBE>((info) => ({
|
||||||
|
type: "VB",
|
||||||
|
person: T.Person.ThirdSingMale,
|
||||||
|
info,
|
||||||
|
}))
|
||||||
|
: [];
|
||||||
|
|
||||||
|
const regular: T.ParsedVBE[] = [
|
||||||
|
base + ending,
|
||||||
|
...(ending === "ه" ? [base] : []),
|
||||||
|
]
|
||||||
|
.flatMap(withAlefAdded)
|
||||||
|
.flatMap((v) => dicitonary.otherLookup("tppp", v, true))
|
||||||
|
.filter(
|
||||||
|
(e): e is T.VerbDictionaryEntry =>
|
||||||
|
tp.isVerbDictionaryEntry(e) && !e.l && !!e.tppp
|
||||||
|
)
|
||||||
|
.flatMap((entry) =>
|
||||||
|
// NOT IF STARTS WITH ALEPH!
|
||||||
|
(entry.separationAtP
|
||||||
|
? (["imperfective"] as const)
|
||||||
|
: startsWithAleph(entry.p) && !startsWithAleph(base)
|
||||||
|
? (["perfective"] as const)
|
||||||
|
: (["imperfective", "perfective"] as const)
|
||||||
|
).map<T.ParsedVBE>((aspect) => ({
|
||||||
|
type: "VB" as const,
|
||||||
|
person: T.Person.ThirdSingMale,
|
||||||
|
info: {
|
||||||
|
type: "verb",
|
||||||
|
aspect,
|
||||||
|
base: "root",
|
||||||
|
verb: { entry },
|
||||||
|
} as const,
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
|
return [...regular, ...hardEnding];
|
||||||
|
|
||||||
|
// ...imperfectiveWSep, ...perfectiveWSep];
|
||||||
|
}
|
||||||
|
|
||||||
|
function thirdPersSingMascShortFromRoot(
|
||||||
|
base: string,
|
||||||
|
ending: string,
|
||||||
|
info: RootInfo
|
||||||
|
): T.ParsedVBE[] {
|
||||||
|
if (info.verb.entry.tppp) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (ending === "ه" && !base.endsWith("ل")) {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
type: "VB",
|
||||||
|
person: T.Person.ThirdSingMale,
|
||||||
|
info,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
function findImperfectiveStem(
|
||||||
|
s: string,
|
||||||
|
dicitonary: T.DictionaryAPI
|
||||||
|
): StemInfo[] {
|
||||||
|
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
const regulars = regStemSearch(s, dicitonary);
|
||||||
|
const irregulars = dicitonary
|
||||||
|
.otherLookup("psp", s)
|
||||||
|
.filter(
|
||||||
|
(e): e is T.VerbDictionaryEntry => tp.isVerbDictionaryEntry(e) && !e.l
|
||||||
|
)
|
||||||
|
.map<T.VerbEntry>((entry) => ({
|
||||||
|
entry,
|
||||||
|
}));
|
||||||
|
return [...regulars, ...irregulars].map((verb) => ({
|
||||||
|
type: "verb",
|
||||||
|
aspect: "imperfective",
|
||||||
|
base: "stem",
|
||||||
|
verb,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
function withAlefAdded(s: string): string[] {
|
||||||
|
return [s, ...(startsWithAleph(s) ? [] : ["ا" + s, "آ" + s])];
|
||||||
|
}
|
||||||
|
|
||||||
|
const stemSplitLookup = memoize((s: string) =>
|
||||||
|
splitVerbEntries.filter(
|
||||||
|
(e) =>
|
||||||
|
(e.entry.ssp || e.entry.psp || e.entry.p).slice(
|
||||||
|
e.entry.separationAtP || 0
|
||||||
|
) === s
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
function findPerfectiveStem(
|
||||||
|
s: string,
|
||||||
|
dicitonary: T.DictionaryAPI
|
||||||
|
): StemInfo[] {
|
||||||
|
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (startsWithAleph(s)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
const baseQ = withAlefAdded(s);
|
||||||
|
const regulars = baseQ
|
||||||
|
.flatMap((q) => regStemSearch(q, dicitonary))
|
||||||
|
.filter((e) => !e.entry.separationAtP);
|
||||||
|
const irregularsBasedOnImperf = baseQ
|
||||||
|
.flatMap((q) => dicitonary.otherLookup("psp", q))
|
||||||
|
.filter(
|
||||||
|
(e): e is T.VerbDictionaryEntry =>
|
||||||
|
tp.isVerbDictionaryEntry(e) && !e.l && !e.ssp && !e.separationAtP
|
||||||
|
)
|
||||||
|
.map<T.VerbEntry>((entry) => ({
|
||||||
|
entry,
|
||||||
|
}));
|
||||||
|
return [...regulars, ...irregularsBasedOnImperf, ...stemSplitLookup(s)].map(
|
||||||
|
(verb) => ({
|
||||||
|
type: "verb",
|
||||||
|
aspect: "perfective",
|
||||||
|
base: "stem",
|
||||||
|
verb,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function regStemSearch(s: string, dicitonary: T.DictionaryAPI): T.VerbEntry[] {
|
||||||
|
const regTrans = dicitonary
|
||||||
|
.verbEntryLookup(s + "ل")
|
||||||
|
.filter(
|
||||||
|
(e) =>
|
||||||
|
!e.entry.c.includes("comp") &&
|
||||||
|
!e.entry.ssp &&
|
||||||
|
!e.entry.psp &&
|
||||||
|
!e.entry.c.includes("intrans")
|
||||||
|
);
|
||||||
|
const regIntrans = dicitonary
|
||||||
|
.verbEntryLookup((s.endsWith("ېږ") ? s.slice(0, -2) : s) + "ېدل")
|
||||||
|
.filter(
|
||||||
|
(e) =>
|
||||||
|
!e.entry.c.includes("comp") &&
|
||||||
|
!e.entry.ssp &&
|
||||||
|
!e.entry.psp &&
|
||||||
|
e.entry.c.includes("intrans")
|
||||||
|
);
|
||||||
|
return [...regTrans, ...regIntrans];
|
||||||
|
}
|
||||||
|
|
||||||
|
function findImperfectiveRoot(
|
||||||
|
s: string,
|
||||||
|
dicitonary: T.DictionaryAPI
|
||||||
|
): RootInfo[] {
|
||||||
|
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
const reg = [s, s + "ل"]
|
||||||
|
.flatMap(dicitonary.verbEntryLookup)
|
||||||
|
.filter((e) => !e.entry.c.includes("comp"));
|
||||||
|
return reg.map((verb) => ({
|
||||||
|
type: "verb",
|
||||||
|
aspect: "imperfective",
|
||||||
|
base: "root",
|
||||||
|
verb,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
const rootSplitLookup = memoize((s: string) =>
|
||||||
|
splitVerbEntries.filter((e) =>
|
||||||
|
[s, s + "ل"].some(
|
||||||
|
(x) => (e.entry.prp || e.entry.p).slice(e.entry.separationAtP || 0) === x
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
function findPerfectiveRoot(
|
||||||
|
s: string,
|
||||||
|
dicitonary: T.DictionaryAPI
|
||||||
|
): RootInfo[] {
|
||||||
|
if (startsWithAleph(s) || ["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
const reg = [s, s + "ل"]
|
||||||
|
.flatMap(withAlefAdded)
|
||||||
|
.flatMap(dicitonary.verbEntryLookup)
|
||||||
|
.filter(
|
||||||
|
(e) =>
|
||||||
|
!e.entry.c.includes("comp") && !e.entry.prp && !e.entry.separationAtP
|
||||||
|
);
|
||||||
|
return [...reg, ...rootSplitLookup(s)].map((verb) => ({
|
||||||
|
type: "verb",
|
||||||
|
aspect: "perfective",
|
||||||
|
base: "root",
|
||||||
|
verb,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
function getImperativeVerbEnding(e: string): T.Person[] {
|
||||||
|
if (e === "ه") {
|
||||||
|
return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
|
||||||
|
}
|
||||||
|
if (e === "ئ") {
|
||||||
|
return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: could handle all sh- verbs for efficiencies sake
|
||||||
|
function parseIrregularVerb(s: string): T.ParsedVBE[] {
|
||||||
|
if (["ته", "راته", "ورته", "درته"].includes(s)) {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
type: "VB",
|
||||||
|
info: {
|
||||||
|
aspect: "imperfective",
|
||||||
|
base: "root",
|
||||||
|
type: "verb",
|
||||||
|
verb: s.startsWith("را")
|
||||||
|
? raatlul
|
||||||
|
: s.startsWith("ور")
|
||||||
|
? wartlul
|
||||||
|
: s.startsWith("در")
|
||||||
|
? dartlul
|
||||||
|
: tlul,
|
||||||
|
},
|
||||||
|
person: T.Person.ThirdSingMale,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
|
||||||
|
// return !e.sepOo && startsWithAleph(e.p);
|
||||||
|
// }
|
||||||
|
|
||||||
|
function startsWithAleph(base: string): boolean {
|
||||||
|
return ["ا", "آ"].includes(base[0]);
|
||||||
|
}
|
|
@ -1,354 +0,0 @@
|
||||||
import * as T from "../../../types";
|
|
||||||
import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
|
|
||||||
import { isInVarients, lastVowelNotA } from "../p-text-helpers";
|
|
||||||
import { dartlul, raatlul, tlul, wartlul } from "./irreg-verbs";
|
|
||||||
import { LookupFunction } from "./lookup";
|
|
||||||
import { shortVerbEndConsonant } from "./misc";
|
|
||||||
import { parseKedul } from "./parse-kedul";
|
|
||||||
import { getVerbEnding } from "./parse-verb-helpers";
|
|
||||||
|
|
||||||
// TODO: کول verbs!
|
|
||||||
// check that aawu stuff is working
|
|
||||||
// check oo`azmooy -
|
|
||||||
// check څاته
|
|
||||||
// laaRa shum etc
|
|
||||||
// TODO: proper use of perfective with sh
|
|
||||||
// TODO: use of raa, dar, war with sh
|
|
||||||
// TODO: هغه لاړ
|
|
||||||
|
|
||||||
export function parseVBE(
|
|
||||||
tokens: Readonly<T.Token[]>,
|
|
||||||
lookup: LookupFunction
|
|
||||||
): T.ParseResult<T.ParsedVBE>[] {
|
|
||||||
if (tokens.length === 0) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
const [first, ...rest] = tokens;
|
|
||||||
const irregResults = parseIrregularVerb(first.s);
|
|
||||||
if (irregResults.length) {
|
|
||||||
return irregResults.map((body) => ({
|
|
||||||
tokens: rest,
|
|
||||||
body,
|
|
||||||
errors: [],
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
const kedulStat = parseKedul(tokens);
|
|
||||||
const ending = first.s.at(-1) || "";
|
|
||||||
const people = getVerbEnding(ending);
|
|
||||||
const imperativePeople = getImperativeVerbEnding(ending);
|
|
||||||
// First do rough verb lookup, grab wide pool of possible verbs (low searching complexity for fast lookup)
|
|
||||||
// TODO: can optimize this to not have to look for possible stems/roots if none
|
|
||||||
const verbs = lookup(first.s, "verb");
|
|
||||||
// if (first.s === "سم") {
|
|
||||||
// console.log({ verbs: JSON.stringify(verbs) });
|
|
||||||
// }
|
|
||||||
// Then find out which ones match exactly and how
|
|
||||||
return [
|
|
||||||
...kedulStat,
|
|
||||||
...matchVerbs(first.s, verbs, people, imperativePeople).map((body) => ({
|
|
||||||
tokens: rest,
|
|
||||||
body,
|
|
||||||
errors: [],
|
|
||||||
})),
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
function matchVerbs(
|
|
||||||
s: string,
|
|
||||||
entries: T.VerbEntry[],
|
|
||||||
people: {
|
|
||||||
root: T.Person[];
|
|
||||||
stem: T.Person[];
|
|
||||||
},
|
|
||||||
imperativePeople: T.Person[]
|
|
||||||
): T.ParsedVBE[] {
|
|
||||||
const w: T.ParsedVBE[] = [];
|
|
||||||
const lEnding = s.endsWith("ل");
|
|
||||||
const base = s.endsWith("ل") ? s : s.slice(0, -1);
|
|
||||||
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(base)) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
const matchShortOrLong = (b: string, x: string) => {
|
|
||||||
return b === x || (!lEnding && b === x.slice(0, -1));
|
|
||||||
};
|
|
||||||
if (people.stem.length || imperativePeople.length) {
|
|
||||||
const stemMatches = {
|
|
||||||
imperfective: entries.filter(({ entry: e }) => {
|
|
||||||
if (e.c.includes("comp")) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (e.psp) {
|
|
||||||
return e.psp === base;
|
|
||||||
}
|
|
||||||
if (e.c.includes("intrans.")) {
|
|
||||||
const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
|
|
||||||
return miniRoot + "ېږ" === base || miniRoot === base;
|
|
||||||
} else {
|
|
||||||
return e.p.slice(0, -1) === base;
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
|
||||||
const e = entry.entry;
|
|
||||||
const baseWAa = "ا" + base;
|
|
||||||
if (e.c.includes("comp")) {
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
if (e.ssp) {
|
|
||||||
if (e.separationAtP) {
|
|
||||||
const bRest = e.ssp.slice(e.separationAtP);
|
|
||||||
if (bRest === base) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (e.ssp === base) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (e.psp) {
|
|
||||||
if (hasBreakawayAlef(e) && startsWithAleph(base)) {
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
if (e.separationAtP) {
|
|
||||||
const bRest = e.psp.slice(e.separationAtP);
|
|
||||||
if (bRest === base) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (!e.sepOo) {
|
|
||||||
if (baseWAa === e.psp) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (base === e.psp) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (hasBreakawayAlef(e) && startsWithAleph(base)) {
|
|
||||||
return acc;
|
|
||||||
} else if (e.c.includes("intrans.")) {
|
|
||||||
const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
|
|
||||||
const miniRootEg = miniRoot + "ېږ";
|
|
||||||
if ([miniRoot, miniRootEg].includes(base)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const eb = e.p.slice(0, -1);
|
|
||||||
if (eb === base) {
|
|
||||||
return [...acc, entry];
|
|
||||||
} else if (!e.sepOo) {
|
|
||||||
if (baseWAa === base.slice(1)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return acc;
|
|
||||||
}, []),
|
|
||||||
};
|
|
||||||
Object.entries(stemMatches).forEach(([aspect, entries]) => {
|
|
||||||
entries.forEach((verb) => {
|
|
||||||
people.stem.forEach((person) => {
|
|
||||||
w.push({
|
|
||||||
type: "VB",
|
|
||||||
person,
|
|
||||||
info: {
|
|
||||||
type: "verb",
|
|
||||||
aspect: aspect as T.Aspect,
|
|
||||||
base: "stem",
|
|
||||||
verb: removeFVarientsFromVerb(verb),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
});
|
|
||||||
imperativePeople.forEach((person) => {
|
|
||||||
w.push({
|
|
||||||
type: "VB",
|
|
||||||
person,
|
|
||||||
info: {
|
|
||||||
type: "verb",
|
|
||||||
aspect: aspect as T.Aspect,
|
|
||||||
base: "stem",
|
|
||||||
verb: removeFVarientsFromVerb(verb),
|
|
||||||
imperative: true,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if (people.root.length) {
|
|
||||||
const rootMatches = {
|
|
||||||
imperfective: entries.filter(
|
|
||||||
({ entry: e }) => !e.c.includes("comp") && matchShortOrLong(base, e.p)
|
|
||||||
),
|
|
||||||
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
|
||||||
const e = entry.entry;
|
|
||||||
if (e.c.includes("comp")) {
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
if (e.separationAtP) {
|
|
||||||
const b = e.prp || e.p;
|
|
||||||
const bRest = b.slice(e.separationAtP);
|
|
||||||
if (matchShortOrLong(base, bRest)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
} else if (hasBreakawayAlef(e) && startsWithAleph(base) && !e.prp) {
|
|
||||||
return acc;
|
|
||||||
} else {
|
|
||||||
const p = e.prp || e.p;
|
|
||||||
if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return acc;
|
|
||||||
}, []),
|
|
||||||
};
|
|
||||||
|
|
||||||
Object.entries(rootMatches).forEach(([aspect, entries]) => {
|
|
||||||
entries.forEach((verb) => {
|
|
||||||
people.root.forEach((person) => {
|
|
||||||
w.push({
|
|
||||||
type: "VB",
|
|
||||||
person,
|
|
||||||
info: {
|
|
||||||
type: "verb",
|
|
||||||
aspect: aspect as T.Aspect,
|
|
||||||
base: "root",
|
|
||||||
verb: removeFVarientsFromVerb(verb),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
const hamzaEnd = s.at(-1) === "ه";
|
|
||||||
const oEnd = s.at(-1) === "و";
|
|
||||||
const abruptEnd = shortVerbEndConsonant.includes(s.slice(-1));
|
|
||||||
const tppMatches = {
|
|
||||||
imperfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
|
||||||
const e = entry.entry;
|
|
||||||
if (e.c.includes("comp")) {
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
if (!e.prp && isInVarients(e.tppp, s)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
if (oEnd && matchShortOrLong(base, e.p)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
lastVowelNotA(e.g.slice(0, -2)) &&
|
|
||||||
(hamzaEnd ? base : abruptEnd ? s : "XX") === e.p.slice(0, -1)
|
|
||||||
) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
// TODO: if check for modified aaXu thing!
|
|
||||||
return acc;
|
|
||||||
}, []),
|
|
||||||
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
|
||||||
const e = entry.entry;
|
|
||||||
if (e.c.includes("comp")) {
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
if (e.separationAtP) {
|
|
||||||
const b = e.prp || e.p;
|
|
||||||
const bRest = b.slice(e.separationAtP);
|
|
||||||
if (bRest === "شول") {
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
if (abruptEnd) {
|
|
||||||
if (s === bRest.slice(0, -1)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
} else if (hamzaEnd) {
|
|
||||||
if (base === bRest.slice(0, -1)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
} else if (oEnd) {
|
|
||||||
if ([bRest, bRest.slice(0, -1)].includes(base)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (!e.prp) {
|
|
||||||
if (hasBreakawayAlef(e) && startsWithAleph(base)) {
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
if (oEnd) {
|
|
||||||
if ([e.p, e.p.slice(0, -1)].includes(base)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
} else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) {
|
|
||||||
const b = hamzaEnd ? base : s;
|
|
||||||
const p = e.p.slice(0, -1);
|
|
||||||
if (b === p) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!e.separationAtP) {
|
|
||||||
if (isInVarients(e.tppp, s)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
} else if (isInVarients(e.tppp, "ا" + s)) {
|
|
||||||
return [...acc, entry];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return acc;
|
|
||||||
}, []),
|
|
||||||
};
|
|
||||||
Object.entries(tppMatches).forEach(([aspect, entries]) => {
|
|
||||||
entries.forEach((verb) => {
|
|
||||||
w.push({
|
|
||||||
type: "VB",
|
|
||||||
person: T.Person.ThirdSingMale,
|
|
||||||
info: {
|
|
||||||
type: "verb",
|
|
||||||
aspect: aspect as T.Aspect,
|
|
||||||
base: "root",
|
|
||||||
verb: removeFVarientsFromVerb(verb),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
return w;
|
|
||||||
}
|
|
||||||
|
|
||||||
function getImperativeVerbEnding(e: string): T.Person[] {
|
|
||||||
if (e === "ه") {
|
|
||||||
return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
|
|
||||||
}
|
|
||||||
if (e === "ئ") {
|
|
||||||
return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
|
|
||||||
}
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: could handle all sh- verbs for efficiencies sake
|
|
||||||
function parseIrregularVerb(s: string): T.ParsedVBE[] {
|
|
||||||
if (["ته", "راته", "ورته", "درته"].includes(s)) {
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
type: "VB",
|
|
||||||
info: {
|
|
||||||
aspect: "imperfective",
|
|
||||||
base: "root",
|
|
||||||
type: "verb",
|
|
||||||
verb: s.startsWith("را")
|
|
||||||
? raatlul
|
|
||||||
: s.startsWith("ور")
|
|
||||||
? wartlul
|
|
||||||
: s.startsWith("در")
|
|
||||||
? dartlul
|
|
||||||
: tlul,
|
|
||||||
},
|
|
||||||
person: T.Person.ThirdSingMale,
|
|
||||||
},
|
|
||||||
];
|
|
||||||
}
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
|
|
||||||
return !e.sepOo && ["ا", "آ"].includes(e.p[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
function startsWithAleph(base: string): boolean {
|
|
||||||
return ["ا", "آ"].includes(base[0]);
|
|
||||||
}
|
|
|
@ -1,46 +1,46 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { LookupFunction } from "./lookup";
|
// import { returnParseResult } from "./utils";
|
||||||
import { returnParseResult } from "./utils";
|
|
||||||
|
|
||||||
export function parseVBP(
|
export function parseVBP(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction
|
dictionary: T.DictionaryAPI
|
||||||
): T.ParseResult<T.ParsedVBP>[] {
|
): T.ParseResult<T.ParsedVBP>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
return [
|
return [];
|
||||||
...parsePastPart(tokens, lookup),
|
// return [
|
||||||
// ...parseAbility(tokens),
|
// ...parsePastPart(tokens, lookup),
|
||||||
];
|
// // ...parseAbility(tokens),
|
||||||
|
// ];
|
||||||
}
|
}
|
||||||
|
|
||||||
function parsePastPart(
|
// function parsePastPart(
|
||||||
tokens: Readonly<T.Token[]>,
|
// tokens: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction
|
// dicitonary: T.DictionaryAPI,
|
||||||
): T.ParseResult<T.ParsedVBP>[] {
|
// ): T.ParseResult<T.ParsedVBP>[] {
|
||||||
const [{ s }, ...rest] = tokens;
|
// const [{ s }, ...rest] = tokens;
|
||||||
const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې";
|
// const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې";
|
||||||
if (!ending || !["ی", "ي", "ې"].includes(ending)) {
|
// if (!ending || !["ی", "ي", "ې"].includes(ending)) {
|
||||||
return [];
|
// return [];
|
||||||
}
|
// }
|
||||||
// TODO: ALSO HANDLE SHORT FORMS
|
// // TODO: ALSO HANDLE SHORT FORMS
|
||||||
const wOutEnd = s.slice(0, -1);
|
// const wOutEnd = s.slice(0, -1);
|
||||||
const matches = lookup(wOutEnd, "pPart");
|
// const matches = lookup(wOutEnd, "pPart");
|
||||||
const genNums = endingGenderNum(ending);
|
// const genNums = endingGenderNum(ending);
|
||||||
return matches
|
// return matches
|
||||||
.flatMap<T.ParsedVBP>((verb) =>
|
// .flatMap<T.ParsedVBP>((verb) =>
|
||||||
genNums.map<T.ParsedVBP>((genNum) => ({
|
// genNums.map<T.ParsedVBP>((genNum) => ({
|
||||||
type: "VB",
|
// type: "VB",
|
||||||
info: {
|
// info: {
|
||||||
type: "ppart",
|
// type: "ppart",
|
||||||
verb,
|
// verb,
|
||||||
genNum,
|
// genNum,
|
||||||
},
|
// },
|
||||||
}))
|
// }))
|
||||||
)
|
// )
|
||||||
.flatMap((m) => returnParseResult(rest, m));
|
// .flatMap((m) => returnParseResult(rest, m));
|
||||||
}
|
// }
|
||||||
|
|
||||||
// function parseAbility(
|
// function parseAbility(
|
||||||
// tokens: Readonly<T.Token[]>,
|
// tokens: Readonly<T.Token[]>,
|
||||||
|
@ -70,33 +70,33 @@ function parsePastPart(
|
||||||
// .flatMap((m) => returnParseResult(rest, m));
|
// .flatMap((m) => returnParseResult(rest, m));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] {
|
// function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] {
|
||||||
if (ending === "ی") {
|
// if (ending === "ی") {
|
||||||
return [
|
// return [
|
||||||
{
|
// {
|
||||||
gender: "masc",
|
// gender: "masc",
|
||||||
number: "singular",
|
// number: "singular",
|
||||||
},
|
// },
|
||||||
];
|
// ];
|
||||||
}
|
// }
|
||||||
if (ending === "ي") {
|
// if (ending === "ي") {
|
||||||
return [
|
// return [
|
||||||
{
|
// {
|
||||||
gender: "masc",
|
// gender: "masc",
|
||||||
number: "plural",
|
// number: "plural",
|
||||||
},
|
// },
|
||||||
];
|
// ];
|
||||||
}
|
// }
|
||||||
// if (ending === "ې") {
|
// // if (ending === "ې") {
|
||||||
return [
|
// return [
|
||||||
{
|
// {
|
||||||
gender: "fem",
|
// gender: "fem",
|
||||||
number: "singular",
|
// number: "singular",
|
||||||
},
|
// },
|
||||||
{
|
// {
|
||||||
gender: "fem",
|
// gender: "fem",
|
||||||
number: "plural",
|
// number: "plural",
|
||||||
},
|
// },
|
||||||
];
|
// ];
|
||||||
// }
|
// // }
|
||||||
}
|
// }
|
||||||
|
|
|
@ -4,6 +4,10 @@ export function isKedulStatEntry(v: T.VerbDictionaryEntry): boolean {
|
||||||
return v.p === "کېدل" && v.e === "to become _____";
|
return v.p === "کېدل" && v.e === "to become _____";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the possible people for stem and root endings
|
||||||
|
* but DOES NOT INCLUDE short third pers masc sing
|
||||||
|
*/
|
||||||
export function getVerbEnding(e: string): {
|
export function getVerbEnding(e: string): {
|
||||||
stem: T.Person[];
|
stem: T.Person[];
|
||||||
root: T.Person[];
|
root: T.Person[];
|
||||||
|
@ -34,7 +38,11 @@ export function getVerbEnding(e: string): {
|
||||||
};
|
};
|
||||||
} else if (e === "و") {
|
} else if (e === "و") {
|
||||||
return {
|
return {
|
||||||
root: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
root: [
|
||||||
|
T.Person.FirstPlurMale,
|
||||||
|
T.Person.FirstPlurFemale,
|
||||||
|
T.Person.ThirdSingMale,
|
||||||
|
],
|
||||||
stem: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
stem: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
||||||
};
|
};
|
||||||
} else if (e === "ئ") {
|
} else if (e === "ئ") {
|
||||||
|
|
|
@ -24,7 +24,6 @@ import {
|
||||||
import { parseBlocks } from "./parse-blocks";
|
import { parseBlocks } from "./parse-blocks";
|
||||||
import { makePronounSelection } from "../phrase-building/make-selections";
|
import { makePronounSelection } from "../phrase-building/make-selections";
|
||||||
import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
|
import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
|
||||||
import { LookupFunction } from "./lookup";
|
|
||||||
import { isSecondPerson, personToGenNum } from "../misc-helpers";
|
import { isSecondPerson, personToGenNum } from "../misc-helpers";
|
||||||
import { equals, zip } from "rambda";
|
import { equals, zip } from "rambda";
|
||||||
import { isImperativeTense } from "../type-predicates";
|
import { isImperativeTense } from "../type-predicates";
|
||||||
|
@ -41,12 +40,12 @@ import { isImperativeTense } from "../type-predicates";
|
||||||
|
|
||||||
export function parseVP(
|
export function parseVP(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: LookupFunction
|
dictionary: T.DictionaryAPI
|
||||||
): T.ParseResult<T.VPSelectionComplete>[] {
|
): T.ParseResult<T.VPSelectionComplete>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const blocks = parseBlocks(tokens, lookup, [], []);
|
const blocks = parseBlocks(tokens, dictionary, [], []);
|
||||||
return bindParseResult(
|
return bindParseResult(
|
||||||
createPossesivePossibilities(blocks),
|
createPossesivePossibilities(blocks),
|
||||||
(tokens, { blocks, kids }) => {
|
(tokens, { blocks, kids }) => {
|
||||||
|
@ -892,7 +891,7 @@ function getMiniPronouns(kids: T.ParsedKid[]): T.ParsedMiniPronoun[] {
|
||||||
|
|
||||||
function getPeopleFromMiniPronouns(kids: T.ParsedKid[]): T.Person[] {
|
function getPeopleFromMiniPronouns(kids: T.ParsedKid[]): T.Person[] {
|
||||||
const p: T.Person[] = [];
|
const p: T.Person[] = [];
|
||||||
for (let k of kids) {
|
for (const k of kids) {
|
||||||
if (k === "me") {
|
if (k === "me") {
|
||||||
p.push(T.Person.FirstSingMale);
|
p.push(T.Person.FirstSingMale);
|
||||||
p.push(T.Person.FirstSingFemale);
|
p.push(T.Person.FirstSingFemale);
|
||||||
|
|
|
@ -163,6 +163,38 @@ export function parserCombMany<R>(parser: Parser<R>): Parser<R[]> {
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function parserCombSucc2<A, B>(
|
||||||
|
parsers: [Parser<A>, Parser<B>]
|
||||||
|
): Parser<[A, B]> {
|
||||||
|
return function (
|
||||||
|
tokens: Readonly<T.Token[]>,
|
||||||
|
dictionary: T.DictionaryAPI
|
||||||
|
): T.ParseResult<[A, B]>[] {
|
||||||
|
return bindParseResult(parsers[0](tokens, dictionary), (t, a) =>
|
||||||
|
bindParseResult(parsers[1](t, dictionary), (tk, b) =>
|
||||||
|
returnParseResult(tk, [a, b])
|
||||||
|
)
|
||||||
|
);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parserCombSucc3<A, B, C>(
|
||||||
|
parsers: [Parser<A>, Parser<B>, Parser<C>]
|
||||||
|
): Parser<[A, B, C]> {
|
||||||
|
return function (
|
||||||
|
tokens: Readonly<T.Token[]>,
|
||||||
|
dictionary: T.DictionaryAPI
|
||||||
|
): T.ParseResult<[A, B, C]>[] {
|
||||||
|
return bindParseResult(parsers[0](tokens, dictionary), (t, a) =>
|
||||||
|
bindParseResult(parsers[1](t, dictionary), (tk, b) =>
|
||||||
|
bindParseResult(parsers[2](tk, dictionary), (tkn, c) =>
|
||||||
|
returnParseResult(tkn, [a, b, c])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export function isCompleteResult<C extends object>(
|
export function isCompleteResult<C extends object>(
|
||||||
r: T.ParseResult<C>
|
r: T.ParseResult<C>
|
||||||
): boolean {
|
): boolean {
|
||||||
|
|
|
@ -214,7 +214,7 @@ function addArticlesAndAdjs(
|
||||||
? np.determiners.determiners
|
? np.determiners.determiners
|
||||||
// @ts-ignore - weird, ts is not recognizing this as rendered
|
// @ts-ignore - weird, ts is not recognizing this as rendered
|
||||||
.map((x) => (moreThanOneDet ? `(${x.e})` : x.e))
|
.map((x) => (moreThanOneDet ? `(${x.e})` : x.e))
|
||||||
.join(" ")
|
.join(" ") + " "
|
||||||
: "";
|
: "";
|
||||||
const detsWithoutNoun = np.determiners && !np.determiners.withNoun;
|
const detsWithoutNoun = np.determiners && !np.determiners.withNoun;
|
||||||
return `${np.determiners ? "" : articles}${determiners}${
|
return `${np.determiners ? "" : articles}${determiners}${
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
import * as T from "../../../types";
|
||||||
|
import { compileVP } from "./compile";
|
||||||
|
import { renderVP } from "./render-vp";
|
||||||
|
|
||||||
|
export function removeRedundantVPSs(
|
||||||
|
vs: T.VPSelectionComplete[]
|
||||||
|
): T.VPSelectionComplete[] {
|
||||||
|
const versions = vs.map((x) => compileVP(renderVP(x), x.form));
|
||||||
|
const toRemove = new Set<number>();
|
||||||
|
versions.forEach((a, i) => {
|
||||||
|
const duplicates = findAllIndices(
|
||||||
|
versions.slice(i + 1),
|
||||||
|
(b) => !toRemove.has(i) && isDuplicate(a, b)
|
||||||
|
);
|
||||||
|
duplicates.forEach((d) => toRemove.add(d + i + 1));
|
||||||
|
});
|
||||||
|
return vs.reduce<T.VPSelectionComplete[]>((acc, v, i) => {
|
||||||
|
if (toRemove.has(i)) {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
return [...acc, v];
|
||||||
|
}, []);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isDuplicate(
|
||||||
|
a: {
|
||||||
|
ps: T.SingleOrLengthOpts<T.PsString[]>;
|
||||||
|
e?: string[];
|
||||||
|
},
|
||||||
|
b: { ps: T.SingleOrLengthOpts<T.PsString[]>; e?: string[] }
|
||||||
|
): boolean {
|
||||||
|
if (!a.e || !b.e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (a.e.length !== b.e.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return a.e.every(
|
||||||
|
(x, i) =>
|
||||||
|
removeGenderGloss(x) === removeGenderGloss(b.e ? b.e[i] : "") &&
|
||||||
|
JSON.stringify(a.ps) === JSON.stringify(b.ps)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function removeGenderGloss(s: string): string {
|
||||||
|
// TODO: combine into one RegEx
|
||||||
|
return s.replaceAll(/\((m|f)\.\)/g, "").replaceAll(/\((m|f)\. pl\.\)/g, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
function findAllIndices<N>(arr: N[], f: (x: N) => boolean): number[] {
|
||||||
|
const indices: number[] = [];
|
||||||
|
arr.forEach((x, i) => {
|
||||||
|
if (f(x)) {
|
||||||
|
indices.push(i);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return indices;
|
||||||
|
}
|
|
@ -178,7 +178,7 @@ function renderDeterminer({
|
||||||
? number === "plural"
|
? number === "plural"
|
||||||
? { p: "دغو", f: "dágho" }
|
? { p: "دغو", f: "dágho" }
|
||||||
: gender === "masc"
|
: gender === "masc"
|
||||||
? { p: "دغه", f: "dághu" }
|
? { p: "دغه", f: "dágha" }
|
||||||
: { p: "دغې", f: "dághe" }
|
: { p: "دغې", f: "dághe" }
|
||||||
: { p: "دغه", f: "dágha" };
|
: { p: "دغه", f: "dágha" };
|
||||||
return {
|
return {
|
||||||
|
@ -196,7 +196,7 @@ function renderDeterminer({
|
||||||
? number === "plural"
|
? number === "plural"
|
||||||
? { p: "هغو", f: "hágho" }
|
? { p: "هغو", f: "hágho" }
|
||||||
: gender === "masc"
|
: gender === "masc"
|
||||||
? { p: "هغه", f: "hághu" }
|
? { p: "هغه", f: "hágha" }
|
||||||
: { p: "هغې", f: "hághe" }
|
: { p: "هغې", f: "hághe" }
|
||||||
: { p: "هغه", f: "hágha" };
|
: { p: "هغه", f: "hágha" };
|
||||||
return {
|
return {
|
||||||
|
|
|
@ -1259,8 +1259,13 @@ export type DictionaryAPI = {
|
||||||
queryP: (p: string) => DictionaryEntry[];
|
queryP: (p: string) => DictionaryEntry[];
|
||||||
adjLookup: (p: string) => AdjectiveEntry[];
|
adjLookup: (p: string) => AdjectiveEntry[];
|
||||||
nounLookup: (p: string) => NounEntry[];
|
nounLookup: (p: string) => NounEntry[];
|
||||||
otherLookup: (key: keyof DictionaryEntry, p: string) => DictionaryEntry[];
|
otherLookup: (
|
||||||
|
key: keyof DictionaryEntry,
|
||||||
|
p: string,
|
||||||
|
regex?: boolean
|
||||||
|
) => DictionaryEntry[];
|
||||||
specialPluralLookup: (p: string) => NounEntry[];
|
specialPluralLookup: (p: string) => NounEntry[];
|
||||||
|
verbEntryLookup: (p: string) => VerbEntry[];
|
||||||
};
|
};
|
||||||
|
|
||||||
export type Parser<R> = (
|
export type Parser<R> = (
|
||||||
|
|
|
@ -18,5 +18,5 @@
|
||||||
"noUnusedParameters": true,
|
"noUnusedParameters": true,
|
||||||
"noFallthroughCasesInSwitch": true
|
"noFallthroughCasesInSwitch": true
|
||||||
},
|
},
|
||||||
"include": ["vite.config.ts", "get-mini-dict.ts"]
|
"include": ["vite.config.ts", "get-mini-dict-and-split-verbs.ts"]
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,7 @@ export const entries: T.DictionaryEntry["ts"][] = [
|
||||||
1527812908, // مېلمه
|
1527812908, // مېلمه
|
||||||
1575924767041, // شپون
|
1575924767041, // شپون
|
||||||
1527815333, // نتور
|
1527815333, // نتور
|
||||||
|
1527812881, // ماشوم
|
||||||
|
|
||||||
// fem nouns
|
// fem nouns
|
||||||
1527811877, // دوستي
|
1527811877, // دوستي
|
||||||
|
@ -50,4 +51,28 @@ export const entries: T.DictionaryEntry["ts"][] = [
|
||||||
1589023873660, // فتح - fatha
|
1589023873660, // فتح - fatha
|
||||||
1527814342, // نفع - nafa
|
1527814342, // نفع - nafa
|
||||||
1527815329, // تجربه
|
1527815329, // تجربه
|
||||||
|
|
||||||
|
// verbs
|
||||||
|
1527815399, // وهل
|
||||||
|
1527817298, // اخیستل
|
||||||
|
1527812275, // لیدل
|
||||||
|
1527812856, // لیکل
|
||||||
|
1527815085, // منل
|
||||||
|
1527817661, // ګالل
|
||||||
|
1527813573, // رسېدل
|
||||||
|
1527812790, // خوړل
|
||||||
|
1527812759, // کېناستل
|
||||||
|
1527812758, // کښېناستل
|
||||||
|
1527815190, // پرېښودل
|
||||||
|
1527811293, // ښودل
|
||||||
|
1527812284, // کېښودل
|
||||||
|
1527812751, // کتل
|
||||||
|
1527823376, // وتل
|
||||||
|
1527816865, // وړل
|
||||||
|
1527813473, // الوتل
|
||||||
|
1585228551150, // درتلل
|
||||||
|
1527817577, // کښېښودل
|
||||||
|
1527814012, // اوښتل
|
||||||
|
1577390597820, // پرېښوول
|
||||||
|
1527815191, // پرېښول
|
||||||
];
|
];
|
||||||
|
|
Loading…
Reference in New Issue