Compare commits
No commits in common. "f17ebddaa1de79a8bd531b9abe653b85eef12d8a" and "191abc57786101cf44e74d663f5db45f0f2a554e" have entirely different histories.
f17ebddaa1
...
191abc5778
|
@ -11,7 +11,6 @@ lerna-debug.log*
|
|||
src/verbs.ts
|
||||
src/nouns-adjs.ts
|
||||
vocab/mini-dict-entries.ts
|
||||
src/lib/src/parsing/split-verbs.ts
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
|
|
@ -1,41 +0,0 @@
|
|||
import * as T from "./src/types";
|
||||
import * as tp from "./src/lib/src/type-predicates";
|
||||
import fs from "fs";
|
||||
|
||||
import { entries as collection } from "./vocab/mini-dict-tss";
|
||||
|
||||
const res = await fetch(
|
||||
"https://storage.lingdocs.com/dictionary/dictionary.json"
|
||||
);
|
||||
const dictionary = (await res.json()) as T.Dictionary;
|
||||
|
||||
const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
|
||||
collection.includes(x.ts)
|
||||
);
|
||||
|
||||
const splitEntries: T.VerbDictionaryEntry[] =
|
||||
dictionary.entries.filter<T.VerbDictionaryEntry>(
|
||||
(x): x is T.VerbDictionaryEntry =>
|
||||
tp.isVerbDictionaryEntry(x) &&
|
||||
!!x.separationAtP &&
|
||||
!["کول", "کېدل"].includes(x.p)
|
||||
);
|
||||
|
||||
const miniDictContents = `import { DictionaryEntry } from "../src/types";
|
||||
// DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
|
||||
export const entries: DictionaryEntry[] = [
|
||||
${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
|
||||
];
|
||||
`;
|
||||
|
||||
const splitVerbContents = `import { VerbEntry, VerbDictionaryEntry } from "../../../types";
|
||||
// DO NOT MODIFY - GENERATED
|
||||
export const entries: VerbEntry[] = [
|
||||
${splitEntries
|
||||
.map((e) => `\t{ entry: ${JSON.stringify(e)} as VerbDictionaryEntry },`)
|
||||
.join("\n")}
|
||||
];
|
||||
`;
|
||||
|
||||
fs.writeFileSync("./vocab/mini-dict-entries.ts", miniDictContents);
|
||||
fs.writeFileSync("./src/lib/src/parsing/split-verbs.ts", splitVerbContents);
|
|
@ -0,0 +1,22 @@
|
|||
import * as T from "./src/types";
|
||||
import fs from "fs";
|
||||
|
||||
import { entries as collection } from "./vocab/mini-dict-tss";
|
||||
|
||||
const res = await fetch(
|
||||
"https://storage.lingdocs.com/dictionary/dictionary.json"
|
||||
);
|
||||
const dictionary = (await res.json()) as T.Dictionary;
|
||||
|
||||
const entries: T.DictionaryEntry[] = dictionary.entries.filter((x) =>
|
||||
collection.includes(x.ts)
|
||||
);
|
||||
|
||||
const contents = `import { DictionaryEntry } from "../src/types";
|
||||
// DO NOT MODIFY - GENERATED FROM mini-dict-tss.ts
|
||||
export const entries: DictionaryEntry[] = [
|
||||
${entries.map((e) => `\t${JSON.stringify(e)},`).join("\n")}
|
||||
];
|
||||
`;
|
||||
|
||||
fs.writeFileSync("./vocab/mini-dict-entries.ts", contents);
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "pashto-inflector-website",
|
||||
"version": "7.7.3",
|
||||
"version": "7.7.1",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"patch": "npm version patch --no-git-tag-version && cd src/lib && npm version patch --no-git-tag-version && cd ../components && npm version patch --no-git-tag-version",
|
||||
|
@ -14,7 +14,7 @@
|
|||
"build-website": "tsc -b && vite build",
|
||||
"build-components": "rm -rf src/components/dist && tsc --project src/components/tsconfig.json && cd src/components && node post-build.cjs",
|
||||
"build-lib": "rm -rf src/lib/dist && tsc --project src/lib/tsconfig.json && tsup src/lib/library.ts --format cjs && mv dist/library.cjs src/lib/dist/lib",
|
||||
"get-words": "node get-words.cjs && tsx get-mini-dict-and-split-verbs.ts",
|
||||
"get-words": "node get-words.cjs && tsx get-mini-dict.ts",
|
||||
"check-all-inflections": "tsx check-all-inflections.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
|
|
|
@ -19,7 +19,7 @@ import { entryFeeder } from "./demo-components/entryFeeder";
|
|||
import Hider from "./components/src/Hider";
|
||||
import InflectionDemo from "./demo-components/InflectionDemo";
|
||||
import SpellingDemo from "./demo-components/SpellingDemo";
|
||||
import ParserDemo from "./demo-components/ParserDemo";
|
||||
// import ParserDemo from "./demo-components/ParserDemo";
|
||||
// import InflectionTable from "./components/src/InflectionsTable";
|
||||
|
||||
function App() {
|
||||
|
@ -163,7 +163,7 @@ function App() {
|
|||
>
|
||||
<SpellingDemo opts={textOptions} onChange={setTextOptions} />
|
||||
</Hider>
|
||||
<Hider
|
||||
{/* <Hider
|
||||
label="Parser (🚧 IN PROGRESS 🚧)"
|
||||
hLevel={3}
|
||||
showing={showing === "parser"}
|
||||
|
@ -174,7 +174,7 @@ function App() {
|
|||
entryFeeder={entryFeeder}
|
||||
dictionary={dictionary}
|
||||
/>
|
||||
</Hider>
|
||||
</Hider> */}
|
||||
</div>
|
||||
</main>
|
||||
<Modal
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@lingdocs/ps-react",
|
||||
"version": "7.7.3",
|
||||
"version": "7.7.1",
|
||||
"description": "Pashto inflector library module with React components",
|
||||
"main": "dist/components/library.js",
|
||||
"module": "dist/components/library.js",
|
||||
|
|
|
@ -3,18 +3,13 @@ import * as T from "../types";
|
|||
// import { parsePhrase } from "../lib/src/parsing/parse-phrase";
|
||||
import { tokenizer } from "../lib/src/parsing/tokenizer";
|
||||
// import { NPDisplay } from "../components/library";
|
||||
import EditableVP from "../components/src/vp-explorer/EditableVP";
|
||||
import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
|
||||
// import { parseNoun } from "../lib/src/parsing/parse-noun-new";
|
||||
// import EditableVP from "../components/src/vp-explorer/EditableVP";
|
||||
// import { uncompleteVPSelection } from "../lib/src/phrase-building/vp-tools";
|
||||
import { parseNoun } from "../lib/src/parsing/parse-noun-new";
|
||||
import { JsonEditor } from "json-edit-react";
|
||||
// import { renderNounSelection } from "../lib/src/phrase-building/render-np";
|
||||
// import { NPBlock } from "../components/src/blocks/Block";
|
||||
// import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
|
||||
import { parsePhrase } from "../lib/src/parsing/parse-phrase";
|
||||
//import { renderVP } from "../lib/src/phrase-building/render-vp";
|
||||
// import VPDisplay from "../components/src/vp-explorer/VPDisplay";
|
||||
import { entryFeeder } from "./entryFeeder";
|
||||
import { removeRedundantVPSs } from "../lib/src/phrase-building/remove-redundant";
|
||||
import { renderNounSelection } from "../lib/src/phrase-building/render-np";
|
||||
import { NPBlock } from "../components/src/blocks/Block";
|
||||
import { getEnglishFromRendered } from "../lib/src/phrase-building/np-tools";
|
||||
|
||||
const working = [
|
||||
"limited demo vocab",
|
||||
|
@ -64,7 +59,7 @@ function ParserDemo({
|
|||
}) {
|
||||
const [text, setText] = useState<string>("");
|
||||
const [result, setResult] = useState<
|
||||
ReturnType<typeof parsePhrase>["success"]
|
||||
ReturnType<typeof parseNoun>[number]["body"][]
|
||||
>([]);
|
||||
// ReturnType<typeof parsePhrase>["success"]
|
||||
const [errors, setErrors] = useState<string[]>([]);
|
||||
|
@ -75,10 +70,16 @@ function ParserDemo({
|
|||
setErrors([]);
|
||||
return;
|
||||
}
|
||||
const res = parsePhrase(tokenizer(value), dictionary);
|
||||
const res = parseNoun(tokenizer(value), dictionary, undefined);
|
||||
const success: ReturnType<typeof parseNoun>[number]["body"][] = res
|
||||
.filter((x) => !x.tokens.length)
|
||||
.map((x) => x.body);
|
||||
const errors = [
|
||||
...new Set(res.flatMap(({ errors }) => errors.map((e) => e.message))),
|
||||
];
|
||||
setText(value);
|
||||
setErrors(res.errors);
|
||||
setResult(removeRedundantVPSs(res.success));
|
||||
setErrors(errors);
|
||||
setResult(success);
|
||||
}
|
||||
return (
|
||||
<div className="mt-3" style={{ marginBottom: "1000px" }}>
|
||||
|
@ -140,8 +141,34 @@ function ParserDemo({
|
|||
<div className="text-center">Did you mean:</div>
|
||||
</>
|
||||
)}
|
||||
{result.map((res) => (
|
||||
{result.map((r) => {
|
||||
try {
|
||||
const renderedNP: T.Rendered<T.NPSelection> = {
|
||||
type: "NP",
|
||||
selection: renderNounSelection(r.selection, r.inflected, "none"),
|
||||
};
|
||||
return (
|
||||
<>
|
||||
{r.inflected ? "INFLECTED" : "PLAIN"}
|
||||
<NPBlock
|
||||
opts={opts}
|
||||
script="p"
|
||||
english={getEnglishFromRendered(renderedNP)}
|
||||
>
|
||||
{renderedNP}
|
||||
</NPBlock>
|
||||
</>
|
||||
);
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
return <div>ERROR RENDERING</div>;
|
||||
}
|
||||
})}
|
||||
<JsonEditor data={result} />
|
||||
{/* {result.map((res) =>
|
||||
"inflected" in res ? (
|
||||
<NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
|
||||
) : "verb" in res ? (
|
||||
<EditableVP
|
||||
opts={opts}
|
||||
entryFeeder={entryFeeder}
|
||||
|
@ -149,76 +176,42 @@ function ParserDemo({
|
|||
>
|
||||
{uncompleteVPSelection(res)}
|
||||
</EditableVP>
|
||||
) : (
|
||||
// (() => {
|
||||
// try {
|
||||
// const rendered = renderVP(res);
|
||||
// const compiled = compileVP(rendered, res.form);
|
||||
// return (
|
||||
// <div>
|
||||
// <CompiledPTextDisplay compiled={compiled} opts={opts} />
|
||||
// {compiled.e && (
|
||||
// <div className={`text-muted mt-2 text-center`}>
|
||||
// {compiled.e.map((e, i) => (
|
||||
// <div key={i}>{e}</div>
|
||||
// ))}
|
||||
// </div>
|
||||
// )}
|
||||
// </div>
|
||||
// );
|
||||
// } catch (e) {
|
||||
// console.error(e);
|
||||
// console.log({ res });
|
||||
// return <div>ERROR</div>;
|
||||
// }
|
||||
// })()
|
||||
<samp>
|
||||
<pre>{JSON.stringify(res, null, " ")}</pre>
|
||||
</samp>
|
||||
)
|
||||
)} */}
|
||||
<details>
|
||||
<summary>AST</summary>
|
||||
<JsonEditor data={res} />
|
||||
<samp>
|
||||
<pre>{JSON.stringify(result, null, " ")}</pre>
|
||||
</samp>
|
||||
</details>
|
||||
</>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default ParserDemo;
|
||||
|
||||
// {/* {result.map((res) =>
|
||||
// "inflected" in res ? (
|
||||
// <NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
|
||||
// ) : "verb" in res ? (
|
||||
// <EditableVP
|
||||
// opts={opts}
|
||||
// entryFeeder={entryFeeder}
|
||||
// allVariations={true}
|
||||
// >
|
||||
// {uncompleteVPSelection(res)}
|
||||
// </EditableVP>
|
||||
// ) : (
|
||||
// (() => {
|
||||
// try {
|
||||
// const rendered = renderVP(res);
|
||||
// const compiled = compileVP(rendered, res.form);
|
||||
// return (
|
||||
// <div>
|
||||
// <CompiledPTextDisplay compiled={compiled} opts={opts} />
|
||||
// {compiled.e && (
|
||||
// <div className={`text-muted mt-2 text-center`}>
|
||||
// {compiled.e.map((e, i) => (
|
||||
// <div key={i}>{e}</div>
|
||||
// ))}
|
||||
// </div>
|
||||
// )}
|
||||
// </div>
|
||||
// );
|
||||
// } catch (e) {
|
||||
// console.error(e);
|
||||
// console.log({ res });
|
||||
// return <div>ERROR</div>;
|
||||
// }
|
||||
// })()
|
||||
// <samp>
|
||||
// <pre>{JSON.stringify(res, null, " ")}</pre>
|
||||
// </samp>
|
||||
// )
|
||||
// )} */}
|
||||
|
||||
// try {
|
||||
// const renderedNP: T.Rendered<T.NPSelection> = {
|
||||
// type: "NP",
|
||||
// selection: renderNounSelection(r.selection, r.inflected, "none"),
|
||||
// };
|
||||
// return (
|
||||
// <>
|
||||
// {r.inflected ? "INFLECTED" : "PLAIN"}
|
||||
// <NPBlock
|
||||
// opts={opts}
|
||||
// script="p"
|
||||
// english={getEnglishFromRendered(renderedNP)}
|
||||
// >
|
||||
// {renderedNP}
|
||||
// </NPBlock>
|
||||
// </>
|
||||
// );
|
||||
// } catch (e) {
|
||||
// console.error(e);
|
||||
// return <div>ERROR RENDERING</div>;
|
||||
// }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@lingdocs/inflect",
|
||||
"version": "7.7.3",
|
||||
"version": "7.7.1",
|
||||
"description": "Pashto inflector library",
|
||||
"main": "dist/lib/library.cjs",
|
||||
"module": "dist/lib/library.js",
|
||||
|
|
|
@ -19,19 +19,7 @@ function queryP(p: string): T.DictionaryEntry[] {
|
|||
}
|
||||
return dictDb.collection.find({ p });
|
||||
}
|
||||
const memoizedQueryP = memoize(queryP);
|
||||
|
||||
function queryTs(ts: number): T.DictionaryEntry {
|
||||
if (!dictDb.collection) {
|
||||
throw new Error("dictionary not initialized yet");
|
||||
}
|
||||
const res = dictDb.findOneByTs(ts);
|
||||
if (!res) {
|
||||
throw new Error("complement link broken");
|
||||
}
|
||||
return res;
|
||||
}
|
||||
const memoizedQueryTs = memoize(queryTs);
|
||||
const memoizedQueryP = queryP;
|
||||
|
||||
function adjLookup(p: string): T.AdjectiveEntry[] {
|
||||
const res = memoizedQueryP(p);
|
||||
|
@ -45,51 +33,26 @@ function nounLookup(p: string): T.NounEntry[] {
|
|||
|
||||
function otherLookup(
|
||||
key: keyof T.DictionaryEntry,
|
||||
p: string,
|
||||
regex?: boolean
|
||||
p: string
|
||||
): T.DictionaryEntry[] {
|
||||
if (!dictDb.collection) {
|
||||
return [];
|
||||
}
|
||||
return dictDb.collection.find({ [key]: regex ? variationRegex(p) : p });
|
||||
return dictDb.collection.find({ [key]: p });
|
||||
}
|
||||
|
||||
function specialPluralLookup(p: string): T.NounEntry[] {
|
||||
if (!dictDb.collection) {
|
||||
return [];
|
||||
}
|
||||
const regex = variationRegex(p);
|
||||
const regex = new RegExp(`(^|\\s|,)${p}($|,)`);
|
||||
return dictDb.collection
|
||||
.find({
|
||||
$or: [{ ppp: regex }, { app: regex }],
|
||||
$or: [{ ppp: { $regex: regex } }, { app: { $regex: regex } }],
|
||||
})
|
||||
.filter(tp.isNounEntry);
|
||||
}
|
||||
|
||||
function verbEntryLookup(p: string): T.VerbEntry[] {
|
||||
if (!dictDb.collection) {
|
||||
return [];
|
||||
}
|
||||
return memoizedQueryP(p)
|
||||
.filter(tp.isVerbDictionaryEntry)
|
||||
.map((entry) =>
|
||||
entry.l
|
||||
? {
|
||||
entry,
|
||||
complement: memoizedQueryTs(entry.l),
|
||||
}
|
||||
: { entry }
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a RegEx mongo query to search for a variation in a certain field
|
||||
* ie. to search for کاته in کوت, کاته
|
||||
*/
|
||||
function variationRegex(p: string): { $regex: RegExp } {
|
||||
return { $regex: new RegExp(`(^|\\s|,)${p}($|,)`) };
|
||||
}
|
||||
|
||||
export const dictionary: T.DictionaryAPI = {
|
||||
initialize: async () => await dictDb.initialize(),
|
||||
update: async () => await dictDb.updateDictionary(() => null),
|
||||
|
@ -98,5 +61,4 @@ export const dictionary: T.DictionaryAPI = {
|
|||
nounLookup: memoize(nounLookup),
|
||||
otherLookup: memoize(otherLookup),
|
||||
specialPluralLookup: memoize(specialPluralLookup),
|
||||
verbEntryLookup: memoize(verbEntryLookup),
|
||||
};
|
||||
|
|
|
@ -1,15 +1,7 @@
|
|||
import * as T from "../../../types";
|
||||
import {
|
||||
isAdjectiveEntry,
|
||||
isNounEntry,
|
||||
isVerbDictionaryEntry,
|
||||
} from "../type-predicates";
|
||||
import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
|
||||
import { entries } from "../../../../vocab/mini-dict-entries";
|
||||
|
||||
function variationRegex(p: string): { $regex: RegExp } {
|
||||
return { $regex: new RegExp(`(^|\\s|,)${p}($|,)`) };
|
||||
}
|
||||
|
||||
const queryP = (p: string) => entries.filter((e) => e.p === p);
|
||||
function adjLookup(p: string): T.AdjectiveEntry[] {
|
||||
return queryP(p).filter(isAdjectiveEntry) as T.AdjectiveEntry[];
|
||||
|
@ -21,37 +13,18 @@ function nounLookup(p: string): T.NounEntry[] {
|
|||
|
||||
function otherLookup(
|
||||
key: keyof T.DictionaryEntry,
|
||||
p: string,
|
||||
regex?: boolean
|
||||
p: string
|
||||
): T.DictionaryEntry[] {
|
||||
if (regex) {
|
||||
const { $regex: regex } = variationRegex(p);
|
||||
return entries.filter((e) => (e[key] as string)?.match(regex));
|
||||
}
|
||||
return entries.filter((e) => e[key] === p);
|
||||
}
|
||||
|
||||
function specialPluralLookup(p: string): T.NounEntry[] {
|
||||
const { $regex: regex } = variationRegex(p);
|
||||
const regex = new RegExp(`(^|\\s|,)${p}($|,)`);
|
||||
return entries.filter(
|
||||
(e) => (e.ppp?.match(regex) || e.app?.match(regex)) && isNounEntry(e)
|
||||
) as T.NounEntry[];
|
||||
}
|
||||
|
||||
function verbEntryLookup(p: string): T.VerbEntry[] {
|
||||
return entries
|
||||
.filter((e) => e.p === p)
|
||||
.filter(isVerbDictionaryEntry)
|
||||
.map<T.VerbEntry>((entry) =>
|
||||
entry.l
|
||||
? {
|
||||
entry,
|
||||
complement: entries.find((e) => e.ts === entry.l),
|
||||
}
|
||||
: { entry }
|
||||
);
|
||||
}
|
||||
|
||||
export const testDictionary: T.DictionaryAPI = {
|
||||
// @ts-expect-error we won't mock the initialization
|
||||
initialize: async () => 0,
|
||||
|
@ -62,5 +35,4 @@ export const testDictionary: T.DictionaryAPI = {
|
|||
nounLookup,
|
||||
otherLookup,
|
||||
specialPluralLookup,
|
||||
verbEntryLookup,
|
||||
};
|
||||
|
|
|
@ -1,16 +1,16 @@
|
|||
import * as T from "../../../types";
|
||||
import { isAdverbEntry } from "../type-predicates";
|
||||
import { LookupFunction } from "./lookup";
|
||||
import { returnParseResultS } from "./utils";
|
||||
|
||||
export function parseAdverb(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
dictionary: T.DictionaryAPI
|
||||
lookup: LookupFunction
|
||||
): T.ParseResult<T.APSelection>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
const adverbs = dictionary.queryP(first.s).filter(isAdverbEntry);
|
||||
const adverbs = lookup(first.s, "adverb");
|
||||
return adverbs.map((entry) =>
|
||||
returnParseResultS(rest, {
|
||||
type: "AP",
|
||||
|
|
|
@ -1,25 +1,26 @@
|
|||
import * as T from "../../../types";
|
||||
import { fmapParseResult } from "../fp-ps";
|
||||
import { LookupFunction } from "./lookup";
|
||||
import { parseAdverb } from "./parse-adverb";
|
||||
import { parseSandwich } from "./parse-sandwich";
|
||||
|
||||
export function parseAP(
|
||||
s: Readonly<T.Token[]>,
|
||||
dicitonary: T.DictionaryAPI,
|
||||
lookup: LookupFunction,
|
||||
possesor: T.PossesorSelection | undefined
|
||||
): T.ParseResult<T.APSelection>[] {
|
||||
if (s.length === 0) {
|
||||
return [];
|
||||
}
|
||||
return [
|
||||
...(!possesor ? parseAdverb(s, dicitonary) : []),
|
||||
...(!possesor ? parseAdverb(s, lookup) : []),
|
||||
...fmapParseResult(
|
||||
(selection) =>
|
||||
({
|
||||
type: "AP",
|
||||
selection,
|
||||
} as const),
|
||||
parseSandwich(s, dicitonary, possesor)
|
||||
parseSandwich(s, lookup, possesor)
|
||||
),
|
||||
];
|
||||
}
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import * as T from "../../../types";
|
||||
import { LookupFunction } from "./lookup";
|
||||
import { parseEquative } from "./parse-equative";
|
||||
import { parseKidsSection } from "./parse-kids-section";
|
||||
import { parseNeg } from "./parse-negative";
|
||||
import { parseNPAP } from "./parse-npap";
|
||||
import { parseVBP } from "./parse-vbp";
|
||||
import { parsePH } from "./parse-ph";
|
||||
import { parseVBE } from "./parse-vbe-new";
|
||||
import { parseVBE } from "./parse-vbe";
|
||||
import {
|
||||
bindParseResult,
|
||||
returnParseResult,
|
||||
|
@ -17,7 +18,7 @@ import { isKedulStatEntry } from "./parse-verb-helpers";
|
|||
|
||||
export function parseBlocks(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
dicitonary: T.DictionaryAPI,
|
||||
lookup: LookupFunction,
|
||||
blocks: T.ParsedBlock[],
|
||||
kids: T.ParsedKid[]
|
||||
): T.ParseResult<{
|
||||
|
@ -34,13 +35,13 @@ export function parseBlocks(
|
|||
|
||||
// TOOD: rather parse VBP / VBE
|
||||
const allBlocks: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
|
||||
...(!inVerbSection ? parseNPAP(tokens, dicitonary) : []),
|
||||
...(!inVerbSection ? parseNPAP(tokens, lookup) : []),
|
||||
// ensure at most one of each PH, VBE, VBP
|
||||
...(prevPh ? [] : parsePH(tokens)),
|
||||
...(blocks.some(isParsedVBE)
|
||||
? []
|
||||
: [...parseVBE(tokens, dicitonary), ...parseEquative(tokens)]),
|
||||
...(blocks.some(isParsedVBP) ? [] : parseVBP(tokens, dicitonary)),
|
||||
: [...parseVBE(tokens, lookup), ...parseEquative(tokens)]),
|
||||
...(blocks.some(isParsedVBP) ? [] : parseVBP(tokens, lookup)),
|
||||
...(blocks.some((b) => b.type === "negative") ? [] : parseNeg(tokens)),
|
||||
...parseKidsSection(tokens, []),
|
||||
];
|
||||
|
@ -49,7 +50,7 @@ export function parseBlocks(
|
|||
const errors: T.ParseError[] = [];
|
||||
if (r.type === "kids") {
|
||||
return {
|
||||
next: parseBlocks(tokens, dicitonary, blocks, [...kids, ...r.kids]),
|
||||
next: parseBlocks(tokens, lookup, blocks, [...kids, ...r.kids]),
|
||||
errors:
|
||||
blocks.length !== 1
|
||||
? [{ message: "kids' section out of place" }]
|
||||
|
@ -70,7 +71,7 @@ export function parseBlocks(
|
|||
return [];
|
||||
}
|
||||
return {
|
||||
next: parseBlocks(tokens, dicitonary, [...blocks, r], kids),
|
||||
next: parseBlocks(tokens, lookup, [...blocks, r], kids),
|
||||
errors,
|
||||
};
|
||||
});
|
||||
|
|
|
@ -1736,7 +1736,7 @@ describe("parsing nouns", () => {
|
|||
test(category, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
const res = parseNoun(tokens, testDictionary, undefined).flatMap(
|
||||
const res = parseNoun(tokens, testDictionary, undefined, []).flatMap(
|
||||
// only take the ones that used all the tokens
|
||||
({ body, tokens }) => (tokens.length === 0 ? [body] : [])
|
||||
);
|
||||
|
|
|
@ -3,12 +3,7 @@ import { makeNounSelection } from "../phrase-building/make-selections";
|
|||
import { parseAdjective } from "./parse-adjective-new";
|
||||
import { parseDeterminer } from "./parse-determiner";
|
||||
import { parseNounWord } from "./parse-noun-word";
|
||||
import {
|
||||
bindParseResult,
|
||||
parserCombMany,
|
||||
parserCombSucc3,
|
||||
toParseError,
|
||||
} from "./utils";
|
||||
import { bindParseResult, parserCombMany, toParseError } from "./utils";
|
||||
|
||||
type NounResult = { inflected: boolean; selection: T.NounSelection };
|
||||
|
||||
|
@ -20,32 +15,33 @@ export function parseNoun(
|
|||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const res = parserCombSucc3([
|
||||
parserCombMany(parseDeterminer),
|
||||
parserCombMany(parseAdjective),
|
||||
parseNounWord,
|
||||
])(tokens, dictionary);
|
||||
return bindParseResult(res, (tkns, [determiners, adjectives, nounWord]) => {
|
||||
const detRes = parserCombMany(parseDeterminer)(tokens, dictionary);
|
||||
// TODO: add recognition of او between adjectives
|
||||
return bindParseResult(detRes, (t, determiners) => {
|
||||
const adjRes = parserCombMany(parseAdjective)(t, dictionary);
|
||||
return bindParseResult(adjRes, (tk, adjectives) => {
|
||||
const nounWord = parseNounWord(tk, dictionary);
|
||||
return bindParseResult(nounWord, (tkns, nr) => {
|
||||
const { error: adjErrors } = adjDetsMatch(
|
||||
adjectives,
|
||||
nounWord.gender,
|
||||
nounWord.inflected ? 1 : 0,
|
||||
nounWord.plural
|
||||
nr.gender,
|
||||
nr.inflected ? 1 : 0,
|
||||
nr.plural
|
||||
);
|
||||
const { error: detErrors } = adjDetsMatch(
|
||||
determiners,
|
||||
nounWord.gender,
|
||||
nounWord.inflected ? 1 : 0,
|
||||
nounWord.plural
|
||||
nr.gender,
|
||||
nr.inflected ? 1 : 0,
|
||||
nr.plural
|
||||
);
|
||||
const dupErrors = checkForDeterminerDuplicates(determiners);
|
||||
const s = makeNounSelection(nounWord.entry, undefined);
|
||||
const s = makeNounSelection(nr.entry, undefined);
|
||||
const body: NounResult = {
|
||||
inflected: nounWord.inflected,
|
||||
inflected: nr.inflected,
|
||||
selection: {
|
||||
...s,
|
||||
gender: nounWord.gender,
|
||||
number: nounWord.plural ? "plural" : "singular",
|
||||
gender: nr.gender,
|
||||
number: nr.plural ? "plural" : "singular",
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
determiners: determiners.length
|
||||
? {
|
||||
|
@ -69,6 +65,8 @@ export function parseNoun(
|
|||
},
|
||||
];
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function checkForDeterminerDuplicates(
|
||||
|
|
|
@ -0,0 +1,191 @@
|
|||
import * as T from "../../../types";
|
||||
import { getInflectionPattern } from "../inflection-pattern";
|
||||
import { makeNounSelection } from "../phrase-building/make-selections";
|
||||
import {
|
||||
isMascNounEntry,
|
||||
isNounEntry,
|
||||
isPluralNounEntry,
|
||||
isUnisexNounEntry,
|
||||
} from "../type-predicates";
|
||||
import { getInflectionQueries } from "./inflection-query";
|
||||
import { LookupFunction } from "./lookup";
|
||||
import { parseAdjective } from "./parse-adjective";
|
||||
import { bindParseResult } from "./utils";
|
||||
|
||||
type NounResult = { inflected: boolean; selection: T.NounSelection };
|
||||
|
||||
export function parseNoun(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: LookupFunction,
|
||||
possesor: T.PossesorSelection | undefined,
|
||||
adjectives: {
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
given: string;
|
||||
selection: T.AdjectiveSelection;
|
||||
}[]
|
||||
): T.ParseResult<NounResult>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
// TODO: add recognition of او between adjectives
|
||||
const adjRes = parseAdjective(tokens, lookup);
|
||||
const withAdj = bindParseResult(adjRes, (tkns, adj) =>
|
||||
parseNoun(tkns, lookup, possesor, [...adjectives, adj])
|
||||
);
|
||||
const [first, ...rest] = tokens;
|
||||
const searches = getInflectionQueries(first.s, true);
|
||||
|
||||
const w: ReturnType<typeof parseNoun> = [];
|
||||
searches.forEach(({ search, details }) => {
|
||||
const nounEntries = lookup(search, "nounAdj").filter(isNounEntry);
|
||||
details.forEach((deets) => {
|
||||
const fittingEntries = nounEntries.filter(deets.predicate);
|
||||
fittingEntries.forEach((entry) => {
|
||||
const genders: T.Gender[] = isUnisexNounEntry(entry)
|
||||
? ["masc", "fem"]
|
||||
: isMascNounEntry(entry)
|
||||
? ["masc"]
|
||||
: ["fem"];
|
||||
deets.gender.forEach((gender) => {
|
||||
if (genders.includes(gender)) {
|
||||
deets.inflection.forEach((inf) => {
|
||||
const { error: adjErrors } = adjsMatch(
|
||||
adjectives,
|
||||
gender,
|
||||
inf,
|
||||
deets.plural
|
||||
);
|
||||
convertInflection(inf, entry, gender, deets.plural).forEach(
|
||||
({ inflected, number }) => {
|
||||
const selection = makeNounSelection(entry, undefined);
|
||||
const errors = [
|
||||
...adjErrors.map((message) => ({
|
||||
message,
|
||||
})),
|
||||
];
|
||||
w.push({
|
||||
tokens: rest,
|
||||
body: {
|
||||
inflected,
|
||||
selection: {
|
||||
...selection,
|
||||
gender: selection.genderCanChange
|
||||
? gender
|
||||
: selection.gender,
|
||||
number: selection.numberCanChange
|
||||
? number
|
||||
: selection.number,
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
// TODO: could be nicer to validate that the possesor is inflected before
|
||||
// and just pass in the selection
|
||||
possesor,
|
||||
},
|
||||
},
|
||||
errors,
|
||||
});
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
return [...withAdj, ...w];
|
||||
}
|
||||
|
||||
function adjsMatch(
|
||||
adjectives: Parameters<typeof parseNoun>[3],
|
||||
gender: T.Gender,
|
||||
inf: 0 | 1 | 2,
|
||||
plural: boolean | undefined
|
||||
): { ok: boolean; error: string[] } {
|
||||
const inflection = (plural && inf < 2 ? inf + 1 : inf) as 0 | 1 | 2;
|
||||
const unmatching = adjectives.filter(
|
||||
(adj) =>
|
||||
!adj.gender.includes(gender) ||
|
||||
!adj.inflection.some((i) => i === inflection)
|
||||
);
|
||||
if (unmatching.length) {
|
||||
return {
|
||||
ok: false,
|
||||
error: unmatching.map((x) => {
|
||||
const adjText =
|
||||
x.given === x.selection.entry.p
|
||||
? x.given
|
||||
: `${x.given} (${x.selection.entry.p})`;
|
||||
const inflectionIssue = !x.inflection.some((x) => x === inflection)
|
||||
? ` should be ${showInflection(inflection)}`
|
||||
: ``;
|
||||
return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
|
||||
}),
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
ok: true,
|
||||
error: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function convertInflection(
|
||||
inflection: 0 | 1 | 2,
|
||||
entry: T.NounEntry | T.AdjectiveEntry,
|
||||
gender: T.Gender,
|
||||
plural: boolean | undefined
|
||||
): {
|
||||
inflected: boolean;
|
||||
number: T.NounNumber;
|
||||
}[] {
|
||||
const pattern = getInflectionPattern(entry);
|
||||
const inf = (plural && inflection < 2 ? inflection + 1 : inflection) as
|
||||
| 0
|
||||
| 1
|
||||
| 2;
|
||||
if (inf === 0) {
|
||||
return [
|
||||
{
|
||||
inflected: false,
|
||||
number: "singular",
|
||||
},
|
||||
];
|
||||
} else if (inf === 1) {
|
||||
return [
|
||||
...(!((isNounEntry(entry) && isPluralNounEntry(entry)) || plural) &&
|
||||
!(pattern === 4 && entry.p.endsWith("ه") && gender === "masc")
|
||||
? [
|
||||
{
|
||||
inflected: true,
|
||||
number: "singular" as T.NounNumber,
|
||||
},
|
||||
]
|
||||
: []),
|
||||
...(pattern > 1 ||
|
||||
(pattern > 0 && gender === "fem") ||
|
||||
(isNounEntry(entry) && isPluralNounEntry(entry)) ||
|
||||
plural
|
||||
? [
|
||||
{
|
||||
inflected: false,
|
||||
number: "plural" as T.NounNumber,
|
||||
},
|
||||
]
|
||||
: []),
|
||||
];
|
||||
}
|
||||
return [
|
||||
{
|
||||
inflected: true,
|
||||
number: "plural",
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
function showInflection(inf: 0 | 1 | 2): string {
|
||||
return inf === 0
|
||||
? "plain"
|
||||
: inf === 1
|
||||
? "first inflection"
|
||||
: "second inflection";
|
||||
}
|
|
@ -1,12 +1,13 @@
|
|||
import * as T from "../../../types";
|
||||
import { parsePronoun } from "./parse-pronoun";
|
||||
import { parseNoun } from "./parse-noun-new";
|
||||
import { parseNoun } from "./parse-noun";
|
||||
import { fmapParseResult } from "../fp-ps";
|
||||
import { parseParticiple } from "./parse-participle";
|
||||
import { LookupFunction } from "./lookup";
|
||||
|
||||
export function parseNP(
|
||||
s: Readonly<T.Token[]>,
|
||||
dicitonary: T.DictionaryAPI,
|
||||
lookup: LookupFunction,
|
||||
possesor: T.PossesorSelection | undefined
|
||||
): T.ParseResult<T.ParsedNP>[] {
|
||||
if (s.length === 0) {
|
||||
|
@ -40,7 +41,7 @@ export function parseNP(
|
|||
|
||||
return fmapParseResult(makeNPSl, [
|
||||
...(!possesor ? parsePronoun(s) : []),
|
||||
...parseNoun(s, dicitonary, possesor),
|
||||
...parseParticiple(s, dicitonary, possesor),
|
||||
...parseNoun(s, lookup, possesor, []),
|
||||
...parseParticiple(s, lookup, possesor),
|
||||
]);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import * as T from "../../../types";
|
||||
import { LookupFunction } from "./lookup";
|
||||
import { parseAP } from "./parse-ap";
|
||||
import { parseNP } from "./parse-np";
|
||||
import { parsePossesor } from "./parse-possesor";
|
||||
|
@ -6,25 +7,19 @@ import { bindParseResult } from "./utils";
|
|||
|
||||
export function parseNPAP(
|
||||
s: Readonly<T.Token[]>,
|
||||
dictionary: T.DictionaryAPI
|
||||
lookup: LookupFunction
|
||||
): T.ParseResult<T.APSelection | T.ParsedNP>[] {
|
||||
if (s.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const possesor = parsePossesor(s, dictionary, undefined);
|
||||
const possesor = parsePossesor(s, lookup, undefined);
|
||||
if (!possesor.length) {
|
||||
return [
|
||||
...parseNP(s, dictionary, undefined),
|
||||
...parseAP(s, dictionary, undefined),
|
||||
];
|
||||
return [...parseNP(s, lookup, undefined), ...parseAP(s, lookup, undefined)];
|
||||
}
|
||||
return bindParseResult<T.PossesorSelection, T.APSelection | T.ParsedNP>(
|
||||
possesor,
|
||||
(tokens, p) => {
|
||||
return [
|
||||
...parseNP(tokens, dictionary, p),
|
||||
...parseAP(tokens, dictionary, p),
|
||||
];
|
||||
return [...parseNP(tokens, lookup, p), ...parseAP(tokens, lookup, p)];
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
|
@ -4,16 +4,14 @@ import {
|
|||
makePossesorSelection,
|
||||
} from "../phrase-building/make-selections";
|
||||
import * as T from "../../../types";
|
||||
import { testDictionary } from "./mini-test-dictionary";
|
||||
import { lookup, wordQuery } from "./lookup";
|
||||
import { tokenizer } from "./tokenizer";
|
||||
import { parseNPAP } from "./parse-npap";
|
||||
|
||||
const leedul = testDictionary.verbEntryLookup("لیدل")[0];
|
||||
const akheestul = testDictionary.verbEntryLookup("اخیستل")[0];
|
||||
const wahul = testDictionary.verbEntryLookup("وهل")[0];
|
||||
const saray = testDictionary.nounLookup("سړی")[0];
|
||||
|
||||
// TODO: uncomment and get parsing of short participles working
|
||||
const leedul = wordQuery("لیدل", "verb");
|
||||
const akheestul = wordQuery("اخیستل", "verb");
|
||||
const wahul = wordQuery("وهل", "verb");
|
||||
const saray = wordQuery("سړی", "noun");
|
||||
|
||||
const tests: {
|
||||
label: string;
|
||||
|
@ -115,20 +113,20 @@ const tests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
// {
|
||||
// input: "د سړي لیدو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeParticipleSelection(leedul),
|
||||
// possesor: makePossesorSelection(
|
||||
// makeNounSelection(saray, undefined)
|
||||
// ),
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
{
|
||||
input: "د سړي لیدو",
|
||||
output: [
|
||||
{
|
||||
inflected: true,
|
||||
selection: {
|
||||
...makeParticipleSelection(leedul),
|
||||
possesor: makePossesorSelection(
|
||||
makeNounSelection(saray, undefined)
|
||||
),
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
@ -138,7 +136,7 @@ describe("parsing participles", () => {
|
|||
test(label, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
const res = parseNPAP(tokens, testDictionary).map(({ body }) => body);
|
||||
const res = parseNPAP(tokens, lookup).map(({ body }) => body);
|
||||
expect(res).toEqual(
|
||||
output.map(
|
||||
(x): T.ParsedNP => ({
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import * as T from "../../../types";
|
||||
import { shortVerbEndConsonant } from "./misc";
|
||||
import { LookupFunction } from "./lookup";
|
||||
|
||||
type ParticipleResult = {
|
||||
inflected: boolean;
|
||||
|
@ -7,10 +7,9 @@ type ParticipleResult = {
|
|||
};
|
||||
|
||||
// TODO: should have adverbs with participle
|
||||
// TODO: NOTE this does not work with compound verbs yet
|
||||
export function parseParticiple(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
dicitonary: T.DictionaryAPI,
|
||||
lookup: LookupFunction,
|
||||
possesor: T.PossesorSelection | undefined
|
||||
): T.ParseResult<ParticipleResult>[] {
|
||||
if (tokens.length === 0) {
|
||||
|
@ -21,13 +20,8 @@ export function parseParticiple(
|
|||
return [];
|
||||
}
|
||||
const inflected = first.s.endsWith("و");
|
||||
|
||||
return [
|
||||
...dicitonary.verbEntryLookup(inflected ? first.s.slice(0, -1) : first.s),
|
||||
...(inflected && shortVerbEndConsonant.includes(first.s.at(-2) || "")
|
||||
? dicitonary.verbEntryLookup(first.s.slice(0, -1) + "ل")
|
||||
: []),
|
||||
].map<T.ParseResult<ParticipleResult>>((verb) => ({
|
||||
const matches = lookup(first.s, "participle");
|
||||
return matches.map<T.ParseResult<ParticipleResult>>((verb) => ({
|
||||
tokens: rest,
|
||||
body: {
|
||||
inflected,
|
||||
|
|
|
@ -1,24 +1,24 @@
|
|||
import * as T from "../../../types";
|
||||
import { lookup } from "./lookup";
|
||||
import { parseVP } from "./parse-vp";
|
||||
|
||||
// شو should not be sheyaano !!
|
||||
|
||||
export function parsePhrase(
|
||||
s: T.Token[],
|
||||
dicitonary: T.DictionaryAPI
|
||||
): {
|
||||
success: // | {
|
||||
// inflected: boolean;
|
||||
// selection: T.NPSelection;
|
||||
// }
|
||||
// | Omit<T.VBE, "ps">
|
||||
T.VPSelectionComplete[];
|
||||
export function parsePhrase(s: T.Token[]): {
|
||||
success: (
|
||||
| {
|
||||
inflected: boolean;
|
||||
selection: T.NPSelection;
|
||||
}
|
||||
| Omit<T.VBE, "ps">
|
||||
| T.VPSelectionComplete
|
||||
)[];
|
||||
errors: string[];
|
||||
} {
|
||||
const res = [
|
||||
// ...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
|
||||
// ...parseVerb(s, verbLookup),
|
||||
...parseVP(s, dicitonary),
|
||||
...parseVP(s, lookup),
|
||||
];
|
||||
|
||||
const success = res.filter((x) => !x.tokens.length).map((x) => x.body);
|
||||
|
|
|
@ -4,16 +4,16 @@ import {
|
|||
makeNounSelection,
|
||||
makePronounSelection,
|
||||
} from "../phrase-building/make-selections";
|
||||
import { lookup, wordQuery } from "./lookup";
|
||||
import { parsePossesor } from "./parse-possesor";
|
||||
import { tokenizer } from "./tokenizer";
|
||||
import { isCompleteResult } from "./utils";
|
||||
import { testDictionary as dictionary } from "./mini-test-dictionary";
|
||||
|
||||
const sturey = dictionary.adjLookup("ستړی")[0];
|
||||
const sarey = dictionary.nounLookup("سړی")[0];
|
||||
const maashoom = dictionary.nounLookup("ماشوم")[0];
|
||||
const malguray = dictionary.nounLookup("ملګری")[0];
|
||||
const plaar = dictionary.nounLookup("پلار")[0];
|
||||
const sturey = wordQuery("ستړی", "adj");
|
||||
const sarey = wordQuery("سړی", "noun");
|
||||
const maashoom = wordQuery("ماشوم", "noun");
|
||||
const malguray = wordQuery("ملګری", "noun");
|
||||
const plaar = wordQuery("پلار", "noun");
|
||||
|
||||
const tests: {
|
||||
input: string;
|
||||
|
@ -109,12 +109,12 @@ const tests: {
|
|||
test("parse possesor", () => {
|
||||
tests.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
const parsed = parsePossesor(tokens, dictionary, undefined);
|
||||
const parsed = parsePossesor(tokens, lookup, undefined);
|
||||
if (output === "error") {
|
||||
expect(parsed.some((x) => x.errors.length)).toBe(true);
|
||||
} else {
|
||||
expect(
|
||||
parsePossesor(tokens, dictionary, undefined)
|
||||
parsePossesor(tokens, lookup, undefined)
|
||||
.filter(isCompleteResult)
|
||||
.map((x) => x.body.np.selection)
|
||||
).toEqual(output);
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import * as T from "../../../types";
|
||||
import { LookupFunction } from "./lookup";
|
||||
import { parseNP } from "./parse-np";
|
||||
import { bindParseResult } from "./utils";
|
||||
// TODO: maybe contractions should just be male to cut down on the
|
||||
|
@ -18,7 +19,7 @@ const contractions: [string[], T.Person[]][] = [
|
|||
|
||||
export function parsePossesor(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
dictionary: T.DictionaryAPI,
|
||||
lookup: LookupFunction,
|
||||
prevPossesor: T.PossesorSelection | undefined
|
||||
): T.ParseResult<T.PossesorSelection>[] {
|
||||
if (tokens.length === 0) {
|
||||
|
@ -42,14 +43,14 @@ export function parsePossesor(
|
|||
? [{ message: "a pronoun cannot have a possesor" }]
|
||||
: [];
|
||||
return contractions
|
||||
.flatMap((p) => parsePossesor(rest, dictionary, p))
|
||||
.flatMap((p) => parsePossesor(rest, lookup, p))
|
||||
.map((x) => ({
|
||||
...x,
|
||||
errors: [...errors, ...x.errors],
|
||||
}));
|
||||
}
|
||||
if (first.s === "د") {
|
||||
const np = parseNP(rest, dictionary, undefined);
|
||||
const np = parseNP(rest, lookup, undefined);
|
||||
return bindParseResult(np, (tokens, body) => {
|
||||
const possesor: T.PossesorSelection = {
|
||||
shrunken: false,
|
||||
|
@ -62,11 +63,7 @@ export function parsePossesor(
|
|||
[{ message: `possesor should be inflected` }]
|
||||
: [],
|
||||
// add and check error - can't add possesor to pronoun
|
||||
next: parsePossesor(
|
||||
tokens,
|
||||
dictionary,
|
||||
addPoss(prevPossesor, possesor)
|
||||
),
|
||||
next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import * as T from "../../../types";
|
||||
import { LookupFunction } from "./lookup";
|
||||
import { sandwiches } from "../sandwiches";
|
||||
import { parseNP } from "./parse-np";
|
||||
import { bindParseResult } from "./utils";
|
||||
|
@ -13,7 +14,7 @@ import { bindParseResult } from "./utils";
|
|||
|
||||
export function parseSandwich(
|
||||
s: Readonly<T.Token[]>,
|
||||
dictionary: T.DictionaryAPI,
|
||||
lookup: LookupFunction,
|
||||
possesor: T.PossesorSelection | undefined
|
||||
): T.ParseResult<T.SandwichSelection<T.Sandwich>>[] {
|
||||
if (s.length === 0) {
|
||||
|
@ -26,7 +27,7 @@ export function parseSandwich(
|
|||
(x) => x.before && x.before.p === first.s
|
||||
);
|
||||
// TODO: this could be be really repetitive...
|
||||
const nps = parseNP(startMatches.length ? rest : s, dictionary, possesor);
|
||||
const nps = parseNP(startMatches.length ? rest : s, lookup, possesor);
|
||||
return bindParseResult(nps, (tokens, np) => {
|
||||
if (!tokens.length) {
|
||||
return [];
|
||||
|
|
|
@ -1,387 +0,0 @@
|
|||
import * as T from "../../../types";
|
||||
import { dartlul, raatlul, tlul, wartlul } from "./irreg-verbs";
|
||||
import { parseKedul } from "./parse-kedul";
|
||||
import { getVerbEnding } from "./parse-verb-helpers";
|
||||
import { returnParseResults } from "./utils";
|
||||
import { entries as splitVerbEntries } from "./split-verbs";
|
||||
import * as tp from "../type-predicates";
|
||||
import memoize from "micro-memoize";
|
||||
import { pashtoConsonants } from "../pashto-consonants";
|
||||
|
||||
// TODO: و ارزول
|
||||
|
||||
// TODO: کول verbs!
|
||||
// check that aawu stuff is working
|
||||
// check oo`azmooy -
|
||||
// TODO: proper use of sepOo (hasBreakawayAleph) when checking for perfective roots/stems
|
||||
// check څاته
|
||||
// laaRa shum etc
|
||||
// TODO: proper use of perfective with sh
|
||||
// TODO: use of raa, dar, war with sh
|
||||
// TODO: هغه لاړ
|
||||
// TODO: don't have کول کېدل in split-verbs
|
||||
|
||||
type BaseInfo = Extract<T.ParsedVBE["info"], { type: "verb" }>;
|
||||
type StemInfo = Omit<BaseInfo, "base"> & {
|
||||
base: "stem";
|
||||
};
|
||||
type RootInfo = Omit<BaseInfo, "base"> & {
|
||||
base: "root";
|
||||
};
|
||||
|
||||
export function parseVBE(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
dictionary: T.DictionaryAPI
|
||||
): T.ParseResult<T.ParsedVBE>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
const irregResults = parseIrregularVerb(first.s);
|
||||
if (irregResults.length) {
|
||||
return returnParseResults(rest, irregResults);
|
||||
}
|
||||
const kedulStat = parseKedul(tokens);
|
||||
const ending = first.s.at(-1) || "";
|
||||
const base = ending === "ل" ? first.s : first.s.slice(0, -1);
|
||||
const { stem, root } = getVerbEnding(ending);
|
||||
// todo imperative for seperating
|
||||
const imperative = getImperativeVerbEnding(ending);
|
||||
const stemRes = returnParseResults(rest, [
|
||||
...[
|
||||
...findImperfectiveStem(base, dictionary),
|
||||
...findPerfectiveStem(base, dictionary),
|
||||
].flatMap<T.ParsedVBE>((info) => [
|
||||
...stem.map<T.ParsedVBE>((person) => ({
|
||||
type: "VB",
|
||||
person,
|
||||
info,
|
||||
})),
|
||||
...imperative.map<T.ParsedVBE>((person) => ({
|
||||
type: "VB",
|
||||
person,
|
||||
info: {
|
||||
...info,
|
||||
imperative: true,
|
||||
},
|
||||
})),
|
||||
]),
|
||||
]);
|
||||
const rootRes = returnParseResults(rest, [
|
||||
...[
|
||||
...findImperfectiveRoot(base, dictionary),
|
||||
...findPerfectiveRoot(base, dictionary),
|
||||
].flatMap<T.ParsedVBE>((info) => {
|
||||
const shortThird = thirdPersSingMascShortFromRoot(base, ending, info);
|
||||
return [
|
||||
...shortThird,
|
||||
...root.map<T.ParsedVBE>((person) => ({
|
||||
type: "VB",
|
||||
person,
|
||||
info,
|
||||
})),
|
||||
];
|
||||
}),
|
||||
...specialThirdPersMascSingForm(base, ending, dictionary),
|
||||
]);
|
||||
return [...kedulStat, ...stemRes, ...rootRes];
|
||||
}
|
||||
|
||||
function specialThirdPersMascSingForm(
|
||||
base: string,
|
||||
ending: string,
|
||||
dicitonary: T.DictionaryAPI
|
||||
): T.ParsedVBE[] {
|
||||
if (ending !== "ه" && !pashtoConsonants.includes(ending)) {
|
||||
return [];
|
||||
}
|
||||
// const imperfectiveWSep = [base + ending, ...(ending === "ه" ? [base] : [])]
|
||||
// .flatMap((v) =>
|
||||
// splitVerbEntries.filter((entry) => entry.entry.p.slice(0, -1) === v)
|
||||
// )
|
||||
// .map<T.ParsedVBE>((verb) => ({
|
||||
// type: "VB",
|
||||
// person: T.Person.ThirdSingMale,
|
||||
// info: {
|
||||
// type: "verb",
|
||||
// aspect: "imperfective",
|
||||
// base: "root",
|
||||
// verb,
|
||||
// },
|
||||
// }));
|
||||
|
||||
// const perfectiveWSep = [base + ending, ...(ending === "ه" ? [base] : [])]
|
||||
// .flatMap((v) => {
|
||||
// const b = splitVerbEntries.filter(({ entry }) => {
|
||||
// if (entry.tppp) {
|
||||
// return splitVarients(entry.tppp).some(
|
||||
// (varient) => varient.slice(entry.separationAtP) === v
|
||||
// );
|
||||
// } else {
|
||||
// return entry.p.slice(entry.separationAtP, -1) === v;
|
||||
// }
|
||||
// });
|
||||
// return b;
|
||||
// })
|
||||
// .map<T.ParsedVBE>((verb) => ({
|
||||
// type: "VB",
|
||||
// person: T.Person.ThirdSingMale,
|
||||
// info: {
|
||||
// type: "verb",
|
||||
// aspect: "perfective",
|
||||
// base: "root",
|
||||
// verb,
|
||||
// },
|
||||
// }));
|
||||
|
||||
const hardEnding: T.ParsedVBE[] =
|
||||
(ending === "د" && ["ې", "و"].some((x) => base.endsWith(x))) ||
|
||||
(ending === "ت" &&
|
||||
["س", "ښ"].some((x) => base.endsWith(x)) &&
|
||||
base.length > 1)
|
||||
? [
|
||||
...findPerfectiveRoot(base + ending + "ل", dicitonary),
|
||||
...findImperfectiveRoot(base + ending + "ل", dicitonary),
|
||||
].map<T.ParsedVBE>((info) => ({
|
||||
type: "VB",
|
||||
person: T.Person.ThirdSingMale,
|
||||
info,
|
||||
}))
|
||||
: [];
|
||||
|
||||
const regular: T.ParsedVBE[] = [
|
||||
base + ending,
|
||||
...(ending === "ه" ? [base] : []),
|
||||
]
|
||||
.flatMap(withAlefAdded)
|
||||
.flatMap((v) => dicitonary.otherLookup("tppp", v, true))
|
||||
.filter(
|
||||
(e): e is T.VerbDictionaryEntry =>
|
||||
tp.isVerbDictionaryEntry(e) && !e.l && !!e.tppp
|
||||
)
|
||||
.flatMap((entry) =>
|
||||
// NOT IF STARTS WITH ALEPH!
|
||||
(entry.separationAtP
|
||||
? (["imperfective"] as const)
|
||||
: startsWithAleph(entry.p) && !startsWithAleph(base)
|
||||
? (["perfective"] as const)
|
||||
: (["imperfective", "perfective"] as const)
|
||||
).map<T.ParsedVBE>((aspect) => ({
|
||||
type: "VB" as const,
|
||||
person: T.Person.ThirdSingMale,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect,
|
||||
base: "root",
|
||||
verb: { entry },
|
||||
} as const,
|
||||
}))
|
||||
);
|
||||
|
||||
return [...regular, ...hardEnding];
|
||||
|
||||
// ...imperfectiveWSep, ...perfectiveWSep];
|
||||
}
|
||||
|
||||
function thirdPersSingMascShortFromRoot(
|
||||
base: string,
|
||||
ending: string,
|
||||
info: RootInfo
|
||||
): T.ParsedVBE[] {
|
||||
if (info.verb.entry.tppp) {
|
||||
return [];
|
||||
}
|
||||
if (ending === "ه" && !base.endsWith("ل")) {
|
||||
return [
|
||||
{
|
||||
type: "VB",
|
||||
person: T.Person.ThirdSingMale,
|
||||
info,
|
||||
},
|
||||
];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function findImperfectiveStem(
|
||||
s: string,
|
||||
dicitonary: T.DictionaryAPI
|
||||
): StemInfo[] {
|
||||
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
|
||||
return [];
|
||||
}
|
||||
const regulars = regStemSearch(s, dicitonary);
|
||||
const irregulars = dicitonary
|
||||
.otherLookup("psp", s)
|
||||
.filter(
|
||||
(e): e is T.VerbDictionaryEntry => tp.isVerbDictionaryEntry(e) && !e.l
|
||||
)
|
||||
.map<T.VerbEntry>((entry) => ({
|
||||
entry,
|
||||
}));
|
||||
return [...regulars, ...irregulars].map((verb) => ({
|
||||
type: "verb",
|
||||
aspect: "imperfective",
|
||||
base: "stem",
|
||||
verb,
|
||||
}));
|
||||
}
|
||||
|
||||
function withAlefAdded(s: string): string[] {
|
||||
return [s, ...(startsWithAleph(s) ? [] : ["ا" + s, "آ" + s])];
|
||||
}
|
||||
|
||||
const stemSplitLookup = memoize((s: string) =>
|
||||
splitVerbEntries.filter(
|
||||
(e) =>
|
||||
(e.entry.ssp || e.entry.psp || e.entry.p).slice(
|
||||
e.entry.separationAtP || 0
|
||||
) === s
|
||||
)
|
||||
);
|
||||
|
||||
function findPerfectiveStem(
|
||||
s: string,
|
||||
dicitonary: T.DictionaryAPI
|
||||
): StemInfo[] {
|
||||
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
|
||||
return [];
|
||||
}
|
||||
if (startsWithAleph(s)) {
|
||||
return [];
|
||||
}
|
||||
const baseQ = withAlefAdded(s);
|
||||
const regulars = baseQ
|
||||
.flatMap((q) => regStemSearch(q, dicitonary))
|
||||
.filter((e) => !e.entry.separationAtP);
|
||||
const irregularsBasedOnImperf = baseQ
|
||||
.flatMap((q) => dicitonary.otherLookup("psp", q))
|
||||
.filter(
|
||||
(e): e is T.VerbDictionaryEntry =>
|
||||
tp.isVerbDictionaryEntry(e) && !e.l && !e.ssp && !e.separationAtP
|
||||
)
|
||||
.map<T.VerbEntry>((entry) => ({
|
||||
entry,
|
||||
}));
|
||||
return [...regulars, ...irregularsBasedOnImperf, ...stemSplitLookup(s)].map(
|
||||
(verb) => ({
|
||||
type: "verb",
|
||||
aspect: "perfective",
|
||||
base: "stem",
|
||||
verb,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
function regStemSearch(s: string, dicitonary: T.DictionaryAPI): T.VerbEntry[] {
|
||||
const regTrans = dicitonary
|
||||
.verbEntryLookup(s + "ل")
|
||||
.filter(
|
||||
(e) =>
|
||||
!e.entry.c.includes("comp") &&
|
||||
!e.entry.ssp &&
|
||||
!e.entry.psp &&
|
||||
!e.entry.c.includes("intrans")
|
||||
);
|
||||
const regIntrans = dicitonary
|
||||
.verbEntryLookup((s.endsWith("ېږ") ? s.slice(0, -2) : s) + "ېدل")
|
||||
.filter(
|
||||
(e) =>
|
||||
!e.entry.c.includes("comp") &&
|
||||
!e.entry.ssp &&
|
||||
!e.entry.psp &&
|
||||
e.entry.c.includes("intrans")
|
||||
);
|
||||
return [...regTrans, ...regIntrans];
|
||||
}
|
||||
|
||||
function findImperfectiveRoot(
|
||||
s: string,
|
||||
dicitonary: T.DictionaryAPI
|
||||
): RootInfo[] {
|
||||
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
|
||||
return [];
|
||||
}
|
||||
const reg = [s, s + "ل"]
|
||||
.flatMap(dicitonary.verbEntryLookup)
|
||||
.filter((e) => !e.entry.c.includes("comp"));
|
||||
return reg.map((verb) => ({
|
||||
type: "verb",
|
||||
aspect: "imperfective",
|
||||
base: "root",
|
||||
verb,
|
||||
}));
|
||||
}
|
||||
|
||||
const rootSplitLookup = memoize((s: string) =>
|
||||
splitVerbEntries.filter((e) =>
|
||||
[s, s + "ل"].some(
|
||||
(x) => (e.entry.prp || e.entry.p).slice(e.entry.separationAtP || 0) === x
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
function findPerfectiveRoot(
|
||||
s: string,
|
||||
dicitonary: T.DictionaryAPI
|
||||
): RootInfo[] {
|
||||
if (startsWithAleph(s) || ["کېږ", "کېد", "ش", "شو", "شول"].includes(s)) {
|
||||
return [];
|
||||
}
|
||||
const reg = [s, s + "ل"]
|
||||
.flatMap(withAlefAdded)
|
||||
.flatMap(dicitonary.verbEntryLookup)
|
||||
.filter(
|
||||
(e) =>
|
||||
!e.entry.c.includes("comp") && !e.entry.prp && !e.entry.separationAtP
|
||||
);
|
||||
return [...reg, ...rootSplitLookup(s)].map((verb) => ({
|
||||
type: "verb",
|
||||
aspect: "perfective",
|
||||
base: "root",
|
||||
verb,
|
||||
}));
|
||||
}
|
||||
|
||||
function getImperativeVerbEnding(e: string): T.Person[] {
|
||||
if (e === "ه") {
|
||||
return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
|
||||
}
|
||||
if (e === "ئ") {
|
||||
return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
// TODO: could handle all sh- verbs for efficiencies sake
|
||||
function parseIrregularVerb(s: string): T.ParsedVBE[] {
|
||||
if (["ته", "راته", "ورته", "درته"].includes(s)) {
|
||||
return [
|
||||
{
|
||||
type: "VB",
|
||||
info: {
|
||||
aspect: "imperfective",
|
||||
base: "root",
|
||||
type: "verb",
|
||||
verb: s.startsWith("را")
|
||||
? raatlul
|
||||
: s.startsWith("ور")
|
||||
? wartlul
|
||||
: s.startsWith("در")
|
||||
? dartlul
|
||||
: tlul,
|
||||
},
|
||||
person: T.Person.ThirdSingMale,
|
||||
},
|
||||
];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
// function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
|
||||
// return !e.sepOo && startsWithAleph(e.p);
|
||||
// }
|
||||
|
||||
function startsWithAleph(base: string): boolean {
|
||||
return ["ا", "آ"].includes(base[0]);
|
||||
}
|
|
@ -7,35 +7,28 @@ import {
|
|||
wartlul,
|
||||
raatlul,
|
||||
} from "./irreg-verbs";
|
||||
import { parseVBE } from "./parse-vbe-new";
|
||||
import { lookup, wordQuery } from "./lookup";
|
||||
import { parseVBE } from "./parse-vbe";
|
||||
import { tokenizer } from "./tokenizer";
|
||||
import { getPeople, removeKeys } from "./utils";
|
||||
import { testDictionary } from "./mini-test-dictionary";
|
||||
|
||||
const wahul = testDictionary.verbEntryLookup("وهل")[0];
|
||||
const leekul = testDictionary.verbEntryLookup("لیکل")[0];
|
||||
const manul = testDictionary.verbEntryLookup("منل")[0];
|
||||
const gaalul = testDictionary.verbEntryLookup("ګالل")[0];
|
||||
const rasedul = testDictionary.verbEntryLookup("رسېدل")[0];
|
||||
const leedul = testDictionary.verbEntryLookup("لیدل")[0];
|
||||
const awuxtul = testDictionary.verbEntryLookup("اوښتل")[0];
|
||||
const khorul = testDictionary.verbEntryLookup("خوړل")[0];
|
||||
const kenaastul = testDictionary.verbEntryLookup("کېناستل")[0];
|
||||
const kxenaastul = testDictionary.verbEntryLookup("کښېناستل")[0];
|
||||
const prexodul = testDictionary.verbEntryLookup("پرېښودل")[0];
|
||||
const prexowul = testDictionary.verbEntryLookup("پرېښوول")[0];
|
||||
const prexawul = testDictionary.verbEntryLookup("پرېښول")[0];
|
||||
const xodul = testDictionary.verbEntryLookup("ښودل")[0];
|
||||
const kexodul = testDictionary.verbEntryLookup("کېښودل")[0];
|
||||
const kxexodul = testDictionary.verbEntryLookup("کښېښودل")[0];
|
||||
const katul = testDictionary.verbEntryLookup("کتل")[0];
|
||||
const watul = testDictionary.verbEntryLookup("وتل")[0];
|
||||
const wurul = testDictionary.verbEntryLookup("وړل")[0];
|
||||
const akheestul = testDictionary.verbEntryLookup("اخیستل")[0];
|
||||
const alwatul = testDictionary.verbEntryLookup("الوتل")[0];
|
||||
const dartlul = testDictionary.verbEntryLookup("درتلل")[0];
|
||||
|
||||
// TODO: Prefix searching on split verbs for perfective head parsing
|
||||
const wahul = wordQuery("وهل", "verb");
|
||||
const leekul = wordQuery("لیکل", "verb");
|
||||
const manul = wordQuery("منل", "verb");
|
||||
// const gaalul = wordQuery("ګالل", "verb");
|
||||
const rasedul = wordQuery("رسېدل", "verb");
|
||||
const leedul = wordQuery("لیدل", "verb");
|
||||
const khorul = wordQuery("خوړل", "verb");
|
||||
const kenaastul = wordQuery("کېناستل", "verb");
|
||||
const prexodul = wordQuery("پرېښودل", "verb");
|
||||
const xodul = wordQuery("ښودل", "verb");
|
||||
const kexodul = wordQuery("کېښودل", "verb");
|
||||
const katul = wordQuery("کتل", "verb");
|
||||
const watul = wordQuery("وتل", "verb");
|
||||
const wurul = wordQuery("وړل", "verb");
|
||||
const akheestul = wordQuery("اخیستل", "verb");
|
||||
const alwatul = wordQuery("الوتل", "verb");
|
||||
// const dartlul = wordQuery("درتلل", "verb")
|
||||
|
||||
// TODO: azmoyul etc
|
||||
// TODO: cleaner and more thorough handling of ا seperating verbs ee - wee etc
|
||||
|
@ -318,6 +311,19 @@ const tests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "وینم",
|
||||
output: [
|
||||
{
|
||||
stem: {
|
||||
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: leedul,
|
||||
},
|
||||
],
|
||||
},
|
||||
// TODO!! THESE COULD ALSO BE MALE
|
||||
{
|
||||
input: "لیده",
|
||||
output: [
|
||||
|
@ -358,6 +364,42 @@ const tests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "خوړ",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: khorul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "کوت",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: katul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "کاته",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: katul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "خلم",
|
||||
output: [
|
||||
|
@ -394,11 +436,6 @@ const tests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
label: "verbs with seperating perfective heads",
|
||||
cases: [
|
||||
{
|
||||
input: "الوځې",
|
||||
output: [
|
||||
|
@ -423,18 +460,6 @@ const tests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "لوتلم",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb: alwatul,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -467,13 +492,6 @@ const tests: {
|
|||
},
|
||||
verb: kenaastul,
|
||||
},
|
||||
{
|
||||
stem: {
|
||||
persons: [T.Person.FirstSingMale, T.Person.FirstSingFemale],
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb: kxenaastul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -489,64 +507,46 @@ const tests: {
|
|||
],
|
||||
},
|
||||
{
|
||||
input: "کېناسته",
|
||||
input: "ناست",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||
aspects: ["imperfective"],
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb: kenaastul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ناست",
|
||||
output: [kenaastul, kxenaastul].map((verb) => ({
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb,
|
||||
})),
|
||||
},
|
||||
{
|
||||
input: "ناسته",
|
||||
output: [kenaastul, kxenaastul].map((verb) => ({
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb,
|
||||
})),
|
||||
},
|
||||
{
|
||||
input: "پرېږدو",
|
||||
output: [prexodul, prexowul, prexawul].map((verb) => ({
|
||||
output: [
|
||||
{
|
||||
stem: {
|
||||
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
||||
aspects: ["imperfective"],
|
||||
},
|
||||
verb,
|
||||
})),
|
||||
verb: prexodul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ږدو",
|
||||
output: [
|
||||
...[prexodul, prexawul, prexowul, kexodul, kxexodul].map((verb) => ({
|
||||
{
|
||||
stem: {
|
||||
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
||||
aspects: ["perfective"] satisfies T.Aspect[],
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb,
|
||||
})),
|
||||
...[kexodul, kxexodul].map((verb) => ({
|
||||
verb: prexodul,
|
||||
},
|
||||
{
|
||||
stem: {
|
||||
persons: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
||||
aspects: ["imperfective"] satisfies T.Aspect[],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: kexodul,
|
||||
},
|
||||
verb,
|
||||
})),
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -571,13 +571,20 @@ const tests: {
|
|||
},
|
||||
verb: xodul,
|
||||
},
|
||||
...[prexodul, kexodul, kxexodul].map((verb) => ({
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingFemale],
|
||||
aspects: ["perfective"] satisfies T.Aspect[],
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb: prexodul,
|
||||
},
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingFemale],
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb: kexodul,
|
||||
},
|
||||
verb,
|
||||
})),
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -654,9 +661,43 @@ const tests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ړلم",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: getPeople(1, "sing"),
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb: wurul,
|
||||
},
|
||||
{
|
||||
root: {
|
||||
persons: getPeople(1, "sing"),
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb: tlul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ړ",
|
||||
output: [],
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb: wurul,
|
||||
},
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["perfective"],
|
||||
},
|
||||
verb: tlul,
|
||||
},
|
||||
],
|
||||
},
|
||||
// should not match with the prefix for perfective
|
||||
{
|
||||
|
@ -672,78 +713,6 @@ const tests: {
|
|||
{
|
||||
label: "verbs with different 3rd pers sing past endings",
|
||||
cases: [
|
||||
{
|
||||
input: "خوړ",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: khorul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "خوړه",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: khorul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "کوت",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: katul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "کاته",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: katul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "واته",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: watul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ووت",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: watul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "رسېد",
|
||||
output: [
|
||||
|
@ -756,18 +725,6 @@ const tests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "رسېده",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: rasedul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "کېناسته",
|
||||
output: [
|
||||
|
@ -809,69 +766,27 @@ const tests: {
|
|||
],
|
||||
},
|
||||
{
|
||||
input: "اوښت",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective"],
|
||||
},
|
||||
verb: awuxtul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ښت",
|
||||
output: [],
|
||||
},
|
||||
{
|
||||
input: "اوښته",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||
aspects: ["imperfective"],
|
||||
},
|
||||
verb: awuxtul,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ښود",
|
||||
input: "واته",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: xodul,
|
||||
verb: watul,
|
||||
},
|
||||
...[prexodul, kexodul, kxexodul].map((verb) => ({
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["perfective"] satisfies T.Aspect[],
|
||||
},
|
||||
verb,
|
||||
})),
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ښوده",
|
||||
input: "ووت",
|
||||
output: [
|
||||
{
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||
persons: [T.Person.ThirdSingMale],
|
||||
aspects: ["imperfective", "perfective"],
|
||||
},
|
||||
verb: xodul,
|
||||
verb: watul,
|
||||
},
|
||||
...[prexodul, kexodul, kxexodul].map((verb) => ({
|
||||
root: {
|
||||
persons: [T.Person.ThirdSingMale, T.Person.ThirdSingFemale],
|
||||
aspects: ["perfective"] satisfies T.Aspect[],
|
||||
},
|
||||
verb,
|
||||
})),
|
||||
],
|
||||
},
|
||||
],
|
||||
|
@ -1056,7 +971,7 @@ tests.forEach(({ label, cases }) => {
|
|||
test(label, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
const vbs = parseVBE(tokens, testDictionary).map((r) => r.body);
|
||||
const vbs = parseVBE(tokens, lookup).map((r) => r.body);
|
||||
const madeVbsS = output.reduce<T.ParsedVBE[]>((acc, o) => {
|
||||
return [
|
||||
...acc,
|
|
@ -0,0 +1,354 @@
|
|||
import * as T from "../../../types";
|
||||
import { removeFVarientsFromVerb } from "../accent-and-ps-utils";
|
||||
import { isInVarients, lastVowelNotA } from "../p-text-helpers";
|
||||
import { dartlul, raatlul, tlul, wartlul } from "./irreg-verbs";
|
||||
import { LookupFunction } from "./lookup";
|
||||
import { shortVerbEndConsonant } from "./misc";
|
||||
import { parseKedul } from "./parse-kedul";
|
||||
import { getVerbEnding } from "./parse-verb-helpers";
|
||||
|
||||
// TODO: کول verbs!
|
||||
// check that aawu stuff is working
|
||||
// check oo`azmooy -
|
||||
// check څاته
|
||||
// laaRa shum etc
|
||||
// TODO: proper use of perfective with sh
|
||||
// TODO: use of raa, dar, war with sh
|
||||
// TODO: هغه لاړ
|
||||
|
||||
export function parseVBE(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: LookupFunction
|
||||
): T.ParseResult<T.ParsedVBE>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
const irregResults = parseIrregularVerb(first.s);
|
||||
if (irregResults.length) {
|
||||
return irregResults.map((body) => ({
|
||||
tokens: rest,
|
||||
body,
|
||||
errors: [],
|
||||
}));
|
||||
}
|
||||
const kedulStat = parseKedul(tokens);
|
||||
const ending = first.s.at(-1) || "";
|
||||
const people = getVerbEnding(ending);
|
||||
const imperativePeople = getImperativeVerbEnding(ending);
|
||||
// First do rough verb lookup, grab wide pool of possible verbs (low searching complexity for fast lookup)
|
||||
// TODO: can optimize this to not have to look for possible stems/roots if none
|
||||
const verbs = lookup(first.s, "verb");
|
||||
// if (first.s === "سم") {
|
||||
// console.log({ verbs: JSON.stringify(verbs) });
|
||||
// }
|
||||
// Then find out which ones match exactly and how
|
||||
return [
|
||||
...kedulStat,
|
||||
...matchVerbs(first.s, verbs, people, imperativePeople).map((body) => ({
|
||||
tokens: rest,
|
||||
body,
|
||||
errors: [],
|
||||
})),
|
||||
];
|
||||
}
|
||||
|
||||
function matchVerbs(
|
||||
s: string,
|
||||
entries: T.VerbEntry[],
|
||||
people: {
|
||||
root: T.Person[];
|
||||
stem: T.Person[];
|
||||
},
|
||||
imperativePeople: T.Person[]
|
||||
): T.ParsedVBE[] {
|
||||
const w: T.ParsedVBE[] = [];
|
||||
const lEnding = s.endsWith("ل");
|
||||
const base = s.endsWith("ل") ? s : s.slice(0, -1);
|
||||
if (["کېږ", "کېد", "ش", "شو", "شول"].includes(base)) {
|
||||
return [];
|
||||
}
|
||||
const matchShortOrLong = (b: string, x: string) => {
|
||||
return b === x || (!lEnding && b === x.slice(0, -1));
|
||||
};
|
||||
if (people.stem.length || imperativePeople.length) {
|
||||
const stemMatches = {
|
||||
imperfective: entries.filter(({ entry: e }) => {
|
||||
if (e.c.includes("comp")) {
|
||||
return false;
|
||||
}
|
||||
if (e.psp) {
|
||||
return e.psp === base;
|
||||
}
|
||||
if (e.c.includes("intrans.")) {
|
||||
const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
|
||||
return miniRoot + "ېږ" === base || miniRoot === base;
|
||||
} else {
|
||||
return e.p.slice(0, -1) === base;
|
||||
}
|
||||
}),
|
||||
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
||||
const e = entry.entry;
|
||||
const baseWAa = "ا" + base;
|
||||
if (e.c.includes("comp")) {
|
||||
return acc;
|
||||
}
|
||||
if (e.ssp) {
|
||||
if (e.separationAtP) {
|
||||
const bRest = e.ssp.slice(e.separationAtP);
|
||||
if (bRest === base) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else {
|
||||
if (e.ssp === base) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
} else if (e.psp) {
|
||||
if (hasBreakawayAlef(e) && startsWithAleph(base)) {
|
||||
return acc;
|
||||
}
|
||||
if (e.separationAtP) {
|
||||
const bRest = e.psp.slice(e.separationAtP);
|
||||
if (bRest === base) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else {
|
||||
if (!e.sepOo) {
|
||||
if (baseWAa === e.psp) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
if (base === e.psp) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
} else if (hasBreakawayAlef(e) && startsWithAleph(base)) {
|
||||
return acc;
|
||||
} else if (e.c.includes("intrans.")) {
|
||||
const miniRoot = e.p !== "کېدل" && e.p.slice(0, -3);
|
||||
const miniRootEg = miniRoot + "ېږ";
|
||||
if ([miniRoot, miniRootEg].includes(base)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else {
|
||||
const eb = e.p.slice(0, -1);
|
||||
if (eb === base) {
|
||||
return [...acc, entry];
|
||||
} else if (!e.sepOo) {
|
||||
if (baseWAa === base.slice(1)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
}
|
||||
return acc;
|
||||
}, []),
|
||||
};
|
||||
Object.entries(stemMatches).forEach(([aspect, entries]) => {
|
||||
entries.forEach((verb) => {
|
||||
people.stem.forEach((person) => {
|
||||
w.push({
|
||||
type: "VB",
|
||||
person,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: aspect as T.Aspect,
|
||||
base: "stem",
|
||||
verb: removeFVarientsFromVerb(verb),
|
||||
},
|
||||
});
|
||||
});
|
||||
imperativePeople.forEach((person) => {
|
||||
w.push({
|
||||
type: "VB",
|
||||
person,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: aspect as T.Aspect,
|
||||
base: "stem",
|
||||
verb: removeFVarientsFromVerb(verb),
|
||||
imperative: true,
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
if (people.root.length) {
|
||||
const rootMatches = {
|
||||
imperfective: entries.filter(
|
||||
({ entry: e }) => !e.c.includes("comp") && matchShortOrLong(base, e.p)
|
||||
),
|
||||
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
||||
const e = entry.entry;
|
||||
if (e.c.includes("comp")) {
|
||||
return acc;
|
||||
}
|
||||
if (e.separationAtP) {
|
||||
const b = e.prp || e.p;
|
||||
const bRest = b.slice(e.separationAtP);
|
||||
if (matchShortOrLong(base, bRest)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else if (hasBreakawayAlef(e) && startsWithAleph(base) && !e.prp) {
|
||||
return acc;
|
||||
} else {
|
||||
const p = e.prp || e.p;
|
||||
if (matchShortOrLong(base, p) || matchShortOrLong("ا" + base, p)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
return acc;
|
||||
}, []),
|
||||
};
|
||||
|
||||
Object.entries(rootMatches).forEach(([aspect, entries]) => {
|
||||
entries.forEach((verb) => {
|
||||
people.root.forEach((person) => {
|
||||
w.push({
|
||||
type: "VB",
|
||||
person,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: aspect as T.Aspect,
|
||||
base: "root",
|
||||
verb: removeFVarientsFromVerb(verb),
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
const hamzaEnd = s.at(-1) === "ه";
|
||||
const oEnd = s.at(-1) === "و";
|
||||
const abruptEnd = shortVerbEndConsonant.includes(s.slice(-1));
|
||||
const tppMatches = {
|
||||
imperfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
||||
const e = entry.entry;
|
||||
if (e.c.includes("comp")) {
|
||||
return acc;
|
||||
}
|
||||
if (!e.prp && isInVarients(e.tppp, s)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
if (oEnd && matchShortOrLong(base, e.p)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
if (
|
||||
lastVowelNotA(e.g.slice(0, -2)) &&
|
||||
(hamzaEnd ? base : abruptEnd ? s : "XX") === e.p.slice(0, -1)
|
||||
) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
// TODO: if check for modified aaXu thing!
|
||||
return acc;
|
||||
}, []),
|
||||
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
||||
const e = entry.entry;
|
||||
if (e.c.includes("comp")) {
|
||||
return acc;
|
||||
}
|
||||
if (e.separationAtP) {
|
||||
const b = e.prp || e.p;
|
||||
const bRest = b.slice(e.separationAtP);
|
||||
if (bRest === "شول") {
|
||||
return acc;
|
||||
}
|
||||
if (abruptEnd) {
|
||||
if (s === bRest.slice(0, -1)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else if (hamzaEnd) {
|
||||
if (base === bRest.slice(0, -1)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else if (oEnd) {
|
||||
if ([bRest, bRest.slice(0, -1)].includes(base)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
} else if (!e.prp) {
|
||||
if (hasBreakawayAlef(e) && startsWithAleph(base)) {
|
||||
return acc;
|
||||
}
|
||||
if (oEnd) {
|
||||
if ([e.p, e.p.slice(0, -1)].includes(base)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
} else if ((hamzaEnd || abruptEnd) && lastVowelNotA(e.g.slice(0, -2))) {
|
||||
const b = hamzaEnd ? base : s;
|
||||
const p = e.p.slice(0, -1);
|
||||
if (b === p) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!e.separationAtP) {
|
||||
if (isInVarients(e.tppp, s)) {
|
||||
return [...acc, entry];
|
||||
} else if (isInVarients(e.tppp, "ا" + s)) {
|
||||
return [...acc, entry];
|
||||
}
|
||||
}
|
||||
return acc;
|
||||
}, []),
|
||||
};
|
||||
Object.entries(tppMatches).forEach(([aspect, entries]) => {
|
||||
entries.forEach((verb) => {
|
||||
w.push({
|
||||
type: "VB",
|
||||
person: T.Person.ThirdSingMale,
|
||||
info: {
|
||||
type: "verb",
|
||||
aspect: aspect as T.Aspect,
|
||||
base: "root",
|
||||
verb: removeFVarientsFromVerb(verb),
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
return w;
|
||||
}
|
||||
|
||||
function getImperativeVerbEnding(e: string): T.Person[] {
|
||||
if (e === "ه") {
|
||||
return [T.Person.SecondSingMale, T.Person.SecondSingFemale];
|
||||
}
|
||||
if (e === "ئ") {
|
||||
return [T.Person.SecondPlurMale, T.Person.SecondPlurFemale];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
// TODO: could handle all sh- verbs for efficiencies sake
|
||||
function parseIrregularVerb(s: string): T.ParsedVBE[] {
|
||||
if (["ته", "راته", "ورته", "درته"].includes(s)) {
|
||||
return [
|
||||
{
|
||||
type: "VB",
|
||||
info: {
|
||||
aspect: "imperfective",
|
||||
base: "root",
|
||||
type: "verb",
|
||||
verb: s.startsWith("را")
|
||||
? raatlul
|
||||
: s.startsWith("ور")
|
||||
? wartlul
|
||||
: s.startsWith("در")
|
||||
? dartlul
|
||||
: tlul,
|
||||
},
|
||||
person: T.Person.ThirdSingMale,
|
||||
},
|
||||
];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function hasBreakawayAlef(e: T.VerbDictionaryEntry): boolean {
|
||||
return !e.sepOo && ["ا", "آ"].includes(e.p[0]);
|
||||
}
|
||||
|
||||
function startsWithAleph(base: string): boolean {
|
||||
return ["ا", "آ"].includes(base[0]);
|
||||
}
|
|
@ -1,46 +1,46 @@
|
|||
import * as T from "../../../types";
|
||||
// import { returnParseResult } from "./utils";
|
||||
import { LookupFunction } from "./lookup";
|
||||
import { returnParseResult } from "./utils";
|
||||
|
||||
export function parseVBP(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
dictionary: T.DictionaryAPI
|
||||
lookup: LookupFunction
|
||||
): T.ParseResult<T.ParsedVBP>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
return [];
|
||||
// return [
|
||||
// ...parsePastPart(tokens, lookup),
|
||||
// // ...parseAbility(tokens),
|
||||
// ];
|
||||
return [
|
||||
...parsePastPart(tokens, lookup),
|
||||
// ...parseAbility(tokens),
|
||||
];
|
||||
}
|
||||
|
||||
// function parsePastPart(
|
||||
// tokens: Readonly<T.Token[]>,
|
||||
// dicitonary: T.DictionaryAPI,
|
||||
// ): T.ParseResult<T.ParsedVBP>[] {
|
||||
// const [{ s }, ...rest] = tokens;
|
||||
// const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې";
|
||||
// if (!ending || !["ی", "ي", "ې"].includes(ending)) {
|
||||
// return [];
|
||||
// }
|
||||
// // TODO: ALSO HANDLE SHORT FORMS
|
||||
// const wOutEnd = s.slice(0, -1);
|
||||
// const matches = lookup(wOutEnd, "pPart");
|
||||
// const genNums = endingGenderNum(ending);
|
||||
// return matches
|
||||
// .flatMap<T.ParsedVBP>((verb) =>
|
||||
// genNums.map<T.ParsedVBP>((genNum) => ({
|
||||
// type: "VB",
|
||||
// info: {
|
||||
// type: "ppart",
|
||||
// verb,
|
||||
// genNum,
|
||||
// },
|
||||
// }))
|
||||
// )
|
||||
// .flatMap((m) => returnParseResult(rest, m));
|
||||
// }
|
||||
function parsePastPart(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: LookupFunction
|
||||
): T.ParseResult<T.ParsedVBP>[] {
|
||||
const [{ s }, ...rest] = tokens;
|
||||
const ending: "ی" | "ي" | "ې" = s.at(-1) as "ی" | "ي" | "ې";
|
||||
if (!ending || !["ی", "ي", "ې"].includes(ending)) {
|
||||
return [];
|
||||
}
|
||||
// TODO: ALSO HANDLE SHORT FORMS
|
||||
const wOutEnd = s.slice(0, -1);
|
||||
const matches = lookup(wOutEnd, "pPart");
|
||||
const genNums = endingGenderNum(ending);
|
||||
return matches
|
||||
.flatMap<T.ParsedVBP>((verb) =>
|
||||
genNums.map<T.ParsedVBP>((genNum) => ({
|
||||
type: "VB",
|
||||
info: {
|
||||
type: "ppart",
|
||||
verb,
|
||||
genNum,
|
||||
},
|
||||
}))
|
||||
)
|
||||
.flatMap((m) => returnParseResult(rest, m));
|
||||
}
|
||||
|
||||
// function parseAbility(
|
||||
// tokens: Readonly<T.Token[]>,
|
||||
|
@ -70,33 +70,33 @@ export function parseVBP(
|
|||
// .flatMap((m) => returnParseResult(rest, m));
|
||||
// }
|
||||
|
||||
// function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] {
|
||||
// if (ending === "ی") {
|
||||
// return [
|
||||
// {
|
||||
// gender: "masc",
|
||||
// number: "singular",
|
||||
// },
|
||||
// ];
|
||||
// }
|
||||
// if (ending === "ي") {
|
||||
// return [
|
||||
// {
|
||||
// gender: "masc",
|
||||
// number: "plural",
|
||||
// },
|
||||
// ];
|
||||
// }
|
||||
// // if (ending === "ې") {
|
||||
// return [
|
||||
// {
|
||||
// gender: "fem",
|
||||
// number: "singular",
|
||||
// },
|
||||
// {
|
||||
// gender: "fem",
|
||||
// number: "plural",
|
||||
// },
|
||||
// ];
|
||||
// // }
|
||||
// }
|
||||
function endingGenderNum(ending: "ی" | "ي" | "ې"): T.GenderNumber[] {
|
||||
if (ending === "ی") {
|
||||
return [
|
||||
{
|
||||
gender: "masc",
|
||||
number: "singular",
|
||||
},
|
||||
];
|
||||
}
|
||||
if (ending === "ي") {
|
||||
return [
|
||||
{
|
||||
gender: "masc",
|
||||
number: "plural",
|
||||
},
|
||||
];
|
||||
}
|
||||
// if (ending === "ې") {
|
||||
return [
|
||||
{
|
||||
gender: "fem",
|
||||
number: "singular",
|
||||
},
|
||||
{
|
||||
gender: "fem",
|
||||
number: "plural",
|
||||
},
|
||||
];
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -4,10 +4,6 @@ export function isKedulStatEntry(v: T.VerbDictionaryEntry): boolean {
|
|||
return v.p === "کېدل" && v.e === "to become _____";
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the possible people for stem and root endings
|
||||
* but DOES NOT INCLUDE short third pers masc sing
|
||||
*/
|
||||
export function getVerbEnding(e: string): {
|
||||
stem: T.Person[];
|
||||
root: T.Person[];
|
||||
|
@ -38,11 +34,7 @@ export function getVerbEnding(e: string): {
|
|||
};
|
||||
} else if (e === "و") {
|
||||
return {
|
||||
root: [
|
||||
T.Person.FirstPlurMale,
|
||||
T.Person.FirstPlurFemale,
|
||||
T.Person.ThirdSingMale,
|
||||
],
|
||||
root: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
||||
stem: [T.Person.FirstPlurMale, T.Person.FirstPlurFemale],
|
||||
};
|
||||
} else if (e === "ئ") {
|
||||
|
|
|
@ -24,6 +24,7 @@ import {
|
|||
import { parseBlocks } from "./parse-blocks";
|
||||
import { makePronounSelection } from "../phrase-building/make-selections";
|
||||
import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
|
||||
import { LookupFunction } from "./lookup";
|
||||
import { isSecondPerson, personToGenNum } from "../misc-helpers";
|
||||
import { equals, zip } from "rambda";
|
||||
import { isImperativeTense } from "../type-predicates";
|
||||
|
@ -40,12 +41,12 @@ import { isImperativeTense } from "../type-predicates";
|
|||
|
||||
export function parseVP(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
dictionary: T.DictionaryAPI
|
||||
lookup: LookupFunction
|
||||
): T.ParseResult<T.VPSelectionComplete>[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const blocks = parseBlocks(tokens, dictionary, [], []);
|
||||
const blocks = parseBlocks(tokens, lookup, [], []);
|
||||
return bindParseResult(
|
||||
createPossesivePossibilities(blocks),
|
||||
(tokens, { blocks, kids }) => {
|
||||
|
@ -891,7 +892,7 @@ function getMiniPronouns(kids: T.ParsedKid[]): T.ParsedMiniPronoun[] {
|
|||
|
||||
function getPeopleFromMiniPronouns(kids: T.ParsedKid[]): T.Person[] {
|
||||
const p: T.Person[] = [];
|
||||
for (const k of kids) {
|
||||
for (let k of kids) {
|
||||
if (k === "me") {
|
||||
p.push(T.Person.FirstSingMale);
|
||||
p.push(T.Person.FirstSingFemale);
|
||||
|
|
|
@ -163,38 +163,6 @@ export function parserCombMany<R>(parser: Parser<R>): Parser<R[]> {
|
|||
return r;
|
||||
}
|
||||
|
||||
export function parserCombSucc2<A, B>(
|
||||
parsers: [Parser<A>, Parser<B>]
|
||||
): Parser<[A, B]> {
|
||||
return function (
|
||||
tokens: Readonly<T.Token[]>,
|
||||
dictionary: T.DictionaryAPI
|
||||
): T.ParseResult<[A, B]>[] {
|
||||
return bindParseResult(parsers[0](tokens, dictionary), (t, a) =>
|
||||
bindParseResult(parsers[1](t, dictionary), (tk, b) =>
|
||||
returnParseResult(tk, [a, b])
|
||||
)
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
export function parserCombSucc3<A, B, C>(
|
||||
parsers: [Parser<A>, Parser<B>, Parser<C>]
|
||||
): Parser<[A, B, C]> {
|
||||
return function (
|
||||
tokens: Readonly<T.Token[]>,
|
||||
dictionary: T.DictionaryAPI
|
||||
): T.ParseResult<[A, B, C]>[] {
|
||||
return bindParseResult(parsers[0](tokens, dictionary), (t, a) =>
|
||||
bindParseResult(parsers[1](t, dictionary), (tk, b) =>
|
||||
bindParseResult(parsers[2](tk, dictionary), (tkn, c) =>
|
||||
returnParseResult(tkn, [a, b, c])
|
||||
)
|
||||
)
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
export function isCompleteResult<C extends object>(
|
||||
r: T.ParseResult<C>
|
||||
): boolean {
|
||||
|
|
|
@ -214,7 +214,7 @@ function addArticlesAndAdjs(
|
|||
? np.determiners.determiners
|
||||
// @ts-ignore - weird, ts is not recognizing this as rendered
|
||||
.map((x) => (moreThanOneDet ? `(${x.e})` : x.e))
|
||||
.join(" ") + " "
|
||||
.join(" ")
|
||||
: "";
|
||||
const detsWithoutNoun = np.determiners && !np.determiners.withNoun;
|
||||
return `${np.determiners ? "" : articles}${determiners}${
|
||||
|
|
|
@ -1,58 +0,0 @@
|
|||
import * as T from "../../../types";
|
||||
import { compileVP } from "./compile";
|
||||
import { renderVP } from "./render-vp";
|
||||
|
||||
export function removeRedundantVPSs(
|
||||
vs: T.VPSelectionComplete[]
|
||||
): T.VPSelectionComplete[] {
|
||||
const versions = vs.map((x) => compileVP(renderVP(x), x.form));
|
||||
const toRemove = new Set<number>();
|
||||
versions.forEach((a, i) => {
|
||||
const duplicates = findAllIndices(
|
||||
versions.slice(i + 1),
|
||||
(b) => !toRemove.has(i) && isDuplicate(a, b)
|
||||
);
|
||||
duplicates.forEach((d) => toRemove.add(d + i + 1));
|
||||
});
|
||||
return vs.reduce<T.VPSelectionComplete[]>((acc, v, i) => {
|
||||
if (toRemove.has(i)) {
|
||||
return acc;
|
||||
}
|
||||
return [...acc, v];
|
||||
}, []);
|
||||
}
|
||||
|
||||
function isDuplicate(
|
||||
a: {
|
||||
ps: T.SingleOrLengthOpts<T.PsString[]>;
|
||||
e?: string[];
|
||||
},
|
||||
b: { ps: T.SingleOrLengthOpts<T.PsString[]>; e?: string[] }
|
||||
): boolean {
|
||||
if (!a.e || !b.e) {
|
||||
return false;
|
||||
}
|
||||
if (a.e.length !== b.e.length) {
|
||||
return false;
|
||||
}
|
||||
return a.e.every(
|
||||
(x, i) =>
|
||||
removeGenderGloss(x) === removeGenderGloss(b.e ? b.e[i] : "") &&
|
||||
JSON.stringify(a.ps) === JSON.stringify(b.ps)
|
||||
);
|
||||
}
|
||||
|
||||
function removeGenderGloss(s: string): string {
|
||||
// TODO: combine into one RegEx
|
||||
return s.replaceAll(/\((m|f)\.\)/g, "").replaceAll(/\((m|f)\. pl\.\)/g, "");
|
||||
}
|
||||
|
||||
function findAllIndices<N>(arr: N[], f: (x: N) => boolean): number[] {
|
||||
const indices: number[] = [];
|
||||
arr.forEach((x, i) => {
|
||||
if (f(x)) {
|
||||
indices.push(i);
|
||||
}
|
||||
});
|
||||
return indices;
|
||||
}
|
|
@ -178,7 +178,7 @@ function renderDeterminer({
|
|||
? number === "plural"
|
||||
? { p: "دغو", f: "dágho" }
|
||||
: gender === "masc"
|
||||
? { p: "دغه", f: "dágha" }
|
||||
? { p: "دغه", f: "dághu" }
|
||||
: { p: "دغې", f: "dághe" }
|
||||
: { p: "دغه", f: "dágha" };
|
||||
return {
|
||||
|
@ -196,7 +196,7 @@ function renderDeterminer({
|
|||
? number === "plural"
|
||||
? { p: "هغو", f: "hágho" }
|
||||
: gender === "masc"
|
||||
? { p: "هغه", f: "hágha" }
|
||||
? { p: "هغه", f: "hághu" }
|
||||
: { p: "هغې", f: "hághe" }
|
||||
: { p: "هغه", f: "hágha" };
|
||||
return {
|
||||
|
|
|
@ -1259,13 +1259,8 @@ export type DictionaryAPI = {
|
|||
queryP: (p: string) => DictionaryEntry[];
|
||||
adjLookup: (p: string) => AdjectiveEntry[];
|
||||
nounLookup: (p: string) => NounEntry[];
|
||||
otherLookup: (
|
||||
key: keyof DictionaryEntry,
|
||||
p: string,
|
||||
regex?: boolean
|
||||
) => DictionaryEntry[];
|
||||
otherLookup: (key: keyof DictionaryEntry, p: string) => DictionaryEntry[];
|
||||
specialPluralLookup: (p: string) => NounEntry[];
|
||||
verbEntryLookup: (p: string) => VerbEntry[];
|
||||
};
|
||||
|
||||
export type Parser<R> = (
|
||||
|
|
|
@ -18,5 +18,5 @@
|
|||
"noUnusedParameters": true,
|
||||
"noFallthroughCasesInSwitch": true
|
||||
},
|
||||
"include": ["vite.config.ts", "get-mini-dict-and-split-verbs.ts"]
|
||||
"include": ["vite.config.ts", "get-mini-dict.ts"]
|
||||
}
|
||||
|
|
|
@ -34,7 +34,6 @@ export const entries: T.DictionaryEntry["ts"][] = [
|
|||
1527812908, // مېلمه
|
||||
1575924767041, // شپون
|
||||
1527815333, // نتور
|
||||
1527812881, // ماشوم
|
||||
|
||||
// fem nouns
|
||||
1527811877, // دوستي
|
||||
|
@ -51,28 +50,4 @@ export const entries: T.DictionaryEntry["ts"][] = [
|
|||
1589023873660, // فتح - fatha
|
||||
1527814342, // نفع - nafa
|
||||
1527815329, // تجربه
|
||||
|
||||
// verbs
|
||||
1527815399, // وهل
|
||||
1527817298, // اخیستل
|
||||
1527812275, // لیدل
|
||||
1527812856, // لیکل
|
||||
1527815085, // منل
|
||||
1527817661, // ګالل
|
||||
1527813573, // رسېدل
|
||||
1527812790, // خوړل
|
||||
1527812759, // کېناستل
|
||||
1527812758, // کښېناستل
|
||||
1527815190, // پرېښودل
|
||||
1527811293, // ښودل
|
||||
1527812284, // کېښودل
|
||||
1527812751, // کتل
|
||||
1527823376, // وتل
|
||||
1527816865, // وړل
|
||||
1527813473, // الوتل
|
||||
1585228551150, // درتلل
|
||||
1527817577, // کښېښودل
|
||||
1527814012, // اوښتل
|
||||
1577390597820, // پرېښوول
|
||||
1527815191, // پرېښول
|
||||
];
|
||||
|
|
Loading…
Reference in New Issue