starting on parser
This commit is contained in:
parent
742a1acf61
commit
c38e0645d3
|
@ -60,6 +60,11 @@ This is published on [a private NPM proxy registry](https://npm.lingdocs.com) as
|
|||
The Pashto Verb Explorer website can be used to view and play with the verb conjugations and various components.
|
||||
|
||||
```
|
||||
cd src/components
|
||||
yarn install
|
||||
cd ../lib
|
||||
yarn install
|
||||
cd ../..
|
||||
yarn install
|
||||
yarn start
|
||||
```
|
||||
|
|
|
@ -19,6 +19,7 @@ import { entryFeeder } from "./demo-components/entryFeeder";
|
|||
import { Hider } from "./components/library";
|
||||
import InflectionDemo from "./demo-components/InflectionDemo";
|
||||
import SpellingDemo from "./demo-components/SpellingDemo";
|
||||
import ParserDemo from "./demo-components/ParserDemo";
|
||||
|
||||
function App() {
|
||||
const [showingTextOptions, setShowingTextOptions] = useStickyState<boolean>(
|
||||
|
@ -132,6 +133,14 @@ function App() {
|
|||
>
|
||||
<SpellingDemo opts={textOptions} onChange={setTextOptions} />
|
||||
</Hider>
|
||||
<Hider
|
||||
label="Parser (IN PROGRESS)"
|
||||
hLevel={3}
|
||||
showing={showing === "parser"}
|
||||
handleChange={() => handleHiderClick("parser")}
|
||||
>
|
||||
<ParserDemo opts={textOptions} />
|
||||
</Hider>
|
||||
</div>
|
||||
</main>
|
||||
<Modal
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
import { useState } from "react";
|
||||
import * as T from "../types";
|
||||
import { parsePhrase } from "../lib/src/parsing/parse-phrase";
|
||||
import { lookup } from "../lib/src/parsing/lookup";
|
||||
import { tokenizer } from "../lib/src/parsing/tokenizer";
|
||||
|
||||
function ParserDemo({ opts }: { opts: T.TextOptions }) {
|
||||
const [text, setText] = useState<string>("");
|
||||
const [result, setResult] = useState<string>("");
|
||||
function handleChange(e: React.ChangeEvent<HTMLInputElement>) {
|
||||
const value = e.target.value;
|
||||
if (!value) {
|
||||
setText("");
|
||||
setResult("");
|
||||
return;
|
||||
}
|
||||
const r = parsePhrase(tokenizer(value), lookup);
|
||||
setText(value);
|
||||
setResult(JSON.stringify(r, null, " "));
|
||||
}
|
||||
return (
|
||||
<div className="mt-3" style={{ marginBottom: "1000px" }}>
|
||||
<p>Type an adjective or noun (w or without adjs) to parse it</p>
|
||||
<div className="form-group mb-2">
|
||||
<input
|
||||
dir="rtl"
|
||||
className={`form-control ${
|
||||
text && result === "[]" ? "is-invalid" : text ? "is-valid" : ""
|
||||
}`}
|
||||
type="text"
|
||||
value={text}
|
||||
onChange={handleChange}
|
||||
/>
|
||||
</div>
|
||||
<samp>
|
||||
<pre>{result}</pre>
|
||||
</samp>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default ParserDemo;
|
|
@ -0,0 +1,199 @@
|
|||
import * as T from "../../../types";
|
||||
import {
|
||||
isPattern1Entry,
|
||||
isPattern2Entry,
|
||||
isPattern3Entry,
|
||||
isPattern,
|
||||
isPattern5Entry,
|
||||
isPattern4Entry,
|
||||
} from "../type-predicates";
|
||||
import { equals } from "rambda";
|
||||
|
||||
export function getInflectionQueries(
|
||||
s: string,
|
||||
includeNouns: boolean
|
||||
): {
|
||||
search: Partial<T.DictionaryEntry>;
|
||||
details: {
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
predicate: (e: T.AdjectiveEntry | T.NounEntry) => boolean;
|
||||
}[];
|
||||
}[] {
|
||||
const queries: {
|
||||
search: Partial<T.DictionaryEntry>;
|
||||
details: {
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
predicate: (e: T.NounEntry | T.AdjectiveEntry) => boolean;
|
||||
};
|
||||
}[] = [];
|
||||
queries.push({
|
||||
search: { p: s },
|
||||
details: {
|
||||
inflection: [0, 1, 2],
|
||||
gender: ["masc", "fem"],
|
||||
predicate: isPattern(0),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s },
|
||||
details: {
|
||||
inflection: [0, 1],
|
||||
gender: ["masc"],
|
||||
predicate: isPattern1Entry,
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s },
|
||||
details: {
|
||||
inflection: [0],
|
||||
gender: ["masc"],
|
||||
predicate: (e) =>
|
||||
isPattern2Entry(e) ||
|
||||
isPattern3Entry(e) ||
|
||||
isPattern4Entry(e) ||
|
||||
isPattern5Entry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { infap: s },
|
||||
details: {
|
||||
inflection: [1],
|
||||
gender: ["masc"],
|
||||
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
|
||||
},
|
||||
});
|
||||
if (s.endsWith("ه")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) },
|
||||
details: {
|
||||
inflection: [0],
|
||||
gender: ["fem"],
|
||||
predicate: isPattern1Entry,
|
||||
},
|
||||
});
|
||||
if (includeNouns) {
|
||||
queries.push({
|
||||
search: { p: s },
|
||||
details: {
|
||||
inflection: [0],
|
||||
gender: ["fem"],
|
||||
predicate: isPattern1Entry,
|
||||
},
|
||||
});
|
||||
}
|
||||
queries.push({
|
||||
search: { infbp: s.slice(0, -1) },
|
||||
details: {
|
||||
inflection: [0],
|
||||
gender: ["fem"],
|
||||
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
|
||||
},
|
||||
});
|
||||
} else if (s.endsWith("ې")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) },
|
||||
details: {
|
||||
inflection: [1],
|
||||
gender: ["fem"],
|
||||
predicate: isPattern1Entry,
|
||||
},
|
||||
});
|
||||
if (includeNouns) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ه" },
|
||||
details: {
|
||||
inflection: [1],
|
||||
gender: ["fem"],
|
||||
predicate: isPattern1Entry,
|
||||
},
|
||||
});
|
||||
}
|
||||
queries.push({
|
||||
search: { infbp: s.slice(0, -1) },
|
||||
details: {
|
||||
inflection: [1],
|
||||
gender: ["fem"],
|
||||
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ی" },
|
||||
details: {
|
||||
inflection: [0, 1],
|
||||
gender: ["fem"],
|
||||
predicate: isPattern2Entry,
|
||||
},
|
||||
});
|
||||
} else if (s.endsWith("ي")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ی" },
|
||||
details: {
|
||||
inflection: [1],
|
||||
gender: ["masc"],
|
||||
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
|
||||
},
|
||||
});
|
||||
} else if (s.endsWith("و")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
predicate: (e) => isPattern1Entry(e) || isPattern5Entry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { infbp: s.slice(0, -1) },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ی" },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
|
||||
},
|
||||
});
|
||||
if (s.endsWith("یو")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -2) + "ی" },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
|
||||
},
|
||||
});
|
||||
}
|
||||
} else if (s.endsWith("ۍ")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ی" },
|
||||
details: {
|
||||
inflection: [0, 1],
|
||||
gender: ["fem"],
|
||||
predicate: isPattern3Entry,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const coallated: ReturnType<typeof getInflectionQueries> = [];
|
||||
|
||||
for (let q of queries) {
|
||||
const existing = coallated.find((x) => equals(x.search, q.search));
|
||||
if (existing) {
|
||||
existing.details.push(q.details);
|
||||
} else {
|
||||
coallated.push({
|
||||
search: q.search,
|
||||
details: [q.details],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return coallated;
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
import nounsAdjs from "../../../nouns-adjs";
|
||||
import * as T from "../../../types";
|
||||
|
||||
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
|
||||
const [key, value] = Object.entries(s)[0];
|
||||
// @ts-ignore
|
||||
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
|
||||
}
|
|
@ -0,0 +1,320 @@
|
|||
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
|
||||
import * as T from "../../../types";
|
||||
import { lookup } from "./lookup";
|
||||
import { parseAdjective } from "./parse-adjective";
|
||||
import { tokenizer } from "./tokenizer";
|
||||
|
||||
const ghut = {
|
||||
ts: 1527812625,
|
||||
i: 9561,
|
||||
p: "غټ",
|
||||
f: "ghuT, ghaT",
|
||||
g: "ghuT,ghaT",
|
||||
e: "big, fat",
|
||||
r: 4,
|
||||
c: "adj.",
|
||||
} as T.AdjectiveEntry;
|
||||
const sturey = {
|
||||
ts: 1527815306,
|
||||
i: 7933,
|
||||
p: "ستړی",
|
||||
f: "stúRay",
|
||||
g: "stuRay",
|
||||
e: "tired",
|
||||
r: 4,
|
||||
c: "adj. / adv.",
|
||||
} as T.AdjectiveEntry;
|
||||
const narey = {
|
||||
ts: 1527819320,
|
||||
i: 14027,
|
||||
p: "نری",
|
||||
f: "naráy",
|
||||
g: "naray",
|
||||
e: "thin; mild; high (pitch)",
|
||||
r: 4,
|
||||
c: "adj.",
|
||||
} as T.AdjectiveEntry;
|
||||
const zor = {
|
||||
ts: 1527815451,
|
||||
i: 7570,
|
||||
p: "زوړ",
|
||||
f: "zoR",
|
||||
g: "zoR",
|
||||
e: "old",
|
||||
r: 4,
|
||||
c: "adj.",
|
||||
infap: "زاړه",
|
||||
infaf: "zaaRu",
|
||||
infbp: "زړ",
|
||||
infbf: "zaR",
|
||||
} as T.AdjectiveEntry;
|
||||
const sheen = {
|
||||
ts: 1527815265,
|
||||
i: 8979,
|
||||
p: "شین",
|
||||
f: "sheen",
|
||||
g: "sheen",
|
||||
e: "green, blue; unripe, immature; bright, sunny",
|
||||
r: 4,
|
||||
c: "adj.",
|
||||
infap: "شنه",
|
||||
infaf: "shnu",
|
||||
infbp: "شن",
|
||||
infbf: "shn",
|
||||
} as T.AdjectiveEntry;
|
||||
|
||||
const tests: {
|
||||
category: string;
|
||||
cases: {
|
||||
input: string;
|
||||
output: {
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
selection: T.AdjectiveSelection;
|
||||
}[];
|
||||
}[];
|
||||
}[] = [
|
||||
{
|
||||
category: "pattern 1",
|
||||
cases: [
|
||||
{
|
||||
input: "غټ",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(ghut),
|
||||
inflection: [0, 1],
|
||||
gender: ["masc"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "غټه",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(ghut),
|
||||
inflection: [0],
|
||||
gender: ["fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "غټې",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(ghut),
|
||||
inflection: [1],
|
||||
gender: ["fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "غټو",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(ghut),
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
category: "pattern 2",
|
||||
cases: [
|
||||
{
|
||||
input: "ستړی",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(sturey),
|
||||
inflection: [0],
|
||||
gender: ["masc"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ستړې",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(sturey),
|
||||
inflection: [0, 1],
|
||||
gender: ["fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ستړو",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(sturey),
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
category: "pattern 3",
|
||||
cases: [
|
||||
{
|
||||
input: "نری",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(narey),
|
||||
inflection: [0],
|
||||
gender: ["masc"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "نري",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(narey),
|
||||
inflection: [1],
|
||||
gender: ["masc"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "نرۍ",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(narey),
|
||||
inflection: [0, 1],
|
||||
gender: ["fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "نرو",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(narey),
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "نریو",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(narey),
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
category: "pattern 4",
|
||||
cases: [
|
||||
{
|
||||
input: "زوړ",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(zor),
|
||||
inflection: [0],
|
||||
gender: ["masc"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "زاړه",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(zor),
|
||||
inflection: [1],
|
||||
gender: ["masc"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "زړه",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(zor),
|
||||
inflection: [0],
|
||||
gender: ["fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "زړې",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(zor),
|
||||
inflection: [1],
|
||||
gender: ["fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "زړو",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(zor),
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
category: "pattern 5",
|
||||
cases: [
|
||||
{
|
||||
input: "شین",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(sheen),
|
||||
inflection: [0],
|
||||
gender: ["masc"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "شنه",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(sheen),
|
||||
inflection: [1],
|
||||
gender: ["masc"],
|
||||
},
|
||||
{
|
||||
selection: makeAdjectiveSelection(sheen),
|
||||
inflection: [0],
|
||||
gender: ["fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "شنو",
|
||||
output: [
|
||||
{
|
||||
selection: makeAdjectiveSelection(sheen),
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
describe("parsing adjectives", () => {
|
||||
tests.forEach(({ category, cases }) => {
|
||||
// eslint-disable-next-line jest/valid-title
|
||||
test(category, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
const possibilities = parseAdjective(tokens, lookup).map((x) => x[1]);
|
||||
expect(possibilities).toEqual(output);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
|
@ -0,0 +1,42 @@
|
|||
import * as T from "../../../types";
|
||||
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
|
||||
import { isAdjectiveEntry } from "../type-predicates";
|
||||
import { getInflectionQueries } from "./inflection-query";
|
||||
|
||||
export function parseAdjective(
|
||||
tokens: Readonly<string[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
|
||||
): [
|
||||
string[],
|
||||
{
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
selection: T.AdjectiveSelection;
|
||||
}
|
||||
][] {
|
||||
const w: ReturnType<typeof parseAdjective> = [];
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
const queries = getInflectionQueries(first, false);
|
||||
queries.forEach(({ search, details }) => {
|
||||
const wideMatches = lookup(search).filter(isAdjectiveEntry);
|
||||
details.forEach((deets) => {
|
||||
const matches = wideMatches.filter(deets.predicate);
|
||||
matches.forEach((m) => {
|
||||
const selection = makeAdjectiveSelection(m);
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
selection,
|
||||
inflection: deets.inflection,
|
||||
gender: deets.gender,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
return w;
|
||||
}
|
|
@ -0,0 +1,291 @@
|
|||
import { makeNounSelection } from "../phrase-building/make-selections";
|
||||
import * as T from "../../../types";
|
||||
import { lookup } from "./lookup";
|
||||
import { parseNoun } from "./parse-noun";
|
||||
|
||||
const sarey = {
|
||||
ts: 1527815251,
|
||||
i: 8163,
|
||||
p: "سړی",
|
||||
f: "saRáy",
|
||||
g: "saRay",
|
||||
e: "man",
|
||||
r: 4,
|
||||
c: "n. m.",
|
||||
ec: "man",
|
||||
ep: "men",
|
||||
} as T.NounEntry;
|
||||
const dostee = {
|
||||
ts: 1527811877,
|
||||
i: 6627,
|
||||
p: "دوستي",
|
||||
f: "dostee",
|
||||
g: "dostee",
|
||||
e: "friendship",
|
||||
r: 3,
|
||||
c: "n. f.",
|
||||
} as T.NounEntry;
|
||||
const wreejze = {
|
||||
ts: 1586551382412,
|
||||
i: 14985,
|
||||
p: "وریژې",
|
||||
f: "wreejze",
|
||||
g: "wreejze",
|
||||
e: "rice",
|
||||
r: 4,
|
||||
c: "n. f. pl.",
|
||||
} as T.NounEntry;
|
||||
const xudza = {
|
||||
ts: 1527812797,
|
||||
i: 9018,
|
||||
p: "ښځه",
|
||||
f: "xúdza",
|
||||
g: "xudza",
|
||||
e: "woman, wife",
|
||||
r: 4,
|
||||
c: "n. f.",
|
||||
ec: "woman",
|
||||
ep: "women",
|
||||
} as T.NounEntry;
|
||||
const kursuy = {
|
||||
ts: 1527814203,
|
||||
i: 10573,
|
||||
p: "کرسۍ",
|
||||
f: "kUrsúy",
|
||||
g: "kUrsuy",
|
||||
e: "chair, seat, stool",
|
||||
r: 3,
|
||||
c: "n. f.",
|
||||
} as T.NounEntry;
|
||||
const kor = {
|
||||
ts: 1527812828,
|
||||
i: 11022,
|
||||
p: "کور",
|
||||
f: "kor",
|
||||
g: "kor",
|
||||
e: "house, home",
|
||||
r: 4,
|
||||
c: "n. m.",
|
||||
} as T.NounEntry;
|
||||
const daktar = {
|
||||
ts: 1527816747,
|
||||
i: 6709,
|
||||
p: "ډاکټر",
|
||||
f: "DaakTar",
|
||||
g: "DaakTar",
|
||||
e: "doctor",
|
||||
r: 4,
|
||||
c: "n. m. anim. unisex",
|
||||
} as T.NounEntry;
|
||||
|
||||
// TODO: test unisex ملګری etc
|
||||
|
||||
const tests: {
|
||||
category: string;
|
||||
cases: {
|
||||
input: string;
|
||||
output: {
|
||||
inflected: boolean;
|
||||
selection: T.NounSelection;
|
||||
}[];
|
||||
}[];
|
||||
}[] = [
|
||||
{
|
||||
category: "pattern 1 nouns",
|
||||
cases: [
|
||||
{
|
||||
input: "کور",
|
||||
output: [
|
||||
{
|
||||
inflected: false,
|
||||
selection: makeNounSelection(kor, undefined),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "کورو",
|
||||
output: [
|
||||
{
|
||||
inflected: true,
|
||||
selection: {
|
||||
...makeNounSelection(kor, undefined),
|
||||
number: "plural",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ډاکټره",
|
||||
output: [
|
||||
{
|
||||
inflected: false,
|
||||
selection: {
|
||||
...makeNounSelection(daktar, undefined),
|
||||
gender: "fem",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ډاکټرې",
|
||||
output: [
|
||||
{
|
||||
inflected: true,
|
||||
selection: {
|
||||
...makeNounSelection(daktar, undefined),
|
||||
gender: "fem",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
// {
|
||||
// input: "سړی",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: false,
|
||||
// selection: makeNounSelection(sarey, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "سړي",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: makeNounSelection(sarey, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "سړو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeNounSelection(sarey, undefined),
|
||||
// number: "plural",
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "سړیو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeNounSelection(sarey, undefined),
|
||||
// number: "plural",
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "دوستي",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: false,
|
||||
// selection: makeNounSelection(dostee, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "دوستۍ",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: makeNounSelection(dostee, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "دوستیو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeNounSelection(dostee, undefined),
|
||||
// number: "plural",
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "وریژې",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: false,
|
||||
// selection: makeNounSelection(wreejze, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "ښځه",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: false,
|
||||
// selection: makeNounSelection(xudza, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "ښځې",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: makeNounSelection(xudza, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "ښځو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeNounSelection(xudza, undefined),
|
||||
// number: "plural",
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "کرسۍ",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: false,
|
||||
// selection: makeNounSelection(kursuy, undefined),
|
||||
// },
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: makeNounSelection(kursuy, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "کرسیو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeNounSelection(kursuy, undefined),
|
||||
// number: "plural",
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// ];
|
||||
|
||||
describe("parsing nouns", () => {
|
||||
tests.forEach(({ category, cases }) => {
|
||||
// eslint-disable-next-line jest/valid-title
|
||||
test(category, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
expect(parseNoun(input, lookup)).toEqual(output);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
|
@ -0,0 +1,97 @@
|
|||
import * as T from "../../../types";
|
||||
import { makeNounSelection } from "../phrase-building/make-selections";
|
||||
import {
|
||||
isFemNounEntry,
|
||||
isMascNounEntry,
|
||||
isNounEntry,
|
||||
isUnisexNounEntry,
|
||||
} from "../type-predicates";
|
||||
import { getInflectionQueries } from "./inflection-query";
|
||||
import { parseAdjective } from "./parse-adjective";
|
||||
|
||||
export function parseNoun(
|
||||
tokens: Readonly<string[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||
adjectives: {
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
selection: T.AdjectiveSelection;
|
||||
}[]
|
||||
): [string[], { inflection: (0 | 1 | 2)[]; selection: T.NounSelection }][] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const adjRes = parseAdjective(tokens, lookup);
|
||||
const withAdj = adjRes.flatMap(([tkns, adj]) =>
|
||||
parseNoun(tkns, lookup, [...adjectives, adj])
|
||||
);
|
||||
const w: ReturnType<typeof parseNoun> = [];
|
||||
const [first, ...rest] = tokens;
|
||||
|
||||
const searches = getInflectionQueries(first, true);
|
||||
searches.forEach(({ search, details }) => {
|
||||
const nounEntries = lookup(search).filter(isNounEntry);
|
||||
details.forEach((deets) => {
|
||||
const fittingEntries = nounEntries.filter(deets.predicate);
|
||||
fittingEntries.forEach((entry) => {
|
||||
console.log({ entry, deets });
|
||||
if (isUnisexNounEntry(entry)) {
|
||||
deets.gender.forEach((gender) => {
|
||||
if (adjsMatch(adjectives, gender, deets.inflection)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflection: deets.inflection,
|
||||
selection: {
|
||||
...makeNounSelection(entry, undefined),
|
||||
gender,
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
},
|
||||
},
|
||||
]);
|
||||
}
|
||||
});
|
||||
} else if (isMascNounEntry(entry) && deets.gender.includes("masc")) {
|
||||
if (adjsMatch(adjectives, "masc", deets.inflection)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflection: deets.inflection,
|
||||
selection: {
|
||||
...makeNounSelection(entry, undefined),
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
},
|
||||
},
|
||||
]);
|
||||
}
|
||||
} else if (isFemNounEntry(entry) && deets.gender.includes("fem")) {
|
||||
if (adjsMatch(adjectives, "fem", deets.inflection)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflection: deets.inflection,
|
||||
selection: {
|
||||
...makeNounSelection(entry, undefined),
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
},
|
||||
},
|
||||
]);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
return [...withAdj, ...w];
|
||||
}
|
||||
|
||||
function adjsMatch(
|
||||
adjectives: Parameters<typeof parseNoun>[2],
|
||||
gender: T.Gender,
|
||||
inflection: (0 | 1 | 2)[]
|
||||
): boolean {
|
||||
return adjectives.every(
|
||||
(adj) =>
|
||||
adj.gender.includes(gender) &&
|
||||
adj.inflection.some((i) => inflection.includes(i))
|
||||
);
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
import { parseAdjective } from "./parse-adjective";
|
||||
import * as T from "../../../types";
|
||||
import { parsePronoun } from "./parse-pronoun";
|
||||
import { parseNoun } from "./parse-noun";
|
||||
|
||||
export function parsePhrase(
|
||||
s: string[],
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
|
||||
): any[] {
|
||||
const adjsRes = parseAdjective(s, lookup);
|
||||
const prnsRes = parsePronoun(s);
|
||||
const nounsRes = parseNoun(s, lookup, []);
|
||||
|
||||
const correct = [...adjsRes, ...prnsRes, ...nounsRes]
|
||||
.filter(([tkns]) => tkns.length === 0)
|
||||
.map((x) => x[1]);
|
||||
return correct;
|
||||
}
|
|
@ -0,0 +1,235 @@
|
|||
import * as T from "../../../types";
|
||||
|
||||
export function parsePronoun(tokens: Readonly<string[]>): [
|
||||
string[],
|
||||
{
|
||||
inflected: boolean[];
|
||||
selection: T.PronounSelection;
|
||||
}
|
||||
][] {
|
||||
const [first, ...rest] = tokens;
|
||||
const w: ReturnType<typeof parsePronoun> = [];
|
||||
if (first === "زه") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: 0,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: 1,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "ته") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: 2,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: 3,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "هغه") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false, true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: 4,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: 5,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "هغې") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.ThirdSingFemale,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "دی") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.ThirdSingMale,
|
||||
distance: "near",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "ده") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.ThirdSingMale,
|
||||
distance: "near",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "دا") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.ThirdSingFemale,
|
||||
distance: "near",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "دې") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.ThirdSingFemale,
|
||||
distance: "near",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (["مونږ", "موږ"].includes(first)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false, true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.FirstPlurMale,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false, true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.FirstPlurFemale,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (["تاسو", "تاسې"].includes(first)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false, true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.SecondPlurMale,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false, true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.SecondPlurFemale,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (["هغوي", "هغوی"].includes(first)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false, true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.ThirdPlurMale,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false, true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.ThirdPlurFemale,
|
||||
distance: "far",
|
||||
},
|
||||
},
|
||||
]);
|
||||
} else if (["دوي", "دوی"].includes(first)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false, true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.ThirdPlurMale,
|
||||
distance: "near",
|
||||
},
|
||||
},
|
||||
]);
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
inflected: [false, true],
|
||||
selection: {
|
||||
type: "pronoun",
|
||||
person: T.Person.ThirdPlurFemale,
|
||||
distance: "near",
|
||||
},
|
||||
},
|
||||
]);
|
||||
}
|
||||
return w;
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
export function tokenizer(s: string): string[] {
|
||||
return s.trim().split(" ");
|
||||
}
|
|
@ -121,9 +121,7 @@ function compileVPPs(
|
|||
kids,
|
||||
!!blankOut?.ba
|
||||
);
|
||||
return removeDuplicates(
|
||||
combineIntoText(blocksWKids, subjectPerson, blankOut)
|
||||
);
|
||||
return combineIntoText(blocksWKids, subjectPerson, blankOut);
|
||||
}
|
||||
|
||||
function compileEPPs(
|
||||
|
@ -217,15 +215,17 @@ export function combineIntoText(
|
|||
subjectPerson: T.Person,
|
||||
blankOut?: BlankoutOptions
|
||||
): T.PsString[] {
|
||||
return piecesWVars
|
||||
.map((pieces) => {
|
||||
const psVarsBlocks = getPsVarsBlocks(
|
||||
applyBlankOut(pieces, blankOut),
|
||||
subjectPerson
|
||||
);
|
||||
return concatAll(monoidPsStringWVars)(psVarsBlocks);
|
||||
})
|
||||
.flat();
|
||||
return removeDuplicates(
|
||||
piecesWVars
|
||||
.map((pieces) => {
|
||||
const psVarsBlocks = getPsVarsBlocks(
|
||||
applyBlankOut(pieces, blankOut),
|
||||
subjectPerson
|
||||
);
|
||||
return concatAll(monoidPsStringWVars)(psVarsBlocks);
|
||||
})
|
||||
.flat()
|
||||
);
|
||||
}
|
||||
|
||||
function getPsVarsBlocks(
|
||||
|
|
Loading…
Reference in New Issue