starting on parser

This commit is contained in:
adueck 2023-07-30 15:54:27 +04:00
parent 742a1acf61
commit c38e0645d3
13 changed files with 1281 additions and 12 deletions

View File

@ -60,6 +60,11 @@ This is published on [a private NPM proxy registry](https://npm.lingdocs.com) as
The Pashto Verb Explorer website can be used to view and play with the verb conjugations and various components. The Pashto Verb Explorer website can be used to view and play with the verb conjugations and various components.
``` ```
cd src/components
yarn install
cd ../lib
yarn install
cd ../..
yarn install yarn install
yarn start yarn start
``` ```

View File

@ -19,6 +19,7 @@ import { entryFeeder } from "./demo-components/entryFeeder";
import { Hider } from "./components/library"; import { Hider } from "./components/library";
import InflectionDemo from "./demo-components/InflectionDemo"; import InflectionDemo from "./demo-components/InflectionDemo";
import SpellingDemo from "./demo-components/SpellingDemo"; import SpellingDemo from "./demo-components/SpellingDemo";
import ParserDemo from "./demo-components/ParserDemo";
function App() { function App() {
const [showingTextOptions, setShowingTextOptions] = useStickyState<boolean>( const [showingTextOptions, setShowingTextOptions] = useStickyState<boolean>(
@ -132,6 +133,14 @@ function App() {
> >
<SpellingDemo opts={textOptions} onChange={setTextOptions} /> <SpellingDemo opts={textOptions} onChange={setTextOptions} />
</Hider> </Hider>
<Hider
label="Parser (IN PROGRESS)"
hLevel={3}
showing={showing === "parser"}
handleChange={() => handleHiderClick("parser")}
>
<ParserDemo opts={textOptions} />
</Hider>
</div> </div>
</main> </main>
<Modal <Modal

View File

@ -0,0 +1,42 @@
import { useState } from "react";
import * as T from "../types";
import { parsePhrase } from "../lib/src/parsing/parse-phrase";
import { lookup } from "../lib/src/parsing/lookup";
import { tokenizer } from "../lib/src/parsing/tokenizer";
function ParserDemo({ opts }: { opts: T.TextOptions }) {
const [text, setText] = useState<string>("");
const [result, setResult] = useState<string>("");
function handleChange(e: React.ChangeEvent<HTMLInputElement>) {
const value = e.target.value;
if (!value) {
setText("");
setResult("");
return;
}
const r = parsePhrase(tokenizer(value), lookup);
setText(value);
setResult(JSON.stringify(r, null, " "));
}
return (
<div className="mt-3" style={{ marginBottom: "1000px" }}>
<p>Type an adjective or noun (w or without adjs) to parse it</p>
<div className="form-group mb-2">
<input
dir="rtl"
className={`form-control ${
text && result === "[]" ? "is-invalid" : text ? "is-valid" : ""
}`}
type="text"
value={text}
onChange={handleChange}
/>
</div>
<samp>
<pre>{result}</pre>
</samp>
</div>
);
}
export default ParserDemo;

View File

@ -0,0 +1,199 @@
import * as T from "../../../types";
import {
isPattern1Entry,
isPattern2Entry,
isPattern3Entry,
isPattern,
isPattern5Entry,
isPattern4Entry,
} from "../type-predicates";
import { equals } from "rambda";
export function getInflectionQueries(
s: string,
includeNouns: boolean
): {
search: Partial<T.DictionaryEntry>;
details: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
predicate: (e: T.AdjectiveEntry | T.NounEntry) => boolean;
}[];
}[] {
const queries: {
search: Partial<T.DictionaryEntry>;
details: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
predicate: (e: T.NounEntry | T.AdjectiveEntry) => boolean;
};
}[] = [];
queries.push({
search: { p: s },
details: {
inflection: [0, 1, 2],
gender: ["masc", "fem"],
predicate: isPattern(0),
},
});
queries.push({
search: { p: s },
details: {
inflection: [0, 1],
gender: ["masc"],
predicate: isPattern1Entry,
},
});
queries.push({
search: { p: s },
details: {
inflection: [0],
gender: ["masc"],
predicate: (e) =>
isPattern2Entry(e) ||
isPattern3Entry(e) ||
isPattern4Entry(e) ||
isPattern5Entry(e),
},
});
queries.push({
search: { infap: s },
details: {
inflection: [1],
gender: ["masc"],
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
},
});
if (s.endsWith("ه")) {
queries.push({
search: { p: s.slice(0, -1) },
details: {
inflection: [0],
gender: ["fem"],
predicate: isPattern1Entry,
},
});
if (includeNouns) {
queries.push({
search: { p: s },
details: {
inflection: [0],
gender: ["fem"],
predicate: isPattern1Entry,
},
});
}
queries.push({
search: { infbp: s.slice(0, -1) },
details: {
inflection: [0],
gender: ["fem"],
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
},
});
} else if (s.endsWith("ې")) {
queries.push({
search: { p: s.slice(0, -1) },
details: {
inflection: [1],
gender: ["fem"],
predicate: isPattern1Entry,
},
});
if (includeNouns) {
queries.push({
search: { p: s.slice(0, -1) + "ه" },
details: {
inflection: [1],
gender: ["fem"],
predicate: isPattern1Entry,
},
});
}
queries.push({
search: { infbp: s.slice(0, -1) },
details: {
inflection: [1],
gender: ["fem"],
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
},
});
queries.push({
search: { p: s.slice(0, -1) + "ی" },
details: {
inflection: [0, 1],
gender: ["fem"],
predicate: isPattern2Entry,
},
});
} else if (s.endsWith("ي")) {
queries.push({
search: { p: s.slice(0, -1) + "ی" },
details: {
inflection: [1],
gender: ["masc"],
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
},
});
} else if (s.endsWith("و")) {
queries.push({
search: { p: s.slice(0, -1) },
details: {
inflection: [2],
gender: ["masc", "fem"],
predicate: (e) => isPattern1Entry(e) || isPattern5Entry(e),
},
});
queries.push({
search: { infbp: s.slice(0, -1) },
details: {
inflection: [2],
gender: ["masc", "fem"],
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
},
});
queries.push({
search: { p: s.slice(0, -1) + "ی" },
details: {
inflection: [2],
gender: ["masc", "fem"],
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
},
});
if (s.endsWith("یو")) {
queries.push({
search: { p: s.slice(0, -2) + "ی" },
details: {
inflection: [2],
gender: ["masc", "fem"],
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
},
});
}
} else if (s.endsWith("ۍ")) {
queries.push({
search: { p: s.slice(0, -1) + "ی" },
details: {
inflection: [0, 1],
gender: ["fem"],
predicate: isPattern3Entry,
},
});
}
const coallated: ReturnType<typeof getInflectionQueries> = [];
for (let q of queries) {
const existing = coallated.find((x) => equals(x.search, q.search));
if (existing) {
existing.details.push(q.details);
} else {
coallated.push({
search: q.search,
details: [q.details],
});
}
}
return coallated;
}

View File

@ -0,0 +1,8 @@
import nounsAdjs from "../../../nouns-adjs";
import * as T from "../../../types";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
const [key, value] = Object.entries(s)[0];
// @ts-ignore
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
}

View File

@ -0,0 +1,320 @@
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import * as T from "../../../types";
import { lookup } from "./lookup";
import { parseAdjective } from "./parse-adjective";
import { tokenizer } from "./tokenizer";
const ghut = {
ts: 1527812625,
i: 9561,
p: "غټ",
f: "ghuT, ghaT",
g: "ghuT,ghaT",
e: "big, fat",
r: 4,
c: "adj.",
} as T.AdjectiveEntry;
const sturey = {
ts: 1527815306,
i: 7933,
p: "ستړی",
f: "stúRay",
g: "stuRay",
e: "tired",
r: 4,
c: "adj. / adv.",
} as T.AdjectiveEntry;
const narey = {
ts: 1527819320,
i: 14027,
p: "نری",
f: "naráy",
g: "naray",
e: "thin; mild; high (pitch)",
r: 4,
c: "adj.",
} as T.AdjectiveEntry;
const zor = {
ts: 1527815451,
i: 7570,
p: "زوړ",
f: "zoR",
g: "zoR",
e: "old",
r: 4,
c: "adj.",
infap: "زاړه",
infaf: "zaaRu",
infbp: "زړ",
infbf: "zaR",
} as T.AdjectiveEntry;
const sheen = {
ts: 1527815265,
i: 8979,
p: "شین",
f: "sheen",
g: "sheen",
e: "green, blue; unripe, immature; bright, sunny",
r: 4,
c: "adj.",
infap: "شنه",
infaf: "shnu",
infbp: "شن",
infbf: "shn",
} as T.AdjectiveEntry;
const tests: {
category: string;
cases: {
input: string;
output: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
selection: T.AdjectiveSelection;
}[];
}[];
}[] = [
{
category: "pattern 1",
cases: [
{
input: "غټ",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [0, 1],
gender: ["masc"],
},
],
},
{
input: "غټه",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [0],
gender: ["fem"],
},
],
},
{
input: "غټې",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [1],
gender: ["fem"],
},
],
},
{
input: "غټو",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 2",
cases: [
{
input: "ستړی",
output: [
{
selection: makeAdjectiveSelection(sturey),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "ستړې",
output: [
{
selection: makeAdjectiveSelection(sturey),
inflection: [0, 1],
gender: ["fem"],
},
],
},
{
input: "ستړو",
output: [
{
selection: makeAdjectiveSelection(sturey),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 3",
cases: [
{
input: "نری",
output: [
{
selection: makeAdjectiveSelection(narey),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "نري",
output: [
{
selection: makeAdjectiveSelection(narey),
inflection: [1],
gender: ["masc"],
},
],
},
{
input: "نرۍ",
output: [
{
selection: makeAdjectiveSelection(narey),
inflection: [0, 1],
gender: ["fem"],
},
],
},
{
input: "نرو",
output: [
{
selection: makeAdjectiveSelection(narey),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
{
input: "نریو",
output: [
{
selection: makeAdjectiveSelection(narey),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 4",
cases: [
{
input: "زوړ",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "زاړه",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [1],
gender: ["masc"],
},
],
},
{
input: "زړه",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [0],
gender: ["fem"],
},
],
},
{
input: "زړې",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [1],
gender: ["fem"],
},
],
},
{
input: "زړو",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 5",
cases: [
{
input: "شین",
output: [
{
selection: makeAdjectiveSelection(sheen),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "شنه",
output: [
{
selection: makeAdjectiveSelection(sheen),
inflection: [1],
gender: ["masc"],
},
{
selection: makeAdjectiveSelection(sheen),
inflection: [0],
gender: ["fem"],
},
],
},
{
input: "شنو",
output: [
{
selection: makeAdjectiveSelection(sheen),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
];
describe("parsing adjectives", () => {
tests.forEach(({ category, cases }) => {
// eslint-disable-next-line jest/valid-title
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const possibilities = parseAdjective(tokens, lookup).map((x) => x[1]);
expect(possibilities).toEqual(output);
});
});
});
});

View File

@ -0,0 +1,42 @@
import * as T from "../../../types";
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import { isAdjectiveEntry } from "../type-predicates";
import { getInflectionQueries } from "./inflection-query";
export function parseAdjective(
tokens: Readonly<string[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): [
string[],
{
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
selection: T.AdjectiveSelection;
}
][] {
const w: ReturnType<typeof parseAdjective> = [];
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const queries = getInflectionQueries(first, false);
queries.forEach(({ search, details }) => {
const wideMatches = lookup(search).filter(isAdjectiveEntry);
details.forEach((deets) => {
const matches = wideMatches.filter(deets.predicate);
matches.forEach((m) => {
const selection = makeAdjectiveSelection(m);
w.push([
rest,
{
selection,
inflection: deets.inflection,
gender: deets.gender,
},
]);
});
});
});
return w;
}

View File

@ -0,0 +1,291 @@
import { makeNounSelection } from "../phrase-building/make-selections";
import * as T from "../../../types";
import { lookup } from "./lookup";
import { parseNoun } from "./parse-noun";
const sarey = {
ts: 1527815251,
i: 8163,
p: "سړی",
f: "saRáy",
g: "saRay",
e: "man",
r: 4,
c: "n. m.",
ec: "man",
ep: "men",
} as T.NounEntry;
const dostee = {
ts: 1527811877,
i: 6627,
p: "دوستي",
f: "dostee",
g: "dostee",
e: "friendship",
r: 3,
c: "n. f.",
} as T.NounEntry;
const wreejze = {
ts: 1586551382412,
i: 14985,
p: "وریژې",
f: "wreejze",
g: "wreejze",
e: "rice",
r: 4,
c: "n. f. pl.",
} as T.NounEntry;
const xudza = {
ts: 1527812797,
i: 9018,
p: "ښځه",
f: "xúdza",
g: "xudza",
e: "woman, wife",
r: 4,
c: "n. f.",
ec: "woman",
ep: "women",
} as T.NounEntry;
const kursuy = {
ts: 1527814203,
i: 10573,
p: "کرسۍ",
f: "kUrsúy",
g: "kUrsuy",
e: "chair, seat, stool",
r: 3,
c: "n. f.",
} as T.NounEntry;
const kor = {
ts: 1527812828,
i: 11022,
p: "کور",
f: "kor",
g: "kor",
e: "house, home",
r: 4,
c: "n. m.",
} as T.NounEntry;
const daktar = {
ts: 1527816747,
i: 6709,
p: "ډاکټر",
f: "DaakTar",
g: "DaakTar",
e: "doctor",
r: 4,
c: "n. m. anim. unisex",
} as T.NounEntry;
// TODO: test unisex ملګری etc
const tests: {
category: string;
cases: {
input: string;
output: {
inflected: boolean;
selection: T.NounSelection;
}[];
}[];
}[] = [
{
category: "pattern 1 nouns",
cases: [
{
input: "کور",
output: [
{
inflected: false,
selection: makeNounSelection(kor, undefined),
},
],
},
{
input: "کورو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(kor, undefined),
number: "plural",
},
},
],
},
{
input: "ډاکټره",
output: [
{
inflected: false,
selection: {
...makeNounSelection(daktar, undefined),
gender: "fem",
},
},
],
},
{
input: "ډاکټرې",
output: [
{
inflected: true,
selection: {
...makeNounSelection(daktar, undefined),
gender: "fem",
},
},
],
},
],
},
];
// {
// input: "سړی",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(sarey, undefined),
// },
// ],
// },
// {
// input: "سړي",
// output: [
// {
// inflected: true,
// selection: makeNounSelection(sarey, undefined),
// },
// ],
// },
// {
// input: "سړو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(sarey, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "سړیو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(sarey, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "دوستي",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(dostee, undefined),
// },
// ],
// },
// {
// input: "دوستۍ",
// output: [
// {
// inflected: true,
// selection: makeNounSelection(dostee, undefined),
// },
// ],
// },
// {
// input: "دوستیو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(dostee, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "وریژې",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(wreejze, undefined),
// },
// ],
// },
// {
// input: "ښځه",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(xudza, undefined),
// },
// ],
// },
// {
// input: "ښځې",
// output: [
// {
// inflected: true,
// selection: makeNounSelection(xudza, undefined),
// },
// ],
// },
// {
// input: "ښځو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(xudza, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "کرسۍ",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(kursuy, undefined),
// },
// {
// inflected: true,
// selection: makeNounSelection(kursuy, undefined),
// },
// ],
// },
// {
// input: "کرسیو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(kursuy, undefined),
// number: "plural",
// },
// },
// ],
// },
// ];
describe("parsing nouns", () => {
tests.forEach(({ category, cases }) => {
// eslint-disable-next-line jest/valid-title
test(category, () => {
cases.forEach(({ input, output }) => {
expect(parseNoun(input, lookup)).toEqual(output);
});
});
});
});

View File

@ -0,0 +1,97 @@
import * as T from "../../../types";
import { makeNounSelection } from "../phrase-building/make-selections";
import {
isFemNounEntry,
isMascNounEntry,
isNounEntry,
isUnisexNounEntry,
} from "../type-predicates";
import { getInflectionQueries } from "./inflection-query";
import { parseAdjective } from "./parse-adjective";
export function parseNoun(
tokens: Readonly<string[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
adjectives: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
selection: T.AdjectiveSelection;
}[]
): [string[], { inflection: (0 | 1 | 2)[]; selection: T.NounSelection }][] {
if (tokens.length === 0) {
return [];
}
const adjRes = parseAdjective(tokens, lookup);
const withAdj = adjRes.flatMap(([tkns, adj]) =>
parseNoun(tkns, lookup, [...adjectives, adj])
);
const w: ReturnType<typeof parseNoun> = [];
const [first, ...rest] = tokens;
const searches = getInflectionQueries(first, true);
searches.forEach(({ search, details }) => {
const nounEntries = lookup(search).filter(isNounEntry);
details.forEach((deets) => {
const fittingEntries = nounEntries.filter(deets.predicate);
fittingEntries.forEach((entry) => {
console.log({ entry, deets });
if (isUnisexNounEntry(entry)) {
deets.gender.forEach((gender) => {
if (adjsMatch(adjectives, gender, deets.inflection)) {
w.push([
rest,
{
inflection: deets.inflection,
selection: {
...makeNounSelection(entry, undefined),
gender,
adjectives: adjectives.map((a) => a.selection),
},
},
]);
}
});
} else if (isMascNounEntry(entry) && deets.gender.includes("masc")) {
if (adjsMatch(adjectives, "masc", deets.inflection)) {
w.push([
rest,
{
inflection: deets.inflection,
selection: {
...makeNounSelection(entry, undefined),
adjectives: adjectives.map((a) => a.selection),
},
},
]);
}
} else if (isFemNounEntry(entry) && deets.gender.includes("fem")) {
if (adjsMatch(adjectives, "fem", deets.inflection)) {
w.push([
rest,
{
inflection: deets.inflection,
selection: {
...makeNounSelection(entry, undefined),
adjectives: adjectives.map((a) => a.selection),
},
},
]);
}
}
});
});
});
return [...withAdj, ...w];
}
function adjsMatch(
adjectives: Parameters<typeof parseNoun>[2],
gender: T.Gender,
inflection: (0 | 1 | 2)[]
): boolean {
return adjectives.every(
(adj) =>
adj.gender.includes(gender) &&
adj.inflection.some((i) => inflection.includes(i))
);
}

View File

@ -0,0 +1,18 @@
import { parseAdjective } from "./parse-adjective";
import * as T from "../../../types";
import { parsePronoun } from "./parse-pronoun";
import { parseNoun } from "./parse-noun";
export function parsePhrase(
s: string[],
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): any[] {
const adjsRes = parseAdjective(s, lookup);
const prnsRes = parsePronoun(s);
const nounsRes = parseNoun(s, lookup, []);
const correct = [...adjsRes, ...prnsRes, ...nounsRes]
.filter(([tkns]) => tkns.length === 0)
.map((x) => x[1]);
return correct;
}

View File

@ -0,0 +1,235 @@
import * as T from "../../../types";
export function parsePronoun(tokens: Readonly<string[]>): [
string[],
{
inflected: boolean[];
selection: T.PronounSelection;
}
][] {
const [first, ...rest] = tokens;
const w: ReturnType<typeof parsePronoun> = [];
if (first === "زه") {
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 0,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 1,
distance: "far",
},
},
]);
} else if (first === "ته") {
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 2,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 3,
distance: "far",
},
},
]);
} else if (first === "هغه") {
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: 4,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 5,
distance: "far",
},
},
]);
} else if (first === "هغې") {
w.push([
rest,
{
inflected: [true],
selection: {
type: "pronoun",
person: T.Person.ThirdSingFemale,
distance: "far",
},
},
]);
} else if (first === "دی") {
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: T.Person.ThirdSingMale,
distance: "near",
},
},
]);
} else if (first === "ده") {
w.push([
rest,
{
inflected: [true],
selection: {
type: "pronoun",
person: T.Person.ThirdSingMale,
distance: "near",
},
},
]);
} else if (first === "دا") {
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: T.Person.ThirdSingFemale,
distance: "near",
},
},
]);
} else if (first === "دې") {
w.push([
rest,
{
inflected: [true],
selection: {
type: "pronoun",
person: T.Person.ThirdSingFemale,
distance: "near",
},
},
]);
} else if (["مونږ", "موږ"].includes(first)) {
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.FirstPlurMale,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.FirstPlurFemale,
distance: "far",
},
},
]);
} else if (["تاسو", "تاسې"].includes(first)) {
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.SecondPlurMale,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.SecondPlurFemale,
distance: "far",
},
},
]);
} else if (["هغوي", "هغوی"].includes(first)) {
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.ThirdPlurMale,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.ThirdPlurFemale,
distance: "far",
},
},
]);
} else if (["دوي", "دوی"].includes(first)) {
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.ThirdPlurMale,
distance: "near",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.ThirdPlurFemale,
distance: "near",
},
},
]);
}
return w;
}

View File

@ -0,0 +1,3 @@
export function tokenizer(s: string): string[] {
return s.trim().split(" ");
}

View File

@ -121,9 +121,7 @@ function compileVPPs(
kids, kids,
!!blankOut?.ba !!blankOut?.ba
); );
return removeDuplicates( return combineIntoText(blocksWKids, subjectPerson, blankOut);
combineIntoText(blocksWKids, subjectPerson, blankOut)
);
} }
function compileEPPs( function compileEPPs(
@ -217,15 +215,17 @@ export function combineIntoText(
subjectPerson: T.Person, subjectPerson: T.Person,
blankOut?: BlankoutOptions blankOut?: BlankoutOptions
): T.PsString[] { ): T.PsString[] {
return piecesWVars return removeDuplicates(
.map((pieces) => { piecesWVars
const psVarsBlocks = getPsVarsBlocks( .map((pieces) => {
applyBlankOut(pieces, blankOut), const psVarsBlocks = getPsVarsBlocks(
subjectPerson applyBlankOut(pieces, blankOut),
); subjectPerson
return concatAll(monoidPsStringWVars)(psVarsBlocks); );
}) return concatAll(monoidPsStringWVars)(psVarsBlocks);
.flat(); })
.flat()
);
} }
function getPsVarsBlocks( function getPsVarsBlocks(