starting on parser

This commit is contained in:
adueck 2023-07-30 15:54:27 +04:00
parent 742a1acf61
commit c38e0645d3
13 changed files with 1281 additions and 12 deletions

View File

@ -60,6 +60,11 @@ This is published on [a private NPM proxy registry](https://npm.lingdocs.com) as
The Pashto Verb Explorer website can be used to view and play with the verb conjugations and various components.
```
cd src/components
yarn install
cd ../lib
yarn install
cd ../..
yarn install
yarn start
```

View File

@ -19,6 +19,7 @@ import { entryFeeder } from "./demo-components/entryFeeder";
import { Hider } from "./components/library";
import InflectionDemo from "./demo-components/InflectionDemo";
import SpellingDemo from "./demo-components/SpellingDemo";
import ParserDemo from "./demo-components/ParserDemo";
function App() {
const [showingTextOptions, setShowingTextOptions] = useStickyState<boolean>(
@ -132,6 +133,14 @@ function App() {
>
<SpellingDemo opts={textOptions} onChange={setTextOptions} />
</Hider>
<Hider
label="Parser (IN PROGRESS)"
hLevel={3}
showing={showing === "parser"}
handleChange={() => handleHiderClick("parser")}
>
<ParserDemo opts={textOptions} />
</Hider>
</div>
</main>
<Modal

View File

@ -0,0 +1,42 @@
import { useState } from "react";
import * as T from "../types";
import { parsePhrase } from "../lib/src/parsing/parse-phrase";
import { lookup } from "../lib/src/parsing/lookup";
import { tokenizer } from "../lib/src/parsing/tokenizer";
function ParserDemo({ opts }: { opts: T.TextOptions }) {
const [text, setText] = useState<string>("");
const [result, setResult] = useState<string>("");
function handleChange(e: React.ChangeEvent<HTMLInputElement>) {
const value = e.target.value;
if (!value) {
setText("");
setResult("");
return;
}
const r = parsePhrase(tokenizer(value), lookup);
setText(value);
setResult(JSON.stringify(r, null, " "));
}
return (
<div className="mt-3" style={{ marginBottom: "1000px" }}>
<p>Type an adjective or noun (w or without adjs) to parse it</p>
<div className="form-group mb-2">
<input
dir="rtl"
className={`form-control ${
text && result === "[]" ? "is-invalid" : text ? "is-valid" : ""
}`}
type="text"
value={text}
onChange={handleChange}
/>
</div>
<samp>
<pre>{result}</pre>
</samp>
</div>
);
}
export default ParserDemo;

View File

@ -0,0 +1,199 @@
import * as T from "../../../types";
import {
isPattern1Entry,
isPattern2Entry,
isPattern3Entry,
isPattern,
isPattern5Entry,
isPattern4Entry,
} from "../type-predicates";
import { equals } from "rambda";
export function getInflectionQueries(
s: string,
includeNouns: boolean
): {
search: Partial<T.DictionaryEntry>;
details: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
predicate: (e: T.AdjectiveEntry | T.NounEntry) => boolean;
}[];
}[] {
const queries: {
search: Partial<T.DictionaryEntry>;
details: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
predicate: (e: T.NounEntry | T.AdjectiveEntry) => boolean;
};
}[] = [];
queries.push({
search: { p: s },
details: {
inflection: [0, 1, 2],
gender: ["masc", "fem"],
predicate: isPattern(0),
},
});
queries.push({
search: { p: s },
details: {
inflection: [0, 1],
gender: ["masc"],
predicate: isPattern1Entry,
},
});
queries.push({
search: { p: s },
details: {
inflection: [0],
gender: ["masc"],
predicate: (e) =>
isPattern2Entry(e) ||
isPattern3Entry(e) ||
isPattern4Entry(e) ||
isPattern5Entry(e),
},
});
queries.push({
search: { infap: s },
details: {
inflection: [1],
gender: ["masc"],
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
},
});
if (s.endsWith("ه")) {
queries.push({
search: { p: s.slice(0, -1) },
details: {
inflection: [0],
gender: ["fem"],
predicate: isPattern1Entry,
},
});
if (includeNouns) {
queries.push({
search: { p: s },
details: {
inflection: [0],
gender: ["fem"],
predicate: isPattern1Entry,
},
});
}
queries.push({
search: { infbp: s.slice(0, -1) },
details: {
inflection: [0],
gender: ["fem"],
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
},
});
} else if (s.endsWith("ې")) {
queries.push({
search: { p: s.slice(0, -1) },
details: {
inflection: [1],
gender: ["fem"],
predicate: isPattern1Entry,
},
});
if (includeNouns) {
queries.push({
search: { p: s.slice(0, -1) + "ه" },
details: {
inflection: [1],
gender: ["fem"],
predicate: isPattern1Entry,
},
});
}
queries.push({
search: { infbp: s.slice(0, -1) },
details: {
inflection: [1],
gender: ["fem"],
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
},
});
queries.push({
search: { p: s.slice(0, -1) + "ی" },
details: {
inflection: [0, 1],
gender: ["fem"],
predicate: isPattern2Entry,
},
});
} else if (s.endsWith("ي")) {
queries.push({
search: { p: s.slice(0, -1) + "ی" },
details: {
inflection: [1],
gender: ["masc"],
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
},
});
} else if (s.endsWith("و")) {
queries.push({
search: { p: s.slice(0, -1) },
details: {
inflection: [2],
gender: ["masc", "fem"],
predicate: (e) => isPattern1Entry(e) || isPattern5Entry(e),
},
});
queries.push({
search: { infbp: s.slice(0, -1) },
details: {
inflection: [2],
gender: ["masc", "fem"],
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
},
});
queries.push({
search: { p: s.slice(0, -1) + "ی" },
details: {
inflection: [2],
gender: ["masc", "fem"],
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
},
});
if (s.endsWith("یو")) {
queries.push({
search: { p: s.slice(0, -2) + "ی" },
details: {
inflection: [2],
gender: ["masc", "fem"],
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
},
});
}
} else if (s.endsWith("ۍ")) {
queries.push({
search: { p: s.slice(0, -1) + "ی" },
details: {
inflection: [0, 1],
gender: ["fem"],
predicate: isPattern3Entry,
},
});
}
const coallated: ReturnType<typeof getInflectionQueries> = [];
for (let q of queries) {
const existing = coallated.find((x) => equals(x.search, q.search));
if (existing) {
existing.details.push(q.details);
} else {
coallated.push({
search: q.search,
details: [q.details],
});
}
}
return coallated;
}

View File

@ -0,0 +1,8 @@
import nounsAdjs from "../../../nouns-adjs";
import * as T from "../../../types";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
const [key, value] = Object.entries(s)[0];
// @ts-ignore
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
}

View File

@ -0,0 +1,320 @@
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import * as T from "../../../types";
import { lookup } from "./lookup";
import { parseAdjective } from "./parse-adjective";
import { tokenizer } from "./tokenizer";
const ghut = {
ts: 1527812625,
i: 9561,
p: "غټ",
f: "ghuT, ghaT",
g: "ghuT,ghaT",
e: "big, fat",
r: 4,
c: "adj.",
} as T.AdjectiveEntry;
const sturey = {
ts: 1527815306,
i: 7933,
p: "ستړی",
f: "stúRay",
g: "stuRay",
e: "tired",
r: 4,
c: "adj. / adv.",
} as T.AdjectiveEntry;
const narey = {
ts: 1527819320,
i: 14027,
p: "نری",
f: "naráy",
g: "naray",
e: "thin; mild; high (pitch)",
r: 4,
c: "adj.",
} as T.AdjectiveEntry;
const zor = {
ts: 1527815451,
i: 7570,
p: "زوړ",
f: "zoR",
g: "zoR",
e: "old",
r: 4,
c: "adj.",
infap: "زاړه",
infaf: "zaaRu",
infbp: "زړ",
infbf: "zaR",
} as T.AdjectiveEntry;
const sheen = {
ts: 1527815265,
i: 8979,
p: "شین",
f: "sheen",
g: "sheen",
e: "green, blue; unripe, immature; bright, sunny",
r: 4,
c: "adj.",
infap: "شنه",
infaf: "shnu",
infbp: "شن",
infbf: "shn",
} as T.AdjectiveEntry;
const tests: {
category: string;
cases: {
input: string;
output: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
selection: T.AdjectiveSelection;
}[];
}[];
}[] = [
{
category: "pattern 1",
cases: [
{
input: "غټ",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [0, 1],
gender: ["masc"],
},
],
},
{
input: "غټه",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [0],
gender: ["fem"],
},
],
},
{
input: "غټې",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [1],
gender: ["fem"],
},
],
},
{
input: "غټو",
output: [
{
selection: makeAdjectiveSelection(ghut),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 2",
cases: [
{
input: "ستړی",
output: [
{
selection: makeAdjectiveSelection(sturey),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "ستړې",
output: [
{
selection: makeAdjectiveSelection(sturey),
inflection: [0, 1],
gender: ["fem"],
},
],
},
{
input: "ستړو",
output: [
{
selection: makeAdjectiveSelection(sturey),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 3",
cases: [
{
input: "نری",
output: [
{
selection: makeAdjectiveSelection(narey),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "نري",
output: [
{
selection: makeAdjectiveSelection(narey),
inflection: [1],
gender: ["masc"],
},
],
},
{
input: "نرۍ",
output: [
{
selection: makeAdjectiveSelection(narey),
inflection: [0, 1],
gender: ["fem"],
},
],
},
{
input: "نرو",
output: [
{
selection: makeAdjectiveSelection(narey),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
{
input: "نریو",
output: [
{
selection: makeAdjectiveSelection(narey),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 4",
cases: [
{
input: "زوړ",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "زاړه",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [1],
gender: ["masc"],
},
],
},
{
input: "زړه",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [0],
gender: ["fem"],
},
],
},
{
input: "زړې",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [1],
gender: ["fem"],
},
],
},
{
input: "زړو",
output: [
{
selection: makeAdjectiveSelection(zor),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
{
category: "pattern 5",
cases: [
{
input: "شین",
output: [
{
selection: makeAdjectiveSelection(sheen),
inflection: [0],
gender: ["masc"],
},
],
},
{
input: "شنه",
output: [
{
selection: makeAdjectiveSelection(sheen),
inflection: [1],
gender: ["masc"],
},
{
selection: makeAdjectiveSelection(sheen),
inflection: [0],
gender: ["fem"],
},
],
},
{
input: "شنو",
output: [
{
selection: makeAdjectiveSelection(sheen),
inflection: [2],
gender: ["masc", "fem"],
},
],
},
],
},
];
describe("parsing adjectives", () => {
tests.forEach(({ category, cases }) => {
// eslint-disable-next-line jest/valid-title
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const possibilities = parseAdjective(tokens, lookup).map((x) => x[1]);
expect(possibilities).toEqual(output);
});
});
});
});

View File

@ -0,0 +1,42 @@
import * as T from "../../../types";
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import { isAdjectiveEntry } from "../type-predicates";
import { getInflectionQueries } from "./inflection-query";
export function parseAdjective(
tokens: Readonly<string[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): [
string[],
{
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
selection: T.AdjectiveSelection;
}
][] {
const w: ReturnType<typeof parseAdjective> = [];
if (tokens.length === 0) {
return [];
}
const [first, ...rest] = tokens;
const queries = getInflectionQueries(first, false);
queries.forEach(({ search, details }) => {
const wideMatches = lookup(search).filter(isAdjectiveEntry);
details.forEach((deets) => {
const matches = wideMatches.filter(deets.predicate);
matches.forEach((m) => {
const selection = makeAdjectiveSelection(m);
w.push([
rest,
{
selection,
inflection: deets.inflection,
gender: deets.gender,
},
]);
});
});
});
return w;
}

View File

@ -0,0 +1,291 @@
import { makeNounSelection } from "../phrase-building/make-selections";
import * as T from "../../../types";
import { lookup } from "./lookup";
import { parseNoun } from "./parse-noun";
const sarey = {
ts: 1527815251,
i: 8163,
p: "سړی",
f: "saRáy",
g: "saRay",
e: "man",
r: 4,
c: "n. m.",
ec: "man",
ep: "men",
} as T.NounEntry;
const dostee = {
ts: 1527811877,
i: 6627,
p: "دوستي",
f: "dostee",
g: "dostee",
e: "friendship",
r: 3,
c: "n. f.",
} as T.NounEntry;
const wreejze = {
ts: 1586551382412,
i: 14985,
p: "وریژې",
f: "wreejze",
g: "wreejze",
e: "rice",
r: 4,
c: "n. f. pl.",
} as T.NounEntry;
const xudza = {
ts: 1527812797,
i: 9018,
p: "ښځه",
f: "xúdza",
g: "xudza",
e: "woman, wife",
r: 4,
c: "n. f.",
ec: "woman",
ep: "women",
} as T.NounEntry;
const kursuy = {
ts: 1527814203,
i: 10573,
p: "کرسۍ",
f: "kUrsúy",
g: "kUrsuy",
e: "chair, seat, stool",
r: 3,
c: "n. f.",
} as T.NounEntry;
const kor = {
ts: 1527812828,
i: 11022,
p: "کور",
f: "kor",
g: "kor",
e: "house, home",
r: 4,
c: "n. m.",
} as T.NounEntry;
const daktar = {
ts: 1527816747,
i: 6709,
p: "ډاکټر",
f: "DaakTar",
g: "DaakTar",
e: "doctor",
r: 4,
c: "n. m. anim. unisex",
} as T.NounEntry;
// TODO: test unisex ملګری etc
const tests: {
category: string;
cases: {
input: string;
output: {
inflected: boolean;
selection: T.NounSelection;
}[];
}[];
}[] = [
{
category: "pattern 1 nouns",
cases: [
{
input: "کور",
output: [
{
inflected: false,
selection: makeNounSelection(kor, undefined),
},
],
},
{
input: "کورو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(kor, undefined),
number: "plural",
},
},
],
},
{
input: "ډاکټره",
output: [
{
inflected: false,
selection: {
...makeNounSelection(daktar, undefined),
gender: "fem",
},
},
],
},
{
input: "ډاکټرې",
output: [
{
inflected: true,
selection: {
...makeNounSelection(daktar, undefined),
gender: "fem",
},
},
],
},
],
},
];
// {
// input: "سړی",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(sarey, undefined),
// },
// ],
// },
// {
// input: "سړي",
// output: [
// {
// inflected: true,
// selection: makeNounSelection(sarey, undefined),
// },
// ],
// },
// {
// input: "سړو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(sarey, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "سړیو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(sarey, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "دوستي",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(dostee, undefined),
// },
// ],
// },
// {
// input: "دوستۍ",
// output: [
// {
// inflected: true,
// selection: makeNounSelection(dostee, undefined),
// },
// ],
// },
// {
// input: "دوستیو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(dostee, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "وریژې",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(wreejze, undefined),
// },
// ],
// },
// {
// input: "ښځه",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(xudza, undefined),
// },
// ],
// },
// {
// input: "ښځې",
// output: [
// {
// inflected: true,
// selection: makeNounSelection(xudza, undefined),
// },
// ],
// },
// {
// input: "ښځو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(xudza, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "کرسۍ",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(kursuy, undefined),
// },
// {
// inflected: true,
// selection: makeNounSelection(kursuy, undefined),
// },
// ],
// },
// {
// input: "کرسیو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(kursuy, undefined),
// number: "plural",
// },
// },
// ],
// },
// ];
describe("parsing nouns", () => {
tests.forEach(({ category, cases }) => {
// eslint-disable-next-line jest/valid-title
test(category, () => {
cases.forEach(({ input, output }) => {
expect(parseNoun(input, lookup)).toEqual(output);
});
});
});
});

View File

@ -0,0 +1,97 @@
import * as T from "../../../types";
import { makeNounSelection } from "../phrase-building/make-selections";
import {
isFemNounEntry,
isMascNounEntry,
isNounEntry,
isUnisexNounEntry,
} from "../type-predicates";
import { getInflectionQueries } from "./inflection-query";
import { parseAdjective } from "./parse-adjective";
export function parseNoun(
tokens: Readonly<string[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
adjectives: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
selection: T.AdjectiveSelection;
}[]
): [string[], { inflection: (0 | 1 | 2)[]; selection: T.NounSelection }][] {
if (tokens.length === 0) {
return [];
}
const adjRes = parseAdjective(tokens, lookup);
const withAdj = adjRes.flatMap(([tkns, adj]) =>
parseNoun(tkns, lookup, [...adjectives, adj])
);
const w: ReturnType<typeof parseNoun> = [];
const [first, ...rest] = tokens;
const searches = getInflectionQueries(first, true);
searches.forEach(({ search, details }) => {
const nounEntries = lookup(search).filter(isNounEntry);
details.forEach((deets) => {
const fittingEntries = nounEntries.filter(deets.predicate);
fittingEntries.forEach((entry) => {
console.log({ entry, deets });
if (isUnisexNounEntry(entry)) {
deets.gender.forEach((gender) => {
if (adjsMatch(adjectives, gender, deets.inflection)) {
w.push([
rest,
{
inflection: deets.inflection,
selection: {
...makeNounSelection(entry, undefined),
gender,
adjectives: adjectives.map((a) => a.selection),
},
},
]);
}
});
} else if (isMascNounEntry(entry) && deets.gender.includes("masc")) {
if (adjsMatch(adjectives, "masc", deets.inflection)) {
w.push([
rest,
{
inflection: deets.inflection,
selection: {
...makeNounSelection(entry, undefined),
adjectives: adjectives.map((a) => a.selection),
},
},
]);
}
} else if (isFemNounEntry(entry) && deets.gender.includes("fem")) {
if (adjsMatch(adjectives, "fem", deets.inflection)) {
w.push([
rest,
{
inflection: deets.inflection,
selection: {
...makeNounSelection(entry, undefined),
adjectives: adjectives.map((a) => a.selection),
},
},
]);
}
}
});
});
});
return [...withAdj, ...w];
}
function adjsMatch(
adjectives: Parameters<typeof parseNoun>[2],
gender: T.Gender,
inflection: (0 | 1 | 2)[]
): boolean {
return adjectives.every(
(adj) =>
adj.gender.includes(gender) &&
adj.inflection.some((i) => inflection.includes(i))
);
}

View File

@ -0,0 +1,18 @@
import { parseAdjective } from "./parse-adjective";
import * as T from "../../../types";
import { parsePronoun } from "./parse-pronoun";
import { parseNoun } from "./parse-noun";
export function parsePhrase(
s: string[],
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): any[] {
const adjsRes = parseAdjective(s, lookup);
const prnsRes = parsePronoun(s);
const nounsRes = parseNoun(s, lookup, []);
const correct = [...adjsRes, ...prnsRes, ...nounsRes]
.filter(([tkns]) => tkns.length === 0)
.map((x) => x[1]);
return correct;
}

View File

@ -0,0 +1,235 @@
import * as T from "../../../types";
export function parsePronoun(tokens: Readonly<string[]>): [
string[],
{
inflected: boolean[];
selection: T.PronounSelection;
}
][] {
const [first, ...rest] = tokens;
const w: ReturnType<typeof parsePronoun> = [];
if (first === "زه") {
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 0,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 1,
distance: "far",
},
},
]);
} else if (first === "ته") {
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 2,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 3,
distance: "far",
},
},
]);
} else if (first === "هغه") {
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: 4,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: 5,
distance: "far",
},
},
]);
} else if (first === "هغې") {
w.push([
rest,
{
inflected: [true],
selection: {
type: "pronoun",
person: T.Person.ThirdSingFemale,
distance: "far",
},
},
]);
} else if (first === "دی") {
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: T.Person.ThirdSingMale,
distance: "near",
},
},
]);
} else if (first === "ده") {
w.push([
rest,
{
inflected: [true],
selection: {
type: "pronoun",
person: T.Person.ThirdSingMale,
distance: "near",
},
},
]);
} else if (first === "دا") {
w.push([
rest,
{
inflected: [false],
selection: {
type: "pronoun",
person: T.Person.ThirdSingFemale,
distance: "near",
},
},
]);
} else if (first === "دې") {
w.push([
rest,
{
inflected: [true],
selection: {
type: "pronoun",
person: T.Person.ThirdSingFemale,
distance: "near",
},
},
]);
} else if (["مونږ", "موږ"].includes(first)) {
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.FirstPlurMale,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.FirstPlurFemale,
distance: "far",
},
},
]);
} else if (["تاسو", "تاسې"].includes(first)) {
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.SecondPlurMale,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.SecondPlurFemale,
distance: "far",
},
},
]);
} else if (["هغوي", "هغوی"].includes(first)) {
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.ThirdPlurMale,
distance: "far",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.ThirdPlurFemale,
distance: "far",
},
},
]);
} else if (["دوي", "دوی"].includes(first)) {
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.ThirdPlurMale,
distance: "near",
},
},
]);
w.push([
rest,
{
inflected: [false, true],
selection: {
type: "pronoun",
person: T.Person.ThirdPlurFemale,
distance: "near",
},
},
]);
}
return w;
}

View File

@ -0,0 +1,3 @@
export function tokenizer(s: string): string[] {
return s.trim().split(" ");
}

View File

@ -121,9 +121,7 @@ function compileVPPs(
kids,
!!blankOut?.ba
);
return removeDuplicates(
combineIntoText(blocksWKids, subjectPerson, blankOut)
);
return combineIntoText(blocksWKids, subjectPerson, blankOut);
}
function compileEPPs(
@ -217,7 +215,8 @@ export function combineIntoText(
subjectPerson: T.Person,
blankOut?: BlankoutOptions
): T.PsString[] {
return piecesWVars
return removeDuplicates(
piecesWVars
.map((pieces) => {
const psVarsBlocks = getPsVarsBlocks(
applyBlankOut(pieces, blankOut),
@ -225,7 +224,8 @@ export function combineIntoText(
);
return concatAll(monoidPsStringWVars)(psVarsBlocks);
})
.flat();
.flat()
);
}
function getPsVarsBlocks(