diff --git a/README.md b/README.md index 2996a35..38ec88b 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,11 @@ This is published on [a private NPM proxy registry](https://npm.lingdocs.com) as The Pashto Verb Explorer website can be used to view and play with the verb conjugations and various components. ``` +cd src/components +yarn install +cd ../lib +yarn install +cd ../.. yarn install yarn start ``` diff --git a/src/App.tsx b/src/App.tsx index 1d47127..f652da6 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -19,6 +19,7 @@ import { entryFeeder } from "./demo-components/entryFeeder"; import { Hider } from "./components/library"; import InflectionDemo from "./demo-components/InflectionDemo"; import SpellingDemo from "./demo-components/SpellingDemo"; +import ParserDemo from "./demo-components/ParserDemo"; function App() { const [showingTextOptions, setShowingTextOptions] = useStickyState( @@ -132,6 +133,14 @@ function App() { > + handleHiderClick("parser")} + > + + (""); + const [result, setResult] = useState(""); + function handleChange(e: React.ChangeEvent) { + const value = e.target.value; + if (!value) { + setText(""); + setResult(""); + return; + } + const r = parsePhrase(tokenizer(value), lookup); + setText(value); + setResult(JSON.stringify(r, null, " ")); + } + return ( +
+

Type an adjective or noun (w or without adjs) to parse it

+
+ +
+ +
{result}
+
+
+ ); +} + +export default ParserDemo; diff --git a/src/lib/src/parsing/inflection-query.ts b/src/lib/src/parsing/inflection-query.ts new file mode 100644 index 0000000..9a29ca7 --- /dev/null +++ b/src/lib/src/parsing/inflection-query.ts @@ -0,0 +1,199 @@ +import * as T from "../../../types"; +import { + isPattern1Entry, + isPattern2Entry, + isPattern3Entry, + isPattern, + isPattern5Entry, + isPattern4Entry, +} from "../type-predicates"; +import { equals } from "rambda"; + +export function getInflectionQueries( + s: string, + includeNouns: boolean +): { + search: Partial; + details: { + inflection: (0 | 1 | 2)[]; + gender: T.Gender[]; + predicate: (e: T.AdjectiveEntry | T.NounEntry) => boolean; + }[]; +}[] { + const queries: { + search: Partial; + details: { + inflection: (0 | 1 | 2)[]; + gender: T.Gender[]; + predicate: (e: T.NounEntry | T.AdjectiveEntry) => boolean; + }; + }[] = []; + queries.push({ + search: { p: s }, + details: { + inflection: [0, 1, 2], + gender: ["masc", "fem"], + predicate: isPattern(0), + }, + }); + queries.push({ + search: { p: s }, + details: { + inflection: [0, 1], + gender: ["masc"], + predicate: isPattern1Entry, + }, + }); + queries.push({ + search: { p: s }, + details: { + inflection: [0], + gender: ["masc"], + predicate: (e) => + isPattern2Entry(e) || + isPattern3Entry(e) || + isPattern4Entry(e) || + isPattern5Entry(e), + }, + }); + queries.push({ + search: { infap: s }, + details: { + inflection: [1], + gender: ["masc"], + predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e), + }, + }); + if (s.endsWith("ه")) { + queries.push({ + search: { p: s.slice(0, -1) }, + details: { + inflection: [0], + gender: ["fem"], + predicate: isPattern1Entry, + }, + }); + if (includeNouns) { + queries.push({ + search: { p: s }, + details: { + inflection: [0], + gender: ["fem"], + predicate: isPattern1Entry, + }, + }); + } + queries.push({ + search: { infbp: s.slice(0, -1) }, + details: { + inflection: [0], + gender: ["fem"], + predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e), + }, + }); + } else if (s.endsWith("ې")) { + queries.push({ + search: { p: s.slice(0, -1) }, + details: { + inflection: [1], + gender: ["fem"], + predicate: isPattern1Entry, + }, + }); + if (includeNouns) { + queries.push({ + search: { p: s.slice(0, -1) + "ه" }, + details: { + inflection: [1], + gender: ["fem"], + predicate: isPattern1Entry, + }, + }); + } + queries.push({ + search: { infbp: s.slice(0, -1) }, + details: { + inflection: [1], + gender: ["fem"], + predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e), + }, + }); + queries.push({ + search: { p: s.slice(0, -1) + "ی" }, + details: { + inflection: [0, 1], + gender: ["fem"], + predicate: isPattern2Entry, + }, + }); + } else if (s.endsWith("ي")) { + queries.push({ + search: { p: s.slice(0, -1) + "ی" }, + details: { + inflection: [1], + gender: ["masc"], + predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e), + }, + }); + } else if (s.endsWith("و")) { + queries.push({ + search: { p: s.slice(0, -1) }, + details: { + inflection: [2], + gender: ["masc", "fem"], + predicate: (e) => isPattern1Entry(e) || isPattern5Entry(e), + }, + }); + queries.push({ + search: { infbp: s.slice(0, -1) }, + details: { + inflection: [2], + gender: ["masc", "fem"], + predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e), + }, + }); + queries.push({ + search: { p: s.slice(0, -1) + "ی" }, + details: { + inflection: [2], + gender: ["masc", "fem"], + predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e), + }, + }); + if (s.endsWith("یو")) { + queries.push({ + search: { p: s.slice(0, -2) + "ی" }, + details: { + inflection: [2], + gender: ["masc", "fem"], + predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e), + }, + }); + } + } else if (s.endsWith("ۍ")) { + queries.push({ + search: { p: s.slice(0, -1) + "ی" }, + details: { + inflection: [0, 1], + gender: ["fem"], + predicate: isPattern3Entry, + }, + }); + } + + const coallated: ReturnType = []; + + for (let q of queries) { + const existing = coallated.find((x) => equals(x.search, q.search)); + if (existing) { + existing.details.push(q.details); + } else { + coallated.push({ + search: q.search, + details: [q.details], + }); + } + } + + return coallated; +} diff --git a/src/lib/src/parsing/lookup.tsx b/src/lib/src/parsing/lookup.tsx new file mode 100644 index 0000000..bd031a0 --- /dev/null +++ b/src/lib/src/parsing/lookup.tsx @@ -0,0 +1,8 @@ +import nounsAdjs from "../../../nouns-adjs"; +import * as T from "../../../types"; + +export function lookup(s: Partial): T.DictionaryEntry[] { + const [key, value] = Object.entries(s)[0]; + // @ts-ignore + return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[]; +} diff --git a/src/lib/src/parsing/parse-adjective.test.ts b/src/lib/src/parsing/parse-adjective.test.ts new file mode 100644 index 0000000..c9b7c92 --- /dev/null +++ b/src/lib/src/parsing/parse-adjective.test.ts @@ -0,0 +1,320 @@ +import { makeAdjectiveSelection } from "../phrase-building/make-selections"; +import * as T from "../../../types"; +import { lookup } from "./lookup"; +import { parseAdjective } from "./parse-adjective"; +import { tokenizer } from "./tokenizer"; + +const ghut = { + ts: 1527812625, + i: 9561, + p: "غټ", + f: "ghuT, ghaT", + g: "ghuT,ghaT", + e: "big, fat", + r: 4, + c: "adj.", +} as T.AdjectiveEntry; +const sturey = { + ts: 1527815306, + i: 7933, + p: "ستړی", + f: "stúRay", + g: "stuRay", + e: "tired", + r: 4, + c: "adj. / adv.", +} as T.AdjectiveEntry; +const narey = { + ts: 1527819320, + i: 14027, + p: "نری", + f: "naráy", + g: "naray", + e: "thin; mild; high (pitch)", + r: 4, + c: "adj.", +} as T.AdjectiveEntry; +const zor = { + ts: 1527815451, + i: 7570, + p: "زوړ", + f: "zoR", + g: "zoR", + e: "old", + r: 4, + c: "adj.", + infap: "زاړه", + infaf: "zaaRu", + infbp: "زړ", + infbf: "zaR", +} as T.AdjectiveEntry; +const sheen = { + ts: 1527815265, + i: 8979, + p: "شین", + f: "sheen", + g: "sheen", + e: "green, blue; unripe, immature; bright, sunny", + r: 4, + c: "adj.", + infap: "شنه", + infaf: "shnu", + infbp: "شن", + infbf: "shn", +} as T.AdjectiveEntry; + +const tests: { + category: string; + cases: { + input: string; + output: { + inflection: (0 | 1 | 2)[]; + gender: T.Gender[]; + selection: T.AdjectiveSelection; + }[]; + }[]; +}[] = [ + { + category: "pattern 1", + cases: [ + { + input: "غټ", + output: [ + { + selection: makeAdjectiveSelection(ghut), + inflection: [0, 1], + gender: ["masc"], + }, + ], + }, + { + input: "غټه", + output: [ + { + selection: makeAdjectiveSelection(ghut), + inflection: [0], + gender: ["fem"], + }, + ], + }, + { + input: "غټې", + output: [ + { + selection: makeAdjectiveSelection(ghut), + inflection: [1], + gender: ["fem"], + }, + ], + }, + { + input: "غټو", + output: [ + { + selection: makeAdjectiveSelection(ghut), + inflection: [2], + gender: ["masc", "fem"], + }, + ], + }, + ], + }, + { + category: "pattern 2", + cases: [ + { + input: "ستړی", + output: [ + { + selection: makeAdjectiveSelection(sturey), + inflection: [0], + gender: ["masc"], + }, + ], + }, + { + input: "ستړې", + output: [ + { + selection: makeAdjectiveSelection(sturey), + inflection: [0, 1], + gender: ["fem"], + }, + ], + }, + { + input: "ستړو", + output: [ + { + selection: makeAdjectiveSelection(sturey), + inflection: [2], + gender: ["masc", "fem"], + }, + ], + }, + ], + }, + { + category: "pattern 3", + cases: [ + { + input: "نری", + output: [ + { + selection: makeAdjectiveSelection(narey), + inflection: [0], + gender: ["masc"], + }, + ], + }, + { + input: "نري", + output: [ + { + selection: makeAdjectiveSelection(narey), + inflection: [1], + gender: ["masc"], + }, + ], + }, + { + input: "نرۍ", + output: [ + { + selection: makeAdjectiveSelection(narey), + inflection: [0, 1], + gender: ["fem"], + }, + ], + }, + { + input: "نرو", + output: [ + { + selection: makeAdjectiveSelection(narey), + inflection: [2], + gender: ["masc", "fem"], + }, + ], + }, + { + input: "نریو", + output: [ + { + selection: makeAdjectiveSelection(narey), + inflection: [2], + gender: ["masc", "fem"], + }, + ], + }, + ], + }, + { + category: "pattern 4", + cases: [ + { + input: "زوړ", + output: [ + { + selection: makeAdjectiveSelection(zor), + inflection: [0], + gender: ["masc"], + }, + ], + }, + { + input: "زاړه", + output: [ + { + selection: makeAdjectiveSelection(zor), + inflection: [1], + gender: ["masc"], + }, + ], + }, + { + input: "زړه", + output: [ + { + selection: makeAdjectiveSelection(zor), + inflection: [0], + gender: ["fem"], + }, + ], + }, + { + input: "زړې", + output: [ + { + selection: makeAdjectiveSelection(zor), + inflection: [1], + gender: ["fem"], + }, + ], + }, + { + input: "زړو", + output: [ + { + selection: makeAdjectiveSelection(zor), + inflection: [2], + gender: ["masc", "fem"], + }, + ], + }, + ], + }, + { + category: "pattern 5", + cases: [ + { + input: "شین", + output: [ + { + selection: makeAdjectiveSelection(sheen), + inflection: [0], + gender: ["masc"], + }, + ], + }, + { + input: "شنه", + output: [ + { + selection: makeAdjectiveSelection(sheen), + inflection: [1], + gender: ["masc"], + }, + { + selection: makeAdjectiveSelection(sheen), + inflection: [0], + gender: ["fem"], + }, + ], + }, + { + input: "شنو", + output: [ + { + selection: makeAdjectiveSelection(sheen), + inflection: [2], + gender: ["masc", "fem"], + }, + ], + }, + ], + }, +]; + +describe("parsing adjectives", () => { + tests.forEach(({ category, cases }) => { + // eslint-disable-next-line jest/valid-title + test(category, () => { + cases.forEach(({ input, output }) => { + const tokens = tokenizer(input); + const possibilities = parseAdjective(tokens, lookup).map((x) => x[1]); + expect(possibilities).toEqual(output); + }); + }); + }); +}); diff --git a/src/lib/src/parsing/parse-adjective.ts b/src/lib/src/parsing/parse-adjective.ts new file mode 100644 index 0000000..d057111 --- /dev/null +++ b/src/lib/src/parsing/parse-adjective.ts @@ -0,0 +1,42 @@ +import * as T from "../../../types"; +import { makeAdjectiveSelection } from "../phrase-building/make-selections"; +import { isAdjectiveEntry } from "../type-predicates"; +import { getInflectionQueries } from "./inflection-query"; + +export function parseAdjective( + tokens: Readonly, + lookup: (s: Partial) => T.DictionaryEntry[] +): [ + string[], + { + inflection: (0 | 1 | 2)[]; + gender: T.Gender[]; + selection: T.AdjectiveSelection; + } +][] { + const w: ReturnType = []; + if (tokens.length === 0) { + return []; + } + const [first, ...rest] = tokens; + const queries = getInflectionQueries(first, false); + queries.forEach(({ search, details }) => { + const wideMatches = lookup(search).filter(isAdjectiveEntry); + details.forEach((deets) => { + const matches = wideMatches.filter(deets.predicate); + matches.forEach((m) => { + const selection = makeAdjectiveSelection(m); + w.push([ + rest, + { + selection, + inflection: deets.inflection, + gender: deets.gender, + }, + ]); + }); + }); + }); + + return w; +} diff --git a/src/lib/src/parsing/parse-noun.test.ts.txt b/src/lib/src/parsing/parse-noun.test.ts.txt new file mode 100644 index 0000000..88f60bb --- /dev/null +++ b/src/lib/src/parsing/parse-noun.test.ts.txt @@ -0,0 +1,291 @@ +import { makeNounSelection } from "../phrase-building/make-selections"; +import * as T from "../../../types"; +import { lookup } from "./lookup"; +import { parseNoun } from "./parse-noun"; + +const sarey = { + ts: 1527815251, + i: 8163, + p: "سړی", + f: "saRáy", + g: "saRay", + e: "man", + r: 4, + c: "n. m.", + ec: "man", + ep: "men", +} as T.NounEntry; +const dostee = { + ts: 1527811877, + i: 6627, + p: "دوستي", + f: "dostee", + g: "dostee", + e: "friendship", + r: 3, + c: "n. f.", +} as T.NounEntry; +const wreejze = { + ts: 1586551382412, + i: 14985, + p: "وریژې", + f: "wreejze", + g: "wreejze", + e: "rice", + r: 4, + c: "n. f. pl.", +} as T.NounEntry; +const xudza = { + ts: 1527812797, + i: 9018, + p: "ښځه", + f: "xúdza", + g: "xudza", + e: "woman, wife", + r: 4, + c: "n. f.", + ec: "woman", + ep: "women", +} as T.NounEntry; +const kursuy = { + ts: 1527814203, + i: 10573, + p: "کرسۍ", + f: "kUrsúy", + g: "kUrsuy", + e: "chair, seat, stool", + r: 3, + c: "n. f.", +} as T.NounEntry; +const kor = { + ts: 1527812828, + i: 11022, + p: "کور", + f: "kor", + g: "kor", + e: "house, home", + r: 4, + c: "n. m.", +} as T.NounEntry; +const daktar = { + ts: 1527816747, + i: 6709, + p: "ډاکټر", + f: "DaakTar", + g: "DaakTar", + e: "doctor", + r: 4, + c: "n. m. anim. unisex", +} as T.NounEntry; + +// TODO: test unisex ملګری etc + +const tests: { + category: string; + cases: { + input: string; + output: { + inflected: boolean; + selection: T.NounSelection; + }[]; + }[]; +}[] = [ + { + category: "pattern 1 nouns", + cases: [ + { + input: "کور", + output: [ + { + inflected: false, + selection: makeNounSelection(kor, undefined), + }, + ], + }, + { + input: "کورو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(kor, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "ډاکټره", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(daktar, undefined), + gender: "fem", + }, + }, + ], + }, + { + input: "ډاکټرې", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(daktar, undefined), + gender: "fem", + }, + }, + ], + }, + ], + }, +]; +// { +// input: "سړی", +// output: [ +// { +// inflected: false, +// selection: makeNounSelection(sarey, undefined), +// }, +// ], +// }, +// { +// input: "سړي", +// output: [ +// { +// inflected: true, +// selection: makeNounSelection(sarey, undefined), +// }, +// ], +// }, +// { +// input: "سړو", +// output: [ +// { +// inflected: true, +// selection: { +// ...makeNounSelection(sarey, undefined), +// number: "plural", +// }, +// }, +// ], +// }, +// { +// input: "سړیو", +// output: [ +// { +// inflected: true, +// selection: { +// ...makeNounSelection(sarey, undefined), +// number: "plural", +// }, +// }, +// ], +// }, +// { +// input: "دوستي", +// output: [ +// { +// inflected: false, +// selection: makeNounSelection(dostee, undefined), +// }, +// ], +// }, +// { +// input: "دوستۍ", +// output: [ +// { +// inflected: true, +// selection: makeNounSelection(dostee, undefined), +// }, +// ], +// }, +// { +// input: "دوستیو", +// output: [ +// { +// inflected: true, +// selection: { +// ...makeNounSelection(dostee, undefined), +// number: "plural", +// }, +// }, +// ], +// }, +// { +// input: "وریژې", +// output: [ +// { +// inflected: false, +// selection: makeNounSelection(wreejze, undefined), +// }, +// ], +// }, +// { +// input: "ښځه", +// output: [ +// { +// inflected: false, +// selection: makeNounSelection(xudza, undefined), +// }, +// ], +// }, +// { +// input: "ښځې", +// output: [ +// { +// inflected: true, +// selection: makeNounSelection(xudza, undefined), +// }, +// ], +// }, +// { +// input: "ښځو", +// output: [ +// { +// inflected: true, +// selection: { +// ...makeNounSelection(xudza, undefined), +// number: "plural", +// }, +// }, +// ], +// }, +// { +// input: "کرسۍ", +// output: [ +// { +// inflected: false, +// selection: makeNounSelection(kursuy, undefined), +// }, +// { +// inflected: true, +// selection: makeNounSelection(kursuy, undefined), +// }, +// ], +// }, +// { +// input: "کرسیو", +// output: [ +// { +// inflected: true, +// selection: { +// ...makeNounSelection(kursuy, undefined), +// number: "plural", +// }, +// }, +// ], +// }, +// ]; + +describe("parsing nouns", () => { + tests.forEach(({ category, cases }) => { + // eslint-disable-next-line jest/valid-title + test(category, () => { + cases.forEach(({ input, output }) => { + expect(parseNoun(input, lookup)).toEqual(output); + }); + }); + }); +}); diff --git a/src/lib/src/parsing/parse-noun.ts b/src/lib/src/parsing/parse-noun.ts new file mode 100644 index 0000000..c9ab49d --- /dev/null +++ b/src/lib/src/parsing/parse-noun.ts @@ -0,0 +1,97 @@ +import * as T from "../../../types"; +import { makeNounSelection } from "../phrase-building/make-selections"; +import { + isFemNounEntry, + isMascNounEntry, + isNounEntry, + isUnisexNounEntry, +} from "../type-predicates"; +import { getInflectionQueries } from "./inflection-query"; +import { parseAdjective } from "./parse-adjective"; + +export function parseNoun( + tokens: Readonly, + lookup: (s: Partial) => T.DictionaryEntry[], + adjectives: { + inflection: (0 | 1 | 2)[]; + gender: T.Gender[]; + selection: T.AdjectiveSelection; + }[] +): [string[], { inflection: (0 | 1 | 2)[]; selection: T.NounSelection }][] { + if (tokens.length === 0) { + return []; + } + const adjRes = parseAdjective(tokens, lookup); + const withAdj = adjRes.flatMap(([tkns, adj]) => + parseNoun(tkns, lookup, [...adjectives, adj]) + ); + const w: ReturnType = []; + const [first, ...rest] = tokens; + + const searches = getInflectionQueries(first, true); + searches.forEach(({ search, details }) => { + const nounEntries = lookup(search).filter(isNounEntry); + details.forEach((deets) => { + const fittingEntries = nounEntries.filter(deets.predicate); + fittingEntries.forEach((entry) => { + console.log({ entry, deets }); + if (isUnisexNounEntry(entry)) { + deets.gender.forEach((gender) => { + if (adjsMatch(adjectives, gender, deets.inflection)) { + w.push([ + rest, + { + inflection: deets.inflection, + selection: { + ...makeNounSelection(entry, undefined), + gender, + adjectives: adjectives.map((a) => a.selection), + }, + }, + ]); + } + }); + } else if (isMascNounEntry(entry) && deets.gender.includes("masc")) { + if (adjsMatch(adjectives, "masc", deets.inflection)) { + w.push([ + rest, + { + inflection: deets.inflection, + selection: { + ...makeNounSelection(entry, undefined), + adjectives: adjectives.map((a) => a.selection), + }, + }, + ]); + } + } else if (isFemNounEntry(entry) && deets.gender.includes("fem")) { + if (adjsMatch(adjectives, "fem", deets.inflection)) { + w.push([ + rest, + { + inflection: deets.inflection, + selection: { + ...makeNounSelection(entry, undefined), + adjectives: adjectives.map((a) => a.selection), + }, + }, + ]); + } + } + }); + }); + }); + return [...withAdj, ...w]; +} + +function adjsMatch( + adjectives: Parameters[2], + gender: T.Gender, + inflection: (0 | 1 | 2)[] +): boolean { + return adjectives.every( + (adj) => + adj.gender.includes(gender) && + adj.inflection.some((i) => inflection.includes(i)) + ); +} diff --git a/src/lib/src/parsing/parse-phrase.ts b/src/lib/src/parsing/parse-phrase.ts new file mode 100644 index 0000000..d6021d9 --- /dev/null +++ b/src/lib/src/parsing/parse-phrase.ts @@ -0,0 +1,18 @@ +import { parseAdjective } from "./parse-adjective"; +import * as T from "../../../types"; +import { parsePronoun } from "./parse-pronoun"; +import { parseNoun } from "./parse-noun"; + +export function parsePhrase( + s: string[], + lookup: (s: Partial) => T.DictionaryEntry[] +): any[] { + const adjsRes = parseAdjective(s, lookup); + const prnsRes = parsePronoun(s); + const nounsRes = parseNoun(s, lookup, []); + + const correct = [...adjsRes, ...prnsRes, ...nounsRes] + .filter(([tkns]) => tkns.length === 0) + .map((x) => x[1]); + return correct; +} diff --git a/src/lib/src/parsing/parse-pronoun.ts b/src/lib/src/parsing/parse-pronoun.ts new file mode 100644 index 0000000..60137d8 --- /dev/null +++ b/src/lib/src/parsing/parse-pronoun.ts @@ -0,0 +1,235 @@ +import * as T from "../../../types"; + +export function parsePronoun(tokens: Readonly): [ + string[], + { + inflected: boolean[]; + selection: T.PronounSelection; + } +][] { + const [first, ...rest] = tokens; + const w: ReturnType = []; + if (first === "زه") { + w.push([ + rest, + { + inflected: [false], + selection: { + type: "pronoun", + person: 0, + distance: "far", + }, + }, + ]); + w.push([ + rest, + { + inflected: [false], + selection: { + type: "pronoun", + person: 1, + distance: "far", + }, + }, + ]); + } else if (first === "ته") { + w.push([ + rest, + { + inflected: [false], + selection: { + type: "pronoun", + person: 2, + distance: "far", + }, + }, + ]); + w.push([ + rest, + { + inflected: [false], + selection: { + type: "pronoun", + person: 3, + distance: "far", + }, + }, + ]); + } else if (first === "هغه") { + w.push([ + rest, + { + inflected: [false, true], + selection: { + type: "pronoun", + person: 4, + distance: "far", + }, + }, + ]); + w.push([ + rest, + { + inflected: [false], + selection: { + type: "pronoun", + person: 5, + distance: "far", + }, + }, + ]); + } else if (first === "هغې") { + w.push([ + rest, + { + inflected: [true], + selection: { + type: "pronoun", + person: T.Person.ThirdSingFemale, + distance: "far", + }, + }, + ]); + } else if (first === "دی") { + w.push([ + rest, + { + inflected: [false], + selection: { + type: "pronoun", + person: T.Person.ThirdSingMale, + distance: "near", + }, + }, + ]); + } else if (first === "ده") { + w.push([ + rest, + { + inflected: [true], + selection: { + type: "pronoun", + person: T.Person.ThirdSingMale, + distance: "near", + }, + }, + ]); + } else if (first === "دا") { + w.push([ + rest, + { + inflected: [false], + selection: { + type: "pronoun", + person: T.Person.ThirdSingFemale, + distance: "near", + }, + }, + ]); + } else if (first === "دې") { + w.push([ + rest, + { + inflected: [true], + selection: { + type: "pronoun", + person: T.Person.ThirdSingFemale, + distance: "near", + }, + }, + ]); + } else if (["مونږ", "موږ"].includes(first)) { + w.push([ + rest, + { + inflected: [false, true], + selection: { + type: "pronoun", + person: T.Person.FirstPlurMale, + distance: "far", + }, + }, + ]); + w.push([ + rest, + { + inflected: [false, true], + selection: { + type: "pronoun", + person: T.Person.FirstPlurFemale, + distance: "far", + }, + }, + ]); + } else if (["تاسو", "تاسې"].includes(first)) { + w.push([ + rest, + { + inflected: [false, true], + selection: { + type: "pronoun", + person: T.Person.SecondPlurMale, + distance: "far", + }, + }, + ]); + w.push([ + rest, + { + inflected: [false, true], + selection: { + type: "pronoun", + person: T.Person.SecondPlurFemale, + distance: "far", + }, + }, + ]); + } else if (["هغوي", "هغوی"].includes(first)) { + w.push([ + rest, + { + inflected: [false, true], + selection: { + type: "pronoun", + person: T.Person.ThirdPlurMale, + distance: "far", + }, + }, + ]); + w.push([ + rest, + { + inflected: [false, true], + selection: { + type: "pronoun", + person: T.Person.ThirdPlurFemale, + distance: "far", + }, + }, + ]); + } else if (["دوي", "دوی"].includes(first)) { + w.push([ + rest, + { + inflected: [false, true], + selection: { + type: "pronoun", + person: T.Person.ThirdPlurMale, + distance: "near", + }, + }, + ]); + w.push([ + rest, + { + inflected: [false, true], + selection: { + type: "pronoun", + person: T.Person.ThirdPlurFemale, + distance: "near", + }, + }, + ]); + } + return w; +} diff --git a/src/lib/src/parsing/tokenizer.ts b/src/lib/src/parsing/tokenizer.ts new file mode 100644 index 0000000..c067ea4 --- /dev/null +++ b/src/lib/src/parsing/tokenizer.ts @@ -0,0 +1,3 @@ +export function tokenizer(s: string): string[] { + return s.trim().split(" "); +} diff --git a/src/lib/src/phrase-building/compile.ts b/src/lib/src/phrase-building/compile.ts index 52c030f..e1141af 100644 --- a/src/lib/src/phrase-building/compile.ts +++ b/src/lib/src/phrase-building/compile.ts @@ -121,9 +121,7 @@ function compileVPPs( kids, !!blankOut?.ba ); - return removeDuplicates( - combineIntoText(blocksWKids, subjectPerson, blankOut) - ); + return combineIntoText(blocksWKids, subjectPerson, blankOut); } function compileEPPs( @@ -217,15 +215,17 @@ export function combineIntoText( subjectPerson: T.Person, blankOut?: BlankoutOptions ): T.PsString[] { - return piecesWVars - .map((pieces) => { - const psVarsBlocks = getPsVarsBlocks( - applyBlankOut(pieces, blankOut), - subjectPerson - ); - return concatAll(monoidPsStringWVars)(psVarsBlocks); - }) - .flat(); + return removeDuplicates( + piecesWVars + .map((pieces) => { + const psVarsBlocks = getPsVarsBlocks( + applyBlankOut(pieces, blankOut), + subjectPerson + ); + return concatAll(monoidPsStringWVars)(psVarsBlocks); + }) + .flat() + ); } function getPsVarsBlocks(