From b672e19c1afe0c56120c3f5be0a88e2d012622c8 Mon Sep 17 00:00:00 2001 From: adueck Date: Tue, 1 Aug 2023 16:11:10 +0400 Subject: [PATCH] pretty full noun recognition - plural suffixes just started --- src/components/src/np-picker/NPNounPicker.tsx | 1 - src/lib/src/parsing/inflection-query.ts | 199 +- src/lib/src/parsing/lookup.tsx | 22 + src/lib/src/parsing/parse-adjective.ts | 8 +- src/lib/src/parsing/parse-noun.test.ts | 1172 +++++++ src/lib/src/parsing/parse-noun.test.ts.txt | 291 -- src/lib/src/parsing/parse-noun.ts | 239 +- src/lib/src/parsing/parse-phrase.ts | 2 +- src/lib/src/parsing/parse-pronoun.ts | 30 +- src/lib/src/parsing/tokenizer.ts | 11 +- src/lib/src/pashto-inflector.test.ts | 2774 +++++++++-------- src/lib/src/pashto-inflector.ts | 17 +- src/lib/src/verb-info.test.ts | 5 +- src/types.ts | 5 + vocab/nouns-adjs/aanu-masc.js | 1 + vocab/nouns-adjs/basic-unisex.js | 237 +- vocab/nouns-adjs/irreg-nouns.js | 22 + vocab/nouns-adjs/short-squish-masc.js | 4 + 18 files changed, 3284 insertions(+), 1756 deletions(-) create mode 100644 src/lib/src/parsing/parse-noun.test.ts delete mode 100644 src/lib/src/parsing/parse-noun.test.ts.txt create mode 100644 vocab/nouns-adjs/aanu-masc.js create mode 100644 vocab/nouns-adjs/irreg-nouns.js create mode 100644 vocab/nouns-adjs/short-squish-masc.js diff --git a/src/components/src/np-picker/NPNounPicker.tsx b/src/components/src/np-picker/NPNounPicker.tsx index d06e57b..1a531e2 100644 --- a/src/components/src/np-picker/NPNounPicker.tsx +++ b/src/components/src/np-picker/NPNounPicker.tsx @@ -61,7 +61,6 @@ function NPNounPicker(props: { opts: T.TextOptions; phraseIsComplete: boolean; }) { - console.log({ noun: props.noun }); // const [patternFilter, setPatternFilter] = useState(undefined); // const [showFilter, setShowFilter] = useState(false) // const nounsFiltered = props.nouns diff --git a/src/lib/src/parsing/inflection-query.ts b/src/lib/src/parsing/inflection-query.ts index 5353c5b..e4264ae 100644 --- a/src/lib/src/parsing/inflection-query.ts +++ b/src/lib/src/parsing/inflection-query.ts @@ -1,4 +1,5 @@ import * as T from "../../../types"; +import { endsInConsonant } from "../p-text-helpers"; import { isPattern1Entry, isPattern2Entry, @@ -7,18 +8,26 @@ import { isPattern5Entry, isPattern4Entry, isPattern6FemEntry, + isFemNounEntry, + isAdjectiveEntry, + isUnisexNounEntry, + isPluralNounEntry, + isNounEntry, + isAnimNounEntry, + isMascNounEntry, } from "../type-predicates"; import { equals } from "rambda"; export function getInflectionQueries( s: string, - includeNouns: boolean + noun: boolean ): { search: Partial; details: { inflection: (0 | 1 | 2)[]; gender: T.Gender[]; predicate: (e: T.AdjectiveEntry | T.NounEntry) => boolean; + plural?: boolean; }[]; }[] { const queries: { @@ -26,6 +35,7 @@ export function getInflectionQueries( details: { inflection: (0 | 1 | 2)[]; gender: T.Gender[]; + plural?: boolean; predicate: (e: T.NounEntry | T.AdjectiveEntry) => boolean; }; }[] = []; @@ -34,15 +44,111 @@ export function getInflectionQueries( details: { inflection: [0, 1, 2], gender: ["masc", "fem"], - predicate: isPattern(0), + predicate: (e) => + !(isNounEntry(e) && isPluralNounEntry(e)) && + isPattern(0)(e) && + isAdjectiveEntry(e), }, }); + if (noun) { + if (s.endsWith("ونه")) { + queries.push({ + search: { p: s.slice(0, -3) }, + details: { + inflection: [0], + gender: ["masc"], + plural: true, + predicate: (e) => + isNounEntry(e) && + !isPluralNounEntry(e) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + queries.push({ + search: { p: s.slice(0, -3) + "ه" }, + details: { + inflection: [0], + gender: ["masc"], + plural: true, + predicate: (e) => + isNounEntry(e) && + !isPluralNounEntry(e) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + } + if (s.endsWith("ونو")) { + queries.push({ + search: { p: s.slice(0, -3) }, + details: { + inflection: [1], + gender: ["masc"], + plural: true, + predicate: (e) => + isNounEntry(e) && + !isPluralNounEntry(e) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + queries.push({ + search: { p: s.slice(0, -3) + "ه" }, + details: { + inflection: [1], + gender: ["masc"], + plural: true, + predicate: (e) => + isNounEntry(e) && + !isPluralNounEntry(e) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + } + if (s.endsWith("و")) { + queries.push({ + search: { p: s.slice(0, -1) }, + details: { + inflection: [2], + gender: ["fem"], + predicate: (e) => + isNounEntry(e) && isAnimNounEntry(e) && isFemNounEntry(e), + }, + }); + } + queries.push({ + search: { p: s }, + details: { + inflection: [0], + gender: ["fem"], + predicate: (e) => + isNounEntry(e) && isFemNounEntry(e) && isPattern1Entry(e), + }, + }); + queries.push({ + search: { p: s }, + details: { + inflection: [0, 1], + gender: ["fem"], + predicate: (e) => + isNounEntry(e) && isAnimNounEntry(e) && isFemNounEntry(e), + }, + }); + } queries.push({ search: { p: s }, details: { inflection: [0, 1], gender: ["masc"], - predicate: isPattern1Entry, + predicate: (e) => + !(isNounEntry(e) && isPluralNounEntry(e)) && + (isPattern1Entry(e) || isPattern(0)(e)), }, }); queries.push({ @@ -65,6 +171,17 @@ export function getInflectionQueries( predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e), }, }); + if (noun) { + queries.push({ + search: { p: s }, + details: { + inflection: [0], + plural: true, + gender: ["masc", "fem"], + predicate: (e) => isNounEntry(e) && isPluralNounEntry(e), + }, + }); + } if (s.endsWith("ه")) { queries.push({ search: { p: s.slice(0, -1) }, @@ -74,16 +191,6 @@ export function getInflectionQueries( predicate: isPattern1Entry, }, }); - if (includeNouns) { - queries.push({ - search: { p: s }, - details: { - inflection: [0], - gender: ["fem"], - predicate: isPattern1Entry, - }, - }); - } queries.push({ search: { infbp: s.slice(0, -1) }, details: { @@ -101,7 +208,7 @@ export function getInflectionQueries( predicate: isPattern1Entry, }, }); - if (includeNouns) { + if (noun) { queries.push({ search: { p: s.slice(0, -1) + "ه" }, details: { @@ -150,7 +257,7 @@ export function getInflectionQueries( details: { inflection: [2], gender: ["masc", "fem"], - predicate: (e) => isPattern1Entry(e) || isPattern5Entry(e), + predicate: (e) => isPattern1Entry(e), }, }); queries.push({ @@ -169,6 +276,48 @@ export function getInflectionQueries( predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e), }, }); + if (noun) { + queries.push({ + search: { p: s.slice(0, -1) + "ه" }, + details: { + inflection: [2], + gender: ["fem"], + predicate: (e) => isPattern1Entry(e) || isFemNounEntry(e), + }, + }); + queries.push({ + search: { p: s.slice(0, -1) + "ه" }, + details: { + inflection: [2], + gender: ["masc"], + predicate: isMascNounEntry, + }, + }); + queries.push({ + search: { p: s.slice(0, -1) + "ې" }, + details: { + inflection: [2], + gender: ["fem"], + predicate: (e) => isNounEntry(e) || isFemNounEntry(e), + }, + }); + queries.push({ + search: { p: s.slice(0, -1) + "ۍ" }, + details: { + inflection: [2], + gender: ["fem"], + predicate: (e) => isFemNounEntry(e) && isPattern3Entry(e), + }, + }); + queries.push({ + search: { p: s.slice(0, -1) + "ي" }, + details: { + inflection: [2], + gender: ["fem"], + predicate: isPattern6FemEntry, + }, + }); + } if (s.endsWith("یو")) { queries.push({ search: { p: s.slice(0, -2) + "ی" }, @@ -178,6 +327,24 @@ export function getInflectionQueries( predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e), }, }); + if (noun) { + queries.push({ + search: { p: s.slice(0, -2) + "ۍ" }, + details: { + inflection: [2], + gender: ["fem"], + predicate: (e) => isPattern3Entry(e) && isFemNounEntry(e), + }, + }); + queries.push({ + search: { p: s.slice(0, -2) + "ي" }, + details: { + inflection: [2], + gender: ["fem"], + predicate: isPattern6FemEntry, + }, + }); + } } } else if (s.endsWith("ۍ")) { queries.push({ @@ -188,7 +355,7 @@ export function getInflectionQueries( predicate: isPattern3Entry, }, }); - if (includeNouns) { + if (noun) { queries.push({ search: { p: s.slice(0, -1) + "ي" }, details: { diff --git a/src/lib/src/parsing/lookup.tsx b/src/lib/src/parsing/lookup.tsx index bd031a0..0b9bcb9 100644 --- a/src/lib/src/parsing/lookup.tsx +++ b/src/lib/src/parsing/lookup.tsx @@ -1,8 +1,30 @@ import nounsAdjs from "../../../nouns-adjs"; import * as T from "../../../types"; +import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; export function lookup(s: Partial): T.DictionaryEntry[] { const [key, value] = Object.entries(s)[0]; // @ts-ignore return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[]; } + +export function wordQuery(word: string, type: "adj"): T.AdjectiveEntry; +export function wordQuery(word: string, type: "noun"): T.NounEntry; +export function wordQuery( + word: string, + type: "noun" | "adj" +): T.NounEntry | T.AdjectiveEntry { + const entry = nounsAdjs.find( + (x) => x.p === word || x.f === word || x.g === word + ); + if (!entry) { + throw new Error(`missing ${word} in word query`); + } + if (type === "noun" && !isNounEntry(entry)) { + throw new Error(`${word} is not a noun`); + } + if (type === "adj" && !isAdjectiveEntry(entry)) { + throw new Error(`${word} is not an adjective`); + } + return entry as T.NounEntry | T.AdjectiveEntry; +} diff --git a/src/lib/src/parsing/parse-adjective.ts b/src/lib/src/parsing/parse-adjective.ts index 2ff681b..5e91cba 100644 --- a/src/lib/src/parsing/parse-adjective.ts +++ b/src/lib/src/parsing/parse-adjective.ts @@ -4,10 +4,10 @@ import { isAdjectiveEntry } from "../type-predicates"; import { getInflectionQueries } from "./inflection-query"; export function parseAdjective( - tokens: Readonly, + tokens: Readonly, lookup: (s: Partial) => T.DictionaryEntry[] ): [ - string[], + T.Token[], { inflection: (0 | 1 | 2)[]; gender: T.Gender[]; @@ -20,7 +20,7 @@ export function parseAdjective( return []; } const [first, ...rest] = tokens; - const queries = getInflectionQueries(first, false); + const queries = getInflectionQueries(first.s, false); queries.forEach(({ search, details }) => { const wideMatches = lookup(search).filter(isAdjectiveEntry); details.forEach((deets) => { @@ -33,7 +33,7 @@ export function parseAdjective( selection, inflection: deets.inflection, gender: deets.gender, - given: first, + given: first.s, }, ]); }); diff --git a/src/lib/src/parsing/parse-noun.test.ts b/src/lib/src/parsing/parse-noun.test.ts new file mode 100644 index 0000000..a08ff92 --- /dev/null +++ b/src/lib/src/parsing/parse-noun.test.ts @@ -0,0 +1,1172 @@ +import { + makeAdjectiveSelection, + makeNounSelection, +} from "../phrase-building/make-selections"; +import * as T from "../../../types"; +import { lookup, wordQuery } from "./lookup"; +import { parseNoun } from "./parse-noun"; +import { tokenizer } from "./tokenizer"; + +const sor = wordQuery("سوړ", "adj"); +const zor = wordQuery("زوړ", "adj"); +const sturey = wordQuery("ستړی", "adj"); +const ghut = wordQuery("غټ", "adj"); +const sarey = wordQuery("سړی", "noun"); +const dostee = wordQuery("دوستي", "noun"); +const wreejze = wordQuery("وریژې", "noun"); +const xudza = wordQuery("ښځه", "noun"); +const kursuy = wordQuery("کرسۍ", "noun"); +const daktar = wordQuery("ډاکټر", "noun"); +const malguray = wordQuery("ملګری", "noun"); +const lmasay = wordQuery("لمسی", "noun"); +const melma = wordQuery("مېلمه", "noun"); +const shpoon = wordQuery("شپون", "noun"); +const tanoor = wordQuery("تنور", "noun"); +const kor = wordQuery("کور", "noun"); +const khur = wordQuery("خر", "noun"); +const ghur = wordQuery("غر", "noun"); +const mor = wordQuery("مور", "noun"); +const plaar = wordQuery("پلار", "noun"); +const oobu = wordQuery("اوبه", "noun"); +const ghanum = wordQuery("غنم", "noun"); +const laar = wordQuery("لار", "noun"); +const qaazee = wordQuery("قاضي", "noun"); +const waadu = wordQuery("واده", "noun"); + +const tests: { + category: string; + cases: { + input: string; + output: { + inflected: boolean; + selection: T.NounSelection; + }[]; + }[]; +}[] = [ + { + category: "pattern 1 nouns", + cases: [ + { + input: "کور", + output: [ + { + inflected: false, + selection: makeNounSelection(kor, undefined), + }, + { + inflected: true, + selection: makeNounSelection(kor, undefined), + }, + ], + }, + { + input: "واده", + output: [ + { + inflected: false, + selection: makeNounSelection(waadu, undefined), + }, + { + inflected: true, + selection: makeNounSelection(waadu, undefined), + }, + ], + }, + { + input: "وادو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(waadu, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "کورو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(kor, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "ډاکټر", + output: [ + { + inflected: false, + selection: makeNounSelection(daktar, undefined), + }, + { + inflected: true, + selection: makeNounSelection(daktar, undefined), + }, + ], + }, + { + input: "ډاکټره", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(daktar, undefined), + gender: "fem", + }, + }, + ], + }, + { + input: "ډاکټرې", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(daktar, undefined), + gender: "fem", + }, + }, + { + inflected: false, + selection: { + ...makeNounSelection(daktar, undefined), + number: "plural", + gender: "fem", + }, + }, + ], + }, + { + input: "ډاکټرو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(daktar, undefined), + number: "plural", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(daktar, undefined), + gender: "fem", + number: "plural", + }, + }, + ], + }, + { + input: "ښځه", + output: [ + { + inflected: false, + selection: makeNounSelection(xudza, undefined), + }, + ], + }, + { + input: "ښځې", + output: [ + { + inflected: true, + selection: makeNounSelection(xudza, undefined), + }, + { + inflected: false, + selection: { + ...makeNounSelection(xudza, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "ښځو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(xudza, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "وریژې", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(wreejze, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "وریژو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(wreejze, undefined), + gender: "fem", + number: "plural", + }, + }, + ], + }, + // fem nouns missing ه + { + input: "لار", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(laar, undefined), + gender: "fem", + }, + }, + ], + }, + { + input: "لارې", + output: [ + { + inflected: true, + selection: makeNounSelection(laar, undefined), + }, + { + inflected: false, + selection: { + ...makeNounSelection(laar, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "لارو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(laar, undefined), + number: "plural", + }, + }, + ], + }, + ], + }, + { + category: "pattern 2 nouns", + cases: [ + { + input: "ملګری", + output: [ + { + inflected: false, + selection: makeNounSelection(malguray, undefined), + }, + ], + }, + { + input: "ملګري", + output: [ + { + inflected: true, + selection: makeNounSelection(malguray, undefined), + }, + { + inflected: false, + selection: { + ...makeNounSelection(malguray, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "ملګرې", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(malguray, undefined), + gender: "fem", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(malguray, undefined), + gender: "fem", + }, + }, + { + inflected: false, + selection: { + ...makeNounSelection(malguray, undefined), + gender: "fem", + number: "plural", + }, + }, + ], + }, + { + input: "ملګرو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(malguray, undefined), + number: "plural", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(malguray, undefined), + number: "plural", + gender: "fem", + }, + }, + ], + }, + { + input: "ملګریو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(malguray, undefined), + number: "plural", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(malguray, undefined), + number: "plural", + gender: "fem", + }, + }, + ], + }, + ], + }, + { + category: "pattern 3 nouns", + cases: [ + { + input: "سړی", + output: [ + { + inflected: false, + selection: makeNounSelection(sarey, undefined), + }, + ], + }, + { + input: "سړي", + output: [ + { + inflected: true, + selection: makeNounSelection(sarey, undefined), + }, + { + inflected: false, + selection: { + ...makeNounSelection(sarey, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "سړیو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(sarey, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "سړو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(sarey, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "کرسۍ", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(kursuy, undefined), + number: "singular", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(kursuy, undefined), + number: "singular", + }, + }, + { + inflected: false, + selection: { + ...makeNounSelection(kursuy, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "کرسو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(kursuy, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "کرسیو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(kursuy, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "لمسی", + output: [ + { + inflected: false, + selection: makeNounSelection(lmasay, undefined), + }, + ], + }, + { + input: "لمسي", + output: [ + { + inflected: true, + selection: makeNounSelection(lmasay, undefined), + }, + { + inflected: false, + selection: { + ...makeNounSelection(lmasay, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "لمسۍ", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(lmasay, undefined), + gender: "fem", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(lmasay, undefined), + gender: "fem", + }, + }, + { + inflected: false, + selection: { + ...makeNounSelection(lmasay, undefined), + gender: "fem", + number: "plural", + }, + }, + ], + }, + { + input: "لمسیو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(lmasay, undefined), + gender: "masc", + number: "plural", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(lmasay, undefined), + gender: "fem", + number: "plural", + }, + }, + ], + }, + { + input: "لمسو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(lmasay, undefined), + gender: "masc", + number: "plural", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(lmasay, undefined), + gender: "fem", + number: "plural", + }, + }, + ], + }, + ], + }, + { + category: "pattern 4", + cases: [ + { + input: "مېلمه", + output: [ + { + inflected: false, + selection: makeNounSelection(melma, undefined), + }, + ], + }, + // pattern 4 ending in 'a' - 1st inflection is only plural + { + input: "مېلمانه", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(melma, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "شپانه", + output: [ + { + inflected: true, + selection: makeNounSelection(shpoon, undefined), + }, + { + inflected: false, + selection: { + ...makeNounSelection(shpoon, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "مېلمنه", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(melma, undefined), + gender: "fem", + }, + }, + ], + }, + { + input: "مېلمنې", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(melma, undefined), + gender: "fem", + }, + }, + { + inflected: false, + selection: { + ...makeNounSelection(melma, undefined), + gender: "fem", + number: "plural", + }, + }, + ], + }, + { + input: "مېلمنو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(melma, undefined), + gender: "masc", + number: "plural", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(melma, undefined), + gender: "fem", + number: "plural", + }, + }, + ], + }, + { + input: "تنرو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(tanoor, undefined), + gender: "masc", + number: "plural", + }, + }, + ], + }, + // pseudeo masc version shouldn't be recognized + { + input: "تنره", + output: [], + }, + ], + }, + { + category: "pattern 5", + // TODO: should غر also be considered inflected? + cases: [ + { + input: "غر", + output: [ + { + inflected: false, + selection: makeNounSelection(ghur, undefined), + }, + ], + }, + { + input: "غره", + output: [ + { + inflected: true, + selection: makeNounSelection(ghur, undefined), + }, + { + inflected: false, + selection: { + ...makeNounSelection(ghur, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "غرو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(ghur, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "خر", + output: [ + { + inflected: false, + selection: makeNounSelection(khur, undefined), + }, + ], + }, + { + input: "خره", + output: [ + { + inflected: true, + selection: makeNounSelection(khur, undefined), + }, + { + inflected: false, + selection: { + ...makeNounSelection(khur, undefined), + number: "plural", + }, + }, + { + inflected: false, + selection: { + ...makeNounSelection(khur, undefined), + gender: "fem", + number: "singular", + }, + }, + ], + }, + ], + }, + { + category: "pattern 6 fem", + cases: [ + { + input: "دوستي", + output: [ + { + inflected: false, + selection: makeNounSelection(dostee, undefined), + }, + ], + }, + { + input: "دوستۍ", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(dostee, undefined), + number: "singular", + }, + }, + { + inflected: false, + selection: { + ...makeNounSelection(dostee, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "دوستو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(dostee, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "دوستیو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(dostee, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "دوستی", + output: [], + }, + ], + }, + { + category: "irregular words", + cases: [ + { + input: "اوبه", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(oobu, undefined), + number: "plural", + gender: "fem", + }, + }, + ], + }, + { + input: "اوبو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(oobu, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "غنم", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(ghanum, undefined), + number: "plural", + gender: "masc", + }, + }, + ], + }, + { + input: "غنمو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(ghanum, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "پلار", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(plaar, undefined), + gender: "masc", + }, + }, + { + inflected: true, + selection: makeNounSelection(plaar, undefined), + }, + ], + }, + { + input: "پلارو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(plaar, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "مور", + output: [ + { + inflected: false, + selection: makeNounSelection(mor, undefined), + }, + { + inflected: true, + selection: makeNounSelection(mor, undefined), + }, + ], + }, + { + input: "مورو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(mor, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "قاضي", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(qaazee, undefined), + gender: "masc", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(qaazee, undefined), + gender: "masc", + }, + }, + ], + }, + ], + }, + { + category: "plurals with -oona", + cases: [ + { + input: "کورونه", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(kor, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "کورونو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(kor, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "وادونه", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(waadu, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "وادونو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(waadu, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "غرونه", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(ghur, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "غرونو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(ghur, undefined), + number: "plural", + }, + }, + ], + }, + // can't add oona to a pattern 2 noun + { + input: "ملګریونه", + output: [], + }, + // can't add oona to a pattern 3 noun + { + input: "سړیونه", + output: [], + }, + // can't add oona to a pattern 4 noun + { + input: "تنرونه", + output: [], + }, + { + input: "تنرونو", + output: [], + }, + ], + }, +]; + +// PROBLEM WITH غټې وریژې +// ]; + +describe("parsing nouns", () => { + tests.forEach(({ category, cases }) => { + // eslint-disable-next-line jest/valid-title + test(category, () => { + cases.forEach(({ input, output }) => { + const tokens = tokenizer(input); + const { success } = parseNoun(tokens, lookup, []); + const res = success.map(([tkns, r]) => r); + expect(res).toEqual(output); + }); + }); + }); +}); + +const adjsTests: { + category: string; + cases: { + input: string; + output: { inflected: boolean; selection: T.NounSelection }[]; + }[]; +}[] = [ + { + category: "agreement with regular nouns", + cases: [ + { + input: "زاړه سړي", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(sarey, undefined), + adjectives: [makeAdjectiveSelection(zor)], + }, + }, + { + inflected: false, + selection: { + ...makeNounSelection(sarey, undefined), + adjectives: [makeAdjectiveSelection(zor)], + number: "plural", + }, + }, + ], + }, + { + input: "غټو ستړو ښځو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(xudza, undefined), + adjectives: [ + makeAdjectiveSelection(ghut), + makeAdjectiveSelection(sturey), + ], + number: "plural", + }, + }, + ], + }, + { + input: "غټو ستړې ښځو", + output: [], + }, + ], + }, + { + category: "agreement with plural nouns", + cases: [ + { + input: "سړې اوبه", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(oobu, undefined), + gender: "fem", + number: "plural", + adjectives: [makeAdjectiveSelection(sor)], + }, + }, + ], + }, + { + input: "زاړه غنم", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(ghanum, undefined), + number: "plural", + adjectives: [makeAdjectiveSelection(zor)], + }, + }, + ], + }, + { + input: "زوړ غنم", + output: [], + }, + { + input: "زاړه کورونه", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(kor, undefined), + number: "plural", + adjectives: [makeAdjectiveSelection(zor)], + }, + }, + ], + }, + ], + }, +]; + +describe("parsing nouns with adjectives", () => { + adjsTests.forEach(({ category, cases }) => { + // eslint-disable-next-line jest/valid-title + test(category, () => { + cases.forEach(({ input, output }) => { + const tokens = tokenizer(input); + expect(parseNoun(tokens, lookup, []).success.map((x) => x[1])).toEqual( + output + ); + }); + }); + }); +}); diff --git a/src/lib/src/parsing/parse-noun.test.ts.txt b/src/lib/src/parsing/parse-noun.test.ts.txt deleted file mode 100644 index 88f60bb..0000000 --- a/src/lib/src/parsing/parse-noun.test.ts.txt +++ /dev/null @@ -1,291 +0,0 @@ -import { makeNounSelection } from "../phrase-building/make-selections"; -import * as T from "../../../types"; -import { lookup } from "./lookup"; -import { parseNoun } from "./parse-noun"; - -const sarey = { - ts: 1527815251, - i: 8163, - p: "سړی", - f: "saRáy", - g: "saRay", - e: "man", - r: 4, - c: "n. m.", - ec: "man", - ep: "men", -} as T.NounEntry; -const dostee = { - ts: 1527811877, - i: 6627, - p: "دوستي", - f: "dostee", - g: "dostee", - e: "friendship", - r: 3, - c: "n. f.", -} as T.NounEntry; -const wreejze = { - ts: 1586551382412, - i: 14985, - p: "وریژې", - f: "wreejze", - g: "wreejze", - e: "rice", - r: 4, - c: "n. f. pl.", -} as T.NounEntry; -const xudza = { - ts: 1527812797, - i: 9018, - p: "ښځه", - f: "xúdza", - g: "xudza", - e: "woman, wife", - r: 4, - c: "n. f.", - ec: "woman", - ep: "women", -} as T.NounEntry; -const kursuy = { - ts: 1527814203, - i: 10573, - p: "کرسۍ", - f: "kUrsúy", - g: "kUrsuy", - e: "chair, seat, stool", - r: 3, - c: "n. f.", -} as T.NounEntry; -const kor = { - ts: 1527812828, - i: 11022, - p: "کور", - f: "kor", - g: "kor", - e: "house, home", - r: 4, - c: "n. m.", -} as T.NounEntry; -const daktar = { - ts: 1527816747, - i: 6709, - p: "ډاکټر", - f: "DaakTar", - g: "DaakTar", - e: "doctor", - r: 4, - c: "n. m. anim. unisex", -} as T.NounEntry; - -// TODO: test unisex ملګری etc - -const tests: { - category: string; - cases: { - input: string; - output: { - inflected: boolean; - selection: T.NounSelection; - }[]; - }[]; -}[] = [ - { - category: "pattern 1 nouns", - cases: [ - { - input: "کور", - output: [ - { - inflected: false, - selection: makeNounSelection(kor, undefined), - }, - ], - }, - { - input: "کورو", - output: [ - { - inflected: true, - selection: { - ...makeNounSelection(kor, undefined), - number: "plural", - }, - }, - ], - }, - { - input: "ډاکټره", - output: [ - { - inflected: false, - selection: { - ...makeNounSelection(daktar, undefined), - gender: "fem", - }, - }, - ], - }, - { - input: "ډاکټرې", - output: [ - { - inflected: true, - selection: { - ...makeNounSelection(daktar, undefined), - gender: "fem", - }, - }, - ], - }, - ], - }, -]; -// { -// input: "سړی", -// output: [ -// { -// inflected: false, -// selection: makeNounSelection(sarey, undefined), -// }, -// ], -// }, -// { -// input: "سړي", -// output: [ -// { -// inflected: true, -// selection: makeNounSelection(sarey, undefined), -// }, -// ], -// }, -// { -// input: "سړو", -// output: [ -// { -// inflected: true, -// selection: { -// ...makeNounSelection(sarey, undefined), -// number: "plural", -// }, -// }, -// ], -// }, -// { -// input: "سړیو", -// output: [ -// { -// inflected: true, -// selection: { -// ...makeNounSelection(sarey, undefined), -// number: "plural", -// }, -// }, -// ], -// }, -// { -// input: "دوستي", -// output: [ -// { -// inflected: false, -// selection: makeNounSelection(dostee, undefined), -// }, -// ], -// }, -// { -// input: "دوستۍ", -// output: [ -// { -// inflected: true, -// selection: makeNounSelection(dostee, undefined), -// }, -// ], -// }, -// { -// input: "دوستیو", -// output: [ -// { -// inflected: true, -// selection: { -// ...makeNounSelection(dostee, undefined), -// number: "plural", -// }, -// }, -// ], -// }, -// { -// input: "وریژې", -// output: [ -// { -// inflected: false, -// selection: makeNounSelection(wreejze, undefined), -// }, -// ], -// }, -// { -// input: "ښځه", -// output: [ -// { -// inflected: false, -// selection: makeNounSelection(xudza, undefined), -// }, -// ], -// }, -// { -// input: "ښځې", -// output: [ -// { -// inflected: true, -// selection: makeNounSelection(xudza, undefined), -// }, -// ], -// }, -// { -// input: "ښځو", -// output: [ -// { -// inflected: true, -// selection: { -// ...makeNounSelection(xudza, undefined), -// number: "plural", -// }, -// }, -// ], -// }, -// { -// input: "کرسۍ", -// output: [ -// { -// inflected: false, -// selection: makeNounSelection(kursuy, undefined), -// }, -// { -// inflected: true, -// selection: makeNounSelection(kursuy, undefined), -// }, -// ], -// }, -// { -// input: "کرسیو", -// output: [ -// { -// inflected: true, -// selection: { -// ...makeNounSelection(kursuy, undefined), -// number: "plural", -// }, -// }, -// ], -// }, -// ]; - -describe("parsing nouns", () => { - tests.forEach(({ category, cases }) => { - // eslint-disable-next-line jest/valid-title - test(category, () => { - cases.forEach(({ input, output }) => { - expect(parseNoun(input, lookup)).toEqual(output); - }); - }); - }); -}); diff --git a/src/lib/src/parsing/parse-noun.ts b/src/lib/src/parsing/parse-noun.ts index 47c1479..48a41e5 100644 --- a/src/lib/src/parsing/parse-noun.ts +++ b/src/lib/src/parsing/parse-noun.ts @@ -1,16 +1,22 @@ import * as T from "../../../types"; +import { getInflectionPattern } from "../inflection-pattern"; import { makeNounSelection } from "../phrase-building/make-selections"; import { isFemNounEntry, isMascNounEntry, isNounEntry, + isPluralNounEntry, isUnisexNounEntry, } from "../type-predicates"; import { getInflectionQueries } from "./inflection-query"; import { parseAdjective } from "./parse-adjective"; +// TODO: +// - cleanup the workflow and make sure all nouns are covered and test +// - add possesive parsing + export function parseNoun( - tokens: Readonly, + tokens: Readonly, lookup: (s: Partial) => T.DictionaryEntry[], adjectives: { inflection: (0 | 1 | 2)[]; @@ -19,10 +25,7 @@ export function parseNoun( selection: T.AdjectiveSelection; }[] ): { - success: [ - string[], - { inflection: (0 | 1 | 2)[]; selection: T.NounSelection } - ][]; + success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][]; errors: string[]; } { if (tokens.length === 0) { @@ -31,15 +34,19 @@ export function parseNoun( errors: [], }; } + const [first, ...rest] = tokens; + // TODO: add recognition of او between adjectives const adjRes = parseAdjective(tokens, lookup); const withAdj = adjRes.map(([tkns, adj]) => parseNoun(tkns, lookup, [...adjectives, adj]) ); const success: ReturnType["success"] = []; const errors: string[] = []; - const [first, ...rest] = tokens; + // const possesor = + // first === "د" ? parseNoun(rest, lookup, adjectives).success : undefined; + + const searches = getInflectionQueries(first.s, true); - const searches = getInflectionQueries(first, true); searches.forEach(({ search, details }) => { const nounEntries = lookup(search).filter(isNounEntry); details.forEach((deets) => { @@ -47,65 +54,108 @@ export function parseNoun( fittingEntries.forEach((entry) => { if (isUnisexNounEntry(entry)) { deets.gender.forEach((gender) => { + deets.inflection.forEach((inf) => { + const { ok, error } = adjsMatch( + adjectives, + gender, + inf, + deets.plural + ); + if (ok) { + convertInflection(inf, entry, gender, deets.plural).forEach( + ({ inflected, number }) => { + const selection = makeNounSelection(entry, undefined); + success.push([ + rest, + { + inflected, + selection: { + ...selection, + gender: selection.genderCanChange + ? gender + : selection.gender, + number: selection.numberCanChange + ? number + : selection.number, + adjectives: adjectives.map((a) => a.selection), + }, + }, + ]); + } + ); + } else { + error.forEach((e) => { + errors.push(e); + }); + } + }); + }); + } else if (isMascNounEntry(entry) && deets.gender.includes("masc")) { + deets.inflection.forEach((inf) => { const { ok, error } = adjsMatch( adjectives, - gender, - deets.inflection + "masc", + inf, + deets.plural ); if (ok) { - success.push([ - rest, - { - inflection: deets.inflection, - selection: { - ...makeNounSelection(entry, undefined), - gender, - adjectives: adjectives.map((a) => a.selection), - }, - }, - ]); + convertInflection(inf, entry, "masc", deets.plural).forEach( + ({ inflected, number }) => { + const selection = makeNounSelection(entry, undefined); + success.push([ + rest, + { + inflected, + selection: { + ...selection, + number: selection.numberCanChange + ? number + : selection.number, + adjectives: adjectives.map((a) => a.selection), + }, + }, + ]); + } + ); } else { error.forEach((e) => { errors.push(e); }); } }); - } else if (isMascNounEntry(entry) && deets.gender.includes("masc")) { - const { ok, error } = adjsMatch(adjectives, "masc", deets.inflection); - if (ok) { - success.push([ - rest, - { - inflection: deets.inflection, - selection: { - ...makeNounSelection(entry, undefined), - adjectives: adjectives.map((a) => a.selection), - }, - }, - ]); - } else { - error.forEach((e) => { - errors.push(e); - }); - } } else if (isFemNounEntry(entry) && deets.gender.includes("fem")) { - const { ok, error } = adjsMatch(adjectives, "fem", deets.inflection); - if (ok) { - success.push([ - rest, - { - inflection: deets.inflection, - selection: { - ...makeNounSelection(entry, undefined), - adjectives: adjectives.map((a) => a.selection), - }, - }, - ]); - } else { - error.forEach((e) => { - errors.push(e); - }); - } + deets.inflection.forEach((inf) => { + const { ok, error } = adjsMatch( + adjectives, + "fem", + inf, + deets.plural + ); + if (ok) { + convertInflection(inf, entry, "fem", deets.plural).forEach( + ({ inflected, number }) => { + const selection = makeNounSelection(entry, undefined); + success.push([ + rest, + { + inflected, + selection: { + ...selection, + number: selection.numberCanChange + ? number + : selection.number, + adjectives: adjectives.map((a) => a.selection), + }, + }, + ]); + } + ); + } else { + error.forEach((e) => { + errors.push(e); + }); + } + }); } }); }); @@ -119,12 +169,14 @@ export function parseNoun( function adjsMatch( adjectives: Parameters[2], gender: T.Gender, - inflection: (0 | 1 | 2)[] + inf: 0 | 1 | 2, + plural: boolean | undefined ): { ok: boolean; error: string[] } { + const inflection = (plural && inf < 2 ? inf + 1 : inf) as 0 | 1 | 2; const unmatching = adjectives.filter( (adj) => !adj.gender.includes(gender) || - !adj.inflection.some((i) => inflection.includes(i)) + !adj.inflection.some((i) => i === inflection) ); if (unmatching.length) { return { @@ -134,9 +186,7 @@ function adjsMatch( x.given === x.selection.entry.p ? x.given : `${x.given} (${x.selection.entry.p})`; - const inflectionIssue = !x.inflection.some((x) => - inflection.includes(x) - ) + const inflectionIssue = !x.inflection.some((x) => x === inflection) ? ` should be ${showInflection(inflection)}` : ``; return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`; @@ -150,14 +200,63 @@ function adjsMatch( } } -function showInflection(inf: (0 | 1 | 2)[]): string { - const [last, ...rest] = inf.reverse(); - const template = rest.length - ? `${rest.join(", ")}, or ${last}` - : last.toString(); - console.log(template); - return template - .replace("0", "plain") - .replace("1", "first inflection") - .replace("2", "second inflection"); +function convertInflection( + inflection: 0 | 1 | 2, + entry: T.NounEntry | T.AdjectiveEntry, + gender: T.Gender, + plural: boolean | undefined +): { + inflected: boolean; + number: T.NounNumber; +}[] { + const pattern = getInflectionPattern(entry); + const inf = (plural && inflection < 2 ? inflection + 1 : inflection) as + | 0 + | 1 + | 2; + if (inf === 0) { + return [ + { + inflected: false, + number: "singular", + }, + ]; + } else if (inf === 1) { + return [ + ...(!((isNounEntry(entry) && isPluralNounEntry(entry)) || plural) && + !(pattern === 4 && entry.p.endsWith("ه") && gender === "masc") + ? [ + { + inflected: true, + number: "singular" as T.NounNumber, + }, + ] + : []), + ...(pattern > 1 || + (pattern > 0 && gender === "fem") || + (isNounEntry(entry) && isPluralNounEntry(entry)) || + plural + ? [ + { + inflected: false, + number: "plural" as T.NounNumber, + }, + ] + : []), + ]; + } + return [ + { + inflected: true, + number: "plural", + }, + ]; +} + +function showInflection(inf: 0 | 1 | 2): string { + return inf === 0 + ? "plain" + : inf === 1 + ? "first inflection" + : "second inflection"; } diff --git a/src/lib/src/parsing/parse-phrase.ts b/src/lib/src/parsing/parse-phrase.ts index f72711a..ff5082b 100644 --- a/src/lib/src/parsing/parse-phrase.ts +++ b/src/lib/src/parsing/parse-phrase.ts @@ -4,7 +4,7 @@ import { parsePronoun } from "./parse-pronoun"; import { parseNoun } from "./parse-noun"; export function parsePhrase( - s: string[], + s: T.Token[], lookup: (s: Partial) => T.DictionaryEntry[] ): { success: any[]; diff --git a/src/lib/src/parsing/parse-pronoun.ts b/src/lib/src/parsing/parse-pronoun.ts index 60137d8..04f3d9f 100644 --- a/src/lib/src/parsing/parse-pronoun.ts +++ b/src/lib/src/parsing/parse-pronoun.ts @@ -1,15 +1,15 @@ import * as T from "../../../types"; -export function parsePronoun(tokens: Readonly): [ - string[], +export function parsePronoun(tokens: Readonly): [ + T.Token[], { inflected: boolean[]; selection: T.PronounSelection; } ][] { - const [first, ...rest] = tokens; + const [{ s }, ...rest] = tokens; const w: ReturnType = []; - if (first === "زه") { + if (s === "زه") { w.push([ rest, { @@ -32,7 +32,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (first === "ته") { + } else if (s === "ته") { w.push([ rest, { @@ -55,7 +55,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (first === "هغه") { + } else if (s === "هغه") { w.push([ rest, { @@ -78,7 +78,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (first === "هغې") { + } else if (s === "هغې") { w.push([ rest, { @@ -90,7 +90,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (first === "دی") { + } else if (s === "دی") { w.push([ rest, { @@ -102,7 +102,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (first === "ده") { + } else if (s === "ده") { w.push([ rest, { @@ -114,7 +114,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (first === "دا") { + } else if (s === "دا") { w.push([ rest, { @@ -126,7 +126,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (first === "دې") { + } else if (s === "دې") { w.push([ rest, { @@ -138,7 +138,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (["مونږ", "موږ"].includes(first)) { + } else if (["مونږ", "موږ"].includes(s)) { w.push([ rest, { @@ -161,7 +161,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (["تاسو", "تاسې"].includes(first)) { + } else if (["تاسو", "تاسې"].includes(s)) { w.push([ rest, { @@ -184,7 +184,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (["هغوي", "هغوی"].includes(first)) { + } else if (["هغوي", "هغوی"].includes(s)) { w.push([ rest, { @@ -207,7 +207,7 @@ export function parsePronoun(tokens: Readonly): [ }, }, ]); - } else if (["دوي", "دوی"].includes(first)) { + } else if (["دوي", "دوی"].includes(s)) { w.push([ rest, { diff --git a/src/lib/src/parsing/tokenizer.ts b/src/lib/src/parsing/tokenizer.ts index c067ea4..a569a2c 100644 --- a/src/lib/src/parsing/tokenizer.ts +++ b/src/lib/src/parsing/tokenizer.ts @@ -1,3 +1,10 @@ -export function tokenizer(s: string): string[] { - return s.trim().split(" "); +import { Token } from "../../../types"; + +export function tokenizer(s: string): Token[] { + const words = s.trim().split(" "); + const indexed: { i: number; s: string }[] = []; + for (let i = 0; i < words.length; i++) { + indexed.push({ i, s: words[i] }); + } + return indexed; } diff --git a/src/lib/src/pashto-inflector.test.ts b/src/lib/src/pashto-inflector.test.ts index 062b355..56c7669 100644 --- a/src/lib/src/pashto-inflector.test.ts +++ b/src/lib/src/pashto-inflector.test.ts @@ -8,1318 +8,1632 @@ // TODO: See if there are animate feminine words ending in ي and test -import { - inflectRegularYayUnisex, - inflectWord, -} from "./pashto-inflector"; +import { inflectRegularYayUnisex, inflectWord } from "./pashto-inflector"; import * as T from "../../types"; const adjectives: { - in: T.DictionaryEntry, - out: T.InflectorOutput, + in: T.DictionaryEntry; + out: T.InflectorOutput; }[] = [ - // irregular adj. - { - in: { - ts: 1527815451, - p: "زوړ", - f: "zoR", - g: "", - e: "old", - c: "adj. irreg.", - i: 6264, - infap: "زاړه", - infaf: "zaaRu", - infbp: "زړ", - infbf: "zaR", - }, - out: { - inflections:{ - masc: [ - [{p: "زوړ", f: "zoR"}], - [{p: "زاړه", f: "zaaRú"}], - [{p: "زړو", f: "zaRó"}], - ], - fem: [ - [{p: "زړه", f: "zaRá"}], - [{p: "زړې", f: "zaRé"}], - [{p: "زړو", f: "zaRó"}], - ], - }, - }, + // irregular adj. + { + in: { + ts: 1527815451, + p: "زوړ", + f: "zoR", + g: "", + e: "old", + c: "adj. irreg.", + i: 6264, + infap: "زاړه", + infaf: "zaaRu", + infbp: "زړ", + infbf: "zaR", }, - // regular adjective ending in ی - { - in: { - ts: 1527815306, - p: "ستړی", - f: "stúRay", - g: "", - e: "tired", - c: "adj.", - i: 6564, - }, - out: { - inflections: { - masc: [ - [{p: "ستړی", f: "stúRay"}], - [{p: "ستړي", f: "stúRee"}], - [{p: "ستړیو", f: "stúRiyo"}, {p: "ستړو", f: "stúRo"}], - ], - fem: [ - [{p: "ستړې", f: "stúRe"}], - [{p: "ستړې", f: "stúRe"}], - [{p: "ستړو", f: "stúRo"}], - ], - } - }, + out: { + inflections: { + masc: [ + [{ p: "زوړ", f: "zoR" }], + [{ p: "زاړه", f: "zaaRú" }], + [{ p: "زړو", f: "zaRó" }], + ], + fem: [ + [{ p: "زړه", f: "zaRá" }], + [{ p: "زړې", f: "zaRé" }], + [{ p: "زړو", f: "zaRó" }], + ], + }, }, - // regular adjective ending in ی with stress on the end - { - in: { - ts: 1527813636, - p: "وروستی", - f: "wroostáy", - g: "", - e: "last, latest, recent", - c: "adj.", - i: 12026, - }, - out: { - inflections: { - masc: [ - [{p: "وروستی", f: "wroostáy"}], - [{p: "وروستي", f: "wroostée"}], - [{p: "وروستیو", f: "wroostiyo"}, {p: "وروستو", f: "wroostó"}], - ], - fem: [ - [{p: "وروستۍ", f: "wroostúy"}], - [{p: "وروستۍ", f: "wroostúy"}], - [{p: "وروستیو", f: "wroostúyo"}, {p: "وروستو", f: "wroostó"}], - ], - } - }, + }, + // regular adjective ending in ی + { + in: { + ts: 1527815306, + p: "ستړی", + f: "stúRay", + g: "", + e: "tired", + c: "adj.", + i: 6564, }, - // regular adjective ending in a consonant - { - in: { - ts: 1527813498, - p: "سپک", - f: "spuk", - g: "", - e: "light; dishonorable, not respectable", - c: "adj.", - i: 6502, - }, - out: { - inflections: { - masc: [ - [{p: "سپک", f: "spuk"}], - [{p: "سپک", f: "spuk"}], - [{p: "سپکو", f: "spúko"}], - ], - fem: [ - [{p: "سپکه", f: "spúka"}], - [{p: "سپکې", f: "spúke"}], - [{p: "سپکو", f: "spúko"}], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "ستړی", f: "stúRay" }], + [{ p: "ستړي", f: "stúRee" }], + [ + { p: "ستړیو", f: "stúRiyo" }, + { p: "ستړو", f: "stúRo" }, + ], + ], + fem: [ + [{ p: "ستړې", f: "stúRe" }], + [{ p: "ستړې", f: "stúRe" }], + [ + { p: "ستړیو", f: "stúRiyo" }, + { p: "ستړو", f: "stúRo" }, + ], + ], + }, }, - // regular adjective ending in a consonant with an accent already - { - in: {"ts":1527818704,"i":352,"p":"ارت","f":"arát","g":"arat","e":"wide, spacious, extensive","c":"adj."}, - out: { - inflections: { - masc: [ - [{p: "ارت", f: "arát"}], - [{p: "ارت", f: "arát"}], - [{p: "ارتو", f: "aráto"}], - ], - fem: [ - [{p: "ارته", f: "aráta"}], - [{p: "ارتې", f: "aráte"}], - [{p: "ارتو", f: "aráto"}], - ], - }, - }, + }, + // regular adjective ending in ی with stress on the end + { + in: { + ts: 1527813636, + p: "وروستی", + f: "wroostáy", + g: "", + e: "last, latest, recent", + c: "adj.", + i: 12026, }, - { - in: { - ts: 1527812862, - p: "لوی", - f: "looy", - g: "", - e: "big, great, large", - c: "adj.", - i: 9945, - }, - out: { - inflections: { - masc: [ - [{p: "لوی", f: "looy"}], - [{p: "لوی", f: "looy"}], - [{p: "لویو", f: "lóoyo"}], - ], - fem: [ - [{p: "لویه", f: "lóoya"}], - [{p: "لویې", f: "lóoye"}], - [{p: "لویو", f: "lóoyo"}], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "وروستی", f: "wroostáy" }], + [{ p: "وروستي", f: "wroostée" }], + [ + { p: "وروستیو", f: "wroostíyo" }, + { p: "وروستو", f: "wroostó" }, + ], + ], + fem: [ + [{ p: "وروستۍ", f: "wroostúy" }], + [{ p: "وروستۍ", f: "wroostúy" }], + [ + { p: "وروستیو", f: "wroostíyo" }, + { p: "وروستو", f: "wroostó" }, + ], + ], + }, }, - { - in: { - ts: 1527811469, - p: "پوه", - f: "poh", - g: "", - e: "understanding, having understood; intelligent, quick, wise, clever; expert", - c: "adj.", - i: 2430, - }, - out: { - inflections: { - masc: [ - [{p: "پوه", f: "poh"}], - [{p: "پوه", f: "poh"}], - [{p: "پوهو", f: "póho"}], - ], - fem: [ - [{p: "پوهه", f: "póha"}], - [{p: "پوهې", f: "póhe"}], - [{p: "پوهو", f: "póho"}], - ], - }, - }, + }, + // regular adjective ending in a consonant + { + in: { + ts: 1527813498, + p: "سپک", + f: "spuk", + g: "", + e: "light; dishonorable, not respectable", + c: "adj.", + i: 6502, }, - // adjective ending in u - { - in: { - ts: 1527812791, - p: "ویده", - f: "weedú", - g: "weedu", - e: "asleep", - c: "adj.", - i: 1, - }, - out: { - inflections: { - masc: [ - [{p: "ویده", f: "weedú"}], - [{p: "ویده", f: "weedú"}], - [{p: "ویدو", f: "weedó"}], - ], - fem: [ - [{p: "ویده", f: "weedá"}], - [{p: "ویدې", f: "weedé"}], - [{p: "ویدو", f: "weedó"}], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "سپک", f: "spuk" }], + [{ p: "سپک", f: "spuk" }], + [{ p: "سپکو", f: "spúko" }], + ], + fem: [ + [{ p: "سپکه", f: "spúka" }], + [{ p: "سپکې", f: "spúke" }], + [{ p: "سپکو", f: "spúko" }], + ], + }, }, - { - in: { - ts: 1527812792, - p: "ښایسته", - f: "xaaystu", - g: "xaaystu", - e: "beautiful", - c: "adj.", - i: 1, - }, - out: { - inflections: { - masc: [ - [{p: "ښایسته", f: "xaaystu"}], - [{p: "ښایسته", f: "xaaystu"}], - [{p: "ښایستو", f: "xaaysto"}], - ], - fem: [ - [{p: "ښایسته", f: "xaaysta"}], - [{p: "ښایستې", f: "xaayste"}], - [{p: "ښایستو", f: "xaaysto"}], - ], - }, - }, + }, + // regular adjective ending in a consonant with an accent already + { + in: { + ts: 1527818704, + i: 352, + p: "ارت", + f: "arát", + g: "arat", + e: "wide, spacious, extensive", + c: "adj.", }, - // numbers should inflect just like adjectives - { - in: {"ts":1588688995113,"i":8176,"p":"شپږ","f":"shpuG","g":"shpug","e":"six","c":"num."}, - out: { - inflections: { - masc: [ - [{ p: "شپږ", f: "shpuG" }], - [{ p: "شپږ", f: "shpuG" }], - [{ p: "شپږو", f: "shpúGo" }], - ], - fem: [ - [{ p: "شپږه", f: "shpúGa" }], - [{ p: "شپږې", f: "shpúGe" }], - [{ p: "شپږو", f: "shpúGo" }], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "ارت", f: "arát" }], + [{ p: "ارت", f: "arát" }], + [{ p: "ارتو", f: "aráto" }], + ], + fem: [ + [{ p: "ارته", f: "aráta" }], + [{ p: "ارتې", f: "aráte" }], + [{ p: "ارتو", f: "aráto" }], + ], + }, }, - // without accents - { - in: {"ts":1527812796,"i":8574,"p":"ښه","f":"xu","g":"xu","e":"good","c":"adj."}, - out: { - inflections: { - masc: [ - [{ p: "ښه", f: "xu" }], - [{ p: "ښه", f: "xu" }], - [{ p: "ښو", f: "xo" }], - ], - fem: [ - [{ p: "ښه", f: "xa" }], - [{ p: "ښې", f: "xe" }], - [{ p: "ښو", f: "xo" }], - ], - }, - }, + }, + { + in: { + ts: 1527812862, + p: "لوی", + f: "looy", + g: "", + e: "big, great, large", + c: "adj.", + i: 9945, }, - // adjective non-inflecting - { - in: { - ts: 1527812798, - p: "خفه", - f: "khufa", - g: "", - e: "sad, upset, angry; choked, suffocated", - c: "adj.", - i: 4631, - }, - out: false, + out: { + inflections: { + masc: [ + [{ p: "لوی", f: "looy" }], + [{ p: "لوی", f: "looy" }], + [{ p: "لویو", f: "lóoyo" }], + ], + fem: [ + [{ p: "لویه", f: "lóoya" }], + [{ p: "لویې", f: "lóoye" }], + [{ p: "لویو", f: "lóoyo" }], + ], + }, }, - { - in: { - ts: 1527814727, - p: "اجباري", - f: "ijbaaree", - g: "", - e: "compulsory, obligatory", - c: "adj.", - i: 167, - }, - out: false, + }, + { + in: { + ts: 1527811469, + p: "پوه", + f: "poh", + g: "", + e: "understanding, having understood; intelligent, quick, wise, clever; expert", + c: "adj.", + i: 2430, }, - // double adjective - { - in: { - ts: 123, - p: "ګډ وډ", - f: "guD wuD", - g: "guDwuD", - e: "mixed up", - c: "adj. doub.", - i: 1, - }, - out: { - inflections: { - masc: [ - [{ p: "ګډ وډ", f: "guD wuD" }], - [{ p: "ګډ وډ", f: "guD wuD" }], - [{ p: "ګډو وډو", f: "gúDo wúDo" }], - ], - fem: [ - [{ p: "ګډه وډه", f: "gúDa wúDa" }], - [{ p: "ګډې وډې", f: "gúDe wúDe" }], - [{ p: "ګډو وډو", f: "gúDo wúDo" }], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "پوه", f: "poh" }], + [{ p: "پوه", f: "poh" }], + [{ p: "پوهو", f: "póho" }], + ], + fem: [ + [{ p: "پوهه", f: "póha" }], + [{ p: "پوهې", f: "póhe" }], + [{ p: "پوهو", f: "póho" }], + ], + }, }, + }, + // adjective ending in u + { + in: { + ts: 1527812791, + p: "ویده", + f: "weedú", + g: "weedu", + e: "asleep", + c: "adj.", + i: 1, + }, + out: { + inflections: { + masc: [ + [{ p: "ویده", f: "weedú" }], + [{ p: "ویده", f: "weedú" }], + [{ p: "ویدو", f: "weedó" }], + ], + fem: [ + [{ p: "ویده", f: "weedá" }], + [{ p: "ویدې", f: "weedé" }], + [{ p: "ویدو", f: "weedó" }], + ], + }, + }, + }, + { + in: { + ts: 1527812792, + p: "ښایسته", + f: "xaaystu", + g: "xaaystu", + e: "beautiful", + c: "adj.", + i: 1, + }, + out: { + inflections: { + masc: [ + [{ p: "ښایسته", f: "xaaystu" }], + [{ p: "ښایسته", f: "xaaystu" }], + [{ p: "ښایستو", f: "xaaysto" }], + ], + fem: [ + [{ p: "ښایسته", f: "xaaysta" }], + [{ p: "ښایستې", f: "xaayste" }], + [{ p: "ښایستو", f: "xaaysto" }], + ], + }, + }, + }, + // numbers should inflect just like adjectives + { + in: { + ts: 1588688995113, + i: 8176, + p: "شپږ", + f: "shpuG", + g: "shpug", + e: "six", + c: "num.", + }, + out: { + inflections: { + masc: [ + [{ p: "شپږ", f: "shpuG" }], + [{ p: "شپږ", f: "shpuG" }], + [{ p: "شپږو", f: "shpúGo" }], + ], + fem: [ + [{ p: "شپږه", f: "shpúGa" }], + [{ p: "شپږې", f: "shpúGe" }], + [{ p: "شپږو", f: "shpúGo" }], + ], + }, + }, + }, + // without accents + { + in: { + ts: 1527812796, + i: 8574, + p: "ښه", + f: "xu", + g: "xu", + e: "good", + c: "adj.", + }, + out: { + inflections: { + masc: [ + [{ p: "ښه", f: "xu" }], + [{ p: "ښه", f: "xu" }], + [{ p: "ښو", f: "xo" }], + ], + fem: [ + [{ p: "ښه", f: "xa" }], + [{ p: "ښې", f: "xe" }], + [{ p: "ښو", f: "xo" }], + ], + }, + }, + }, + // adjective non-inflecting + { + in: { + ts: 1527812798, + p: "خفه", + f: "khufa", + g: "", + e: "sad, upset, angry; choked, suffocated", + c: "adj.", + i: 4631, + }, + out: false, + }, + { + in: { + ts: 1527814727, + p: "اجباري", + f: "ijbaaree", + g: "", + e: "compulsory, obligatory", + c: "adj.", + i: 167, + }, + out: false, + }, + // double adjective + { + in: { + ts: 123, + p: "ګډ وډ", + f: "guD wuD", + g: "guDwuD", + e: "mixed up", + c: "adj. doub.", + i: 1, + }, + out: { + inflections: { + masc: [ + [{ p: "ګډ وډ", f: "guD wuD" }], + [{ p: "ګډ وډ", f: "guD wuD" }], + [{ p: "ګډو وډو", f: "gúDo wúDo" }], + ], + fem: [ + [{ p: "ګډه وډه", f: "gúDa wúDa" }], + [{ p: "ګډې وډې", f: "gúDe wúDe" }], + [{ p: "ګډو وډو", f: "gúDo wúDo" }], + ], + }, + }, + }, ]; const nouns: { - in: T.DictionaryEntry, - out: T.InflectorOutput, + in: T.DictionaryEntry; + out: T.InflectorOutput; }[] = [ - // ## UNISEX - // Unisex noun irregular - { - in: { - ts: 1527812908, - p: "مېلمه", - f: "melmá", - e: "guest", - g: "", - c: "n. m. irreg. unisex", - i: 11244, - infap: "مېلمانه", - infaf: "melmaanu", - infbp: "مېلمن", - infbf: "melman", - }, - out: { - inflections: { - masc: [ - [{p: "مېلمه", f: "melmá"}], - [{p: "مېلمانه", f: "melmaanú"}], - [{p: "مېلمنو", f: "melmanó"}], - ], - fem: [ - [{p: "مېلمنه", f: "melmaná"}], - [{p: "مېلمنې", f: "melmané"}], - [{p: "مېلمنو", f: "melmanó"}], - ], - }, - }, + // ## UNISEX + // Unisex noun irregular + { + in: { + ts: 1527812908, + p: "مېلمه", + f: "melmá", + e: "guest", + g: "", + c: "n. m. irreg. unisex", + i: 11244, + infap: "مېلمانه", + infaf: "melmaanu", + infbp: "مېلمن", + infbf: "melman", }, - // Unisex noun ending with ی - { - in: { - ts: 1527814159, - p: "ملګری", - f: "malgúray", - g: "", - e: "friend, companion", - c: "n. m. unisex", - i: 10943, - }, - out: { - inflections: { - masc: [ - [{p: "ملګری", f: "malgúray"}], - [{p: "ملګري", f: "malgúree"}], - [{p: "ملګریو", f: "malgúriyo"}, {p: "ملګرو", f: "malgúro"}], - ], - fem: [ - [{p: "ملګرې", f: "malgúre"}], - [{p: "ملګرې", f: "malgúre"}], - [{p: "ملګرو", f: "malgúro"}], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "مېلمه", f: "melmá" }], + [{ p: "مېلمانه", f: "melmaanú" }], + [{ p: "مېلمنو", f: "melmanó" }], + ], + fem: [ + [{ p: "مېلمنه", f: "melmaná" }], + [{ p: "مېلمنې", f: "melmané" }], + [{ p: "مېلمنو", f: "melmanó" }], + ], + }, }, - // Unisex noun ending on ی with emphasis on the end - { - in: {"i":3319,"ts":1527816431,"p":"ترورزی","f":"trorzáy","g":"trorzay","e":"cousin (of paternal aunt)","c":"n. m. unisex","ppp":"ترورزامن","ppf":"trorzaamun"}, - out: { - inflections: { - masc: [ - [{p: "ترورزی", f: "trorzáy"}], - [{p: "ترورزي", f: "trorzée"}], - [{p: "ترورزیو", f: "trorziyo"}, {p: "ترورزو", f: "trorzó"}], - ], - fem: [ - [{p: "ترورزۍ", f: "trorzúy"}], - [{p: "ترورزۍ", f: "trorzúy"}], - [{p: "ترورزیو", f: "trorzúyo"}, {p: "ترورزو", f: "trorzó"}], - ], - }, - plural: { - masc: [ - [{ p: "ترورزامن", f: "trorzaamun" }], - [{ p: "ترورزامنو", f: "trorzaamuno" }], - ], - }, - }, + }, + // Unisex noun ending with ی + { + in: { + ts: 1527814159, + p: "ملګری", + f: "malgúray", + g: "", + e: "friend, companion", + c: "n. m. unisex", + i: 10943, }, - // Unisex noun ending with a consanant - { - in: { - ts: 1527820043, - p: "چرګ", - f: "churg", - g: "", - e: "rooster, cock; chicken, poultry", - c: "n. m. unisex anim.", - i: 4101, - }, - out: { - inflections: { - masc: [ - [{p: "چرګ", f: "churg"}], - [{p: "چرګ", f: "churg"}], - [{p: "چرګو", f: "chúrgo"}], - ], - fem: [ - [{p: "چرګه", f: "chúrga"}], - [{p: "چرګې", f: "chúrge"}], - [{p: "چرګو", f: "chúrgo"}], - ], - }, - plural: { - masc: [ - [{p: "چرګان", f: "churgáan"}], - [{p: "چرګانو", f: "churgáano"}], - ], - fem: [ - [{p: "چرګانې", f: "churgáane"}], - [{p: "چرګانو", f: "churgáano"}], - ], - }, - bundledPlural: { - masc: [ - [{ p: "چرګه", f: "chúrga" }], - [{ p: "چرګو", f: "chúrgo" }], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "ملګری", f: "malgúray" }], + [{ p: "ملګري", f: "malgúree" }], + [ + { p: "ملګریو", f: "malgúriyo" }, + { p: "ملګرو", f: "malgúro" }, + ], + ], + fem: [ + [{ p: "ملګرې", f: "malgúre" }], + [{ p: "ملګرې", f: "malgúre" }], + [ + { p: "ملګریو", f: "malgúriyo" }, + { p: "ملګرو", f: "malgúro" }, + ], + ], + }, }, - // with #3 pattern anim unisex - { - in: {"ts":1527820130,"i":2561,"p":"پلوی","f":"palawáy","g":"palaway","e":"adherent, supporter; the outside or further ox in a team of oxes grinding or threshing","c":"n. m. anim. unisex"}, - out: { - inflections: { - masc: [ - [{ p: "پلوی", f: "palawáy" }], - [{ p: "پلوي", f: "palawée" }], - [{ p: "پلویو", f: "palawiyo" }, { p: "پلوو", f: "palawó" }], - ], - fem: [ - [{ p: "پلوۍ", f: "palawúy" }], - [{ p: "پلوۍ", f: "palawúy" }], - [{ p: "پلویو", f: "palawúyo" }, { p: "پلوو", f: "palawó" }], - ], - }, - plural: { - masc: [ - [{ p: "پلویان", f: "palawiyáan" }], - [{ p: "پلویانو", f: "palawiyáano" }], - ], - fem: [ - [{ p: "پلویانې", f: "palawiyáane" }], - [{ p: "پلویانو", f: "palawiyáano" }], - ], - }, - }, + }, + // Unisex noun ending on ی with emphasis on the end + { + in: { + i: 3319, + ts: 1527816431, + p: "ترورزی", + f: "trorzáy", + g: "trorzay", + e: "cousin (of paternal aunt)", + c: "n. m. unisex", + ppp: "ترورزامن", + ppf: "trorzaamun", }, - // ## MASCULINE - // Masculine regular ending in ی - { - in: { - ts: 1527815251, - p: "سړی", - f: "saRáy", - g: "", - e: "man", - c: "n. m.", - i: 6750, - }, - out: { - inflections: { - masc: [ - [{p: "سړی", f: "saRáy"}], - [{p: "سړي", f: "saRée"}], - [{p: "سړیو", f: "saRiyo"}, {p: "سړو", f: "saRo"}], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "ترورزی", f: "trorzáy" }], + [{ p: "ترورزي", f: "trorzée" }], + [ + { p: "ترورزیو", f: "trorzíyo" }, + { p: "ترورزو", f: "trorzó" }, + ], + ], + fem: [ + [{ p: "ترورزۍ", f: "trorzúy" }], + [{ p: "ترورزۍ", f: "trorzúy" }], + [ + { p: "ترورزیو", f: "trorzíyo" }, + { p: "ترورزو", f: "trorzó" }, + ], + ], + }, + plural: { + masc: [ + [{ p: "ترورزامن", f: "trorzaamun" }], + [{ p: "ترورزامنو", f: "trorzaamuno" }], + ], + }, }, - // Masculine #3 anim - // TODO: Also do Fem #3 anim! - { - in: {"ts":1527819801,"i":8082,"p":"سیلانی","f":"saylaanáy","g":"saylaanay","e":"tourist, sightseer, visitor","c":"n. m. anim."}, - out: { - inflections: { - masc: [ - [{ p: "سیلانی", f: "saylaanáy" }], - [{ p: "سیلاني", f: "saylaanée" }], - [{ p: "سیلانیو", f: "saylaaniyo" }, { p: "سیلانو", f: "saylaano" }], - ], - }, - plural: { - masc: [ - [{ p: "سیلانیان", f: "saylaaniyáan" }], - [{ p: "سیلانیانو", f: "saylaaniyáano" }], - ], - }, - }, + }, + // Unisex noun ending with a consanant + { + in: { + ts: 1527820043, + p: "چرګ", + f: "churg", + g: "", + e: "rooster, cock; chicken, poultry", + c: "n. m. unisex anim.", + i: 4101, }, - // Masculine regular ending in ی with emphasis on end - { - in: { - ts: 1527818511, - p: "ترېلی", - f: "treláy", - g: "", - e: "pool, reservoir", - c: "n. m.", - i: 2931, - }, - out: { - inflections: { - masc: [ - [{p: "ترېلی", f: "treláy"}], - [{p: "ترېلي", f: "trelée"}], - [{p: "ترېلیو", f: "treliyo"}, {p: "ترېلو", f: "trelo"}], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "چرګ", f: "churg" }], + [{ p: "چرګ", f: "churg" }], + [{ p: "چرګو", f: "chúrgo" }], + ], + fem: [ + [{ p: "چرګه", f: "chúrga" }], + [{ p: "چرګې", f: "chúrge" }], + [{ p: "چرګو", f: "chúrgo" }], + ], + }, + plural: { + masc: [ + [{ p: "چرګان", f: "churgáan" }], + [{ p: "چرګانو", f: "churgáano" }], + ], + fem: [ + [{ p: "چرګانې", f: "churgáane" }], + [{ p: "چرګانو", f: "churgáano" }], + ], + }, + bundledPlural: { + masc: [[{ p: "چرګه", f: "chúrga" }], [{ p: "چرګو", f: "chúrgo" }]], + }, }, - // Masculine ending in tob - { - in: { - i: 11998, - ts: 1586760783536, - p: "مشرتوب", - f: "mushurtob", - g: "", - e: "leadership, authority, presidency", - c: "n. m.", - }, - out: { - inflections: { - masc: [ - [{p: "مشرتوب", f: "mushurtob"}], - [{p: "مشرتابه", f: "mushurtaabu"}], - [{p: "مشرتبو", f: "mushurtabo"}], - ], - }, - }, + }, + // with #3 pattern anim unisex + { + in: { + ts: 1527820130, + i: 2561, + p: "پلوی", + f: "palawáy", + g: "palaway", + e: "adherent, supporter; the outside or further ox in a team of oxes grinding or threshing", + c: "n. m. anim. unisex", }, - // Masculine irregular - { - in: {"ts":1527813809,"i":11318,"p":"لمونځ","f":"lamoondz","g":"lamoondz","e":"Muslim ritual prayers (namaz, salah, salat)","c":"n. m. irreg.","infap":"لمانځه","infaf":"lamaandzu","infbp":"لمنځ","infbf":"lamandz","ppp":"لمونځونه","ppf":"lamoondzóona"}, - out: { - inflections: { - masc: [ - [{p: "لمونځ", f: "lamoondz"}], - [{p: "لمانځه", f: "lamaandzú"}], - [{p: "لمنځو", f: "lamandzó"}], - ], - }, - plural: { - masc: [ - [{ p: "لمونځونه", f: "lamoondzóona" }], - [{ p: "لمونځونو", f: "lamoondzóono" }], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "پلوی", f: "palawáy" }], + [{ p: "پلوي", f: "palawée" }], + [ + { p: "پلویو", f: "palawíyo" }, + { p: "پلوو", f: "palawó" }, + ], + ], + fem: [ + [{ p: "پلوۍ", f: "palawúy" }], + [{ p: "پلوۍ", f: "palawúy" }], + [ + { p: "پلویو", f: "palawíyo" }, + { p: "پلوو", f: "palawó" }, + ], + ], + }, + plural: { + masc: [ + [{ p: "پلویان", f: "palawiyáan" }], + [{ p: "پلویانو", f: "palawiyáano" }], + ], + fem: [ + [{ p: "پلویانې", f: "palawiyáane" }], + [{ p: "پلویانو", f: "palawiyáano" }], + ], + }, }, - // Masculine short squish - { - in: {"i":9049,"ts":1527813593,"p":"غر","f":"ghar, ghur","g":"ghar,ghur","e":"mountain","c":"n. m.","infap":"غره","infaf":"ghru","infbp":"غر","infbf":"ghr"}, - out: { - inflections: { - masc: [ - [{ p: "غر", f: "ghar" }], - [{ p: "غره", f: "ghru" }], - [{ p: "غرو", f: "ghro" }], - ], - }, - plural: { - masc: [ - [{ p: "غرونه", f: "ghróona" }], - [{ p: "غرونو", f: "ghróono" }], - ], - }, - bundledPlural: { - masc: [ - [{ p: "غره", f: "ghára" }], - [{ p: "غرو", f: "gháro" }], - ], - } - }, + }, + // ## MASCULINE + // Masculine regular ending in ی + { + in: { + ts: 1527815251, + p: "سړی", + f: "saRáy", + g: "", + e: "man", + c: "n. m.", + i: 6750, }, - // should NOT do the oona plural with the squish nouns, when thay're animate - { - in: {"i":5465,"ts":1527812802,"p":"خر","f":"khur","g":"khur","e":"donkay","c":"n. m. anim. unisex irreg.","infap":"خره","infaf":"khru","infbp":"خر","infbf":"khr"}, - out: { - inflections: { - // TODO: use smarter system using new isType5Entry predicates, to allow for not using the redundant one syllable accents with these - masc: [ - [{ p: "خر", f: "khur" }], - [{ p: "خره", f: "khru" }], - [{ p: "خرو", f: "khro" }], - ], - fem: [ - [{ p: "خره", f: "khra" }], - [{ p: "خرې", f: "khre" }], - [{ p: "خرو", f: "khro" }], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "سړی", f: "saRáy" }], + [{ p: "سړي", f: "saRée" }], + [ + { p: "سړیو", f: "saRíyo" }, + { p: "سړو", f: "saRó" }, + ], + ], + }, }, - // masc plural - { - in: {"i":6063,"ts":1527815739,"p":"دروغ","f":"drogh, darwagh","g":"drogh,darwagh","e":"lie, falsehood","c":"n. m. pl."}, - out: { - plural: { - masc: [ - [{ p: "دروغ", f: "drogh" }], - [{ p: "دروغو", f: "drogho" }], - ], - }, - }, + }, + // Masculine #3 anim + // TODO: Also do Fem #3 anim! + { + in: { + ts: 1527819801, + i: 8082, + p: "سیلانی", + f: "saylaanáy", + g: "saylaanay", + e: "tourist, sightseer, visitor", + c: "n. m. anim.", }, - { - in: {"i":9191,"ts":1527817330,"p":"غنم","f":"ghanúm","g":"ghanum","e":"wheat","c":"n. m. pl."}, - out: { - plural: { - masc: [ - [{ p: "غنم", f: "ghanúm" }], - [{ p: "غنمو", f: "ghanúmo" }], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "سیلانی", f: "saylaanáy" }], + [{ p: "سیلاني", f: "saylaanée" }], + [ + { p: "سیلانیو", f: "saylaaníyo" }, + { p: "سیلانو", f: "saylaanó" }, + ], + ], + }, + plural: { + masc: [ + [{ p: "سیلانیان", f: "saylaaniyáan" }], + [{ p: "سیلانیانو", f: "saylaaniyáano" }], + ], + }, }, - { - in: {"ts":1527813508,"i":7058,"p":"زړه","f":"zRu","g":"zRu","e":"heart","c":"n. m.","noInf":true}, - out: { - plural: { - masc: [ - [{ p: "زړونه", f: "zRóona" }], - [{ p: "زړونو", f: "zRóono" }], - ], - }, - }, + }, + // Masculine regular ending in ی with emphasis on end + { + in: { + ts: 1527818511, + p: "ترېلی", + f: "treláy", + g: "", + e: "pool, reservoir", + c: "n. m.", + i: 2931, }, - // fem plural - { - in: {"ts":1527815129,"i":1013,"p":"اوبه","f":"oobú","g":"oobu","e":"water","c":"n. f. pl."}, - out: { - plural: { - fem: [ - [{ p: "اوبه", f: "oobú" }], - [{ p: "اوبو", f: "oobó" }], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "ترېلی", f: "treláy" }], + [{ p: "ترېلي", f: "trelée" }], + [ + { p: "ترېلیو", f: "trelíyo" }, + { p: "ترېلو", f: "treló" }, + ], + ], + }, }, - { - in: {"ts":1527815008,"i":8421,"p":"شودې","f":"shoodé","g":"shoode","e":"milk","c":"n. f. pl."}, - out: { - plural: { - fem: [ - [{ p: "شودې", f: "shoodé" }], - [{ p: "شودو", f: "shoodó" }], - ] - } - } + }, + // Masculine ending in tob + { + in: { + i: 11998, + ts: 1586760783536, + p: "مشرتوب", + f: "mushurtob", + g: "", + e: "leadership, authority, presidency", + c: "n. m.", }, - { - in: {"ts":1527815008,"i":8421,"p":"شودې","f":"shoode","g":"shoode","e":"milk","c":"n. f. pl."}, - out: { - plural: { - fem: [ - [{ p: "شودې", f: "shoode" }], - [{ p: "شودو", f: "shoodo" }], - ] - } - } + out: { + inflections: { + masc: [ + [{ p: "مشرتوب", f: "mushurtob" }], + [{ p: "مشرتابه", f: "mushurtaabu" }], + [{ p: "مشرتبو", f: "mushurtabo" }], + ], + }, }, - // masculine ending in a vowel - { - in: {"ts":1527815484,"i":13069,"p":"ملا","f":"mUllaa","g":"mUllaa","e":"mullah, priest","r":4,"c":"n. m."}, - out: { - plural: { - masc: [ - [ - { p: "ملایان", f: "mUllaayáan" }, - { p: "ملاګان", f: "mUllaagáan" }, - ], - [ - { p: "ملایانو", f: "mUllaayáano" }, - { p: "ملاګانو", f: "mUllaagáano" }, - ], - ], - }, - }, + }, + // Masculine irregular + { + in: { + ts: 1527813809, + i: 11318, + p: "لمونځ", + f: "lamoondz", + g: "lamoondz", + e: "Muslim ritual prayers (namaz, salah, salat)", + c: "n. m. irreg.", + infap: "لمانځه", + infaf: "lamaandzu", + infbp: "لمنځ", + infbf: "lamandz", + ppp: "لمونځونه", + ppf: "lamoondzóona", }, - // TODO: uncomment this - // { - // in: {"ts":1527812591,"i":6286,"p":"دواړه","f":"dwáaRu","g":"dwaaRu","e":"both","c":"n. m. pl. unisex / adj."}, - // out: { - // plural: { - // masc: [ - // [{ p: "دواړه", f: "dwáaRu" }], - // [{ p: "دواړو", f: "dwáaRo" }], - // ], - // fem: [ - // [{ p: "دواړې", f: "dwáaRe" }], - // [{ p: "دواړو", f: "dwáaRo" }], - // ], - // } - // } - // }, - // Masculine non-inflecting - { - in: { - ts: 1527812817, - p: "کتاب", - f: "kitaab", - g: "", - e: "book", - c: "n. m.", - i: 8640, - }, - out: { - plural: { - masc: [ - [{ p: "کتابونه", f: "kitaabóona" }], - [{ p: "کتابونو", f: "kitaabóono" }], - ], - }, - bundledPlural: { - masc: [ - [{ p: "کتابه", f: "kitaaba" }], - [{ p: "کتابو", f: "kitaabo" }], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "لمونځ", f: "lamoondz" }], + [{ p: "لمانځه", f: "lamaandzú" }], + [{ p: "لمنځو", f: "lamandzó" }], + ], + }, + plural: { + masc: [ + [{ p: "لمونځونه", f: "lamoondzóona" }], + [{ p: "لمونځونو", f: "lamoondzóono" }], + ], + }, }, - { - in: {"ts":1527816746,"i":9017,"p":"غاښ","f":"ghaax","g":"ghaax","e":"tooth","c":"n. m.","ec":"tooth","ep":"teeth"}, - out: { - plural: { - masc: [ - [{ p: "غاښونه", f: "ghaaxóona" }], - [{ p: "غاښونو", f: "ghaaxóono" }], - ], - }, - bundledPlural: { - masc: [ - [{ p: "غاښه", f: "gháaxa" }], - [{ p: "غاښو", f: "gháaxo" }], - ], - }, - }, + }, + // Masculine short squish + { + in: { + i: 9049, + ts: 1527813593, + p: "غر", + f: "ghar, ghur", + g: "ghar,ghur", + e: "mountain", + c: "n. m.", + infap: "غره", + infaf: "ghru", + infbp: "غر", + infbf: "ghr", }, - { - in: {"ts":1527815394,"i":13991,"p":"واده","f":"waadú","g":"waadu","e":"wedding, marriage","c":"n. m.","ppp":"ودونه","ppf":"wadóona"}, - out: { - plural: { - masc: [ - [{ p: "ودونه", f: "wadóona" }], - [{ p: "ودونو", f: "wadóono" }], - ], - }, - }, + out: { + inflections: { + masc: [ + [{ p: "غر", f: "ghar" }], + [{ p: "غره", f: "ghru" }], + [{ p: "غرو", f: "ghro" }], + ], + }, + plural: { + masc: [[{ p: "غرونه", f: "ghróona" }], [{ p: "غرونو", f: "ghróono" }]], + }, + bundledPlural: { + masc: [[{ p: "غره", f: "ghára" }], [{ p: "غرو", f: "gháro" }]], + }, }, - { - in: {"ts":1527817768,"i":9791,"p":"کارګه","f":"kaargu","g":"kaargu","e":"raven, crow","c":"n. m. anim."}, - out: { - plural: { - masc: [ - [{ p: "کارګان", f: "kaargáan" }], - [{ p: "کارګانو", f: "kaargáano" }], - ], - }, - }, + }, + // should NOT do the oona plural with the squish nouns, when thay're animate + { + in: { + i: 5465, + ts: 1527812802, + p: "خر", + f: "khur", + g: "khur", + e: "donkay", + c: "n. m. anim. unisex irreg.", + infap: "خره", + infaf: "khru", + infbp: "خر", + infbf: "khr", }, - { - in: {"i":11352,"ts":1527813995,"p":"لو","f":"law, lau","g":"law,lau","e":"harvesting, reaping, hay-making; mowed, reaped, harvested","c":"n. m."}, - out: { - plural: { - masc: [ - [{ p: "لوونه", f: "lawóona" }], - [{ p: "لوونو", f: "lawóono" }], - ], - }, - bundledPlural: { - masc: [ - [{ p: "لوه", f: "láwa" }], - [{ p: "لوو", f: "láwo" }], - ], - } - }, + out: { + inflections: { + // TODO: use smarter system using new isType5Entry predicates, to allow for not using the redundant one syllable accents with these + masc: [ + [{ p: "خر", f: "khur" }], + [{ p: "خره", f: "khru" }], + [{ p: "خرو", f: "khro" }], + ], + fem: [ + [{ p: "خره", f: "khra" }], + [{ p: "خرې", f: "khre" }], + [{ p: "خرو", f: "khro" }], + ], + }, }, - // ## FEMININE - // Feminine regular ending in ه - { - in: { - ts: 1527812797, - p: "ښځه", - f: "xudza", - g: "", - e: "woman, wife", - c: "n. f.", - i: 7444, - }, - out: { - inflections: { - fem: [ - [{p: "ښځه", f: "xudza"}], - [{p: "ښځې", f: "xudze"}], - [{p: "ښځو", f: "xudzo"}], - ], - }, - }, + }, + // masc plural + { + in: { + i: 6063, + ts: 1527815739, + p: "دروغ", + f: "drogh, darwagh", + g: "drogh,darwagh", + e: "lie, falsehood", + c: "n. m. pl.", }, - { - in: { - ts: 1527821380, - p: "اره", - f: "ará", - g: "", - e: "saw (the tool)", - c: "n. f.", - i: 365, - }, - out: { - inflections: { - fem: [ - [{p: "اره", f: "ará"}], - [{p: "ارې", f: "aré"}], - [{p: "ارو", f: "aró"}], - ], - }, - }, + out: { + plural: { + masc: [[{ p: "دروغ", f: "drogh" }], [{ p: "دروغو", f: "drogho" }]], + }, }, - // Feminine regular ending in ع - a' - { - in: { - ts: 1527820693, - p: "مرجع", - f: "marja'", - g: "", - e: "reference, authority, body, place to go (for help, shelter, etc.)", - c: "n. f.", - i: 10661, - app: "مراجع", - apf: "maraají'", - }, - out: { - inflections: { - fem: [ - [{p: "مرجع", f: "marja'"}], - [{p: "مرجعې", f: "marje"}], - [{p: "مرجعو", f: "marjo"}], - ], - }, - arabicPlural: { - fem: [ - [{ p: "مراجع", f: "maraají'" }], - [{ p: "مراجو", f: "maraajó" }], - ], - }, - }, + }, + { + in: { + i: 9191, + ts: 1527817330, + p: "غنم", + f: "ghanúm", + g: "ghanum", + e: "wheat", + c: "n. m. pl.", }, - { - in: { - ts: 1527820212, - p: "منبع", - f: "manbá", - g: "", - e: "source, origin, resource, cause", - c: "n. f.", - i: 11201, - app: "منابع", - apf: "manaabí", - }, - out: { - inflections: { - fem: [ - [{p: "منبع", f: "manbá"}], - [{p: "منبعې", f: "manbé"}], - [{p: "منبعو", f: "manbó"}], - ], - }, - arabicPlural: { - fem: [ - [{ p: "منابع", f: "manaabí" }], - [{ p: "منابو", f: "manaabó" }], - ], - }, - }, + out: { + plural: { + masc: [[{ p: "غنم", f: "ghanúm" }], [{ p: "غنمو", f: "ghanúmo" }]], + }, }, - { - in: {"ts":1527823093,"i":13207,"p":"نبي","f":"nabee","g":"nabee","e":"prophet","c":"n. m. anim.","app":"انبیا","apf":"ambiyáa"}, - out: { - plural: { - masc: [ - [{ p: "نبیان", f: "nabiyáan" }], - [{ p: "نبیانو", f: "nabiyáano" }], - ], - }, - arabicPlural: { - masc: [ - [{ p: "انبیا", f: "ambiyáa" }], - [{ p: "انبیاوو", f: "ambiyáawo" }], - ], - }, - } + }, + { + in: { + ts: 1527813508, + i: 7058, + p: "زړه", + f: "zRu", + g: "zRu", + e: "heart", + c: "n. m.", + noInf: true, }, - { - in: {"ts":1527819536,"i":3063,"p":"تبع","f":"taba'","g":"taba","e":"follower, adherent, supporter, subject, national","c":"n. m. unisex anim.","app":"اتباع","apf":"atbaa"}, - out: { - arabicPlural: { - masc: [ - [{ p: "اتباع", f: "atbaa" }], - [{ p: "اتباعوو", f: "atbaawo" }], - ], - }, - }, + out: { + plural: { + masc: [[{ p: "زړونه", f: "zRóona" }], [{ p: "زړونو", f: "zRóono" }]], + }, }, - { - in: {"ts":1527816113,"i":3072,"p":"تبلیغ","f":"tableegh","g":"tableegh","e":"propaganda; preaching, evangelism","c":"n. m.","app":"تبلیغات","apf":"tableegháat"}, - out: { - plural: { - masc: [ - [{ p: "تبلیغونه", f: "tableeghóona" }], - [{ p: "تبلیغونو", f: "tableeghóono" }], - ], - }, - bundledPlural: { - masc: [ - [{ p: "تبلیغه", f: "tableegha" }], - [{ p: "تبلیغو", f: "tableegho" }], - ], - }, - arabicPlural: { - masc: [ - [{ p: "تبلیغات", f: "tableegháat" }], - [{ p: "تبلیغاتو", f: "tableegháato" }], - ], - }, - }, + }, + // fem plural + { + in: { + ts: 1527815129, + i: 1013, + p: "اوبه", + f: "oobú", + g: "oobu", + e: "water", + c: "n. f. pl.", }, - { - in: {"ts":1527815921,"i":3844,"p":"توقع","f":"tawaqqU","g":"tawakkU","e":"expectation, hope, anticipation","c":"n. f.","app":"توقعات","apf":"tawaqqUaat"}, - out: { - arabicPlural: { - masc: [ - [{ p: "توقعات", f: "tawaqqUaat" }], - [{ p: "توقعاتو", f: "tawaqqUaato" }], - ], - }, - }, + out: { + plural: { + fem: [[{ p: "اوبه", f: "oobú" }], [{ p: "اوبو", f: "oobó" }]], + }, }, - { - in: {"ts":1527815820,"i":5177,"p":"حادثه","f":"haadisá","g":"haadisa","e":"accident, event","c":"n. f.","app":"حوادث, حادثات","apf":"hawaadis, haadisaat"}, - out: { - inflections: { - fem: [ - [{ p: "حادثه", f: "haadisá" }], - [{ p: "حادثې", f: "haadisé" }], - [{ p: "حادثو", f: "haadisó" }], - ], - }, - arabicPlural: { - masc: [ - [{ p: "حوادث", f: "hawaadis"}, { p: "حادثات", f: "haadisaat" }], - [{ p: "حوادثو", f: "hawaadiso"}, { p: "حادثاتو", f: "haadisaato" }], - ], - }, - }, + }, + { + in: { + ts: 1527815008, + i: 8421, + p: "شودې", + f: "shoodé", + g: "shoode", + e: "milk", + c: "n. f. pl.", }, - { - in: {"ts":1527815329,"i":3097,"p":"تجربه","f":"tajrabá, tajribá","g":"tajraba,tajriba","e":"experience","c":"n. f.","app":"تجارب","apf":"tajaarib"}, - out: { - inflections: { - fem: [ - [{ p: "تجربه", f: "tajrabá" }], - [{ p: "تجربې", f: "tajrabé" }], - [{ p: "تجربو", f: "tajrabó" }], - ], - }, - arabicPlural: { - masc: [ - [{ p: "تجارب", f: "tajaarib"}], - [{ p: "تجاربو", f: "tajaaribo"}], - ], - }, - }, + out: { + plural: { + fem: [[{ p: "شودې", f: "shoodé" }], [{ p: "شودو", f: "shoodó" }]], + }, }, - { - in: {"ts":1527814069,"i":5194,"p":"حال","f":"haal","g":"haal","e":"state, condition, circumstance","c":"n. m.","app":"احوال","apf":"ahwáal"}, - out: { - plural: { - masc: [ - [{ p: "حالونه", f: "haalóona" }], - [{ p: "حالونو", f: "haalóono" }], - ], - }, - bundledPlural: { - masc: [ - [{ p: "حاله", f: "háala" }], - [{ p: "حالو", f: "háalo" }], - ], - }, - arabicPlural: { - masc: [ - [{ p: "احوال", f: "ahwáal" }], - [{ p: "احوالو", f: "ahwáalo" }], - ], - }, - }, + }, + { + in: { + ts: 1527815008, + i: 8421, + p: "شودې", + f: "shoode", + g: "shoode", + e: "milk", + c: "n. f. pl.", }, - { - in: {"ts":1527819536,"i":3063,"p":"تبع","f":"taba'","g":"taba","e":"follower, adherent, supporter, subject, national","c":"n. m. unisex anim.","app":"اتباع","apf":"atbáa'"}, - out: { - arabicPlural: { - masc: [ - [{ p: "اتباع", f: "atbáa'" }], - [{ p: "اتباعوو", f: "atbáawo" }], - ], - }, - }, + out: { + plural: { + fem: [[{ p: "شودې", f: "shoode" }], [{ p: "شودو", f: "shoodo" }]], + }, }, - // Feminine regular ending in ح - a - { - in: { - ts: 1527815506, - p: "ذبح", - f: "zabha", - g: "", - e: "slaughter, killing, butchering", - c: "n. f.", - i: 5813, - }, - out: { - inflections: { - fem: [ - [{p: "ذبح", f: "zabha"}], - [{p: "ذبحې", f: "zabhe"}], - [{p: "ذبحو", f: "zabho"}], - ], - }, - }, + }, + // masculine ending in a vowel + { + in: { + ts: 1527815484, + i: 13069, + p: "ملا", + f: "mUllaa", + g: "mUllaa", + e: "mullah, priest", + r: 4, + c: "n. m.", }, - // Feminine inanimate regular with missing ه - { - in: { - ts: 1527814150, - p: "لار", - f: "laar", - g: "", - e: "road, way, path", - c: "n. f.", - i: 9593, - }, - out: { - inflections: { - fem: [ - [{p: "لار", f: "laar"}], - [{p: "لارې", f: "láare"}], - [{p: "لارو", f: "láaro"}], - ], - }, - }, + out: { + plural: { + masc: [ + [ + { p: "ملایان", f: "mUllaayáan" }, + { p: "ملاګان", f: "mUllaagáan" }, + ], + [ + { p: "ملایانو", f: "mUllaayáano" }, + { p: "ملاګانو", f: "mUllaagáano" }, + ], + ], + }, }, - // Feminine animate ending in a consonant - // TODO: ALLOW FOR MULTIPLE PLURAL POSSIBILITIES میندې, میېنې etc. - { - in: { - ts: 1527812928, - p: "مور", - f: "mor", - g: "", - e: "mother, mom", - c: "n. f. anim.", - ppp: "میندې", - ppf: "maynde", - i: 11113, - }, - out: { - plural: { - fem: [ - [{ p: "میندې", f: "maynde" }], - [{ p: "میندو", f: "mayndo" }], - ], - }, - }, + }, + // TODO: uncomment this + // { + // in: {"ts":1527812591,"i":6286,"p":"دواړه","f":"dwáaRu","g":"dwaaRu","e":"both","c":"n. m. pl. unisex / adj."}, + // out: { + // plural: { + // masc: [ + // [{ p: "دواړه", f: "dwáaRu" }], + // [{ p: "دواړو", f: "dwáaRo" }], + // ], + // fem: [ + // [{ p: "دواړې", f: "dwáaRe" }], + // [{ p: "دواړو", f: "dwáaRo" }], + // ], + // } + // } + // }, + // Masculine non-inflecting + { + in: { + ts: 1527812817, + p: "کتاب", + f: "kitaab", + g: "", + e: "book", + c: "n. m.", + i: 8640, }, - // Feminine regular inanimate ending in ي - { - in: { - ts: 1527811877, - p: "دوستي", - f: "dostee", - g: "", - e: "friendship", - c: "n. f.", - i: 5503, - }, - out: { - plural: { - fem: [ - [{ p: "دوستیانې", f: "dostiyáane" }, { p: "دوستیګانې", f: "dosteegáane" }], - [{ p: "دوستیانو", f: "dostiyáano" }, { p: "دوستیګانو", f: "dosteegáano" }], - ], - }, - inflections: { - fem: [ - [{p: "دوستي", f: "dostee"}], - [{p: "دوستۍ", f: "dostúy"}], - [{p: "دوستیو", f: "dostúyo"}], - ], - }, - }, + out: { + plural: { + masc: [ + [{ p: "کتابونه", f: "kitaabóona" }], + [{ p: "کتابونو", f: "kitaabóono" }], + ], + }, + bundledPlural: { + masc: [[{ p: "کتابه", f: "kitaaba" }], [{ p: "کتابو", f: "kitaabo" }]], + }, }, - // Feminine regular ending in ۍ - { - in: { - ts: 1527814203, - p: "کرسۍ", - f: "kUrsuy", - g: "", - e: "chair, seat, stool", - c: "n. f.", - i: 8718, - }, - out: { - inflections: { - fem: [ - [{p: "کرسۍ", f: "kUrsúy"}], - [{p: "کرسۍ", f: "kUrsúy"}], - [{p: "کرسیو", f: "kUrsúyo"}, { p: "کرسو", f: "kUrso"}], - ], - }, - }, + }, + { + in: { + ts: 1527816746, + i: 9017, + p: "غاښ", + f: "ghaax", + g: "ghaax", + e: "tooth", + c: "n. m.", + ec: "tooth", + ep: "teeth", }, - // Feminine regular ending in ا - { - in: { - ts: 1527812456, - p: "اړتیا", - f: "aRtiyáa, aRtyáa", - g: "", - e: "need, necessity", - c: "n. f.", - i: 376, - }, - out: { - plural: { - fem: [ - [{p: "اړتیاوې", f: "aRtiyáawe"}, { p: "اړتیاګانې", f:"aRtiyaagáane"}], - [{p: "اړتیاوو", f: "aRtiyáawo"}, { p: "اړتیاګانو", f:"aRtiyaagáano"}], - ], - }, - }, + out: { + plural: { + masc: [ + [{ p: "غاښونه", f: "ghaaxóona" }], + [{ p: "غاښونو", f: "ghaaxóono" }], + ], + }, + bundledPlural: { + masc: [[{ p: "غاښه", f: "gháaxa" }], [{ p: "غاښو", f: "gháaxo" }]], + }, }, - // Feminine regular ending in و - { - in: {"i":2899,"ts":1527815163,"p":"پیشو","f":"peeshó","g":"peesho","e":"cat","c":"n. f. anim."}, - out: { - plural: { - fem: [ - [{ p: "پیشووې", f: "peeshówe" }, { p: "پیشوګانې", f: "peeshogáane" }], - [{ p: "پیشووو", f: "peeshówo" }, { p: "پیشوګانو", f: "peeshogáano" }], - ], - }, - }, + }, + { + in: { + ts: 1527815394, + i: 13991, + p: "واده", + f: "waadú", + g: "waadu", + e: "wedding, marriage", + c: "n. m.", + ppp: "ودونه", + ppf: "wadóona", }, - // Feminine regular ending in اع - { - in: { - ts: 1527821388, - p: "وداع", - f: "widáa'", - g: "", - e: "farewell, goodbye", - c: "n. f.", - i: 12205, - }, - out: { - plural: { - fem: [ - [{p: "وداع وې", f: "widáa we"}, {p: "وداع ګانې", f: "widaa gáane"}], - [{p: "وداع وو", f: "widáa wo"}, {p: "وداع ګانو", f: "widaa gáano"}], - ], - }, - }, + out: { + plural: { + masc: [[{ p: "ودونه", f: "wadóona" }], [{ p: "ودونو", f: "wadóono" }]], + }, }, - // with variations on Pashto plural - { - in: {"ts":1527815268,"i":8475,"p":"شی","f":"shay","g":"shay","ppp":"شیان، شیونه", "ppf": "shayáan, shayóona","e":"thing","c":"n. m."}, - out: { - inflections: { - masc: [ - [{ p: "شی", f: "shay" }], - [{ p: "شي", f: "shee" }], - [{ p: "شیو", f: "shiyo" }, { p: "شو", f: "sho" }], - ], - }, - plural: { - masc: [ - [{ p: "شیان", f: "shayáan" }, { p: "شیونه", f: "shayóona" }], - [{ p: "شیانو", f: "shayáano" }, { p: "شیونو", f: "shayóono" }], - ], - }, - }, + }, + { + in: { + ts: 1527817768, + i: 9791, + p: "کارګه", + f: "kaargu", + g: "kaargu", + e: "raven, crow", + c: "n. m. anim.", }, - // TODO: Plaar plaroona paaraan - wrooNa - // Word with no inflections - { - in: { - ts: 1527815402, - p: "وړ", - f: "waR", - g: "", - e: "worthy of, deserving, -able", - c: "suff. / adj.", - i: 12045, - noInf: true, - }, - out: false, + out: { + plural: { + masc: [ + [{ p: "کارګان", f: "kaargáan" }], + [{ p: "کارګانو", f: "kaargáano" }], + ], + }, }, - { - in: {"ts":1610795367898,"i":6978,"p":"رشوت خور","f":"rishwat khór","g":"rishwatkhor","e":"bribe-taker, corrupt","r":4,"c":"n. m. anim. unisex / adj.","infap":"رشوت خواره","infaf":"rishwat khwaaru","infbp":"رشوت خور","infbf":"rishwat khwar"}, - out: { - inflections: { - masc: [ - [{p: "رشوت خور", f: "rishwat khór"}], - [{p: "رشوت خواره", f: "rishwat khwaarú"}], - [{p: "رشوت خورو", f: "rishwat khwaró"}], - ], - fem: [ - [{p: "رشوت خوره", f: "rishwat khwará"}], - [{p: "رشوت خورې", f: "rishwat khwaré"}], - [{p: "رشوت خورو", f: "rishwat khwaró"}], - ], - }, - }, + }, + { + in: { + i: 11352, + ts: 1527813995, + p: "لو", + f: "law, lau", + g: "law,lau", + e: "harvesting, reaping, hay-making; mowed, reaped, harvested", + c: "n. m.", }, + out: { + plural: { + masc: [[{ p: "لوونه", f: "lawóona" }], [{ p: "لوونو", f: "lawóono" }]], + }, + bundledPlural: { + masc: [[{ p: "لوه", f: "láwa" }], [{ p: "لوو", f: "láwo" }]], + }, + }, + }, + // ## FEMININE + // Feminine regular ending in ه + { + in: { + ts: 1527812797, + p: "ښځه", + f: "xudza", + g: "", + e: "woman, wife", + c: "n. f.", + i: 7444, + }, + out: { + inflections: { + fem: [ + [{ p: "ښځه", f: "xudza" }], + [{ p: "ښځې", f: "xudze" }], + [{ p: "ښځو", f: "xudzo" }], + ], + }, + }, + }, + { + in: { + ts: 1527821380, + p: "اره", + f: "ará", + g: "", + e: "saw (the tool)", + c: "n. f.", + i: 365, + }, + out: { + inflections: { + fem: [ + [{ p: "اره", f: "ará" }], + [{ p: "ارې", f: "aré" }], + [{ p: "ارو", f: "aró" }], + ], + }, + }, + }, + // Feminine regular ending in ع - a' + { + in: { + ts: 1527820693, + p: "مرجع", + f: "marja'", + g: "", + e: "reference, authority, body, place to go (for help, shelter, etc.)", + c: "n. f.", + i: 10661, + app: "مراجع", + apf: "maraají'", + }, + out: { + inflections: { + fem: [ + [{ p: "مرجع", f: "marja'" }], + [{ p: "مرجعې", f: "marje" }], + [{ p: "مرجعو", f: "marjo" }], + ], + }, + arabicPlural: { + fem: [[{ p: "مراجع", f: "maraají'" }], [{ p: "مراجو", f: "maraajó" }]], + }, + }, + }, + { + in: { + ts: 1527820212, + p: "منبع", + f: "manbá", + g: "", + e: "source, origin, resource, cause", + c: "n. f.", + i: 11201, + app: "منابع", + apf: "manaabí", + }, + out: { + inflections: { + fem: [ + [{ p: "منبع", f: "manbá" }], + [{ p: "منبعې", f: "manbé" }], + [{ p: "منبعو", f: "manbó" }], + ], + }, + arabicPlural: { + fem: [[{ p: "منابع", f: "manaabí" }], [{ p: "منابو", f: "manaabó" }]], + }, + }, + }, + { + in: { + ts: 1527823093, + i: 13207, + p: "نبي", + f: "nabee", + g: "nabee", + e: "prophet", + c: "n. m. anim.", + app: "انبیا", + apf: "ambiyáa", + }, + out: { + plural: { + masc: [ + [{ p: "نبیان", f: "nabiyáan" }], + [{ p: "نبیانو", f: "nabiyáano" }], + ], + }, + arabicPlural: { + masc: [ + [{ p: "انبیا", f: "ambiyáa" }], + [{ p: "انبیاوو", f: "ambiyáawo" }], + ], + }, + }, + }, + { + in: { + ts: 1527819536, + i: 3063, + p: "تبع", + f: "taba'", + g: "taba", + e: "follower, adherent, supporter, subject, national", + c: "n. m. unisex anim.", + app: "اتباع", + apf: "atbaa", + }, + out: { + arabicPlural: { + masc: [[{ p: "اتباع", f: "atbaa" }], [{ p: "اتباعوو", f: "atbaawo" }]], + }, + }, + }, + { + in: { + ts: 1527816113, + i: 3072, + p: "تبلیغ", + f: "tableegh", + g: "tableegh", + e: "propaganda; preaching, evangelism", + c: "n. m.", + app: "تبلیغات", + apf: "tableegháat", + }, + out: { + plural: { + masc: [ + [{ p: "تبلیغونه", f: "tableeghóona" }], + [{ p: "تبلیغونو", f: "tableeghóono" }], + ], + }, + bundledPlural: { + masc: [ + [{ p: "تبلیغه", f: "tableegha" }], + [{ p: "تبلیغو", f: "tableegho" }], + ], + }, + arabicPlural: { + masc: [ + [{ p: "تبلیغات", f: "tableegháat" }], + [{ p: "تبلیغاتو", f: "tableegháato" }], + ], + }, + }, + }, + { + in: { + ts: 1527815921, + i: 3844, + p: "توقع", + f: "tawaqqU", + g: "tawakkU", + e: "expectation, hope, anticipation", + c: "n. f.", + app: "توقعات", + apf: "tawaqqUaat", + }, + out: { + arabicPlural: { + masc: [ + [{ p: "توقعات", f: "tawaqqUaat" }], + [{ p: "توقعاتو", f: "tawaqqUaato" }], + ], + }, + }, + }, + { + in: { + ts: 1527815820, + i: 5177, + p: "حادثه", + f: "haadisá", + g: "haadisa", + e: "accident, event", + c: "n. f.", + app: "حوادث, حادثات", + apf: "hawaadis, haadisaat", + }, + out: { + inflections: { + fem: [ + [{ p: "حادثه", f: "haadisá" }], + [{ p: "حادثې", f: "haadisé" }], + [{ p: "حادثو", f: "haadisó" }], + ], + }, + arabicPlural: { + masc: [ + [ + { p: "حوادث", f: "hawaadis" }, + { p: "حادثات", f: "haadisaat" }, + ], + [ + { p: "حوادثو", f: "hawaadiso" }, + { p: "حادثاتو", f: "haadisaato" }, + ], + ], + }, + }, + }, + { + in: { + ts: 1527815329, + i: 3097, + p: "تجربه", + f: "tajrabá, tajribá", + g: "tajraba,tajriba", + e: "experience", + c: "n. f.", + app: "تجارب", + apf: "tajaarib", + }, + out: { + inflections: { + fem: [ + [{ p: "تجربه", f: "tajrabá" }], + [{ p: "تجربې", f: "tajrabé" }], + [{ p: "تجربو", f: "tajrabó" }], + ], + }, + arabicPlural: { + masc: [ + [{ p: "تجارب", f: "tajaarib" }], + [{ p: "تجاربو", f: "tajaaribo" }], + ], + }, + }, + }, + { + in: { + ts: 1527814069, + i: 5194, + p: "حال", + f: "haal", + g: "haal", + e: "state, condition, circumstance", + c: "n. m.", + app: "احوال", + apf: "ahwáal", + }, + out: { + plural: { + masc: [ + [{ p: "حالونه", f: "haalóona" }], + [{ p: "حالونو", f: "haalóono" }], + ], + }, + bundledPlural: { + masc: [[{ p: "حاله", f: "háala" }], [{ p: "حالو", f: "háalo" }]], + }, + arabicPlural: { + masc: [[{ p: "احوال", f: "ahwáal" }], [{ p: "احوالو", f: "ahwáalo" }]], + }, + }, + }, + { + in: { + ts: 1527819536, + i: 3063, + p: "تبع", + f: "taba'", + g: "taba", + e: "follower, adherent, supporter, subject, national", + c: "n. m. unisex anim.", + app: "اتباع", + apf: "atbáa'", + }, + out: { + arabicPlural: { + masc: [[{ p: "اتباع", f: "atbáa'" }], [{ p: "اتباعوو", f: "atbáawo" }]], + }, + }, + }, + // Feminine regular ending in ح - a + { + in: { + ts: 1527815506, + p: "ذبح", + f: "zabha", + g: "", + e: "slaughter, killing, butchering", + c: "n. f.", + i: 5813, + }, + out: { + inflections: { + fem: [ + [{ p: "ذبح", f: "zabha" }], + [{ p: "ذبحې", f: "zabhe" }], + [{ p: "ذبحو", f: "zabho" }], + ], + }, + }, + }, + // Feminine inanimate regular with missing ه + { + in: { + ts: 1527814150, + p: "لار", + f: "laar", + g: "", + e: "road, way, path", + c: "n. f.", + i: 9593, + }, + out: { + inflections: { + fem: [ + [{ p: "لار", f: "laar" }], + [{ p: "لارې", f: "láare" }], + [{ p: "لارو", f: "láaro" }], + ], + }, + }, + }, + // Feminine animate ending in a consonant + // TODO: ALLOW FOR MULTIPLE PLURAL POSSIBILITIES میندې, میېنې etc. + { + in: { + ts: 1527812928, + p: "مور", + f: "mor", + g: "", + e: "mother, mom", + c: "n. f. anim.", + ppp: "میندې", + ppf: "maynde", + i: 11113, + }, + out: { + plural: { + fem: [[{ p: "میندې", f: "maynde" }], [{ p: "میندو", f: "mayndo" }]], + }, + }, + }, + // Feminine regular inanimate ending in ي + { + in: { + ts: 1527811877, + p: "دوستي", + f: "dostee", + g: "", + e: "friendship", + c: "n. f.", + i: 5503, + }, + out: { + plural: { + fem: [ + [ + { p: "دوستیانې", f: "dostiyáane" }, + { p: "دوستیګانې", f: "dosteegáane" }, + ], + [ + { p: "دوستیانو", f: "dostiyáano" }, + { p: "دوستیګانو", f: "dosteegáano" }, + ], + ], + }, + inflections: { + fem: [ + [{ p: "دوستي", f: "dostee" }], + [{ p: "دوستۍ", f: "dostúy" }], + [{ p: "دوستیو", f: "dostúyo" }], + ], + }, + }, + }, + // Feminine regular ending in ۍ + { + in: { + ts: 1527814203, + p: "کرسۍ", + f: "kUrsuy", + g: "", + e: "chair, seat, stool", + c: "n. f.", + i: 8718, + }, + out: { + inflections: { + fem: [ + [{ p: "کرسۍ", f: "kUrsúy" }], + [{ p: "کرسۍ", f: "kUrsúy" }], + [ + { p: "کرسیو", f: "kUrsíyo" }, + { p: "کرسو", f: "kUrsó" }, + ], + ], + }, + }, + }, + // Feminine regular ending in ا + { + in: { + ts: 1527812456, + p: "اړتیا", + f: "aRtiyáa, aRtyáa", + g: "", + e: "need, necessity", + c: "n. f.", + i: 376, + }, + out: { + plural: { + fem: [ + [ + { p: "اړتیاوې", f: "aRtiyáawe" }, + { p: "اړتیاګانې", f: "aRtiyaagáane" }, + ], + [ + { p: "اړتیاوو", f: "aRtiyáawo" }, + { p: "اړتیاګانو", f: "aRtiyaagáano" }, + ], + ], + }, + }, + }, + // Feminine regular ending in و + { + in: { + i: 2899, + ts: 1527815163, + p: "پیشو", + f: "peeshó", + g: "peesho", + e: "cat", + c: "n. f. anim.", + }, + out: { + plural: { + fem: [ + [ + { p: "پیشووې", f: "peeshówe" }, + { p: "پیشوګانې", f: "peeshogáane" }, + ], + [ + { p: "پیشووو", f: "peeshówo" }, + { p: "پیشوګانو", f: "peeshogáano" }, + ], + ], + }, + }, + }, + // Feminine regular ending in اع + { + in: { + ts: 1527821388, + p: "وداع", + f: "widáa'", + g: "", + e: "farewell, goodbye", + c: "n. f.", + i: 12205, + }, + out: { + plural: { + fem: [ + [ + { p: "وداع وې", f: "widáa we" }, + { p: "وداع ګانې", f: "widaa gáane" }, + ], + [ + { p: "وداع وو", f: "widáa wo" }, + { p: "وداع ګانو", f: "widaa gáano" }, + ], + ], + }, + }, + }, + // with variations on Pashto plural + { + in: { + ts: 1527815268, + i: 8475, + p: "شی", + f: "shay", + g: "shay", + ppp: "شیان، شیونه", + ppf: "shayáan, shayóona", + e: "thing", + c: "n. m.", + }, + out: { + inflections: { + masc: [ + [{ p: "شی", f: "shay" }], + [{ p: "شي", f: "shee" }], + [ + { p: "شیو", f: "shiyo" }, + { p: "شو", f: "sho" }, + ], + ], + }, + plural: { + masc: [ + [ + { p: "شیان", f: "shayáan" }, + { p: "شیونه", f: "shayóona" }, + ], + [ + { p: "شیانو", f: "shayáano" }, + { p: "شیونو", f: "shayóono" }, + ], + ], + }, + }, + }, + // TODO: Plaar plaroona paaraan - wrooNa + // Word with no inflections + { + in: { + ts: 1527815402, + p: "وړ", + f: "waR", + g: "", + e: "worthy of, deserving, -able", + c: "suff. / adj.", + i: 12045, + noInf: true, + }, + out: false, + }, + { + in: { + ts: 1610795367898, + i: 6978, + p: "رشوت خور", + f: "rishwat khór", + g: "rishwatkhor", + e: "bribe-taker, corrupt", + r: 4, + c: "n. m. anim. unisex / adj.", + infap: "رشوت خواره", + infaf: "rishwat khwaaru", + infbp: "رشوت خور", + infbf: "rishwat khwar", + }, + out: { + inflections: { + masc: [ + [{ p: "رشوت خور", f: "rishwat khór" }], + [{ p: "رشوت خواره", f: "rishwat khwaarú" }], + [{ p: "رشوت خورو", f: "rishwat khwaró" }], + ], + fem: [ + [{ p: "رشوت خوره", f: "rishwat khwará" }], + [{ p: "رشوت خورې", f: "rishwat khwaré" }], + [{ p: "رشوت خورو", f: "rishwat khwaró" }], + ], + }, + }, + }, ]; const others: T.DictionaryEntry[] = [ - { - ts: 1527812612, - p: "ګنډل", - f: "ganDul", - g: "", - e: "to sew, mend, make, knit", - c: "v. trans.", - i: 9448, - }, - { - ts: 1527812457, - p: "اصلاً", - f: "aslan", - g: "", - e: "actually", - c: "adv.", - i: 550, - }, + { + ts: 1527812612, + p: "ګنډل", + f: "ganDul", + g: "", + e: "to sew, mend, make, knit", + c: "v. trans.", + i: 9448, + }, + { + ts: 1527812457, + p: "اصلاً", + f: "aslan", + g: "", + e: "actually", + c: "adv.", + i: 550, + }, ]; adjectives.forEach((word) => { - test(`${word.in.p} should inflect properly`, () => { - expect(inflectWord(word.in)).toEqual(word.out); - }); + test(`${word.in.p} should inflect properly`, () => { + expect(inflectWord(word.in)).toEqual(word.out); + }); }); nouns.forEach((word) => { - // if (word.in.p !== "نبي") return; - test(`${word.in.p} should inflect properly`, () => { - expect(inflectWord(word.in)).toEqual(word.out); - }); + // if (word.in.p !== "نبي") return; + test(`${word.in.p} should inflect properly`, () => { + expect(inflectWord(word.in)).toEqual(word.out); + }); }); others.forEach((word) => { - test(`${word.p} should return false`, () => { - expect(inflectWord(word)).toEqual(false); - }); + test(`${word.p} should return false`, () => { + expect(inflectWord(word)).toEqual(false); + }); }); test(`inflectRegularYayUnisex should work`, () => { - expect(inflectRegularYayUnisex("لیدونکی", "leedóonkay")).toEqual({ - masc: [ - [{p: "لیدونکی", f: "leedóonkay" }], - [{p: "لیدونکي", f: "leedóonkee" }], - [{p: "لیدونکیو", f: "leedóonkiyo" }, {p: "لیدونکو", f: "leedóonko"}], - ], - fem: [ - [{p: "لیدونکې", f: "leedóonke" }], - [{p: "لیدونکې", f: "leedóonke" }], - [{p: "لیدونکو", f: "leedóonko"}], - ], - }); + expect(inflectRegularYayUnisex("لیدونکی", "leedóonkay")).toEqual({ + masc: [ + [{ p: "لیدونکی", f: "leedóonkay" }], + [{ p: "لیدونکي", f: "leedóonkee" }], + [ + { p: "لیدونکیو", f: "leedóonkiyo" }, + { p: "لیدونکو", f: "leedóonko" }, + ], + ], + fem: [ + [{ p: "لیدونکې", f: "leedóonke" }], + [{ p: "لیدونکې", f: "leedóonke" }], + [ + { p: "لیدونکیو", f: "leedóonkiyo" }, + { p: "لیدونکو", f: "leedóonko" }, + ], + ], + }); }); diff --git a/src/lib/src/pashto-inflector.ts b/src/lib/src/pashto-inflector.ts index e46a6a9..6358416 100644 --- a/src/lib/src/pashto-inflector.ts +++ b/src/lib/src/pashto-inflector.ts @@ -256,7 +256,10 @@ export function inflectRegularYayUnisex( fem: [ [{ p: `${baseP}ې`, f: `${baseF}e` }], [{ p: `${baseP}ې`, f: `${baseF}e` }], - [{ p: `${baseP}و`, f: `${baseF}o` }], + [ + { p: `${baseP}یو`, f: `${baseF}iyo` }, + { p: `${baseP}و`, f: `${baseF}o` }, + ], ], }; } @@ -291,7 +294,7 @@ function inflectEmphasizedYayUnisex(p: string, f: string): T.UnisexInflections { [{ p, f }], [{ p: `${baseP}ي`, f: `${baseF}ée` }], [ - { p: `${baseP}یو`, f: `${baseF}iyo` }, + { p: `${baseP}یو`, f: `${baseF}íyo` }, { p: `${baseP}و`, f: `${baseF}ó` }, ], ], @@ -299,7 +302,7 @@ function inflectEmphasizedYayUnisex(p: string, f: string): T.UnisexInflections { [{ p: `${baseP}ۍ`, f: `${baseF}úy` }], [{ p: `${baseP}ۍ`, f: `${baseF}úy` }], [ - { p: `${baseP}یو`, f: `${baseF}úyo` }, + { p: `${baseP}یو`, f: `${baseF}íyo` }, { p: `${baseP}و`, f: `${baseF}ó` }, ], ], @@ -360,8 +363,8 @@ function inflectRegularEmphasizedYayMasc(p: string, f: string): T.Inflections { [{ p, f }], [{ p: `${baseP}ي`, f: `${baseF}ée` }], [ - { p: `${baseP}یو`, f: `${baseF}iyo` }, - { p: `${baseP}و`, f: `${baseF}o` }, + { p: `${baseP}یو`, f: `${baseF}íyo` }, + { p: `${baseP}و`, f: `${baseF}ó` }, ], ], }; @@ -453,8 +456,8 @@ function inflectRegularUyFem(p: string, f: string): T.Inflections { [{ p, f: `${baseF}úy` }], [{ p, f: `${baseF}úy` }], [ - { p: `${baseP}یو`, f: `${baseF}úyo` }, - { p: `${baseP}و`, f: `${baseF}o` }, + { p: `${baseP}یو`, f: `${baseF}íyo` }, + { p: `${baseP}و`, f: `${baseF}ó` }, ], ], }; diff --git a/src/lib/src/verb-info.test.ts b/src/lib/src/verb-info.test.ts index 94a3383..6c20727 100644 --- a/src/lib/src/verb-info.test.ts +++ b/src/lib/src/verb-info.test.ts @@ -1896,7 +1896,10 @@ const toTest = [ fem: [ [{ p: "ستړې", f: "stúRe" }], [{ p: "ستړې", f: "stúRe" }], - [{ p: "ستړو", f: "stúRo" }], + [ + { p: "ستړیو", f: "stúRiyo" }, + { p: "ستړو", f: "stúRo" }, + ], ], }, }, diff --git a/src/types.ts b/src/types.ts index 8fac6e2..b736a5f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1269,3 +1269,8 @@ export type OtherComp = { type: "Comp"; ps: PsString; }; + +export type Token = { + i: number; + s: string; +}; diff --git a/vocab/nouns-adjs/aanu-masc.js b/vocab/nouns-adjs/aanu-masc.js new file mode 100644 index 0000000..8aa08bd --- /dev/null +++ b/vocab/nouns-adjs/aanu-masc.js @@ -0,0 +1 @@ +module.exports = [{ ts: 1527815333, e: "oven" }]; diff --git a/vocab/nouns-adjs/basic-unisex.js b/vocab/nouns-adjs/basic-unisex.js index b672029..a60c573 100644 --- a/vocab/nouns-adjs/basic-unisex.js +++ b/vocab/nouns-adjs/basic-unisex.js @@ -7,121 +7,122 @@ */ module.exports = [ - { ts: 1527815408, e: "asleep" }, // ویده - weedú - { ts: 1527812796, e: "good" }, // ښه - xu - { ts: 1527821744, e: "cook, chef" }, // آشپز - aashpáz - { ts: 1527812461, e: "hero, brave" }, // اتل - atul - { ts: 1527821649, e: "impressive, effective, influencing" }, // اثرناک - asarnáak - { ts: 1527818704, e: "wide, spacious, extensive" }, // ارت - arát - { ts: 1578340121962, e: "free, independant" }, // ازاد - azáad - { ts: 1527819418, e: "independant, autonomous" }, // خپلواک - khpulwaak - { ts: 1527817146, e: "resident; settled" }, // استوګن - astogan - { ts: 1527813713, e: "hopeful, pregnant" }, // امیدوار - Umeedwaar - { ts: 1527819451, e: "Englishman, English (adjective)" }, // انګرېز - angréz - { ts: 1527820346, e: "on-line" }, // انلاین - anlaayn - { ts: 1527813667, e: "important" }, // اهم - aham - { ts: 1598724912198, e: "dry" }, // اوچ - ooch - { ts: 1527815138, e: "insurgent" }, // اورپک - orpak - { ts: 1586452587974, e: "free, available" }, // اوزګار - oozgáar - { ts: 1527816489, e: "faithful, believer" }, // ایماندار - eemaandaar - { ts: 1527820433, e: "valiant" }, // باتور - baatóor - { ts: 1527813425, e: "stingy" }, // بخیل - bakheel - { ts: 1527812511, e: "bad" }, // بد - bud, bad - { ts: 1527812518, e: "equal, even, set up" }, // برابر - buraabur - { ts: 1527811861, e: "naked" }, // بربنډ - barbunD - { ts: 1527811511, e: "full, complete" }, // بشپړ - bushpuR - { ts: 1527812515, e: "other, next" }, // بل - bul - { ts: 1527815725, e: "knowledgeable, accustomed" }, // بلد - balad - { ts: 1577301753727, e: "closed" }, // بند - band - { ts: 1527812490, e: "useless" }, // بې کار - be kaar - { ts: 1527812031, e: "separate, different" }, // بېل - bel - { ts: 1527815144, e: "clean, pure" }, // پاک - paak - { ts: 1527815201, e: "hidden" }, // پټ - puT - { ts: 1527815179, e: "wide" }, // پلن - plun - { ts: 1527819059, e: "thick, fat" }, // پنډ - punD - { ts: 1611767359178, e: "compassionate" }, // ترسناک - tarsnáak - { ts: 1527813270, e: "sour" }, // تروش - troosh - { ts: 1527813817, e: "narrow, cramped" }, // تنګ - tang - { ts: 1527816354, e: "ready" }, // تیار - tayaar - { ts: 1527817056, e: "sharp, fast" }, // تېز - tez - { ts: 1527814076, e: "societal, social" }, // ټولنیز - Toluneez - { ts: 1527819864, e: "low" }, // ټیټ - TeeT - { ts: 1527811894, e: "firm, tough, rigid" }, // ټینګ - Teeng - { ts: 1527812943, e: "constant, stable, proven" }, // ثابت - saabit - { ts: 1527813085, e: "heavy, difficult" }, // ثقیل - saqeel - { ts: 1527820479, e: "ignorant" }, // جاهل - jaahíl - { ts: 1588160800930, e: "surgeon" }, // جراح - jarráah - { ts: 1527812707, e: "high, tall" }, // جګ - jig, jug - { ts: 1527816944, e: "clear, evident" }, // جوت - jawat - { ts: 1527822996, e: "alongside, adjoining" }, // جوخت - jokht - { ts: 1527812711, e: "well, healthy" }, // جوړ - joR - { ts: 1527816323, e: "shining, sparkling" }, // ځلاند - dzalaand - { ts: 1527812291, e: "young, youthful" }, // ځوان - dzwaan - { ts: 1527820112, e: "hanging" }, // ځوړند - dzwáRund - { ts: 1527819672, e: "crafty" }, // چالاک - chaaláak - { ts: 1527811230, e: "quick, fast" }, // چټک - chaTak - { ts: 1527812524, e: "started, in motion" }, // چلان - chalaan - { ts: 1527815370, e: "clear, apparent" }, // څرګند - tsărgund - { ts: 1576366107077, e: "straight, upright" }, // څک - tsak - { ts: 1527812113, e: "present, on hand, ready" }, // حاضر - haazir, haazur - { ts: 1527820699, e: "pregnant, carrying" }, // حامل - haamíl - { ts: 1527819824, e: "greedy" }, // حریص - harées - { ts: 1527812669, e: "sensitive" }, // حساس - hasaas - { ts: 1527812057, e: "raw, unripe" }, // خام - khaam - { ts: 1527811523, e: "traitor, treacherous" }, // خاین - khaayin - { ts: 1527814219, e: "relative, one's own" }, // خپل - khpul - { ts: 1527812795, e: "relative" }, // خپلوان - khpulwaan - { ts: 1527812808, e: "poor, miserable" }, // خوار - khwaar - { ts: 1527814880, e: "tall" }, // دنګ - dung - { ts: 1527812537, e: "assured" }, // ډاډمن - DaaDmun - { ts: 1527812583, e: "full" }, // ډک - Duk - { ts: 1527822674, e: "gaunt" }, // ډنګر - Dungár, Dangár - { ts: 1527817256, e: "sunk" }, // ډوب - Doob - { ts: 1527814277, e: "healthy" }, // روغ - rogh - { ts: 1609780006604, e: "fruitful" }, // زرخېز - zarkhéz - { ts: 1527817116, e: "green, flourishing" }, // زرغون - zarghoon - { ts: 1527814026, e: "golden" }, // زرین - zareen - { ts: 1527815848, e: "committed" }, // ژمن - jzman - { ts: 1527813498, e: "light" }, // سپک - spuk - { ts: 1578329248464, e: "white" }, // سپین - speen - { ts: 1527811860, e: "great" }, // ستر - stur - { ts: 1527820178, e: "problematic" }, // ستونزمن - stoonzmán - { ts: 1527815246, e: "difficult" }, // سخت - sakht - { ts: 1527817262, e: "barren" }, // شنډ - shanD - { ts: 1527813426, e: "stingy" }, // شوم - shoom - { ts: 1527812625, e: "big" }, // غټ - ghuT, ghaT - { ts: 1527811846, e: "successful" }, // کامیاب - kaamyaab - { ts: 1527823678, e: "lazy" }, // کاهل - kaahíl - { ts: 1527814896, e: "proud, arrogant" }, // کبرجن - kaburjun - { ts: 1527813117, e: "firm, solid" }, // کلک - klak, kluk - { ts: 1578769492475, e: "few, little" }, // کم - kam - // { ts: 1527814253, e: "mixed up" }, // ګډ وډ // TODO: FIX INFLECTION MACHINE FOR DOUBLES! - { ts: 1578769409512, e: "weak" }, // کمزور - kamzór - { ts: 1527812639, e: "dear, difficult" }, // ګران - graan - { ts: 1527816786, e: "all" }, // ګرد - gurd - { ts: 1527814811, e: "warm, hot" }, // ګرم - garm, garum - { ts: 1527817662, e: "guilty" }, // ګرم - gram - { ts: 1527812308, e: "thick, lots" }, // ګڼ - gaN - { ts: 1527813848, e: "desiring, eager" }, // لېوال - lewaal - { ts: 1527816011, e: "broken" }, // مات - maat - { ts: 1527812881, e: "child" }, // ماشوم - maashoom - { ts: 1527817007, e: "known" }, // مالوم - maaloom - { ts: 1527814321, e: "positive" }, // مثبت - mUsbat - { ts: 1527811264, e: "condemned" }, // محکوم - mahkoom - { ts: 1527814802, e: "foul" }, // مردار - mUrdáar - { ts: 1527821812, e: "arrogant" }, // مغرور - maghróor - { ts: 1527820222, e: "lying down" }, // ملاست - mlaast - { ts: 1527814344, e: "important" }, // مهم - mUhím - { ts: 1527816033, e: "uncommon" }, // نادر - naadir - { ts: 1527815106, e: "sitting, seated" }, // ناست - naast - { ts: 1527815127, e: "nurse" }, // نرس - nurs - { ts: 1527821673, e: "moist, damp, wet" }, // نمجن - namjún - { ts: 1527815130, e: "dry, land, ground" }, // وچ - wuch, wUch - { ts: 1527817486, e: "ruined, destroyed; destructive, bad, naughty" }, // وران - wraan - { ts: 1527814373, e: "lost" }, // ورک - wruk - { ts: 1527822838, e: "decayed, spoiled, rotten" }, // وروست - wrost - { ts: 1609949334478, e: "roasted" }, // وریت - wreet - { ts: 1527811544, e: "standing" }, // ولاړ - waláaR, wuláaR - { ts: 1527815498, e: "aforementioned" }, // یاد - yaad - { ts: 1527815434, e: "cold" }, // یخ - yakh, yukh - ]; \ No newline at end of file + { ts: 1527816747, e: "doctor" }, // ډاکټر + { ts: 1527815408, e: "asleep" }, // ویده - weedú + { ts: 1527812796, e: "good" }, // ښه - xu + { ts: 1527821744, e: "cook, chef" }, // آشپز - aashpáz + { ts: 1527812461, e: "hero, brave" }, // اتل - atul + { ts: 1527821649, e: "impressive, effective, influencing" }, // اثرناک - asarnáak + { ts: 1527818704, e: "wide, spacious, extensive" }, // ارت - arát + { ts: 1578340121962, e: "free, independant" }, // ازاد - azáad + { ts: 1527819418, e: "independant, autonomous" }, // خپلواک - khpulwaak + { ts: 1527817146, e: "resident; settled" }, // استوګن - astogan + { ts: 1527813713, e: "hopeful, pregnant" }, // امیدوار - Umeedwaar + { ts: 1527819451, e: "Englishman, English (adjective)" }, // انګرېز - angréz + { ts: 1527820346, e: "on-line" }, // انلاین - anlaayn + { ts: 1527813667, e: "important" }, // اهم - aham + { ts: 1598724912198, e: "dry" }, // اوچ - ooch + { ts: 1527815138, e: "insurgent" }, // اورپک - orpak + { ts: 1586452587974, e: "free, available" }, // اوزګار - oozgáar + { ts: 1527816489, e: "faithful, believer" }, // ایماندار - eemaandaar + { ts: 1527820433, e: "valiant" }, // باتور - baatóor + { ts: 1527813425, e: "stingy" }, // بخیل - bakheel + { ts: 1527812511, e: "bad" }, // بد - bud, bad + { ts: 1527812518, e: "equal, even, set up" }, // برابر - buraabur + { ts: 1527811861, e: "naked" }, // بربنډ - barbunD + { ts: 1527811511, e: "full, complete" }, // بشپړ - bushpuR + { ts: 1527812515, e: "other, next" }, // بل - bul + { ts: 1527815725, e: "knowledgeable, accustomed" }, // بلد - balad + { ts: 1577301753727, e: "closed" }, // بند - band + { ts: 1527812490, e: "useless" }, // بې کار - be kaar + { ts: 1527812031, e: "separate, different" }, // بېل - bel + { ts: 1527815144, e: "clean, pure" }, // پاک - paak + { ts: 1527815201, e: "hidden" }, // پټ - puT + { ts: 1527815179, e: "wide" }, // پلن - plun + { ts: 1527819059, e: "thick, fat" }, // پنډ - punD + { ts: 1611767359178, e: "compassionate" }, // ترسناک - tarsnáak + { ts: 1527813270, e: "sour" }, // تروش - troosh + { ts: 1527813817, e: "narrow, cramped" }, // تنګ - tang + { ts: 1527816354, e: "ready" }, // تیار - tayaar + { ts: 1527817056, e: "sharp, fast" }, // تېز - tez + { ts: 1527814076, e: "societal, social" }, // ټولنیز - Toluneez + { ts: 1527819864, e: "low" }, // ټیټ - TeeT + { ts: 1527811894, e: "firm, tough, rigid" }, // ټینګ - Teeng + { ts: 1527812943, e: "constant, stable, proven" }, // ثابت - saabit + { ts: 1527813085, e: "heavy, difficult" }, // ثقیل - saqeel + { ts: 1527820479, e: "ignorant" }, // جاهل - jaahíl + { ts: 1588160800930, e: "surgeon" }, // جراح - jarráah + { ts: 1527812707, e: "high, tall" }, // جګ - jig, jug + { ts: 1527816944, e: "clear, evident" }, // جوت - jawat + { ts: 1527822996, e: "alongside, adjoining" }, // جوخت - jokht + { ts: 1527812711, e: "well, healthy" }, // جوړ - joR + { ts: 1527816323, e: "shining, sparkling" }, // ځلاند - dzalaand + { ts: 1527812291, e: "young, youthful" }, // ځوان - dzwaan + { ts: 1527820112, e: "hanging" }, // ځوړند - dzwáRund + { ts: 1527819672, e: "crafty" }, // چالاک - chaaláak + { ts: 1527811230, e: "quick, fast" }, // چټک - chaTak + { ts: 1527812524, e: "started, in motion" }, // چلان - chalaan + { ts: 1527815370, e: "clear, apparent" }, // څرګند - tsărgund + { ts: 1576366107077, e: "straight, upright" }, // څک - tsak + { ts: 1527812113, e: "present, on hand, ready" }, // حاضر - haazir, haazur + { ts: 1527820699, e: "pregnant, carrying" }, // حامل - haamíl + { ts: 1527819824, e: "greedy" }, // حریص - harées + { ts: 1527812669, e: "sensitive" }, // حساس - hasaas + { ts: 1527812057, e: "raw, unripe" }, // خام - khaam + { ts: 1527811523, e: "traitor, treacherous" }, // خاین - khaayin + { ts: 1527814219, e: "relative, one's own" }, // خپل - khpul + { ts: 1527812795, e: "relative" }, // خپلوان - khpulwaan + { ts: 1527812808, e: "poor, miserable" }, // خوار - khwaar + { ts: 1527814880, e: "tall" }, // دنګ - dung + { ts: 1527812537, e: "assured" }, // ډاډمن - DaaDmun + { ts: 1527812583, e: "full" }, // ډک - Duk + { ts: 1527822674, e: "gaunt" }, // ډنګر - Dungár, Dangár + { ts: 1527817256, e: "sunk" }, // ډوب - Doob + { ts: 1527814277, e: "healthy" }, // روغ - rogh + { ts: 1609780006604, e: "fruitful" }, // زرخېز - zarkhéz + { ts: 1527817116, e: "green, flourishing" }, // زرغون - zarghoon + { ts: 1527814026, e: "golden" }, // زرین - zareen + { ts: 1527815848, e: "committed" }, // ژمن - jzman + { ts: 1527813498, e: "light" }, // سپک - spuk + { ts: 1578329248464, e: "white" }, // سپین - speen + { ts: 1527811860, e: "great" }, // ستر - stur + { ts: 1527820178, e: "problematic" }, // ستونزمن - stoonzmán + { ts: 1527815246, e: "difficult" }, // سخت - sakht + { ts: 1527817262, e: "barren" }, // شنډ - shanD + { ts: 1527813426, e: "stingy" }, // شوم - shoom + { ts: 1527812625, e: "big" }, // غټ - ghuT, ghaT + { ts: 1527811846, e: "successful" }, // کامیاب - kaamyaab + { ts: 1527823678, e: "lazy" }, // کاهل - kaahíl + { ts: 1527814896, e: "proud, arrogant" }, // کبرجن - kaburjun + { ts: 1527813117, e: "firm, solid" }, // کلک - klak, kluk + { ts: 1578769492475, e: "few, little" }, // کم - kam + // { ts: 1527814253, e: "mixed up" }, // ګډ وډ // TODO: FIX INFLECTION MACHINE FOR DOUBLES! + { ts: 1578769409512, e: "weak" }, // کمزور - kamzór + { ts: 1527812639, e: "dear, difficult" }, // ګران - graan + { ts: 1527816786, e: "all" }, // ګرد - gurd + { ts: 1527814811, e: "warm, hot" }, // ګرم - garm, garum + { ts: 1527817662, e: "guilty" }, // ګرم - gram + { ts: 1527812308, e: "thick, lots" }, // ګڼ - gaN + { ts: 1527813848, e: "desiring, eager" }, // لېوال - lewaal + { ts: 1527816011, e: "broken" }, // مات - maat + { ts: 1527812881, e: "child" }, // ماشوم - maashoom + { ts: 1527817007, e: "known" }, // مالوم - maaloom + { ts: 1527814321, e: "positive" }, // مثبت - mUsbat + { ts: 1527811264, e: "condemned" }, // محکوم - mahkoom + { ts: 1527814802, e: "foul" }, // مردار - mUrdáar + { ts: 1527821812, e: "arrogant" }, // مغرور - maghróor + { ts: 1527820222, e: "lying down" }, // ملاست - mlaast + { ts: 1527814344, e: "important" }, // مهم - mUhím + { ts: 1527816033, e: "uncommon" }, // نادر - naadir + { ts: 1527815106, e: "sitting, seated" }, // ناست - naast + { ts: 1527815127, e: "nurse" }, // نرس - nurs + { ts: 1527821673, e: "moist, damp, wet" }, // نمجن - namjún + { ts: 1527815130, e: "dry, land, ground" }, // وچ - wuch, wUch + { ts: 1527817486, e: "ruined, destroyed; destructive, bad, naughty" }, // وران - wraan + { ts: 1527814373, e: "lost" }, // ورک - wruk + { ts: 1527822838, e: "decayed, spoiled, rotten" }, // وروست - wrost + { ts: 1609949334478, e: "roasted" }, // وریت - wreet + { ts: 1527811544, e: "standing" }, // ولاړ - waláaR, wuláaR + { ts: 1527815498, e: "aforementioned" }, // یاد - yaad + { ts: 1527815434, e: "cold" }, // یخ - yakh, yukh +]; diff --git a/vocab/nouns-adjs/irreg-nouns.js b/vocab/nouns-adjs/irreg-nouns.js new file mode 100644 index 0000000..a252e11 --- /dev/null +++ b/vocab/nouns-adjs/irreg-nouns.js @@ -0,0 +1,22 @@ +module.exports = [ + { + ts: 1527815177, + e: "father", + }, + { + ts: 1527815129, + e: "water", + }, + { + ts: 1527817330, + e: "wheat", + }, + { + ts: 1527815206, + e: "judge", + }, + { + ts: 1527812342, + e: "people", // خلک + }, +]; diff --git a/vocab/nouns-adjs/short-squish-masc.js b/vocab/nouns-adjs/short-squish-masc.js new file mode 100644 index 0000000..555f015 --- /dev/null +++ b/vocab/nouns-adjs/short-squish-masc.js @@ -0,0 +1,4 @@ +module.exports = [ + { ts: 1527811441, e: "door" }, // ور + { ts: 1527813593, e: "mountain" }, // غر +];