From f0624252bc7babe66c2c596389829706fd715a5b Mon Sep 17 00:00:00 2001 From: adueck Date: Tue, 1 Aug 2023 18:49:11 +0400 Subject: [PATCH] mostly done plural recognition --- .vscode/settings.json | 3 + src/lib/src/parsing/inflection-query.ts | 159 ++++++++++++++ src/lib/src/parsing/lookup.tsx | 10 + src/lib/src/parsing/parse-noun.test.ts | 272 ++++++++++++++++++++++++ vocab/nouns-adjs/irreg-nouns.js | 8 + 5 files changed, 452 insertions(+) diff --git a/.vscode/settings.json b/.vscode/settings.json index d949d6d..8478167 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,4 +2,7 @@ "typescript.preferences.autoImportFileExcludePatterns": [ "../../library.ts" ], + "cSpell.words": [ + "کارخانه" + ], } \ No newline at end of file diff --git a/src/lib/src/parsing/inflection-query.ts b/src/lib/src/parsing/inflection-query.ts index e4264ae..0cf2aff 100644 --- a/src/lib/src/parsing/inflection-query.ts +++ b/src/lib/src/parsing/inflection-query.ts @@ -51,6 +51,35 @@ export function getInflectionQueries( }, }); if (noun) { + queries.push({ + search: { ppp: s }, + details: { + inflection: [0], + gender: ["masc", "fem"], + plural: true, + predicate: isNounEntry, + }, + }); + if (s.endsWith("و")) { + queries.push({ + search: { ppp: s.slice(0, -1) }, + details: { + inflection: [1], + gender: ["masc"], + plural: true, + predicate: isMascNounEntry, + }, + }); + queries.push({ + search: { ppp: s.slice(0, -1) + "ې" }, + details: { + inflection: [1], + gender: ["fem"], + plural: true, + predicate: isFemNounEntry, + }, + }); + } if (s.endsWith("ونه")) { queries.push({ search: { p: s.slice(0, -3) }, @@ -81,6 +110,136 @@ export function getInflectionQueries( }, }); } + if (s.endsWith("ان") && !["ا", "و"].includes(s.at(-3) || "")) { + queries.push({ + search: { p: s.slice(0, -2) }, + details: { + inflection: [0], + gender: ["masc"], + plural: true, + predicate: (e) => + isNounEntry(e) && + (isMascNounEntry(e) || isUnisexNounEntry(e)) && + !isPluralNounEntry(e) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + } + if (s.endsWith("انې") && !["ا", "و"].includes(s.at(-4) || "")) { + queries.push({ + search: { p: s.slice(0, -3) }, + details: { + inflection: [0], + gender: ["fem"], + plural: true, + predicate: (e) => + isNounEntry(e) && + (isFemNounEntry(e) || isUnisexNounEntry(e)) && + !isPluralNounEntry(e) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + } + if (s.endsWith("ګان") && ["ا", "و"].includes(s.at(-4) || "")) { + queries.push({ + search: { p: s.slice(0, -3) }, + details: { + inflection: [0], + gender: ["masc"], + plural: true, + predicate: (e) => + isNounEntry(e) && + (isMascNounEntry(e) || isUnisexNounEntry(e)) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + } + if (s.endsWith("ګانې") && ["ا", "و"].includes(s.at(-5) || "")) { + queries.push({ + search: { p: s.slice(0, -4) }, + details: { + inflection: [0], + gender: ["fem"], + plural: true, + predicate: (e) => + isNounEntry(e) && + (isFemNounEntry(e) || isUnisexNounEntry(e)) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + } + if (s.endsWith("وې") && ["ا", "و"].includes(s.at(-3) || "")) { + queries.push({ + search: { p: s.slice(0, -2) }, + details: { + inflection: [0], + gender: ["fem"], + plural: true, + predicate: (e) => + isNounEntry(e) && + (isFemNounEntry(e) || isUnisexNounEntry(e)) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + } + if (s.endsWith("وو") && ["ا", "و"].includes(s.at(-3) || "")) { + queries.push({ + search: { p: s.slice(0, -2) }, + details: { + inflection: [1], + gender: ["fem"], + plural: true, + predicate: (e) => + isNounEntry(e) && + (isFemNounEntry(e) || isUnisexNounEntry(e)) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + } + if (s.endsWith("ګانو") && ["ا", "و"].includes(s.at(-5) || "")) { + queries.push({ + search: { p: s.slice(0, -4) }, + details: { + inflection: [1], + gender: ["masc", "fem"], + plural: true, + predicate: (e) => + isNounEntry(e) && + !isPluralNounEntry(e) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + } + if (s.endsWith("انو")) { + queries.push({ + search: { p: s.slice(0, -3) }, + details: { + inflection: [1], + gender: ["masc", "fem"], + plural: true, + predicate: (e) => + isNounEntry(e) && + !isPluralNounEntry(e) && + !isPattern2Entry(e) && + !isPattern3Entry(e) && + !isPattern4Entry(e), + }, + }); + } if (s.endsWith("ونو")) { queries.push({ search: { p: s.slice(0, -3) }, diff --git a/src/lib/src/parsing/lookup.tsx b/src/lib/src/parsing/lookup.tsx index 0b9bcb9..c109686 100644 --- a/src/lib/src/parsing/lookup.tsx +++ b/src/lib/src/parsing/lookup.tsx @@ -4,6 +4,16 @@ import { isAdjectiveEntry, isNounEntry } from "../type-predicates"; export function lookup(s: Partial): T.DictionaryEntry[] { const [key, value] = Object.entries(s)[0]; + if (key === "ppp") { + return nounsAdjs.filter( + (e) => + e.ppp && + e.ppp + .split(",") + .map((w) => w.trim()) + .includes(value as string) + ); + } // @ts-ignore return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[]; } diff --git a/src/lib/src/parsing/parse-noun.test.ts b/src/lib/src/parsing/parse-noun.test.ts index a08ff92..3ee197f 100644 --- a/src/lib/src/parsing/parse-noun.test.ts +++ b/src/lib/src/parsing/parse-noun.test.ts @@ -32,6 +32,10 @@ const ghanum = wordQuery("غنم", "noun"); const laar = wordQuery("لار", "noun"); const qaazee = wordQuery("قاضي", "noun"); const waadu = wordQuery("واده", "noun"); +const maamaa = wordQuery("ماما", "noun"); +const peesho = wordQuery("پیشو", "noun"); +const duaa = wordQuery("دعا", "noun"); +const zooy = wordQuery("زوی", "noun"); const tests: { category: string; @@ -937,6 +941,38 @@ const tests: { }, ], }, + { + input: "ماما", + output: [ + { + inflected: false, + selection: makeNounSelection(maamaa, undefined), + }, + { + inflected: true, + selection: makeNounSelection(maamaa, undefined), + }, + ], + }, + { + input: "پیشو", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(peesho, undefined), + gender: "fem", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(peesho, undefined), + gender: "fem", + }, + }, + ], + }, ], }, { @@ -1035,6 +1071,225 @@ const tests: { }, ], }, + { + category: "plurals with -aan", + cases: [ + { + input: "پلاران", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(plaar, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "پلارانې", + output: [], + }, + { + input: "پلارګان", + output: [], + }, + { + input: "پلارګانو", + output: [], + }, + { + input: "پلارانو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(plaar, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "دعاګانې", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(duaa, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "دعاګانو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(duaa, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "ماماګان", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(maamaa, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "ماماګانو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(maamaa, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "ډاکټران", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(daktar, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "ډاکټرانې", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(daktar, undefined), + number: "plural", + gender: "fem", + }, + }, + ], + }, + { + input: "ډاکټرانو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(daktar, undefined), + number: "plural", + gender: "masc", + }, + }, + { + inflected: true, + selection: { + ...makeNounSelection(daktar, undefined), + number: "plural", + gender: "fem", + }, + }, + ], + }, + ], + }, + { + category: "plurals with -we", + cases: [ + { + input: "دعاوې", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(duaa, undefined), + gender: "fem", + number: "plural", + }, + }, + ], + }, + { + input: "دعاوو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(duaa, undefined), + gender: "fem", + number: "plural", + }, + }, + ], + }, + ], + }, + { + category: "irregular plurals", + cases: [ + { + input: "میندې", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(mor, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "میندو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(mor, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "زامن", + output: [ + { + inflected: false, + selection: { + ...makeNounSelection(zooy, undefined), + number: "plural", + }, + }, + ], + }, + { + input: "زامنو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(zooy, undefined), + number: "plural", + }, + }, + ], + }, + ], + }, ]; // PROBLEM WITH غټې وریژې @@ -1153,6 +1408,23 @@ const adjsTests: { }, ], }, + // TODO: WHY DOES ADDING زړو break this ??? + { + input: "غټو کورونو", + output: [ + { + inflected: true, + selection: { + ...makeNounSelection(kor, undefined), + number: "plural", + adjectives: [ + makeAdjectiveSelection(ghut), + // makeAdjectiveSelection(zor), + ], + }, + }, + ], + }, ], }, ]; diff --git a/vocab/nouns-adjs/irreg-nouns.js b/vocab/nouns-adjs/irreg-nouns.js index a252e11..bbe7f42 100644 --- a/vocab/nouns-adjs/irreg-nouns.js +++ b/vocab/nouns-adjs/irreg-nouns.js @@ -19,4 +19,12 @@ module.exports = [ ts: 1527812342, e: "people", // خلک }, + { + ts: 1527815163, + e: "cat", // پیشو + }, + { + ts: 1527815450, + e: "son", // زوی + }, ];