mostly done plural recognition

This commit is contained in:
adueck 2023-08-01 18:49:11 +04:00
parent b672e19c1a
commit f0624252bc
5 changed files with 452 additions and 0 deletions

View File

@ -2,4 +2,7 @@
"typescript.preferences.autoImportFileExcludePatterns": [
"../../library.ts"
],
"cSpell.words": [
"کارخانه"
],
}

View File

@ -51,6 +51,35 @@ export function getInflectionQueries(
},
});
if (noun) {
queries.push({
search: { ppp: s },
details: {
inflection: [0],
gender: ["masc", "fem"],
plural: true,
predicate: isNounEntry,
},
});
if (s.endsWith("و")) {
queries.push({
search: { ppp: s.slice(0, -1) },
details: {
inflection: [1],
gender: ["masc"],
plural: true,
predicate: isMascNounEntry,
},
});
queries.push({
search: { ppp: s.slice(0, -1) + "ې" },
details: {
inflection: [1],
gender: ["fem"],
plural: true,
predicate: isFemNounEntry,
},
});
}
if (s.endsWith("ونه")) {
queries.push({
search: { p: s.slice(0, -3) },
@ -81,6 +110,136 @@ export function getInflectionQueries(
},
});
}
if (s.endsWith("ان") && !["ا", "و"].includes(s.at(-3) || "")) {
queries.push({
search: { p: s.slice(0, -2) },
details: {
inflection: [0],
gender: ["masc"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
(isMascNounEntry(e) || isUnisexNounEntry(e)) &&
!isPluralNounEntry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
}
if (s.endsWith("انې") && !["ا", "و"].includes(s.at(-4) || "")) {
queries.push({
search: { p: s.slice(0, -3) },
details: {
inflection: [0],
gender: ["fem"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
(isFemNounEntry(e) || isUnisexNounEntry(e)) &&
!isPluralNounEntry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
}
if (s.endsWith("ګان") && ["ا", "و"].includes(s.at(-4) || "")) {
queries.push({
search: { p: s.slice(0, -3) },
details: {
inflection: [0],
gender: ["masc"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
(isMascNounEntry(e) || isUnisexNounEntry(e)) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
}
if (s.endsWith("ګانې") && ["ا", "و"].includes(s.at(-5) || "")) {
queries.push({
search: { p: s.slice(0, -4) },
details: {
inflection: [0],
gender: ["fem"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
(isFemNounEntry(e) || isUnisexNounEntry(e)) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
}
if (s.endsWith("وې") && ["ا", "و"].includes(s.at(-3) || "")) {
queries.push({
search: { p: s.slice(0, -2) },
details: {
inflection: [0],
gender: ["fem"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
(isFemNounEntry(e) || isUnisexNounEntry(e)) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
}
if (s.endsWith("وو") && ["ا", "و"].includes(s.at(-3) || "")) {
queries.push({
search: { p: s.slice(0, -2) },
details: {
inflection: [1],
gender: ["fem"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
(isFemNounEntry(e) || isUnisexNounEntry(e)) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
}
if (s.endsWith("ګانو") && ["ا", "و"].includes(s.at(-5) || "")) {
queries.push({
search: { p: s.slice(0, -4) },
details: {
inflection: [1],
gender: ["masc", "fem"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
!isPluralNounEntry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
}
if (s.endsWith("انو")) {
queries.push({
search: { p: s.slice(0, -3) },
details: {
inflection: [1],
gender: ["masc", "fem"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
!isPluralNounEntry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
}
if (s.endsWith("ونو")) {
queries.push({
search: { p: s.slice(0, -3) },

View File

@ -4,6 +4,16 @@ import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
const [key, value] = Object.entries(s)[0];
if (key === "ppp") {
return nounsAdjs.filter(
(e) =>
e.ppp &&
e.ppp
.split(",")
.map((w) => w.trim())
.includes(value as string)
);
}
// @ts-ignore
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
}

View File

@ -32,6 +32,10 @@ const ghanum = wordQuery("غنم", "noun");
const laar = wordQuery("لار", "noun");
const qaazee = wordQuery("قاضي", "noun");
const waadu = wordQuery("واده", "noun");
const maamaa = wordQuery("ماما", "noun");
const peesho = wordQuery("پیشو", "noun");
const duaa = wordQuery("دعا", "noun");
const zooy = wordQuery("زوی", "noun");
const tests: {
category: string;
@ -937,6 +941,38 @@ const tests: {
},
],
},
{
input: "ماما",
output: [
{
inflected: false,
selection: makeNounSelection(maamaa, undefined),
},
{
inflected: true,
selection: makeNounSelection(maamaa, undefined),
},
],
},
{
input: "پیشو",
output: [
{
inflected: false,
selection: {
...makeNounSelection(peesho, undefined),
gender: "fem",
},
},
{
inflected: true,
selection: {
...makeNounSelection(peesho, undefined),
gender: "fem",
},
},
],
},
],
},
{
@ -1035,6 +1071,225 @@ const tests: {
},
],
},
{
category: "plurals with -aan",
cases: [
{
input: "پلاران",
output: [
{
inflected: false,
selection: {
...makeNounSelection(plaar, undefined),
number: "plural",
},
},
],
},
{
input: "پلارانې",
output: [],
},
{
input: "پلارګان",
output: [],
},
{
input: "پلارګانو",
output: [],
},
{
input: "پلارانو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(plaar, undefined),
number: "plural",
},
},
],
},
{
input: "دعاګانې",
output: [
{
inflected: false,
selection: {
...makeNounSelection(duaa, undefined),
number: "plural",
},
},
],
},
{
input: "دعاګانو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(duaa, undefined),
number: "plural",
},
},
],
},
{
input: "ماماګان",
output: [
{
inflected: false,
selection: {
...makeNounSelection(maamaa, undefined),
number: "plural",
},
},
],
},
{
input: "ماماګانو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(maamaa, undefined),
number: "plural",
},
},
],
},
{
input: "ډاکټران",
output: [
{
inflected: false,
selection: {
...makeNounSelection(daktar, undefined),
number: "plural",
},
},
],
},
{
input: "ډاکټرانې",
output: [
{
inflected: false,
selection: {
...makeNounSelection(daktar, undefined),
number: "plural",
gender: "fem",
},
},
],
},
{
input: "ډاکټرانو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(daktar, undefined),
number: "plural",
gender: "masc",
},
},
{
inflected: true,
selection: {
...makeNounSelection(daktar, undefined),
number: "plural",
gender: "fem",
},
},
],
},
],
},
{
category: "plurals with -we",
cases: [
{
input: "دعاوې",
output: [
{
inflected: false,
selection: {
...makeNounSelection(duaa, undefined),
gender: "fem",
number: "plural",
},
},
],
},
{
input: "دعاوو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(duaa, undefined),
gender: "fem",
number: "plural",
},
},
],
},
],
},
{
category: "irregular plurals",
cases: [
{
input: "میندې",
output: [
{
inflected: false,
selection: {
...makeNounSelection(mor, undefined),
number: "plural",
},
},
],
},
{
input: "میندو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(mor, undefined),
number: "plural",
},
},
],
},
{
input: "زامن",
output: [
{
inflected: false,
selection: {
...makeNounSelection(zooy, undefined),
number: "plural",
},
},
],
},
{
input: "زامنو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(zooy, undefined),
number: "plural",
},
},
],
},
],
},
];
// PROBLEM WITH غټې وریژې
@ -1153,6 +1408,23 @@ const adjsTests: {
},
],
},
// TODO: WHY DOES ADDING زړو break this ???
{
input: "غټو کورونو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(kor, undefined),
number: "plural",
adjectives: [
makeAdjectiveSelection(ghut),
// makeAdjectiveSelection(zor),
],
},
},
],
},
],
},
];

View File

@ -19,4 +19,12 @@ module.exports = [
ts: 1527812342,
e: "people", // خلک
},
{
ts: 1527815163,
e: "cat", // پیشو
},
{
ts: 1527815450,
e: "son", // زوی
},
];