pashto-inflector/src/lib/pashto-inflector.test.ts

807 lines
21 KiB
TypeScript
Raw Normal View History

2021-03-09 12:39:13 +00:00
/**
* Copyright (c) 2021 lingdocs.com
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
// TODO: See if there are animate feminine words ending in ي and test
import {
inflectRegularYeyUnisex,
inflectWord,
} from "./pashto-inflector";
import * as T from "../types";
const adjectives: Array<{
in: T.DictionaryEntry,
2021-09-07 11:49:57 +00:00
out: T.InflectorOutput,
2021-03-09 12:39:13 +00:00
}> = [
// irregular adj.
{
in: {
ts: 1527815451,
p: "زوړ",
f: "zoR",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "old",
c: "adj. irreg.",
i: 6264,
infap: "زاړه",
infaf: "zaaRu",
infbp: "زړ",
infbf: "zaR",
},
out: {
2021-09-07 11:49:57 +00:00
inflections:{
masc: [
[{p: "زوړ", f: "zoR"}],
[{p: "زاړه", f: "zaaRu"}],
[{p: "زړو", f: "zaRo"}],
],
fem: [
[{p: "زړه", f: "zaRa"}],
[{p: "زړې", f: "zaRe"}],
[{p: "زړو", f: "zaRo"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// regular adjective ending in ی
{
in: {
ts: 1527815306,
p: "ستړی",
f: "stúRey",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "tired",
c: "adj.",
i: 6564,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "ستړی", f: "stúRey"}],
[{p: "ستړي", f: "stúRee"}],
[{p: "ستړیو", f: "stúRiyo"}, {p: "ستړو", f: "stúRo"}],
],
fem: [
[{p: "ستړې", f: "stúRe"}],
[{p: "ستړې", f: "stúRe"}],
[{p: "ستړو", f: "stúRo"}],
],
}
2021-03-09 12:39:13 +00:00
},
},
// regular adjective ending in ی with stress on the end
{
in: {
ts: 1527813636,
p: "وروستی",
f: "wroostéy",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "last, latest, recent",
c: "adj.",
i: 12026,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "وروستی", f: "wroostéy"}],
[{p: "وروستي", f: "wroostée"}],
[{p: "وروستیو", f: "wroostiyo"}, {p: "وروستو", f: "wroostó"}],
],
fem: [
[{p: "وروستۍ", f: "wroostúy"}],
[{p: "وروستۍ", f: "wroostúy"}],
[{p: "وروستیو", f: "wroostúyo"}, {p: "وروستو", f: "wroostó"}],
],
}
2021-03-09 12:39:13 +00:00
},
},
// regular adjective ending in a consonant
{
in: {
ts: 1527813498,
p: "سپک",
f: "spuk",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "light; dishonorable, not respectable",
c: "adj.",
i: 6502,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "سپک", f: "spuk"}],
[{p: "سپک", f: "spuk"}],
[{p: "سپکو", f: "spuko"}],
],
fem: [
[{p: "سپکه", f: "spuka"}],
[{p: "سپکې", f: "spuke"}],
[{p: "سپکو", f: "spuko"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
{
in: {
ts: 1527812862,
p: "لوی",
f: "looy",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "big, great, large",
c: "adj.",
i: 9945,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "لوی", f: "looy"}],
[{p: "لوی", f: "looy"}],
[{p: "لویو", f: "looyo"}],
],
fem: [
[{p: "لویه", f: "looya"}],
[{p: "لویې", f: "looye"}],
[{p: "لویو", f: "looyo"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
{
in: {
ts: 1527811469,
p: "پوه",
f: "poh",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "understanding, having understood; intelligent, quick, wise, clever; expert",
c: "adj.",
i: 2430,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "پوه", f: "poh"}],
[{p: "پوه", f: "poh"}],
[{p: "پوهو", f: "poho"}],
],
fem: [
[{p: "پوهه", f: "poha"}],
[{p: "پوهې", f: "pohe"}],
[{p: "پوهو", f: "poho"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
2021-08-31 09:34:18 +00:00
// adjective ending in u
{
in: {
ts: 1527812791,
p: "ویده",
f: "weedú",
g: "weedu",
e: "asleep",
c: "adj.",
i: 1,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "ویده", f: "weedú"}],
[{p: "ویده", f: "weedú"}],
[{p: "ویدو", f: "weedó"}],
],
fem: [
[{p: "ویده", f: "weedá"}],
[{p: "ویدې", f: "weedé"}],
[{p: "ویدو", f: "weedó"}],
],
},
2021-08-31 09:34:18 +00:00
},
},
2021-03-09 12:39:13 +00:00
// adjective non-inflecting
{
in: {
ts: 1527812798,
p: "خفه",
f: "khufa",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "sad, upset, angry; choked, suffocated",
c: "adj.",
i: 4631,
},
out: false,
},
{
in: {
ts: 1527814727,
p: "اجباري",
f: "ijbaaree",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "compulsory, obligatory",
c: "adj.",
i: 167,
},
out: false,
},
2021-05-25 09:47:02 +00:00
// double adjective
{
in: {
ts: 123,
p: "ګډ وډ",
f: "guD wuD",
g: "guDwuD",
e: "mixed up",
c: "adj. doub.",
i: 1,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{ p: "ګډ وډ", f: "guD wuD" }],
[{ p: "ګډ وډ", f: "guD wuD" }],
[{ p: "ګډو وډو", f: "guDo wuDo" }],
],
fem: [
[{ p: "ګډه وډه", f: "guDa wuDa" }],
[{ p: "ګډې وډې", f: "guDe wuDe" }],
[{ p: "ګډو وډو", f: "guDo wuDo" }],
],
},
},
},
2021-03-09 12:39:13 +00:00
];
const nouns: Array<{
in: T.DictionaryEntry,
2021-09-07 11:49:57 +00:00
out: T.InflectorOutput,
2021-03-09 12:39:13 +00:00
}> = [
// ## UNISEX
// Unisex noun irregular
{
in: {
ts: 1527812908,
p: "مېلمه",
f: "melmá",
e: "guest",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
c: "n. m. irreg. unisex",
i: 11244,
infap: "مېلمانه",
infaf: "melmaanu",
infbp: "مېلمن",
infbf: "melman",
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "مېلمه", f: "melmá"}],
[{p: "مېلمانه", f: "melmaanu"}],
[{p: "مېلمنو", f: "melmano"}],
],
fem: [
[{p: "مېلمنه", f: "melmana"}],
[{p: "مېلمنې", f: "melmane"}],
[{p: "مېلمنو", f: "melmano"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Unisex noun ending with ی
{
in: {
ts: 1527814159,
p: "ملګری",
f: "malgúrey",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "friend, companion",
c: "n. m. unisex",
i: 10943,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "ملګری", f: "malgúrey"}],
[{p: "ملګري", f: "malgúree"}],
[{p: "ملګریو", f: "malgúriyo"}, {p: "ملګرو", f: "malgúro"}],
],
fem: [
[{p: "ملګرې", f: "malgúre"}],
[{p: "ملګرې", f: "malgúre"}],
[{p: "ملګرو", f: "malgúro"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Unisex noun ending on ی with emphasis on the end
{
in: {
ts: 1527816431,
p: "ترورزی",
f: "trorzéy",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "cousin (son of paternal aunt)",
c: "n. m. unisex",
i: 2900,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "ترورزی", f: "trorzéy"}],
[{p: "ترورزي", f: "trorzée"}],
[{p: "ترورزیو", f: "trorziyo"}, {p: "ترورزو", f: "trorzó"}],
],
fem: [
[{p: "ترورزۍ", f: "trorzúy"}],
[{p: "ترورزۍ", f: "trorzúy"}],
[{p: "ترورزیو", f: "trorzúyo"}, {p: "ترورزو", f: "trorzó"}],
],
},
// plural: {
// masc: [
// [{ p: "ترورزامن", f: "trorzaamun" }],
// [{ p: "ترورزامنو", f: "trorzaamuno" }],
// ],
// },
2021-03-09 12:39:13 +00:00
},
},
// Unisex noun ending with a consanant
{
in: {
ts: 1527820043,
p: "چرګ",
f: "churg",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "rooster, cock; chicken, poultry",
2021-09-07 11:49:57 +00:00
c: "n. m. unisex anim.",
2021-03-09 12:39:13 +00:00
i: 4101,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "چرګ", f: "churg"}],
[{p: "چرګ", f: "churg"}],
[{p: "چرګو", f: "churgo"}],
],
fem: [
[{p: "چرګه", f: "churga"}],
[{p: "چرګې", f: "churge"}],
[{p: "چرګو", f: "churgo"}],
],
},
plural: {
masc: [
[{p: "چرګان", f: "churgáan"}],
[{p: "چرګانو", f: "churgáano"}],
],
fem: [
[{p: "چرګانې", f: "churgáane"}],
[{p: "چرګانو", f: "churgáano"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// ## MASCULINE
// Masculine regular ending in ی
{
in: {
ts: 1527815251,
p: "سړی",
f: "saRey",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "man",
c: "n. m.",
i: 6750,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "سړی", f: "saRey"}],
[{p: "سړي", f: "saRee"}],
[{p: "سړیو", f: "saRiyo"}, {p: "سړو", f: "saRo"}],
],
}
2021-03-09 12:39:13 +00:00
},
},
// Masculine regular ending in ی with emphasis on end
{
in: {
ts: 1527818511,
p: "ترېلی",
f: "treléy",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "pool, reservoir",
c: "n. m.",
i: 2931,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "ترېلی", f: "treléy"}],
[{p: "ترېلي", f: "trelée"}],
[{p: "ترېلیو", f: "treliyo"}, {p: "ترېلو", f: "trelo"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Masculine ending in tob
{
in: {
i: 11998,
ts: 1586760783536,
p: "مشرتوب",
f: "mushurtob",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "leadership, authority, presidency",
c: "n. m.",
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "مشرتوب", f: "mushurtob"}],
[{p: "مشرتابه", f: "mushurtaabu"}],
[{p: "مشرتبو", f: "mushurtabo"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Masculine irregular
{
in: {
ts: 1527813809,
p: "لمونځ",
f: "lamoondz",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "Muslim ritual prayers (namaz, salah, salat)",
c: "n. m. irreg.",
i: 9835,
infap: "لمانځه",
infaf: "lamaandzu",
infbp: "لمنځ",
infbf: "lamandz",
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
masc: [
[{p: "لمونځ", f: "lamoondz"}],
[{p: "لمانځه", f: "lamaandzu"}],
[{p: "لمنځو", f: "lamandzo"}],
],
},
// plural: {
// masc: [
// [{ p: "لمونځونه", f: "lamoondzóona" }],
// [{ p: "لمونځونو", f: "lamoondzóono" }],
// ],
// },
2021-03-09 12:39:13 +00:00
},
},
// Masculine non-inflecting
{
in: {
ts: 1527812817,
p: "کتاب",
f: "kitaab",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "book",
c: "n. m.",
i: 8640,
},
2021-09-07 11:49:57 +00:00
out: {
plural: {
masc: [
[{ p: "کتابونه", f: "kitaabóona" }],
[{ p: "کتابونو", f: "kitaabóono" }],
],
},
},
2021-03-09 12:39:13 +00:00
},
// ## FEMININE
// Feminine regular ending in ه
{
in: {
ts: 1527812797,
p: "ښځه",
f: "xudza",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "woman, wife",
c: "n. f.",
i: 7444,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
fem: [
[{p: "ښځه", f: "xudza"}],
[{p: "ښځې", f: "xudze"}],
[{p: "ښځو", f: "xudzo"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
{
in: {
ts: 1527821380,
p: "اره",
f: "ará",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "saw (the tool)",
c: "n. f.",
i: 365,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
fem: [
[{p: "اره", f: "ará"}],
[{p: "ارې", f: "are"}],
[{p: "ارو", f: "aro"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Feminine regular ending in ع - a'
{
in: {
ts: 1527820693,
p: "مرجع",
f: "marja'",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "reference, authority, body, place to go (for help, shelter, etc.)",
c: "n. f.",
i: 10661,
app: "مراجع",
apf: "maraají",
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
fem: [
[{p: "مرجع", f: "marja'"}],
[{p: "مرجعې", f: "marje"}],
[{p: "مرجعو", f: "marjo"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
{
in: {
ts: 1527820212,
p: "منبع",
f: "manbá",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "source, origin, resource, cause",
c: "n. f.",
i: 11201,
app: "منابع",
apf: "manaabí",
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
fem: [
[{p: "منبع", f: "manbá"}],
[{p: "منبعې", f: "manbe"}],
[{p: "منبعو", f: "manbo"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Feminine regular ending in ح - a
{
in: {
ts: 1527815506,
p: "ذبح",
f: "zabha",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "slaughter, killing, butchering",
c: "n. f.",
i: 5813,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
fem: [
[{p: "ذبح", f: "zabha"}],
[{p: "ذبحې", f: "zabhe"}],
[{p: "ذبحو", f: "zabho"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Feminine inanimate regular with missing ه
{
in: {
ts: 1527814150,
p: "لار",
f: "laar",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "road, way, path",
c: "n. f.",
i: 9593,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
fem: [
[{p: "لار", f: "laar"}],
[{p: "لارې", f: "laare"}],
[{p: "لارو", f: "laaro"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Feminine animate ending in a consonant
2021-09-07 11:49:57 +00:00
// TODO: ALLOW FOR MULTIPLE PLURAL POSSIBILITIES میندې, میېنې etc.
2021-03-09 12:39:13 +00:00
{
in: {
ts: 1527812928,
p: "مور",
f: "mor",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "mother, mom",
c: "n. f. anim.",
2021-09-07 11:49:57 +00:00
ppp: "میندې",
ppf: "meynde",
2021-03-09 12:39:13 +00:00
i: 11113,
},
2021-09-07 11:49:57 +00:00
out: {
plural: {
fem: [
[{ p: "میندې", f: "meynde" }],
[{ p: "میندو", f: "meyndo" }],
],
},
},
2021-03-09 12:39:13 +00:00
},
// Feminine regular inanimate ending in ي
{
in: {
ts: 1527811877,
p: "دوستي",
f: "dostee",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "friendship",
c: "n. f.",
i: 5503,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
fem: [
[{p: "دوستي", f: "dostee"}],
[{p: "دوستۍ", f: "dostuy"}],
[{p: "دوستیو", f: "dostuyo"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Feminine regular ending in ۍ
{
in: {
ts: 1527814203,
p: "کرسۍ",
f: "kUrsuy",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "chair, seat, stool",
c: "n. f.",
i: 8718,
},
out: {
2021-09-07 11:49:57 +00:00
inflections: {
fem: [
[{p: "کرسۍ", f: "kUrsuy"}],
[{p: "کرسۍ", f: "kUrsuy"}],
[{p: "کرسیو", f: "kUrsuyo"}, { p: "کرسو", f: "kUrso"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Feminine regular ending in ا
{
in: {
ts: 1527812456,
p: "اړتیا",
2021-09-07 11:49:57 +00:00
f: "aRtiyáa, aRtyáa",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "need, necessity",
c: "n. f.",
i: 376,
},
out: {
2021-09-07 11:49:57 +00:00
plural: {
fem: [
[{p: "اړتیاوې", f: "aRtiyáawe"}, { p: "اړتیاګانې", f:"aRtiyaagáane"}],
[{p: "اړتیاوو", f: "aRtiyáawo"}, { p: "اړتیاګانو", f:"aRtiyaagáano"}],
],
},
2021-03-09 12:39:13 +00:00
},
},
// Feminine regular ending in اع
{
in: {
ts: 1527821388,
p: "وداع",
f: "widáa'",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "farewell, goodbye",
c: "n. f.",
i: 12205,
},
2021-09-07 11:49:57 +00:00
out: false,
// out: {
// plural: {
// fem: [
// [{p: "وداع وې", f: "widáawe"}, {p: "وداع ګانې", f: "widaagáane"}],
// [{p: "وداع وو", f: "widáawo"}, {p: "وداع ګانو", f: "widaagáano"}],
// ],
// },
// },
2021-03-09 12:39:13 +00:00
},
// Word with no inflections
{
in: {
ts: 1527815402,
p: "وړ",
f: "waR",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "worthy of, deserving, -able",
c: "suff. / adj.",
i: 12045,
noInf: true,
},
out: false,
},
2021-09-07 11:49:57 +00:00
// TODO: WORDS THAT ARE ALREADY PLURAL!
2021-03-09 12:39:13 +00:00
];
const others: T.DictionaryEntry[] = [
{
ts: 1527812612,
p: "ګنډل",
f: "ganDul",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "to sew, mend, make, knit",
c: "v. trans.",
i: 9448,
},
{
ts: 1527812457,
p: "اصلاً",
f: "aslan",
2021-03-16 13:35:41 +00:00
g: "",
2021-03-09 12:39:13 +00:00
e: "actually",
c: "adv.",
i: 550,
},
];
adjectives.forEach((word) => {
test(`${word.in.p} should inflect properly`, () => {
expect(inflectWord(word.in)).toEqual(word.out);
});
});
nouns.forEach((word) => {
test(`${word.in.p} should inflect properly`, () => {
expect(inflectWord(word.in)).toEqual(word.out);
});
});
others.forEach((word) => {
test(`${word.p} should return false`, () => {
expect(inflectWord(word)).toEqual(false);
});
});
test(`inflectRegularYeyUnisex should work`, () => {
expect(inflectRegularYeyUnisex("لیدونکی", "leedóonkey")).toEqual({
masc: [
[{p: "لیدونکی", f: "leedóonkey" }],
[{p: "لیدونکي", f: "leedóonkee" }],
[{p: "لیدونکیو", f: "leedóonkiyo" }, {p: "لیدونکو", f: "leedóonko"}],
],
fem: [
[{p: "لیدونکې", f: "leedóonke" }],
[{p: "لیدونکې", f: "leedóonke" }],
[{p: "لیدونکو", f: "leedóonko"}],
],
});
})