From 4a2408f197b7f6b72225b5dcb5498d50b28a1930 Mon Sep 17 00:00:00 2001 From: lingdocs <71590811+lingdocs@users.noreply.github.com> Date: Mon, 4 Oct 2021 21:15:13 -0400 Subject: [PATCH] get english word function --- package.json | 2 +- src/lib/get-english-word.test.ts | 56 ++++++++++++++++++++++++++++++ src/lib/get-english-word.ts | 58 ++++++++++++++++++++++++++++++++ src/library.ts | 4 +++ 4 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 src/lib/get-english-word.test.ts create mode 100644 src/lib/get-english-word.ts diff --git a/package.json b/package.json index 6fb7154..33ad4dd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/pashto-inflector", - "version": "1.0.7", + "version": "1.0.8", "author": "lingdocs.com", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "homepage": "https://verbs.lingdocs.com", diff --git a/src/lib/get-english-word.test.ts b/src/lib/get-english-word.test.ts new file mode 100644 index 0000000..86011ca --- /dev/null +++ b/src/lib/get-english-word.test.ts @@ -0,0 +1,56 @@ +import { getEnglishWord } from "./get-english-word"; + +test("getEnglishWord", () => { + const tests = [ + // irreg + { + in: {"ts":1527815251,"i":7737,"p":"سړی","f":"saRéy","g":"saRey","e":"man","c":"n. m.","ec":"man","ep":"men"}, + out: { singular: "man", plural: "men" }, + }, + // ch, sh, x, s, z - es ending + { + in: {"ts":1527823620,"i":1589,"p":"بزه","f":"bza","g":"bza","e":"patch (in a garment)","c":"n. f."}, + out: { singular: "patch", plural: "patches" }, + }, + { + in: {"ts":1527823172,"i":2066,"p":"بېله","f":"belá","g":"bela","e":"marsh, swamp","c":"n. f."}, + out: { singular: "marsh", plural: "marshes" }, + }, + { + in: {"ts":1589885085444,"i":11290,"p":"لمبړ","f":"lUmbáR","g":"lUmbaR","e":"fox","c":"n. m. unisex"}, + out: { singular: "fox", plural: "foxes" }, + }, + { + in: {"ts":1592642858843,"i":13644,"p":"نېک","f":"nek","g":"nek","e":"boss, master","c":"n. m. anim. unisex"}, + out: { singular: "boss", plural: "bosses" }, + }, + // (consonant) w/ y - ies + { + in: {"ts":1527812677,"i":13994,"p":"هېواد","f":"hewaad","g":"hewaad","e":"country, homeland","c":"n. m."}, + out: { singular: "country", plural: "countries" }, + }, + // (vowel) w/ y - ys + { + in: {"ts":1527815417,"i":14123,"p":"ورځ","f":"wradz","g":"wradz","e":"day","c":"n. f."}, + out: { singular: "day", plural: "days" }, + }, + // ends in o + { + in: {"ts":1527820648,"i":788,"p":"الو","f":"aloo","g":"aloo","e":"potato","c":"n. m.","ppp":"الوګان","ppf":"aloogáan"}, + out: { singular: "potato", plural: "potatoes" }, + }, + // ends in is + { + in: {"ts":1527814761,"i":3124,"p":"تحلیل","f":"tahleel","g":"tahleel","e":"analysis","c":"n. m."}, + out: { singular: "analysis", plural: "analyses" }, + }, + // only plural + { + in: {"ts":1527815008,"i":8433,"p":"شودې","f":"shoodé","g":"shoode","e":"milk","c":"n. f. pl."}, + out: { plural: "milk" }, + }, + ] + tests.forEach((t) => { + expect(getEnglishWord(t.in)).toEqual(t.out); + }); +}) \ No newline at end of file diff --git a/src/lib/get-english-word.ts b/src/lib/get-english-word.ts new file mode 100644 index 0000000..a071af7 --- /dev/null +++ b/src/lib/get-english-word.ts @@ -0,0 +1,58 @@ +import * as T from "../types"; + +/** + * returns the singular and plural english word for a Pashto entry if possible + * NOTE: only works with nouns and adjectives + * + * @param entry + * @returns + */ +export function getEnglishWord(entry: T.DictionaryEntry): { + singular?: string, + plural: string, +} | undefined { + if (!entry.c || !entry.c.includes("n.") || entry.c.includes("adj.")) { + return undefined; + } + if (entry.ec && entry.ep) { + return { + singular: entry.ec, + plural: entry.ep, + }; + } + const base = entry.e.split(",")[0].split(";")[0].split("(")[0].trim(); + if (entry.c.includes("pl.")) { + return { + plural: base, + }; + } + if (base.slice(-3) === "sis") { + return { + singular: base, + plural: `${base.slice(0, -2)}es`, + }; + } + if ( + ["sh", "ch"].includes(base.slice(-2)) || + ["s", "x", "z", "o"].includes(base.slice(-1)) + ) { + return { + singular: base, + plural: `${base}es`, + }; + } + if (base.slice(-1) === "y" && !isVowel(base.slice(-2, -1))) { + return { + singular: base, + plural: `${base.slice(0, -1)}ies`, + }; + } + return { + singular: base, + plural: `${base}s`, + }; +} + +function isVowel(l: string): boolean { + return ["a", "e", "i", "o", "u"].includes(l); +} \ No newline at end of file diff --git a/src/library.ts b/src/library.ts index e9371d5..2e8ef0e 100644 --- a/src/library.ts +++ b/src/library.ts @@ -36,6 +36,9 @@ import { isUnisexSet, isInflectionSet, } from "./lib/p-text-helpers"; +import { + getEnglishWord, +} from "./lib/get-english-word"; import { standardizePashto, standardizePhonetics, @@ -118,6 +121,7 @@ export { personFromVerbBlockPos, removeAccents, hasAccents, + getEnglishWord, // protobuf helpers readDictionary, writeDictionary,