get english word function

This commit is contained in:
lingdocs 2021-10-04 21:15:13 -04:00
parent 5cae22bde7
commit 4a2408f197
4 changed files with 119 additions and 1 deletions

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/pashto-inflector",
"version": "1.0.7",
"version": "1.0.8",
"author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com",

View File

@ -0,0 +1,56 @@
import { getEnglishWord } from "./get-english-word";
test("getEnglishWord", () => {
const tests = [
// irreg
{
in: {"ts":1527815251,"i":7737,"p":"سړی","f":"saRéy","g":"saRey","e":"man","c":"n. m.","ec":"man","ep":"men"},
out: { singular: "man", plural: "men" },
},
// ch, sh, x, s, z - es ending
{
in: {"ts":1527823620,"i":1589,"p":"بزه","f":"bza","g":"bza","e":"patch (in a garment)","c":"n. f."},
out: { singular: "patch", plural: "patches" },
},
{
in: {"ts":1527823172,"i":2066,"p":"بېله","f":"belá","g":"bela","e":"marsh, swamp","c":"n. f."},
out: { singular: "marsh", plural: "marshes" },
},
{
in: {"ts":1589885085444,"i":11290,"p":"لمبړ","f":"lUmbáR","g":"lUmbaR","e":"fox","c":"n. m. unisex"},
out: { singular: "fox", plural: "foxes" },
},
{
in: {"ts":1592642858843,"i":13644,"p":"نېک","f":"nek","g":"nek","e":"boss, master","c":"n. m. anim. unisex"},
out: { singular: "boss", plural: "bosses" },
},
// (consonant) w/ y - ies
{
in: {"ts":1527812677,"i":13994,"p":"هېواد","f":"hewaad","g":"hewaad","e":"country, homeland","c":"n. m."},
out: { singular: "country", plural: "countries" },
},
// (vowel) w/ y - ys
{
in: {"ts":1527815417,"i":14123,"p":"ورځ","f":"wradz","g":"wradz","e":"day","c":"n. f."},
out: { singular: "day", plural: "days" },
},
// ends in o
{
in: {"ts":1527820648,"i":788,"p":"الو","f":"aloo","g":"aloo","e":"potato","c":"n. m.","ppp":"الوګان","ppf":"aloogáan"},
out: { singular: "potato", plural: "potatoes" },
},
// ends in is
{
in: {"ts":1527814761,"i":3124,"p":"تحلیل","f":"tahleel","g":"tahleel","e":"analysis","c":"n. m."},
out: { singular: "analysis", plural: "analyses" },
},
// only plural
{
in: {"ts":1527815008,"i":8433,"p":"شودې","f":"shoodé","g":"shoode","e":"milk","c":"n. f. pl."},
out: { plural: "milk" },
},
]
tests.forEach((t) => {
expect(getEnglishWord(t.in)).toEqual(t.out);
});
})

View File

@ -0,0 +1,58 @@
import * as T from "../types";
/**
* returns the singular and plural english word for a Pashto entry if possible
* NOTE: only works with nouns and adjectives
*
* @param entry
* @returns
*/
export function getEnglishWord(entry: T.DictionaryEntry): {
singular?: string,
plural: string,
} | undefined {
if (!entry.c || !entry.c.includes("n.") || entry.c.includes("adj.")) {
return undefined;
}
if (entry.ec && entry.ep) {
return {
singular: entry.ec,
plural: entry.ep,
};
}
const base = entry.e.split(",")[0].split(";")[0].split("(")[0].trim();
if (entry.c.includes("pl.")) {
return {
plural: base,
};
}
if (base.slice(-3) === "sis") {
return {
singular: base,
plural: `${base.slice(0, -2)}es`,
};
}
if (
["sh", "ch"].includes(base.slice(-2)) ||
["s", "x", "z", "o"].includes(base.slice(-1))
) {
return {
singular: base,
plural: `${base}es`,
};
}
if (base.slice(-1) === "y" && !isVowel(base.slice(-2, -1))) {
return {
singular: base,
plural: `${base.slice(0, -1)}ies`,
};
}
return {
singular: base,
plural: `${base}s`,
};
}
function isVowel(l: string): boolean {
return ["a", "e", "i", "o", "u"].includes(l);
}

View File

@ -36,6 +36,9 @@ import {
isUnisexSet,
isInflectionSet,
} from "./lib/p-text-helpers";
import {
getEnglishWord,
} from "./lib/get-english-word";
import {
standardizePashto,
standardizePhonetics,
@ -118,6 +121,7 @@ export {
personFromVerbBlockPos,
removeAccents,
hasAccents,
getEnglishWord,
// protobuf helpers
readDictionary,
writeDictionary,