From f25ebaaf62008dffe6ebbd0e51e0ba1f13fbe6aa Mon Sep 17 00:00:00 2001 From: adueck Date: Thu, 26 Jan 2023 17:19:59 +0500 Subject: [PATCH] better inflection on irregulars --- package-lock.json | 4 ++-- package.json | 2 +- src/components/package.json | 2 +- src/lib/package.json | 2 +- src/lib/src/pashto-inflector.test.ts | 29 ++++++++++++++++++++++------ src/lib/src/pashto-inflector.ts | 25 ++++++++++++------------ 6 files changed, 41 insertions(+), 23 deletions(-) diff --git a/package-lock.json b/package-lock.json index 7e093b2..7f30b89 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "pashto-inflector", - "version": "5.7.4", + "version": "5.7.5", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "pashto-inflector", - "version": "5.7.4", + "version": "5.7.5", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index e23590b..aa7b15b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pashto-inflector", - "version": "5.7.4", + "version": "5.7.5", "author": "lingdocs.com", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "homepage": "https://verbs.lingdocs.com", diff --git a/src/components/package.json b/src/components/package.json index b98a5c8..19b5760 100644 --- a/src/components/package.json +++ b/src/components/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/ps-react", - "version": "5.7.4", + "version": "5.7.5", "description": "Pashto inflector library module with React components", "main": "dist/components/library.js", "module": "dist/components/library.js", diff --git a/src/lib/package.json b/src/lib/package.json index 1ba10cd..6fe6749 100644 --- a/src/lib/package.json +++ b/src/lib/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/inflect", - "version": "5.7.4", + "version": "5.7.5", "description": "Pashto inflector library", "main": "dist/index.js", "types": "dist/lib/library.d.ts", diff --git a/src/lib/src/pashto-inflector.test.ts b/src/lib/src/pashto-inflector.test.ts index 02f91f5..fccd9e8 100644 --- a/src/lib/src/pashto-inflector.test.ts +++ b/src/lib/src/pashto-inflector.test.ts @@ -600,7 +600,7 @@ const nouns: { masc: [ [{ p: "غر", f: "ghar" }], [{ p: "غره", f: "ghru" }], - [{ p: "غرو", f: "ghró" }], + [{ p: "غرو", f: "ghro" }], ], }, plural: { @@ -625,13 +625,13 @@ const nouns: { // TODO: use smarter system using new isType5Entry predicates, to allow for not using the redundant one syllable accents with these masc: [ [{ p: "خر", f: "khur" }], - [{ p: "خره", f: "khrú" }], - [{ p: "خرو", f: "khró" }], + [{ p: "خره", f: "khru" }], + [{ p: "خرو", f: "khro" }], ], fem: [ - [{ p: "خره", f: "khrá" }], - [{ p: "خرې", f: "khré" }], - [{ p: "خرو", f: "khró" }], + [{ p: "خره", f: "khra" }], + [{ p: "خرې", f: "khre" }], + [{ p: "خرو", f: "khro" }], ], }, }, @@ -1232,6 +1232,23 @@ const nouns: { }, out: false, }, + { + in: {"ts":1610795367898,"i":6978,"p":"رشوت خور","f":"rishwat khór","g":"rishwatkhor","e":"bribe-taker, corrupt","r":4,"c":"n. m. anim. unisex / adj.","infap":"رشوت خواره","infaf":"rishwat khwaaru","infbp":"رشوت خور","infbf":"rishwat khwar"}, + out: { + inflections: { + masc: [ + [{p: "رشوت خور", f: "rishwat khór"}], + [{p: "رشوت خواره", f: "rishwat khwaarú"}], + [{p: "رشوت خورو", f: "rishwat khwaró"}], + ], + fem: [ + [{p: "رشوت خوره", f: "rishwat khwará"}], + [{p: "رشوت خورې", f: "rishwat khwaré"}], + [{p: "رشوت خورو", f: "rishwat khwaró"}], + ], + }, + }, + }, ]; const others: T.DictionaryEntry[] = [ diff --git a/src/lib/src/pashto-inflector.ts b/src/lib/src/pashto-inflector.ts index 915f770..f4b9519 100644 --- a/src/lib/src/pashto-inflector.ts +++ b/src/lib/src/pashto-inflector.ts @@ -31,6 +31,7 @@ import { } from "./accent-helpers"; import * as T from "../../types"; import { splitFIntoPhonemes } from "./phonetics-to-diacritics"; +import { splitPsString } from "./splitPsString"; const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/; const endingInHeyOrAynRegex = /[^ا][هع]$/; @@ -184,18 +185,18 @@ function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput { // LEVEL 3 FUNCTIONS function inflectIrregularUnisex(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections { const inf1 = removeAccents(inflections[1]); - const inf0syls = splitFIntoPhonemes(inflections[0].f); - const inf0f = accentFSylsOnNFromEnd(inf0syls, 0); + const inf0 = removeAccents(inflections[0]); + const inf0fSyls = splitUpSyllables(inf0.f).length; return { masc: [ [{p, f}], - [{p: inflections[0].p, f: inf0f, }], - [{p: `${inf1.p}و`, f: `${inf1.f}ó`}], + [{p: inflections[0].p, f: `${inf0.f.slice(0, -1)}${inf0fSyls === 1 ? "u" : "ú"}` }], + [{p: `${inf1.p}و`, f: `${inf1.f}${inf0fSyls === 1 ? "o" : "ó"}`}], ], fem: [ - [{p: `${inf1.p}ه`, f: `${inf1.f}á`}], - [{p: `${inf1.p}ې`, f: `${inf1.f}é`}], - [{p: `${inf1.p}و`, f: `${inf1.f}ó`}], + [{p: `${inf1.p}ه`, f: `${inf1.f}${inf0fSyls === 1 ? "a" : "á"}`}], + [{p: `${inf1.p}ې`, f: `${inf1.f}${inf0fSyls === 1 ? "e" : "é"}`}], + [{p: `${inf1.p}و`, f: `${inf1.f}${inf0fSyls === 1 ? "o" : "ó"}`}], ], }; } @@ -324,14 +325,14 @@ function inflectRegularEmphasizedYeyMasc(p: string, f: string): T.Inflections { } function inflectIrregularMasc(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections { - const inf0f = splitUpSyllables(inflections[0].f).length > 1 - ? accentFSylsOnNFromEnd(inflections[0].f, 0) - : inflections[0].f + let inf0f = removeAccents(inflections[0].f); + const inf0syls = splitUpSyllables(f).length; + const inf1f = removeAccents(inflections[1].f); return { masc: [ [{p, f}], - [{p: inflections[0].p, f: inf0f}], - [{p: `${inflections[1].p}و`, f: `${removeAccents(inflections[1].f)}ó`}], + [{p: inflections[0].p, f: `${inf0f.slice(0, -1)}${inf0syls === 1 ? "u" : "ú"}`}], + [{p: `${inflections[1].p}و`, f: `${inf1f}${inf0syls === 1 ? "o" : "ó"}`}], ], }; }