From 3cab8827ca2df92177cea07690e5b64e16cb679d Mon Sep 17 00:00:00 2001 From: lingdocs <71590811+lingdocs@users.noreply.github.com> Date: Mon, 1 Nov 2021 17:10:23 -0400 Subject: [PATCH] better accenting on inflections, will need to refactor with the cleaner isPatternXEntry predicates to do a fully proper job --- package.json | 2 +- src/lib/accent-helpers.ts | 11 +++- src/lib/p-text-helpers.ts | 2 +- src/lib/pashto-inflector.test.ts | 89 +++++++++++++++-------------- src/lib/pashto-inflector.ts | 45 ++++++++++----- src/lib/verb-info.test.ts | 98 ++++++++++++++++---------------- 6 files changed, 134 insertions(+), 113 deletions(-) diff --git a/package.json b/package.json index 0dd8273..0935898 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/pashto-inflector", - "version": "1.3.5", + "version": "1.3.6", "author": "lingdocs.com", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "homepage": "https://verbs.lingdocs.com", diff --git a/src/lib/accent-helpers.ts b/src/lib/accent-helpers.ts index 3ac16bd..019f661 100644 --- a/src/lib/accent-helpers.ts +++ b/src/lib/accent-helpers.ts @@ -15,11 +15,13 @@ import * as T from "../types"; * * @param s */ +export function accentOnFront(s: T.PsString): T.PsString; +export function accentOnFront(s: T.LengthOptions): T.LengthOptions; export function accentOnFront(s: T.SingleOrLengthOpts): T.SingleOrLengthOpts { if ("long" in s) { return { - short: accentOnFront(s.short) as T.PsString, - long: accentOnFront(s.long) as T.PsString, + short: accentOnFront(s.short), + long: accentOnFront(s.long), }; } return { @@ -69,7 +71,10 @@ export function countSyllables(f: T.PsString | string): number { * @param syls - an array of syllables in phonetic strings without accents (including spaces as extra items) * @param n - the number of syllables from the end to put the accent */ -export function accentFSylsOnNFromEnd(syls: string[], n: number): string { +export function accentFSylsOnNFromEnd(syls: string[] | string, n: number): string { + if (typeof syls === "string") { + return accentFSylsOnNFromEnd(splitUpSyllables(syls), n); + } return [ ...syls.slice(0, syls.length-(n+1)), // before accent accentSyllable(syls[syls.length-(n+1)]), // syllable to be accented diff --git a/src/lib/p-text-helpers.ts b/src/lib/p-text-helpers.ts index 095e7d8..cd886d0 100644 --- a/src/lib/p-text-helpers.ts +++ b/src/lib/p-text-helpers.ts @@ -15,7 +15,7 @@ import { getPersonInflectionsKey, } from "./misc-helpers"; import * as T from "../types"; -import { hasAccents, removeAccents } from "./accent-helpers"; +import { accentFSylsOnNFromEnd, accentOnFront, hasAccents, removeAccents, splitUpSyllables } from "./accent-helpers"; import { phoneticsConsonants } from "./pashto-consonants"; import { simplifyPhonetics } from "./simplify-phonetics"; diff --git a/src/lib/pashto-inflector.test.ts b/src/lib/pashto-inflector.test.ts index 2acf19d..3a8515d 100644 --- a/src/lib/pashto-inflector.test.ts +++ b/src/lib/pashto-inflector.test.ts @@ -37,13 +37,13 @@ const adjectives: Array<{ inflections:{ masc: [ [{p: "زوړ", f: "zoR"}], - [{p: "زاړه", f: "zaaRu"}], - [{p: "زړو", f: "zaRo"}], + [{p: "زاړه", f: "zaaRú"}], + [{p: "زړو", f: "zaRó"}], ], fem: [ - [{p: "زړه", f: "zaRa"}], - [{p: "زړې", f: "zaRe"}], - [{p: "زړو", f: "zaRo"}], + [{p: "زړه", f: "zaRá"}], + [{p: "زړې", f: "zaRé"}], + [{p: "زړو", f: "zaRó"}], ], }, }, @@ -116,12 +116,12 @@ const adjectives: Array<{ masc: [ [{p: "سپک", f: "spuk"}], [{p: "سپک", f: "spuk"}], - [{p: "سپکو", f: "spuko"}], + [{p: "سپکو", f: "spúko"}], ], fem: [ - [{p: "سپکه", f: "spuka"}], - [{p: "سپکې", f: "spuke"}], - [{p: "سپکو", f: "spuko"}], + [{p: "سپکه", f: "spúka"}], + [{p: "سپکې", f: "spúke"}], + [{p: "سپکو", f: "spúko"}], ], }, }, @@ -141,12 +141,12 @@ const adjectives: Array<{ masc: [ [{p: "لوی", f: "looy"}], [{p: "لوی", f: "looy"}], - [{p: "لویو", f: "looyo"}], + [{p: "لویو", f: "lóoyo"}], ], fem: [ - [{p: "لویه", f: "looya"}], - [{p: "لویې", f: "looye"}], - [{p: "لویو", f: "looyo"}], + [{p: "لویه", f: "lóoya"}], + [{p: "لویې", f: "lóoye"}], + [{p: "لویو", f: "lóoyo"}], ], }, }, @@ -166,12 +166,12 @@ const adjectives: Array<{ masc: [ [{p: "پوه", f: "poh"}], [{p: "پوه", f: "poh"}], - [{p: "پوهو", f: "poho"}], + [{p: "پوهو", f: "póho"}], ], fem: [ - [{p: "پوهه", f: "poha"}], - [{p: "پوهې", f: "pohe"}], - [{p: "پوهو", f: "poho"}], + [{p: "پوهه", f: "póha"}], + [{p: "پوهې", f: "póhe"}], + [{p: "پوهو", f: "póho"}], ], }, }, @@ -210,12 +210,12 @@ const adjectives: Array<{ masc: [ [{ p: "شپږ", f: "shpuG" }], [{ p: "شپږ", f: "shpuG" }], - [{ p: "شپږو", f: "shpuGo" }], + [{ p: "شپږو", f: "shpúGo" }], ], fem: [ - [{ p: "شپږه", f: "shpuGa" }], - [{ p: "شپږې", f: "shpuGe" }], - [{ p: "شپږو", f: "shpuGo" }], + [{ p: "شپږه", f: "shpúGa" }], + [{ p: "شپږې", f: "shpúGe" }], + [{ p: "شپږو", f: "shpúGo" }], ], }, }, @@ -279,12 +279,12 @@ const adjectives: Array<{ masc: [ [{ p: "ګډ وډ", f: "guD wuD" }], [{ p: "ګډ وډ", f: "guD wuD" }], - [{ p: "ګډو وډو", f: "guDo wuDo" }], + [{ p: "ګډو وډو", f: "gúDo wúDo" }], ], fem: [ - [{ p: "ګډه وډه", f: "guDa wuDa" }], - [{ p: "ګډې وډې", f: "guDe wuDe" }], - [{ p: "ګډو وډو", f: "guDo wuDo" }], + [{ p: "ګډه وډه", f: "gúDa wúDa" }], + [{ p: "ګډې وډې", f: "gúDe wúDe" }], + [{ p: "ګډو وډو", f: "gúDo wúDo" }], ], }, }, @@ -315,13 +315,13 @@ const nouns: Array<{ inflections: { masc: [ [{p: "مېلمه", f: "melmá"}], - [{p: "مېلمانه", f: "melmaanu"}], - [{p: "مېلمنو", f: "melmano"}], + [{p: "مېلمانه", f: "melmaanú"}], + [{p: "مېلمنو", f: "melmanó"}], ], fem: [ - [{p: "مېلمنه", f: "melmana"}], - [{p: "مېلمنې", f: "melmane"}], - [{p: "مېلمنو", f: "melmano"}], + [{p: "مېلمنه", f: "melmaná"}], + [{p: "مېلمنې", f: "melmané"}], + [{p: "مېلمنو", f: "melmanó"}], ], }, }, @@ -392,12 +392,12 @@ const nouns: Array<{ masc: [ [{p: "چرګ", f: "churg"}], [{p: "چرګ", f: "churg"}], - [{p: "چرګو", f: "churgo"}], + [{p: "چرګو", f: "chúrgo"}], ], fem: [ - [{p: "چرګه", f: "churga"}], - [{p: "چرګې", f: "churge"}], - [{p: "چرګو", f: "churgo"}], + [{p: "چرګه", f: "chúrga"}], + [{p: "چرګې", f: "chúrge"}], + [{p: "چرګو", f: "chúrgo"}], ], }, plural: { @@ -531,8 +531,8 @@ const nouns: Array<{ inflections: { masc: [ [{p: "لمونځ", f: "lamoondz"}], - [{p: "لمانځه", f: "lamaandzu"}], - [{p: "لمنځو", f: "lamandzo"}], + [{p: "لمانځه", f: "lamaandzú"}], + [{p: "لمنځو", f: "lamandzó"}], ], }, plural: { @@ -551,7 +551,7 @@ const nouns: Array<{ masc: [ [{ p: "غر", f: "ghar" }], [{ p: "غره", f: "ghru" }], - [{ p: "غرو", f: "ghro" }], + [{ p: "غرو", f: "ghró" }], ], }, plural: { @@ -567,15 +567,16 @@ const nouns: Array<{ in: {"i":5465,"ts":1527812802,"p":"خر","f":"khur","g":"khur","e":"donkey","c":"n. m. anim. unisex irreg.","infap":"خره","infaf":"khru","infbp":"خر","infbf":"khr"}, out: { inflections: { + // TODO: use smarter system using new isType5Entry predicates, to allow for not using the redundant one syllable accents with these masc: [ [{ p: "خر", f: "khur" }], - [{ p: "خره", f: "khru" }], - [{ p: "خرو", f: "khro" }], + [{ p: "خره", f: "khrú" }], + [{ p: "خرو", f: "khró" }], ], fem: [ - [{ p: "خره", f: "khra" }], - [{ p: "خرې", f: "khre" }], - [{ p: "خرو", f: "khro" }], + [{ p: "خره", f: "khrá" }], + [{ p: "خرې", f: "khré" }], + [{ p: "خرو", f: "khró" }], ], }, }, @@ -961,8 +962,8 @@ const nouns: Array<{ inflections: { fem: [ [{p: "لار", f: "laar"}], - [{p: "لارې", f: "laare"}], - [{p: "لارو", f: "laaro"}], + [{p: "لارې", f: "láare"}], + [{p: "لارو", f: "láaro"}], ], }, }, diff --git a/src/lib/pashto-inflector.ts b/src/lib/pashto-inflector.ts index b45c04c..94bb341 100644 --- a/src/lib/pashto-inflector.ts +++ b/src/lib/pashto-inflector.ts @@ -23,10 +23,13 @@ import { endsWith, } from "./p-text-helpers"; import { + accentFSylsOnNFromEnd, hasAccents, removeAccents, + splitUpSyllables, } from "./accent-helpers"; import * as T from "../types"; +import { splitFIntoPhonemes } from "./phonetics-to-diacritics"; const endingInSingleARegex = /[^a]'?’?[aá]'?’?$/; const endingInHeyOrAynRegex = /[^ا][هع]$/; @@ -179,16 +182,19 @@ function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput { // LEVEL 3 FUNCTIONS function inflectIrregularUnisex(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections { + const inf1 = removeAccents(inflections[1]); + const inf0syls = splitFIntoPhonemes(inflections[0].f); + const inf0f = accentFSylsOnNFromEnd(inf0syls, 0); return { masc: [ [{p, f}], - [{p: inflections[0].p, f: inflections[0].f}], - [{p: `${inflections[1].p}و`, f: `${inflections[1].f}o`}], + [{p: inflections[0].p, f: inf0f, }], + [{p: `${inf1.p}و`, f: `${inf1.f}ó`}], ], fem: [ - [{p: `${inflections[1].p}ه`, f: `${inflections[1].f}a`}], - [{p: `${inflections[1].p}ې`, f: `${inflections[1].f}e`}], - [{p: `${inflections[1].p}و`, f: `${inflections[1].f}o`}], + [{p: `${inf1.p}ه`, f: `${inf1.f}á`}], + [{p: `${inf1.p}ې`, f: `${inf1.f}é`}], + [{p: `${inf1.p}و`, f: `${inf1.f}ó`}], ], }; } @@ -256,16 +262,20 @@ function inflectEmphasizedYeyUnisex(p: string, f: string): T.UnisexInflections { } function inflectConsonantEndingUnisex(p: string, f: string): T.UnisexInflections { + const fSyls = splitUpSyllables(f); + const iBase = fSyls.length === 1 + ? makePsString(p, accentFSylsOnNFromEnd(fSyls, 0)) + : makePsString(p, f); return { masc: [ [{p, f}], [{p, f}], - [{p: `${p}و`, f: `${f}o`}], + [{p: `${iBase.p}و`, f: `${iBase.f}o`}], ], fem: [ - [{p: `${p}ه`, f: `${f}a`}], - [{p: `${p}ې`, f: `${f}e`}], - [{p: `${p}و`, f: `${f}o`}], + [{p: `${iBase.p}ه`, f: `${iBase.f}a`}], + [{p: `${iBase.p}ې`, f: `${iBase.f}e`}], + [{p: `${iBase.p}و`, f: `${iBase.f}o`}], ], }; } @@ -313,11 +323,14 @@ function inflectRegularEmphasizedYeyMasc(p: string, f: string): T.Inflections { } function inflectIrregularMasc(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections { + const inf0f = splitUpSyllables(inflections[0].f).length > 1 + ? accentFSylsOnNFromEnd(inflections[0].f, 0) + : inflections[0].f return { masc: [ [{p, f}], - [{p: inflections[0].p, f: inflections[0].f}], - [{p: `${inflections[1].p}و`, f: `${inflections[1].f}o`}], + [{p: inflections[0].p, f: inf0f}], + [{p: `${inflections[1].p}و`, f: `${removeAccents(inflections[1].f)}ó`}], ], }; } @@ -348,11 +361,14 @@ function inflectRegularAWithHimPEnding(p: string, f: string): T.Inflections { } function inflectRegularInanMissingAFem(p: string, f: string): T.Inflections { - return { + const fBase = splitUpSyllables(f).length === 1 + ? accentFSylsOnNFromEnd(f, 0) + : f; + return { fem: [ [{p, f}], - [{p: `${p}ې`, f: `${f}e`}], - [{p: `${p}و`, f: `${f}o`}], + [{p: `${p}ې`, f: `${fBase}e`}], + [{p: `${p}و`, f: `${fBase}o`}], ], }; } @@ -520,7 +536,6 @@ function makePlural(w: T.DictionaryEntryNoFVars): { plural: T.PluralInflections ? "fem noun" : "other"; if (pashtoPlural) return { - // TODO: add the pashto plural to words like پلویان but not to words like ترورزامن ? plural: pashtoPlural, arabicPlural, }; diff --git a/src/lib/verb-info.test.ts b/src/lib/verb-info.test.ts index 044279d..31f0eae 100644 --- a/src/lib/verb-info.test.ts +++ b/src/lib/verb-info.test.ts @@ -822,19 +822,19 @@ const toTest = [ mini: { p: "پوخ ک", f: "pokh k" }, }, mascPlur: { - long: { p: "پاخه کړل", f: "paakhu kRul" }, - short: { p: "پاخه کړ", f: "paakhu kR" }, - mini: { p: "پاخه ک", f: "paakhu k" }, + long: { p: "پاخه کړل", f: "paakhú kRul" }, + short: { p: "پاخه کړ", f: "paakhú kR" }, + mini: { p: "پاخه ک", f: "paakhú k" }, }, femSing: { - long: { p: "پخه کړل", f: "pakha kRul" }, - short: { p: "پخه کړ", f: "pakha kR" }, - mini: { p: "پخه ک", f: "pakha k" }, + long: { p: "پخه کړل", f: "pakhá kRul" }, + short: { p: "پخه کړ", f: "pakhá kR" }, + mini: { p: "پخه ک", f: "pakhá k" }, }, femPlur: { - long: { p: "پخې کړل", f: "pakhe kRul" }, - short: { p: "پخې کړ", f: "pakhe kR" }, - mini: { p: "پخې ک", f: "pakhe k" }, + long: { p: "پخې کړل", f: "pakhé kRul" }, + short: { p: "پخې کړ", f: "pakhé kR" }, + mini: { p: "پخې ک", f: "pakhé k" }, }, }, perfectiveSplit: { @@ -844,19 +844,19 @@ const toTest = [ mini: [{ p: "پوخ ", f: "pokh " }, { p: "ک", f: "k" }], }, mascPlur: { - long: [{ p: "پاخه ", f: "paakhu " }, { p: "کړل", f: "kRul" }], - short: [{ p: "پاخه ", f: "paakhu " }, { p: "کړ", f: "kR" }], - mini: [{ p: "پاخه ", f: "paakhu " }, { p: "ک", f: "k" }], + long: [{ p: "پاخه ", f: "paakhú " }, { p: "کړل", f: "kRul" }], + short: [{ p: "پاخه ", f: "paakhú " }, { p: "کړ", f: "kR" }], + mini: [{ p: "پاخه ", f: "paakhú " }, { p: "ک", f: "k" }], }, femSing: { - long: [{ p: "پخه ", f: "pakha " }, { p: "کړل", f: "kRul" }], - short: [{ p: "پخه ", f: "pakha " }, { p: "کړ", f: "kR" }], - mini: [{ p: "پخه ", f: "pakha " }, { p: "ک", f: "k" }], + long: [{ p: "پخه ", f: "pakhá " }, { p: "کړل", f: "kRul" }], + short: [{ p: "پخه ", f: "pakhá " }, { p: "کړ", f: "kR" }], + mini: [{ p: "پخه ", f: "pakhá " }, { p: "ک", f: "k" }], }, femPlur: { - long: [{ p: "پخې ", f: "pakhe " }, { p: "کړل", f: "kRul" }], - short: [{ p: "پخې ", f: "pakhe " }, { p: "کړ", f: "kR" }], - mini: [{ p: "پخې ", f: "pakhe " }, { p: "ک", f: "k" }], + long: [{ p: "پخې ", f: "pakhé " }, { p: "کړل", f: "kRul" }], + short: [{ p: "پخې ", f: "pakhé " }, { p: "کړ", f: "kR" }], + mini: [{ p: "پخې ", f: "pakhé " }, { p: "ک", f: "k" }], }, }, }, @@ -868,16 +868,16 @@ const toTest = [ short: { p: "پوخ ک", f: "pokh k" }, }, mascPlur: { - long: { p: "پاخه کړ", f: "paakhu kR" }, - short: { p: "پاخه ک", f: "paakhu k" }, + long: { p: "پاخه کړ", f: "paakhú kR" }, + short: { p: "پاخه ک", f: "paakhú k" }, }, femSing: { - long: { p: "پخه کړ", f: "pakha kR" }, - short: { p: "پخه ک", f: "pakha k" }, + long: { p: "پخه کړ", f: "pakhá kR" }, + short: { p: "پخه ک", f: "pakhá k" }, }, femPlur: { - long: { p: "پخې کړ", f: "pakhe kR" }, - short: { p: "پخې ک", f: "pakhe k" }, + long: { p: "پخې کړ", f: "pakhé kR" }, + short: { p: "پخې ک", f: "pakhé k" }, }, }, perfectiveSplit: { @@ -886,16 +886,16 @@ const toTest = [ short: [{ p: "پوخ ", f: "pokh " }, { p: "ک", f: "k" }], }, mascPlur: { - long: [{ p: "پاخه ", f: "paakhu " }, { p: "کړ", f: "kR" }], - short: [{ p: "پاخه ", f: "paakhu " }, { p: "ک", f: "k" }], + long: [{ p: "پاخه ", f: "paakhú " }, { p: "کړ", f: "kR" }], + short: [{ p: "پاخه ", f: "paakhú " }, { p: "ک", f: "k" }], }, femSing: { - long: [{ p: "پخه ", f: "pakha " }, { p: "کړ", f: "kR" }], - short: [{ p: "پخه ", f: "pakha " }, { p: "ک", f: "k" }], + long: [{ p: "پخه ", f: "pakhá " }, { p: "کړ", f: "kR" }], + short: [{ p: "پخه ", f: "pakhá " }, { p: "ک", f: "k" }], }, femPlur: { - long: [{ p: "پخې ", f: "pakhe " }, { p: "کړ", f: "kR" }], - short: [{ p: "پخې ", f: "pakhe " }, { p: "ک", f: "k" }], + long: [{ p: "پخې ", f: "pakhé " }, { p: "کړ", f: "kR" }], + short: [{ p: "پخې ", f: "pakhé " }, { p: "ک", f: "k" }], }, }, }, @@ -903,21 +903,21 @@ const toTest = [ present: { p: "پخوونکی", f: "pakhawóonkey" }, past: { mascSing: { p: "پوخ کړی", f: "pokh kúRey" }, - mascPlur: { p: "پاخه کړي", f: "paakhu kúRee" }, - femSing: { p: "پخه کړې", f: "pakha kúRe" }, - femPlur: { p: "پخې کړې", f: "pakhe kúRe" }, + mascPlur: { p: "پاخه کړي", f: "paakhú kúRee" }, + femSing: { p: "پخه کړې", f: "pakhá kúRe" }, + femPlur: { p: "پخې کړې", f: "pakhé kúRe" }, }, }, complement: { masc: [ [{ p: "پوخ", f: "pokh" }], - [{ p: "پاخه", f: "paakhu" }], - [{ p: "پخو", f: "pakho" }], + [{ p: "پاخه", f: "paakhú" }], + [{ p: "پخو", f: "pakhó" }], ], fem: [ - [{ p: "پخه", f: "pakha" }], - [{ p: "پخې", f: "pakhe" }], - [{ p: "پخو", f: "pakho" }], + [{ p: "پخه", f: "pakhá" }], + [{ p: "پخې", f: "pakhé" }], + [{ p: "پخو", f: "pakhó" }], ], }, }, @@ -1510,29 +1510,29 @@ const toTest = [ short: { p: "لاړې تېرو", f: "laaRe teraw" }, }, perfective: { - long: { p: "لاړې تېرې کړل", f: "laaRe tere kRul" }, - short: { p: "لاړې تېرې کړ", f: "laaRe tere kR" }, - mini: { p: "لاړې تېرې ک", f: "laaRe tere k" }, + long: { p: "لاړې تېرې کړل", f: "laaRe tére kRul" }, + short: { p: "لاړې تېرې کړ", f: "laaRe tére kR" }, + mini: { p: "لاړې تېرې ک", f: "laaRe tére k" }, }, perfectiveSplit: { - long: [{ p: "لاړې تېرې ", f: "laaRe tere " }, { p: "کړل", f: "kRul" }], - short: [{ p: "لاړې تېرې ", f: "laaRe tere " }, { p: "کړ", f: "kR" }], - mini: [{ p: "لاړې تېرې ", f: "laaRe tere " }, { p: "ک", f: "k" }], + long: [{ p: "لاړې تېرې ", f: "laaRe tére " }, { p: "کړل", f: "kRul" }], + short: [{ p: "لاړې تېرې ", f: "laaRe tére " }, { p: "کړ", f: "kR" }], + mini: [{ p: "لاړې تېرې ", f: "laaRe tére " }, { p: "ک", f: "k" }], }, }, stem: { imperfective: { p: "لاړې تېرو", f: "laaRe teraw" }, perfective: { - long: { p: "لاړې تېرې کړ", f: "laaRe tere kR" }, - short: { p: "لاړې تېرې ک", f: "laaRe tere k" }, + long: { p: "لاړې تېرې کړ", f: "laaRe tére kR" }, + short: { p: "لاړې تېرې ک", f: "laaRe tére k" }, }, perfectiveSplit: { - long: [{ p: "لاړې تېرې ", f: "laaRe tere " }, { p: "کړ", f: "kR" }], - short: [{ p: "لاړې تېرې ", f: "laaRe tere " }, { p: "ک", f: "k" }], + long: [{ p: "لاړې تېرې ", f: "laaRe tére " }, { p: "کړ", f: "kR" }], + short: [{ p: "لاړې تېرې ", f: "laaRe tére " }, { p: "ک", f: "k" }], }, }, participle: { - past: { p: "لاړې تېرې کړې", f: "laaRe tere kúRe" }, + past: { p: "لاړې تېرې کړې", f: "laaRe tére kúRe" }, present: { p: "لاړې تېروونکی", f: "laaRe terawóonkey" }, }, objComplement: {