better accenting on inflections, will need to refactor with the cleaner isPatternXEntry predicates to do a fully proper job

This commit is contained in:
lingdocs 2021-11-01 17:10:23 -04:00
parent 85f249941b
commit 3cab8827ca
6 changed files with 134 additions and 113 deletions

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/pashto-inflector",
"version": "1.3.5",
"version": "1.3.6",
"author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com",

View File

@ -15,11 +15,13 @@ import * as T from "../types";
*
* @param s
*/
export function accentOnFront(s: T.PsString): T.PsString;
export function accentOnFront(s: T.LengthOptions<T.PsString>): T.LengthOptions<T.PsString>;
export function accentOnFront(s: T.SingleOrLengthOpts<T.PsString>): T.SingleOrLengthOpts<T.PsString> {
if ("long" in s) {
return {
short: accentOnFront(s.short) as T.PsString,
long: accentOnFront(s.long) as T.PsString,
short: accentOnFront(s.short),
long: accentOnFront(s.long),
};
}
return {
@ -69,7 +71,10 @@ export function countSyllables(f: T.PsString | string): number {
* @param syls - an array of syllables in phonetic strings without accents (including spaces as extra items)
* @param n - the number of syllables from the end to put the accent
*/
export function accentFSylsOnNFromEnd(syls: string[], n: number): string {
export function accentFSylsOnNFromEnd(syls: string[] | string, n: number): string {
if (typeof syls === "string") {
return accentFSylsOnNFromEnd(splitUpSyllables(syls), n);
}
return [
...syls.slice(0, syls.length-(n+1)), // before accent
accentSyllable(syls[syls.length-(n+1)]), // syllable to be accented

View File

@ -15,7 +15,7 @@ import {
getPersonInflectionsKey,
} from "./misc-helpers";
import * as T from "../types";
import { hasAccents, removeAccents } from "./accent-helpers";
import { accentFSylsOnNFromEnd, accentOnFront, hasAccents, removeAccents, splitUpSyllables } from "./accent-helpers";
import { phoneticsConsonants } from "./pashto-consonants";
import { simplifyPhonetics } from "./simplify-phonetics";

View File

@ -37,13 +37,13 @@ const adjectives: Array<{
inflections:{
masc: [
[{p: "زوړ", f: "zoR"}],
[{p: "زاړه", f: "zaaRu"}],
[{p: "زړو", f: "zaRo"}],
[{p: "زاړه", f: "zaaRú"}],
[{p: "زړو", f: "zaRó"}],
],
fem: [
[{p: "زړه", f: "zaRa"}],
[{p: "زړې", f: "zaRe"}],
[{p: "زړو", f: "zaRo"}],
[{p: "زړه", f: "zaRá"}],
[{p: "زړې", f: "zaRé"}],
[{p: "زړو", f: "zaRó"}],
],
},
},
@ -116,12 +116,12 @@ const adjectives: Array<{
masc: [
[{p: "سپک", f: "spuk"}],
[{p: "سپک", f: "spuk"}],
[{p: "سپکو", f: "spuko"}],
[{p: "سپکو", f: "spúko"}],
],
fem: [
[{p: "سپکه", f: "spuka"}],
[{p: "سپکې", f: "spuke"}],
[{p: "سپکو", f: "spuko"}],
[{p: "سپکه", f: "spúka"}],
[{p: "سپکې", f: "spúke"}],
[{p: "سپکو", f: "spúko"}],
],
},
},
@ -141,12 +141,12 @@ const adjectives: Array<{
masc: [
[{p: "لوی", f: "looy"}],
[{p: "لوی", f: "looy"}],
[{p: "لویو", f: "looyo"}],
[{p: "لویو", f: "lóoyo"}],
],
fem: [
[{p: "لویه", f: "looya"}],
[{p: "لویې", f: "looye"}],
[{p: "لویو", f: "looyo"}],
[{p: "لویه", f: "lóoya"}],
[{p: "لویې", f: "lóoye"}],
[{p: "لویو", f: "lóoyo"}],
],
},
},
@ -166,12 +166,12 @@ const adjectives: Array<{
masc: [
[{p: "پوه", f: "poh"}],
[{p: "پوه", f: "poh"}],
[{p: "پوهو", f: "poho"}],
[{p: "پوهو", f: "póho"}],
],
fem: [
[{p: "پوهه", f: "poha"}],
[{p: "پوهې", f: "pohe"}],
[{p: "پوهو", f: "poho"}],
[{p: "پوهه", f: "póha"}],
[{p: "پوهې", f: "póhe"}],
[{p: "پوهو", f: "póho"}],
],
},
},
@ -210,12 +210,12 @@ const adjectives: Array<{
masc: [
[{ p: "شپږ", f: "shpuG" }],
[{ p: "شپږ", f: "shpuG" }],
[{ p: "شپږو", f: "shpuGo" }],
[{ p: "شپږو", f: "shpúGo" }],
],
fem: [
[{ p: "شپږه", f: "shpuGa" }],
[{ p: "شپږې", f: "shpuGe" }],
[{ p: "شپږو", f: "shpuGo" }],
[{ p: "شپږه", f: "shpúGa" }],
[{ p: "شپږې", f: "shpúGe" }],
[{ p: "شپږو", f: "shpúGo" }],
],
},
},
@ -279,12 +279,12 @@ const adjectives: Array<{
masc: [
[{ p: "ګډ وډ", f: "guD wuD" }],
[{ p: "ګډ وډ", f: "guD wuD" }],
[{ p: "ګډو وډو", f: "guDo wuDo" }],
[{ p: "ګډو وډو", f: "gúDo wúDo" }],
],
fem: [
[{ p: "ګډه وډه", f: "guDa wuDa" }],
[{ p: "ګډې وډې", f: "guDe wuDe" }],
[{ p: "ګډو وډو", f: "guDo wuDo" }],
[{ p: "ګډه وډه", f: "gúDa wúDa" }],
[{ p: "ګډې وډې", f: "gúDe wúDe" }],
[{ p: "ګډو وډو", f: "gúDo wúDo" }],
],
},
},
@ -315,13 +315,13 @@ const nouns: Array<{
inflections: {
masc: [
[{p: "مېلمه", f: "melmá"}],
[{p: "مېلمانه", f: "melmaanu"}],
[{p: "مېلمنو", f: "melmano"}],
[{p: "مېلمانه", f: "melmaanú"}],
[{p: "مېلمنو", f: "melmanó"}],
],
fem: [
[{p: "مېلمنه", f: "melmana"}],
[{p: "مېلمنې", f: "melmane"}],
[{p: "مېلمنو", f: "melmano"}],
[{p: "مېلمنه", f: "melmaná"}],
[{p: "مېلمنې", f: "melmané"}],
[{p: "مېلمنو", f: "melmanó"}],
],
},
},
@ -392,12 +392,12 @@ const nouns: Array<{
masc: [
[{p: "چرګ", f: "churg"}],
[{p: "چرګ", f: "churg"}],
[{p: "چرګو", f: "churgo"}],
[{p: "چرګو", f: "chúrgo"}],
],
fem: [
[{p: "چرګه", f: "churga"}],
[{p: "چرګې", f: "churge"}],
[{p: "چرګو", f: "churgo"}],
[{p: "چرګه", f: "chúrga"}],
[{p: "چرګې", f: "chúrge"}],
[{p: "چرګو", f: "chúrgo"}],
],
},
plural: {
@ -531,8 +531,8 @@ const nouns: Array<{
inflections: {
masc: [
[{p: "لمونځ", f: "lamoondz"}],
[{p: "لمانځه", f: "lamaandzu"}],
[{p: "لمنځو", f: "lamandzo"}],
[{p: "لمانځه", f: "lamaandzú"}],
[{p: "لمنځو", f: "lamandzó"}],
],
},
plural: {
@ -551,7 +551,7 @@ const nouns: Array<{
masc: [
[{ p: "غر", f: "ghar" }],
[{ p: "غره", f: "ghru" }],
[{ p: "غرو", f: "ghro" }],
[{ p: "غرو", f: "ghró" }],
],
},
plural: {
@ -567,15 +567,16 @@ const nouns: Array<{
in: {"i":5465,"ts":1527812802,"p":"خر","f":"khur","g":"khur","e":"donkey","c":"n. m. anim. unisex irreg.","infap":"خره","infaf":"khru","infbp":"خر","infbf":"khr"},
out: {
inflections: {
// TODO: use smarter system using new isType5Entry predicates, to allow for not using the redundant one syllable accents with these
masc: [
[{ p: "خر", f: "khur" }],
[{ p: "خره", f: "khru" }],
[{ p: "خرو", f: "khro" }],
[{ p: "خره", f: "khrú" }],
[{ p: "خرو", f: "khró" }],
],
fem: [
[{ p: "خره", f: "khra" }],
[{ p: "خرې", f: "khre" }],
[{ p: "خرو", f: "khro" }],
[{ p: "خره", f: "khrá" }],
[{ p: "خرې", f: "khré" }],
[{ p: "خرو", f: "khró" }],
],
},
},
@ -961,8 +962,8 @@ const nouns: Array<{
inflections: {
fem: [
[{p: "لار", f: "laar"}],
[{p: "لارې", f: "laare"}],
[{p: "لارو", f: "laaro"}],
[{p: "لارې", f: "láare"}],
[{p: "لارو", f: "láaro"}],
],
},
},

View File

@ -23,10 +23,13 @@ import {
endsWith,
} from "./p-text-helpers";
import {
accentFSylsOnNFromEnd,
hasAccents,
removeAccents,
splitUpSyllables,
} from "./accent-helpers";
import * as T from "../types";
import { splitFIntoPhonemes } from "./phonetics-to-diacritics";
const endingInSingleARegex = /[^a]'??[aá]'??$/;
const endingInHeyOrAynRegex = /[^ا][هع]$/;
@ -179,16 +182,19 @@ function handleFemNoun(word: T.DictionaryEntryNoFVars): T.InflectorOutput {
// LEVEL 3 FUNCTIONS
function inflectIrregularUnisex(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections {
const inf1 = removeAccents(inflections[1]);
const inf0syls = splitFIntoPhonemes(inflections[0].f);
const inf0f = accentFSylsOnNFromEnd(inf0syls, 0);
return {
masc: [
[{p, f}],
[{p: inflections[0].p, f: inflections[0].f}],
[{p: `${inflections[1].p}و`, f: `${inflections[1].f}o`}],
[{p: inflections[0].p, f: inf0f, }],
[{p: `${inf1.p}و`, f: `${inf1.f}ó`}],
],
fem: [
[{p: `${inflections[1].p}ه`, f: `${inflections[1].f}a`}],
[{p: `${inflections[1].p}ې`, f: `${inflections[1].f}e`}],
[{p: `${inflections[1].p}و`, f: `${inflections[1].f}o`}],
[{p: `${inf1.p}ه`, f: `${inf1.f}á`}],
[{p: `${inf1.p}ې`, f: `${inf1.f}é`}],
[{p: `${inf1.p}و`, f: `${inf1.f}ó`}],
],
};
}
@ -256,16 +262,20 @@ function inflectEmphasizedYeyUnisex(p: string, f: string): T.UnisexInflections {
}
function inflectConsonantEndingUnisex(p: string, f: string): T.UnisexInflections {
const fSyls = splitUpSyllables(f);
const iBase = fSyls.length === 1
? makePsString(p, accentFSylsOnNFromEnd(fSyls, 0))
: makePsString(p, f);
return {
masc: [
[{p, f}],
[{p, f}],
[{p: `${p}و`, f: `${f}o`}],
[{p: `${iBase.p}و`, f: `${iBase.f}o`}],
],
fem: [
[{p: `${p}ه`, f: `${f}a`}],
[{p: `${p}ې`, f: `${f}e`}],
[{p: `${p}و`, f: `${f}o`}],
[{p: `${iBase.p}ه`, f: `${iBase.f}a`}],
[{p: `${iBase.p}ې`, f: `${iBase.f}e`}],
[{p: `${iBase.p}و`, f: `${iBase.f}o`}],
],
};
}
@ -313,11 +323,14 @@ function inflectRegularEmphasizedYeyMasc(p: string, f: string): T.Inflections {
}
function inflectIrregularMasc(p: string, f: string, inflections: Array<{p: string, f: string}>): T.Inflections {
const inf0f = splitUpSyllables(inflections[0].f).length > 1
? accentFSylsOnNFromEnd(inflections[0].f, 0)
: inflections[0].f
return {
masc: [
[{p, f}],
[{p: inflections[0].p, f: inflections[0].f}],
[{p: `${inflections[1].p}و`, f: `${inflections[1].f}o`}],
[{p: inflections[0].p, f: inf0f}],
[{p: `${inflections[1].p}و`, f: `${removeAccents(inflections[1].f)}ó`}],
],
};
}
@ -348,11 +361,14 @@ function inflectRegularAWithHimPEnding(p: string, f: string): T.Inflections {
}
function inflectRegularInanMissingAFem(p: string, f: string): T.Inflections {
return {
const fBase = splitUpSyllables(f).length === 1
? accentFSylsOnNFromEnd(f, 0)
: f;
return {
fem: [
[{p, f}],
[{p: `${p}ې`, f: `${f}e`}],
[{p: `${p}و`, f: `${f}o`}],
[{p: `${p}ې`, f: `${fBase}e`}],
[{p: `${p}و`, f: `${fBase}o`}],
],
};
}
@ -520,7 +536,6 @@ function makePlural(w: T.DictionaryEntryNoFVars): { plural: T.PluralInflections
? "fem noun"
: "other";
if (pashtoPlural) return {
// TODO: add the pashto plural to words like پلویان but not to words like ترورزامن ?
plural: pashtoPlural,
arabicPlural,
};

View File

@ -822,19 +822,19 @@ const toTest = [
mini: { p: "پوخ ک", f: "pokh k" },
},
mascPlur: {
long: { p: "پاخه کړل", f: "paakhu kRul" },
short: { p: "پاخه کړ", f: "paakhu kR" },
mini: { p: "پاخه ک", f: "paakhu k" },
long: { p: "پاخه کړل", f: "paakhú kRul" },
short: { p: "پاخه کړ", f: "paakhú kR" },
mini: { p: "پاخه ک", f: "paakhú k" },
},
femSing: {
long: { p: "پخه کړل", f: "pakha kRul" },
short: { p: "پخه کړ", f: "pakha kR" },
mini: { p: "پخه ک", f: "pakha k" },
long: { p: "پخه کړل", f: "pakhá kRul" },
short: { p: "پخه کړ", f: "pakhá kR" },
mini: { p: "پخه ک", f: "pakhá k" },
},
femPlur: {
long: { p: "پخې کړل", f: "pakhe kRul" },
short: { p: "پخې کړ", f: "pakhe kR" },
mini: { p: "پخې ک", f: "pakhe k" },
long: { p: "پخې کړل", f: "pakhé kRul" },
short: { p: "پخې کړ", f: "pakhé kR" },
mini: { p: "پخې ک", f: "pakhé k" },
},
},
perfectiveSplit: {
@ -844,19 +844,19 @@ const toTest = [
mini: [{ p: "پوخ ", f: "pokh " }, { p: "ک", f: "k" }],
},
mascPlur: {
long: [{ p: "پاخه ", f: "paakhu " }, { p: "کړل", f: "kRul" }],
short: [{ p: "پاخه ", f: "paakhu " }, { p: "کړ", f: "kR" }],
mini: [{ p: "پاخه ", f: "paakhu " }, { p: "ک", f: "k" }],
long: [{ p: "پاخه ", f: "paakhú " }, { p: "کړل", f: "kRul" }],
short: [{ p: "پاخه ", f: "paakhú " }, { p: "کړ", f: "kR" }],
mini: [{ p: "پاخه ", f: "paakhú " }, { p: "ک", f: "k" }],
},
femSing: {
long: [{ p: "پخه ", f: "pakha " }, { p: "کړل", f: "kRul" }],
short: [{ p: "پخه ", f: "pakha " }, { p: "کړ", f: "kR" }],
mini: [{ p: "پخه ", f: "pakha " }, { p: "ک", f: "k" }],
long: [{ p: "پخه ", f: "pakhá " }, { p: "کړل", f: "kRul" }],
short: [{ p: "پخه ", f: "pakhá " }, { p: "کړ", f: "kR" }],
mini: [{ p: "پخه ", f: "pakhá " }, { p: "ک", f: "k" }],
},
femPlur: {
long: [{ p: "پخې ", f: "pakhe " }, { p: "کړل", f: "kRul" }],
short: [{ p: "پخې ", f: "pakhe " }, { p: "کړ", f: "kR" }],
mini: [{ p: "پخې ", f: "pakhe " }, { p: "ک", f: "k" }],
long: [{ p: "پخې ", f: "pakhé " }, { p: "کړل", f: "kRul" }],
short: [{ p: "پخې ", f: "pakhé " }, { p: "کړ", f: "kR" }],
mini: [{ p: "پخې ", f: "pakhé " }, { p: "ک", f: "k" }],
},
},
},
@ -868,16 +868,16 @@ const toTest = [
short: { p: "پوخ ک", f: "pokh k" },
},
mascPlur: {
long: { p: "پاخه کړ", f: "paakhu kR" },
short: { p: "پاخه ک", f: "paakhu k" },
long: { p: "پاخه کړ", f: "paakhú kR" },
short: { p: "پاخه ک", f: "paakhú k" },
},
femSing: {
long: { p: "پخه کړ", f: "pakha kR" },
short: { p: "پخه ک", f: "pakha k" },
long: { p: "پخه کړ", f: "pakhá kR" },
short: { p: "پخه ک", f: "pakhá k" },
},
femPlur: {
long: { p: "پخې کړ", f: "pakhe kR" },
short: { p: "پخې ک", f: "pakhe k" },
long: { p: "پخې کړ", f: "pakhé kR" },
short: { p: "پخې ک", f: "pakhé k" },
},
},
perfectiveSplit: {
@ -886,16 +886,16 @@ const toTest = [
short: [{ p: "پوخ ", f: "pokh " }, { p: "ک", f: "k" }],
},
mascPlur: {
long: [{ p: "پاخه ", f: "paakhu " }, { p: "کړ", f: "kR" }],
short: [{ p: "پاخه ", f: "paakhu " }, { p: "ک", f: "k" }],
long: [{ p: "پاخه ", f: "paakhú " }, { p: "کړ", f: "kR" }],
short: [{ p: "پاخه ", f: "paakhú " }, { p: "ک", f: "k" }],
},
femSing: {
long: [{ p: "پخه ", f: "pakha " }, { p: "کړ", f: "kR" }],
short: [{ p: "پخه ", f: "pakha " }, { p: "ک", f: "k" }],
long: [{ p: "پخه ", f: "pakhá " }, { p: "کړ", f: "kR" }],
short: [{ p: "پخه ", f: "pakhá " }, { p: "ک", f: "k" }],
},
femPlur: {
long: [{ p: "پخې ", f: "pakhe " }, { p: "کړ", f: "kR" }],
short: [{ p: "پخې ", f: "pakhe " }, { p: "ک", f: "k" }],
long: [{ p: "پخې ", f: "pakhé " }, { p: "کړ", f: "kR" }],
short: [{ p: "پخې ", f: "pakhé " }, { p: "ک", f: "k" }],
},
},
},
@ -903,21 +903,21 @@ const toTest = [
present: { p: "پخوونکی", f: "pakhawóonkey" },
past: {
mascSing: { p: "پوخ کړی", f: "pokh kúRey" },
mascPlur: { p: "پاخه کړي", f: "paakhu kúRee" },
femSing: { p: "پخه کړې", f: "pakha kúRe" },
femPlur: { p: "پخې کړې", f: "pakhe kúRe" },
mascPlur: { p: "پاخه کړي", f: "paakhú kúRee" },
femSing: { p: "پخه کړې", f: "pakhá kúRe" },
femPlur: { p: "پخې کړې", f: "pakhé kúRe" },
},
},
complement: {
masc: [
[{ p: "پوخ", f: "pokh" }],
[{ p: "پاخه", f: "paakhu" }],
[{ p: "پخو", f: "pakho" }],
[{ p: "پاخه", f: "paakhú" }],
[{ p: "پخو", f: "pakhó" }],
],
fem: [
[{ p: "پخه", f: "pakha" }],
[{ p: "پخې", f: "pakhe" }],
[{ p: "پخو", f: "pakho" }],
[{ p: "پخه", f: "pakhá" }],
[{ p: "پخې", f: "pakhé" }],
[{ p: "پخو", f: "pakhó" }],
],
},
},
@ -1510,29 +1510,29 @@ const toTest = [
short: { p: "لاړې تېرو", f: "laaRe teraw" },
},
perfective: {
long: { p: "لاړې تېرې کړل", f: "laaRe tere kRul" },
short: { p: "لاړې تېرې کړ", f: "laaRe tere kR" },
mini: { p: "لاړې تېرې ک", f: "laaRe tere k" },
long: { p: "لاړې تېرې کړل", f: "laaRe tére kRul" },
short: { p: "لاړې تېرې کړ", f: "laaRe tére kR" },
mini: { p: "لاړې تېرې ک", f: "laaRe tére k" },
},
perfectiveSplit: {
long: [{ p: "لاړې تېرې ", f: "laaRe tere " }, { p: "کړل", f: "kRul" }],
short: [{ p: "لاړې تېرې ", f: "laaRe tere " }, { p: "کړ", f: "kR" }],
mini: [{ p: "لاړې تېرې ", f: "laaRe tere " }, { p: "ک", f: "k" }],
long: [{ p: "لاړې تېرې ", f: "laaRe tére " }, { p: "کړل", f: "kRul" }],
short: [{ p: "لاړې تېرې ", f: "laaRe tére " }, { p: "کړ", f: "kR" }],
mini: [{ p: "لاړې تېرې ", f: "laaRe tére " }, { p: "ک", f: "k" }],
},
},
stem: {
imperfective: { p: "لاړې تېرو", f: "laaRe teraw" },
perfective: {
long: { p: "لاړې تېرې کړ", f: "laaRe tere kR" },
short: { p: "لاړې تېرې ک", f: "laaRe tere k" },
long: { p: "لاړې تېرې کړ", f: "laaRe tére kR" },
short: { p: "لاړې تېرې ک", f: "laaRe tére k" },
},
perfectiveSplit: {
long: [{ p: "لاړې تېرې ", f: "laaRe tere " }, { p: "کړ", f: "kR" }],
short: [{ p: "لاړې تېرې ", f: "laaRe tere " }, { p: "ک", f: "k" }],
long: [{ p: "لاړې تېرې ", f: "laaRe tére " }, { p: "کړ", f: "kR" }],
short: [{ p: "لاړې تېرې ", f: "laaRe tére " }, { p: "ک", f: "k" }],
},
},
participle: {
past: { p: "لاړې تېرې کړې", f: "laaRe tere kúRe" },
past: { p: "لاړې تېرې کړې", f: "laaRe tére kúRe" },
present: { p: "لاړې تېروونکی", f: "laaRe terawóonkey" },
},
objComplement: {