inflector with plural noun inflection!!

This commit is contained in:
lingdocs 2021-09-14 19:04:45 +04:00
parent 916bc24487
commit c042f66fc8
8 changed files with 80 additions and 62 deletions

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/pashto-inflector",
"version": "0.9.6",
"version": "1.0.0",
"author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com",

View File

@ -11,11 +11,14 @@ import Pashto from "./Pashto";
import { Modal } from "react-bootstrap";
import TableCell from "./TableCell";
import * as T from "../types";
import { isPluralInflections } from "../lib/p-text-helpers";
const explanation = (inf: T.Inflections, textOptions: T.TextOptions) => {
// @ts-ignore
const w = inf["masc" in inf ? "masc" : "fem"][0][0];
return <>
const explanation = (inf: T.Inflections | T.PluralInflections, textOptions: T.TextOptions) => {
const isPluralInfs = isPluralInflections(inf);
const w = "masc" in inf
? inf.masc[0][0]
: inf.fem[0][0];
return !isPluralInfs ? <>
<p>In Pashto, <strong>nouns, pronouns, and adjectives</strong> get inflected when they are either:</p>
<ul>
<li>Plural</li>
@ -30,21 +33,27 @@ const explanation = (inf: T.Inflections, textOptions: T.TextOptions) => {
<p><small>Not all nouns, pronouns, and adjectives can inflect. But if you're seeing this table here, it means that <Pashto opts={textOptions}>{w}</Pashto> does inflect.</small></p>
<p><small>Irregular nouns like پښتون or مېلمه often only take the 1st inflection when they're plural, and not for the other two reasons, depending on dialect. When there are two reasons to inflect, these will always take the double inflection.</small></p>
<p><small>For prepositional/postpositional sandwiches of location like په ... کې and په ... باندې the first inflection of nouns (not of adjectives/pronouns) often doesn't happen. The second one always will though.</small></p>
</>
</> : <>
<p>Many Arabic loan-words can be used with their original Arabic plural form.</p>
<p>When they need to be inflected a second time because they are: a. sandwiched with a preposition/postposition (oblique) or b. the subject of a transitive past tense verb, you add an و to the as you do with other Pashto verbs.</p>
</>;
}
const InflectionTable = ({ inf, textOptions }: {
inf: T.Inflections,
inf: T.Inflections | T.PluralInflections,
textOptions: T.TextOptions,
}) => {
const [showingExplanation, setShowingExplanation] = useState(false);
/* istanbul ignore next */ // Insanely can't see the modal to close it
const handleCloseExplanation = () => setShowingExplanation(false);
const handleShowExplanation = () => setShowingExplanation(true);
const isPluralInfs = isPluralInflections(inf);
return (
<div className="mt-4">
<div style={{ display: "flex", justifyContent: "space-between" }}>
<h5>Inflections:</h5>
<h5>
{!isPluralInfs ? "Inflections" : "Arabic Plural and 2nd Inflection"}:
</h5>
<div className="clickable mr-2" onClick={handleShowExplanation} data-testid="help-button">
<i className={`fa fa-question-circle`}></i>
</div>
@ -58,7 +67,7 @@ const InflectionTable = ({ inf, textOptions }: {
</tr>
</thead>
<tbody>
{["Plain", "1st", "2nd"].map((title, i) => (
{!isPluralInfs ? ["Plain", "1st", "2nd"] : ["Plural", "2nd"].map((title, i) => (
<tr key={title}>
<th scope="row">{title}</th>
{"masc" in inf && <TableCell item={inf.masc[i]} textOptions={textOptions} />}
@ -69,7 +78,7 @@ const InflectionTable = ({ inf, textOptions }: {
</table>
<Modal show={showingExplanation} onHide={handleCloseExplanation}>
<Modal.Header closeButton>
<Modal.Title>About Inflections</Modal.Title>
<Modal.Title>About {isPluralInfs ? "Inflections" : "Arabic Plural"}</Modal.Title>
</Modal.Header>
<Modal.Body>{explanation(inf, textOptions)}</Modal.Body>
<Modal.Footer>

View File

@ -29,7 +29,7 @@ import {
PhonemeStatus,
} from "./diacritics-helpers";
import { firstPhonetics } from "./p-text-helpers";
import { removeFVarients } from "./p-text-helpers";
import { pipe } from "rambda";
/**
@ -37,7 +37,7 @@ import { pipe } from "rambda";
* Errors if the phonetics and script don't line up.
*/
export function addDiacritics({ p, f }: T.PsString, ignoreCommas?: true): T.PsString {
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? firstPhonetics(f) : f);
const phonemes: Phoneme[] = splitFIntoPhonemes(!ignoreCommas ? removeFVarients(f) : f);
const { pIn, pOut } = phonemes.reduce(processPhoneme, { pOut: "", pIn: p.trim() });
if (pIn !== "") {
throw new Error("phonetics error - phonetics shorter than pashto script");

View File

@ -796,9 +796,9 @@ export function endsInAaOrOo(w: T.PsString): boolean {
const fEnd = simplifyPhonetics(w.f).slice(-2);
const pEnd = w.p.slice(-1) === "ع" ? w.p.slice(-2, -1) : w.p.slice(-1);
return (
pEnd === "و" && fEnd.endsWith("o")
(pEnd === "و" && fEnd.endsWith("o"))
||
pEnd === "ا" && fEnd === "aa"
(pEnd === "ا" && fEnd === "aa")
);
}
@ -912,7 +912,6 @@ export function splitPsByVarients(w: T.PsString): T.ArrayOneOrMore<T.PsString> {
}) as T.ArrayOneOrMore<T.PsString>;
}
export function removeEndTick(w: T.PsString): T.PsString;
export function removeEndTick(w: string): string;
export function removeEndTick(w: T.PsString | string): T.PsString | string {
@ -922,4 +921,15 @@ export function removeEndTick(w: T.PsString | string): T.PsString | string {
return (w.slice(-1) === "'")
? w.slice(0, -1)
: w;
}
export function isUnisexSet<X>(inf: T.GenderedSet<X>): inf is T.UnisexSet<X> {
return "masc" in inf && "fem" in inf;
}
export function isPluralInflections(inf: T.PluralInflections | T.Inflections): inf is T.PluralInflections {
if ("masc" in inf) {
return inf.masc.length === 2;
}
return inf.fem.length === 2;
}

View File

@ -427,7 +427,7 @@ function makePlural(w: T.DictionaryEntryNoFVars): { plural: T.PluralInflections
const pashtoPlural = makePashtoPlural(w);
if (pashtoPlural) return { plural: pashtoPlural, arabicPlural };
function addMascPluralSuffix(animate?: boolean, shortSquish?: boolean): T.PluralInflectionSet {
if (shortSquish && (w.infap == undefined || w.infaf === undefined)) {
if (shortSquish && (w.infap === undefined || w.infaf === undefined)) {
throw new Error(`no irregular inflection info for ${w.p} - ${w.ts}`);
}
const b = removeAccents(shortSquish
@ -475,7 +475,11 @@ function makePlural(w: T.DictionaryEntryNoFVars): { plural: T.PluralInflections
return { arabicPlural, plural: { masc: addMascPluralSuffix(anim, shortSquish) }};
}
}
if (type === "masc noun" && (shortSquish || (endsInConsonant(w) || endsInShwa(w) && (!w.infap))) && (w.p.slice(-3) !== "توب")) {
if (
type === "masc noun" &&
(shortSquish || ((endsInConsonant(w) || endsInShwa(w)) && (!w.infap))) &&
(w.p.slice(-3) !== "توب")
) {
return {
arabicPlural,
plural: {

View File

@ -25,9 +25,9 @@ import {
concatInflections,
unisexInfToObjectMatrix,
inflectYey,
psStringFromEntry,
allOnePersonInflection,
psStringEquals,
makePsString,
} from "./p-text-helpers";
import {
accentOnNFromEnd,
@ -116,7 +116,7 @@ function conjugateDynamicCompound(info: T.DynamicCompoundVerbInfo): T.VerbConjug
);
const complement = info.objComplement.plural
? info.objComplement.plural
: psStringFromEntry(info.objComplement.entry);
: makePsString(info.objComplement.entry.p, info.objComplement.entry.f);
const makeAspectContent = (aspect: T.Aspect): T.AspectContent => {
const makeDynamicModalContent = (): T.ModalContent => {
const nonImperative = addToForm([complement, " "], auxConj[aspect].modal.nonImperative);

View File

@ -8,7 +8,6 @@
import {
concatPsString,
firstPhonetics,
makePsString,
psStringEquals,
removeEndingL,
@ -22,6 +21,8 @@ import {
removeStartingTick,
ensureShortWurShwaShift,
choosePersInf,
removeFVarients,
isUnisexSet,
} from "./p-text-helpers";
import {
accentOnFront,
@ -60,9 +61,11 @@ const eyEndingUnaccented: T.PsString = { p: "ی", f: "ey" };
* @param complement - the dictioanry entry for the complement of the verb if compound
*/
export function getVerbInfo(
entry: T.DictionaryEntry,
complement?: T.DictionaryEntry,
ent: T.DictionaryEntry,
complmnt?: T.DictionaryEntry,
): T.VerbInfo {
const entry = removeFVarients(ent);
const complement = complmnt ? removeFVarients(complmnt) : undefined;
const type = getType(entry);
if (type === "transitive or grammatically transitive simple") {
return {
@ -118,7 +121,7 @@ export function getVerbInfo(
}
}
if (type === "generative stative compound") {
return getGenerativeStativeCompoundVerbInfo(entry, complement as T.DictionaryEntry);
return getGenerativeStativeCompoundVerbInfo(entry, complement as T.DictionaryEntryNoFVars);
}
}
const comp = complement ? ensureUnisexInf(complement) : undefined;
@ -170,17 +173,14 @@ type Bases = {
}
function getGenerativeStativeCompoundVerbInfo(
entry: T.DictionaryEntry, comp: T.DictionaryEntry, forceSingular?: true,
entry: T.DictionaryEntryNoFVars, comp: T.DictionaryEntryNoFVars, forceSingular?: true,
): T.GenerativeStativeCompoundVerbInfo {
const transitivity = getTransitivity(entry);
const transitivityNoGrammTrans = transitivity === "grammatically transitive" ? "transitive" : transitivity;
const yulEnding = null;
const objComplement = getObjComplementInfo(entry, comp, forceSingular);
const auxVerb = stativeAux[transitivityNoGrammTrans];
const compUsed = objComplement.plural ? objComplement.plural : makePsString(
objComplement.entry.p,
firstPhonetics(objComplement.entry.f),
);
const compUsed = objComplement.plural ? objComplement.plural : removeFVarients(objComplement.entry);
const bases: Bases = {
stem: {
imperfective: auxVerb.info.stem.imperfective,
@ -235,16 +235,13 @@ function getGenerativeStativeCompoundVerbInfo(
};
}
function getDynamicCompoundInfo(entry: T.DictionaryEntry, comp: T.DictionaryEntry, forceSingular?: true): T.DynamicCompoundVerbInfo {
function getDynamicCompoundInfo(entry: T.DictionaryEntryNoFVars, comp: T.DictionaryEntryNoFVars, forceSingular?: true): T.DynamicCompoundVerbInfo {
const transitivity = getTransitivity(entry);
const yulEnding = null;
const objComplement = getObjComplementInfo(entry, comp, forceSingular);
const auxVerb = getDynamicAuxVerb(entry);
const auxVerbInfo = getVerbInfo(auxVerb.entry, auxVerb.complement) as T.NonComboVerbInfo;
const compUsed = objComplement.plural ? objComplement.plural : makePsString(
objComplement.entry.p,
firstPhonetics(objComplement.entry.f),
);
const compUsed = objComplement.plural ? objComplement.plural : objComplement.entry;
const bases: Bases = (auxVerbInfo.type === "stative compound")
? getObjectMatchingBases(auxVerbInfo, objComplement.person)
: {
@ -295,7 +292,7 @@ function getDynamicCompoundInfo(entry: T.DictionaryEntry, comp: T.DictionaryEntr
present: concatPsString(compUsed, " ", auxVerbInfo.participle.present),
past: concatPsString(compUsed, " ", bases.participle.past),
};
const makeIntransitiveFormOfEntry = (e: T.DictionaryEntry): T.DictionaryEntry => ({
const makeIntransitiveFormOfEntry = (e: T.DictionaryEntryNoFVars): T.DictionaryEntryNoFVars => ({
...e,
p: e.p.replace(
"کول",
@ -357,21 +354,20 @@ function getObjectMatchingBases(auxInfo: T.NonComboVerbInfo, person: T.Person):
}
function getObjComplementInfo(
entry: T.DictionaryEntry,
complement: T.DictionaryEntry,
entry: T.DictionaryEntryNoFVars,
complement: T.DictionaryEntryNoFVars,
forceSingular?: true
): T.ObjComplement {
const complementInEntry = makePsString(
entry.p.split(" ")[0],
entry.f.split(" ")[0],
);
const complementEntry: T.DictionaryEntry = { ...complement, f: firstPhonetics(complement.f) };
const usesSeperatePluralForm = !forceSingular && !psStringEquals(
makePsString(complementInEntry.p, removeAccents(complementInEntry.f)),
makePsString(complementEntry.p, removeAccents(complementEntry.f)),
makePsString(complement.p, removeAccents(complement.f)),
);
return {
entry: complementEntry,
entry: complement,
...usesSeperatePluralForm ? {
plural: complementInEntry,
} : {},
@ -379,7 +375,7 @@ function getObjComplementInfo(
};
}
function getTransitivity(entry: T.DictionaryEntry): T.Transitivity {
function getTransitivity(entry: T.DictionaryEntryNoFVars): T.Transitivity {
if (!entry.c) {
throw new Error("No part of speech info");
}
@ -423,7 +419,7 @@ function getType(entry: T.DictionaryEntry):
return "simple";
}
function getIdiosyncraticThirdMascSing(entry: T.DictionaryEntry): T.ShortThirdPersFormSet | false {
function getIdiosyncraticThirdMascSing(entry: T.DictionaryEntryNoFVars): T.ShortThirdPersFormSet | false {
if (entry.tppp && entry.tppf) {
const tpp = makePsString(entry.tppp, entry.tppf);
const ooRes = addOoPrefix(tpp, entry)
@ -454,7 +450,7 @@ function getIdiosyncraticThirdMascSing(entry: T.DictionaryEntry): T.ShortThirdPe
*
* @param entry - the dictionary entry for the verb
*/
function getVerbRoots(entry: T.DictionaryEntry, transitivity: T.Transitivity, complement?: T.UnisexInflections): T.VerbRootSet {
function getVerbRoots(entry: T.DictionaryEntryNoFVars, transitivity: T.Transitivity, complement?: T.UnisexInflections): T.VerbRootSet {
// each of the roots compes with a short and long version
// with or without the ending ل - ul
const isKawulAux = entry.p === "کول";
@ -469,19 +465,19 @@ function getVerbRoots(entry: T.DictionaryEntry, transitivity: T.Transitivity, co
} : {},
};
};
const infinitive = makePsString(entry.p, firstPhonetics(entry.f));
const infinitive = makePsString(entry.p, entry.f);
// the imperfective root is the infinitive
// TODO: CHECK THIS!! FOR PERSON INFLECTIONS??
const imperfective = ((): T.OptionalPersonInflections<T.LengthOptions<T.PsString>> => {
// if stative compound
if (complement && spaceInForm(infinitive)) {
if (complement && spaceInForm(entry)) {
const comp = complementInflects(complement) ? unisexInfToObjectMatrix(complement) : complement.masc[0][0];
const t = getAuxTransitivity(transitivity);
const aux = stativeAux[t].info.root.imperfective
return concatPsString(comp, " ", aux) as T.OptionalPersonInflections<T.LengthOptions<T.PsString>>;
}
return shortAndLong(infinitive);
return shortAndLong(entry);
})();
const { perfective, pSplit, fSplit } = ((): {
@ -533,7 +529,7 @@ function getVerbRoots(entry: T.DictionaryEntry, transitivity: T.Transitivity, co
*
* @param entry - the dictionary entry for the verb
*/
function getVerbStems(entry: T.DictionaryEntry, root: T.VerbRootSet, transitivity: T.Transitivity, complement?: T.UnisexInflections): T.VerbStemSet {
function getVerbStems(entry: T.DictionaryEntryNoFVars, root: T.VerbRootSet, transitivity: T.Transitivity, complement?: T.UnisexInflections): T.VerbStemSet {
function isRegEdulTransitive(): boolean {
/* istanbul ignore next */
if ("mascSing" in root.imperfective) {
@ -689,7 +685,7 @@ function splitPerfective(perfective: T.FullForm<T.PsString>, pSplit: number, fSp
return [beforeAccented, after] as T.SplitInfo;
}
function getParticiple(entry: T.DictionaryEntry, stem: T.VerbStemSet, infinitive: T.PsString, transitivity: T.Transitivity, complement?: T.UnisexInflections): T.ParticipleSet {
function getParticiple(entry: T.DictionaryEntryNoFVars, stem: T.VerbStemSet, infinitive: T.PsString, transitivity: T.Transitivity, complement?: T.UnisexInflections): T.ParticipleSet {
const shortParticipleRoot = ((): T.PsString | null => {
const shortenableEndings = ["ښتل", "ستل", "وتل"];
// special thing for اېښودل - پرېښودل
@ -786,7 +782,7 @@ function getParticiple(entry: T.DictionaryEntry, stem: T.VerbStemSet, infinitive
*/
function addOoPrefix(
s: T.SingleOrLengthOpts<T.PsString>,
entry: T.DictionaryEntry,
entry: T.DictionaryEntryNoFVars,
): { ps: T.SingleOrLengthOpts<T.PsString>, pSplit: number, fSplit: number } {
let pSplit = 0;
let fSplit = 0;
@ -877,13 +873,12 @@ function addOoPrefix(
};
}
function ensureUnisexInf(complement: T.DictionaryEntry): T.UnisexInflections {
const inflected = inflectWord(complement);
const isUnisex = inflected && (("masc" in inflected) && ("fem" in inflected));
if (isUnisex) {
return inflected as T.UnisexInflections;
function ensureUnisexInf(complement: T.DictionaryEntryNoFVars): T.UnisexInflections {
const inf = inflectWord(complement);
if (inf !== false && !!inf.inflections && isUnisexSet(inf.inflections)) {
return inf.inflections as T.UnisexInflections;
}
const word = makePsString(complement.p, firstPhonetics(complement.f));
const word = makePsString(complement.p, complement.f);
return {
masc: [
[word],
@ -898,9 +893,9 @@ function ensureUnisexInf(complement: T.DictionaryEntry): T.UnisexInflections {
};
}
function getDynamicAuxVerb(entry: T.DictionaryEntry): {
entry: T.DictionaryEntry,
complement?: T.DictionaryEntry,
function getDynamicAuxVerb(entry: T.DictionaryEntryNoFVars): {
entry: T.DictionaryEntryNoFVars,
complement?: T.DictionaryEntryNoFVars,
} {
const auxWord = entry.p.trim().split(" ").slice(-1)[0];
const auxWordResult = dynamicAuxVerbs.find((a) => a.entry.p === auxWord);
@ -909,15 +904,15 @@ function getDynamicAuxVerb(entry: T.DictionaryEntry): {
throw new Error("unknown auxilary verb for dynamic compound");
}
return {
entry: auxWordResult.entry,
entry: removeFVarients(auxWordResult.entry),
...("complement" in auxWordResult) ? {
complement: auxWordResult.complement,
complement: auxWordResult.complement ? removeFVarients(auxWordResult.complement) : undefined,
} : {},
};
}
function getComplementPerson(
complement: T.DictionaryEntry,
complement: T.DictionaryEntryNoFVars,
usesSeperatePluralForm?: boolean,
): T.Person {
const number = (

View File

@ -29,7 +29,7 @@ import {
addToForm,
concatPsString,
makePsString,
removeFVariants,
removeFVarients,
isVerbBlock,
isImperativeBlock,
isInflectionSet,
@ -89,7 +89,7 @@ export {
addToForm,
concatPsString,
makePsString,
removeFVariants,
removeFVarients,
standardizePashto,
convertSpelling,
revertSpelling,