both Pakistani spelling varients

This commit is contained in:
lingdocs 2021-07-24 18:43:53 +03:00
parent beb24bc820
commit a85670a99f
7 changed files with 89 additions and 36 deletions

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/pashto-inflector", "name": "@lingdocs/pashto-inflector",
"version": "0.8.5", "version": "0.9.0",
"author": "lingdocs.com", "author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com", "homepage": "https://verbs.lingdocs.com",

View File

@ -24,7 +24,7 @@ import {
} from "react-bootstrap"; } from "react-bootstrap";
import * as T from "./types"; import * as T from "./types";
import defualtTextOptions from "./lib/default-text-options"; import defualtTextOptions from "./lib/default-text-options";
const textOptionsLocalStorageName = "textOptions2";
type VerbType = "simple" | "stative compound" | "dynamic compound"; type VerbType = "simple" | "stative compound" | "dynamic compound";
const verbTypes: VerbType[] = [ const verbTypes: VerbType[] = [
"simple", "simple",
@ -66,7 +66,7 @@ function App() {
const regularIrregular = localStorage.getItem("regularIrregular") as "regular" | "irregular"; const regularIrregular = localStorage.getItem("regularIrregular") as "regular" | "irregular";
const transitivitiyShowing = localStorage.getItem("transitivityShowing") as undefined | T.Transitivity; const transitivitiyShowing = localStorage.getItem("transitivityShowing") as undefined | T.Transitivity;
const theme = localStorage.getItem("theme"); const theme = localStorage.getItem("theme");
const textOptionst = localStorage.getItem("textOptions"); const textOptionst = localStorage.getItem(textOptionsLocalStorageName);
if (regularIrregular) { if (regularIrregular) {
setRegularIrregular(regularIrregular); setRegularIrregular(regularIrregular);
} }
@ -95,7 +95,7 @@ function App() {
localStorage.setItem("regularIrregular", regularIrregular); localStorage.setItem("regularIrregular", regularIrregular);
localStorage.setItem("verbTypeShowing", verbTypeShowing); localStorage.setItem("verbTypeShowing", verbTypeShowing);
localStorage.setItem("transitivityShowing", transitivityShowing); localStorage.setItem("transitivityShowing", transitivityShowing);
localStorage.setItem("textOptions", JSON.stringify(textOptions)); localStorage.setItem(textOptionsLocalStorageName, JSON.stringify(textOptions));
localStorage.setItem("theme", theme); localStorage.setItem("theme", theme);
}); });
@ -305,14 +305,15 @@ function App() {
<h6>Pashto Spelling</h6> <h6>Pashto Spelling</h6>
<ButtonSelect <ButtonSelect
options={[ options={[
{ label: "🇦🇫 Afghan", value: "Afghan" }, { label: "Afghan", value: "Afghan" },
{ label: "🇵🇰 Pakistani", value: "Pakistani" }, { label: "Pakistani ي", value: "Pakistani ي" },
{ label: "Pakistani ی", value: "Pakistani ی" },
]} ]}
value={textOptions.spelling} value={textOptions.spelling}
handleChange={(p) => { handleChange={(p) => {
setTextOptions({ setTextOptions({
...textOptions, ...textOptions,
spelling: p as "Afghan" | "Pakistani", spelling: p as T.Spelling,
}); });
}} }}
/> />

View File

@ -7,7 +7,7 @@
*/ */
import { import {
convertAfToPkSpelling, convertSpelling,
} from "../lib/convert-spelling"; } from "../lib/convert-spelling";
import { import {
phoneticsToDiacritics phoneticsToDiacritics
@ -23,9 +23,7 @@ const Pashto = ({ opts, children: text }: {
const p = opts.diacritics const p = opts.diacritics
? (phoneticsToDiacritics(ps.p, ps.f) || ps.p) ? (phoneticsToDiacritics(ps.p, ps.f) || ps.p)
: ps.p; : ps.p;
return opts.spelling === "Afghan" return convertSpelling(p, opts.spelling);
? p
: convertAfToPkSpelling(p);
} }
const style = opts.pTextSize === "normal" const style = opts.pTextSize === "normal"
? undefined ? undefined

View File

@ -7,11 +7,11 @@
*/ */
import { import {
convertAfToPkSpelling, convertSpelling,
convertPkToAfSpelling, revertSpelling,
} from "./convert-spelling"; } from "./convert-spelling";
const pairs = [ const pairsWPakistaniUndotted = [
["سړی", "سړے"], ["سړی", "سړے"],
["موسیٰ", "موسیٰ"], ["موسیٰ", "موسیٰ"],
["فرمايي", "فرمائی"], ["فرمايي", "فرمائی"],
@ -27,14 +27,44 @@ const pairs = [
["ضمائر", "ضمائر"], ["ضمائر", "ضمائر"],
]; ];
pairs.forEach((pair) => { const pairsWPakistaniDotted = [
test(`${pair[0]} should be converted to ${pair[1]} in Pakistani spelling`, () => { ["سړی", "سړے"],
const converted = convertAfToPkSpelling(pair[0]); ["موسیٰ", "موسیٰ"],
["فرمايي", "فرمائي"],
["چای", "چائ"],
["زوی", "زوئ"],
["ښويېدل", "ښوئېدل"],
["ويي", "وئي"],
["دوستي", "دوستي"],
["هييت", "هييت"],
["ښيي", "ښيي"],
["ستاينه", "ستائينه"],
["فرمايل", "فرمائيل"],
["ضمائر", "ضمائر"],
];
pairsWPakistaniDotted.forEach((pair) => {
test(`${pair[0]} should be converted to ${pair[1]} in Pakistani ي spelling`, () => {
const converted = convertSpelling(pair[0], "Pakistani ي");
expect(converted).toBe(pair[1]); expect(converted).toBe(pair[1]);
}); });
test(`${pair[1]} should be reverted to ${pair[0]} in Pakistani ي spelling`, () => {
const reverted = revertSpelling(pair[1], "Pakistani ي");
expect(reverted).toBe(pair[0]);
});
});
test(`${pair[1]} should be converted to ${pair[0]} in Afghan spelling`, () => { pairsWPakistaniUndotted.forEach((pair) => {
const converted = convertPkToAfSpelling(pair[1]); test(`${pair[0]} should be converted to ${pair[1]} in Pakistani ی spelling`, () => {
const converted = convertSpelling(pair[0], "Pakistani ی");
expect(converted).toBe(pair[1]);
});
test(`${pair[0]} should stay the same`, () => {
const converted = convertSpelling(pair[0], "Afghan");
expect(converted).toBe(pair[0]); expect(converted).toBe(pair[0]);
}); });
test(`${pair[1]} should be reverted to ${pair[0]} in Pakistani ی spelling`, () => {
const reverted = revertSpelling(pair[1], "Pakistani ی");
expect(reverted).toBe(pair[0]);
});
}); });

View File

@ -6,28 +6,50 @@
* *
*/ */
export function convertAfToPkSpelling(input: string): string { import * as T from "../types";
const converted = input
.replace(/ای(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ائ") /**
* takes a string of standard Afghan Pashto text and puts it into the same or a different spelling system
*
* @param input
* @param spelling
* @returns
*/
export function convertSpelling(input: string, spelling: T.Spelling): string {
if (spelling === "Afghan") {
return input;
}
return input.replace(/ای(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ائ")
.replace(/وی(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "وئ") .replace(/وی(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "وئ")
.replace(/ی(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ے") .replace(/ی(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ے")
.replace(/ي(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ی") .replace(/ي(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, (spelling === "Pakistani ی")
? "ی"
: "ي")
.replace(/(?:ای|اي)(?=ي|ی|ې)/g, "ائ") .replace(/(?:ای|اي)(?=ي|ی|ې)/g, "ائ")
.replace(/(?:وی|وي)(?=ي|ی|ې)/g, "وئ") .replace(/(?:وی|وي)(?=ي|ی|ې)/g, "وئ")
.replace(/(?:ای|اي)(?=[\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ائي") .replace(/(?:ای|اي)(?=[\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ائي")
.replace(/(?:وی|وي)(?=[\u0621-\u065f\u0670-\u06d3\u06d5])/g, "وئي"); .replace(/(?:وی|وي)(?=[\u0621-\u065f\u0670-\u06d3\u06d5])/g, "وئي");
return converted;
} }
export function convertPkToAfSpelling(input: string): string { /**
const converted = input * Takes a string of a given spelling system and puts it into standardAfghan Pashto text
.replace(/ی(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ي") *
* @param input
* @param spelling
*/
export function revertSpelling(input: string, spelling: T.Spelling): string {
if (spelling === "Afghan") {
return input;
}
return input
.replace(new RegExp(`${spelling === "Pakistani ی"
? "ی"
: "ي"}(?![\u0621-\u065f\u0670-\u06d3\u06d5])`, "g"), "ي")
.replace(/ے(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ی") .replace(/ے(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ی")
.replace(/ائ(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ای") .replace(/ائ(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ای")
.replace(/وئ(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "وی") .replace(/وئ(?![\u0621-\u065f\u0670-\u06d3\u06d5])/g, "وی")
.replace(/(?:ائی|ائي)(?=[\u0621-\u065f\u0670-\u06d3\u06d5])/g, "اي") .replace(/(?:ائی|ائي)(?=[\u0621-\u065f\u0670-\u06d3\u06d5])/g, "اي")
.replace(/(?:وئی|وئي)(?=[\u0621-\u065f\u0670-\u06d3\u06d5])/g, "وي") .replace(/(?:وئی|وئي)(?=[\u0621-\u065f\u0670-\u06d3\u06d5])/g, "وي")
.replace(/ائ(?=ي|ی|ې)/g, "اي") .replace(/ائ(?=ي|ی|ې)/g, "اي")
.replace(/وئ(?=ي|ی|ې)/g, "وي"); .replace(/وئ(?=ي|ی|ې)/g, "وي");;
return converted;
} }

View File

@ -37,8 +37,8 @@ import {
import { standardizePashto } from "./lib/standardize-pashto"; import { standardizePashto } from "./lib/standardize-pashto";
import { phoneticsToDiacritics } from "./lib/phonetics-to-diacritics"; import { phoneticsToDiacritics } from "./lib/phonetics-to-diacritics";
import { import {
convertAfToPkSpelling, convertSpelling,
convertPkToAfSpelling, revertSpelling,
} from "./lib/convert-spelling"; } from "./lib/convert-spelling";
import { import {
dictionaryEntryBooleanFields, dictionaryEntryBooleanFields,
@ -87,8 +87,8 @@ export {
makePsString, makePsString,
removeFVariants, removeFVariants,
standardizePashto, standardizePashto,
convertAfToPkSpelling, convertSpelling,
convertPkToAfSpelling, revertSpelling,
validateEntry, validateEntry,
isNounAdjOrVerb, isNounAdjOrVerb,
simplifyPhonetics, simplifyPhonetics,

View File

@ -128,11 +128,13 @@ export type DictionaryEntryError = {
erroneousFields: DictionaryEntryField[], erroneousFields: DictionaryEntryField[],
} }
export type Spelling = "Afghan" | "Pakistani ی" | "Pakistani ي";
export type TextOptions = { export type TextOptions = {
pTextSize: "normal" | "larger" | "largest"; pTextSize: "normal" | "larger" | "largest";
phonetics: "lingdocs" | "ipa" | "alalc" | "none"; phonetics: "lingdocs" | "ipa" | "alalc" | "none";
dialect: "standard" | "peshawer" | "southern"; dialect: "standard" | "peshawer" | "southern";
spelling: "Afghan" | "Pakistani"; spelling: Spelling;
diacritics: boolean; diacritics: boolean;
} }