ability to standardize no-break space insanity

This commit is contained in:
lingdocs 2022-07-30 16:27:13 -05:00
parent 418281518d
commit 0cccfaa083
3 changed files with 5 additions and 2 deletions

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/pashto-inflector", "name": "@lingdocs/pashto-inflector",
"version": "3.6.6", "version": "3.6.7",
"author": "lingdocs.com", "author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com", "homepage": "https://verbs.lingdocs.com",

View File

@ -23,6 +23,7 @@ const testPairs = [
["راکوي؛", "راکوي؛"], ["راکوي؛", "راکوي؛"],
["راکوي!", "راکوي!"], ["راکوي!", "راکوي!"],
["راکوي.", "راکوي."], ["راکوي.", "راکوي."],
["ګڼه ګوڼه", "ګڼه ګوڼه"]
]; ];
testPairs.forEach((pair) => { testPairs.forEach((pair) => {

View File

@ -18,7 +18,9 @@ export function standardizePashto(input: string): string {
// Replace ي in the middle of words with ی // Replace ي in the middle of words with ی
.replace(/ي(?=[\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ی") .replace(/ي(?=[\u0621-\u065f\u0670-\u06d3\u06d5])/g, "ی")
// Replace آ two character version with combined آ character // Replace آ two character version with combined آ character
.replace(/آ/g, "آ"); .replace(/آ/g, "آ")
// Replace narrow no-break space with space
.replace(/\u202F/g, " ");
} }
export function standardizePhonetics(input: string): string { export function standardizePhonetics(input: string): string {