From 966578569d1f7d216159ab0d9184a2e7c6f05aa3 Mon Sep 17 00:00:00 2001 From: lingdocs <71590811+lingdocs@users.noreply.github.com> Date: Fri, 19 Aug 2022 12:54:52 +0430 Subject: [PATCH] better fuzzy --- .../src/lib/fuzzify-pashto/fuzzify-pashto.test.ts | 4 ++++ website/src/lib/fuzzify-pashto/replacer.ts | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/website/src/lib/fuzzify-pashto/fuzzify-pashto.test.ts b/website/src/lib/fuzzify-pashto/fuzzify-pashto.test.ts index 740ac42..570b604 100644 --- a/website/src/lib/fuzzify-pashto/fuzzify-pashto.test.ts +++ b/website/src/lib/fuzzify-pashto/fuzzify-pashto.test.ts @@ -56,6 +56,10 @@ const defaultInfo: IDefaultInfoBlock = { ["وازف", "واظیف"], ["شوریٰ", "شورا"], ["ځنبېدل", "ځمبېدل"], + // consonant swap // TODO: more?? + ["مچلوغزه", "مچلوزغه"], + ["رکشه", "رشکه"], + ["پښه", "ښپه"], ], nonMatches: [ ["سرک", "ترک"], diff --git a/website/src/lib/fuzzify-pashto/replacer.ts b/website/src/lib/fuzzify-pashto/replacer.ts index e76a0f5..47eacc1 100644 --- a/website/src/lib/fuzzify-pashto/replacer.ts +++ b/website/src/lib/fuzzify-pashto/replacer.ts @@ -34,6 +34,11 @@ interface IPhoneticsReplacerInfoItem extends IReplacerInfoItem { replWhenBeginning?: string; } +const ghzCombo = ["غز", "زغ"]; +const pxCombo = ["پښ", "ښپ"]; +const kshCombo = ["کش", "شک", "کښ", "کش"]; + + export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [ { char: "اً", range: "ان" }, { @@ -103,6 +108,13 @@ export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [ { char: "ډ", range: tdSounds }, { char: "ڈ", range: tdSounds }, + { char: "غز", plus: ghzCombo }, + { char: "زغ", plus: ghzCombo }, + { char: "پښ", plus: pxCombo }, + { char: "ښپ", plus: pxCombo }, + { char: "کش", plus: kshCombo }, + { char: "شک", plus: kshCombo }, + { char: "مب", plus: ["مب", "نب"] }, { char: "نب", plus: ["مب", "نب"] }, { char: "ن", range: "نڼ", plus: ["اً"] }, // allow for words using اٌ at the end to be seached for with ن @@ -121,7 +133,7 @@ export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [ ]; // tslint:disable-next-line -export const pashtoReplacerRegex = /اً|أ|ا|آ|ٱ|ٲ|ٳ|ئی|ئي|ئے|یٰ|ی|ي|ې|ۍ|ئ|ے|س|ص|ث|څ|ج|چ|هٔ|ه|ۀ|ہ|ع|و|ؤ|ښ|غ|خ|ح|ش|ز|ض|ذ|ځ|ظ|ژ|ر|ړ|ڑ|ت|ټ|ٹ|ط|د|ډ|ڈ|مب|م|نب|ن|ڼ|ک|ګ|گ|ل|ق|ږ|ب|پ|ف/g; +export const pashtoReplacerRegex = /اً|أ|ا|آ|ٱ|ٲ|ٳ|ئی|ئي|ئے|یٰ|ی|ي|ې|ۍ|ئ|ے|س|ص|ث|څ|ج|چ|هٔ|ه|ۀ|غز|زغ|کش|شک|ښک|ښک|پښ|ښپ|ہ|ع|و|ؤ|ښ|غ|خ|ح|ش|ز|ض|ذ|ځ|ظ|ژ|ر|ړ|ڑ|ت|ټ|ٹ|ط|د|ډ|ڈ|مب|م|نب|ن|ڼ|ک|ګ|گ|ل|ق|ږ|ب|پ|ف/g; // TODO: I removed the h? 's at the beginning and ends. was that a good idea? const aaySoundLatin = "(?:[aá]a?i|[eé]y|[aá]a?y|[aá]h?i)";