diff --git a/functions/package-lock.json b/functions/package-lock.json index 95239a1..da9b455 100644 --- a/functions/package-lock.json +++ b/functions/package-lock.json @@ -7,7 +7,7 @@ "name": "functions", "dependencies": { "@google-cloud/storage": "^5.8.1", - "@lingdocs/pashto-inflector": "3.6.0", + "@lingdocs/pashto-inflector": "3.6.2", "@types/cors": "^2.8.10", "@types/google-spreadsheet": "^3.0.2", "cors": "^2.8.5", @@ -30,7 +30,7 @@ "node": "16" }, "peerDependencies": { - "@lingdocs/pashto-inflector": "3.6.0" + "@lingdocs/pashto-inflector": "3.6.2" } }, "node_modules/@babel/code-frame": { @@ -505,9 +505,9 @@ } }, "node_modules/@lingdocs/pashto-inflector": { - "version": "3.6.0", - "resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz", - "integrity": "sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg==", + "version": "3.6.2", + "resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz", + "integrity": "sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ==", "license": "MIT", "dependencies": { "@formkit/auto-animate": "^1.0.0-beta.1", @@ -3682,9 +3682,9 @@ } }, "@lingdocs/pashto-inflector": { - "version": "3.6.0", - "resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz", - "integrity": "sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg==", + "version": "3.6.2", + "resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz", + "integrity": "sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ==", "requires": { "@formkit/auto-animate": "^1.0.0-beta.1", "classnames": "^2.2.6", diff --git a/functions/package.json b/functions/package.json index 444b513..beb5ce4 100644 --- a/functions/package.json +++ b/functions/package.json @@ -14,7 +14,7 @@ "main": "lib/functions/src/index.js", "dependencies": { "@google-cloud/storage": "^5.8.1", - "@lingdocs/pashto-inflector": "3.6.0", + "@lingdocs/pashto-inflector": "3.6.2", "@types/cors": "^2.8.10", "@types/google-spreadsheet": "^3.0.2", "cors": "^2.8.5", @@ -35,6 +35,6 @@ }, "private": true, "peerDependencies": { - "@lingdocs/pashto-inflector": "3.6.0" + "@lingdocs/pashto-inflector": "3.6.2" } } diff --git a/package-lock.json b/package-lock.json index 3db87aa..280f6fd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "0.3.1", "license": "MIT", "dependencies": { - "@lingdocs/pashto-inflector": "3.6.0", + "@lingdocs/pashto-inflector": "3.6.2", "lokijs": "^1.5.12", "nano": "^9.0.5", "passport-github2": "^0.1.12", @@ -29,7 +29,7 @@ "typescript": "^4.4.3" }, "peerDependencies": { - "@lingdocs/pashto-inflector": "3.6.0" + "@lingdocs/pashto-inflector": "3.6.2" } }, "node_modules/@babel/code-frame": { @@ -312,9 +312,9 @@ "dev": true }, "node_modules/@lingdocs/pashto-inflector": { - "version": "3.6.0", - "resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz", - "integrity": "sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg==", + "version": "3.6.2", + "resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz", + "integrity": "sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ==", "license": "MIT", "dependencies": { "@formkit/auto-animate": "^1.0.0-beta.1", @@ -4213,9 +4213,9 @@ "dev": true }, "@lingdocs/pashto-inflector": { - "version": "3.6.0", - "resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz", - "integrity": "sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg==", + "version": "3.6.2", + "resolved": "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz", + "integrity": "sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ==", "requires": { "@formkit/auto-animate": "^1.0.0-beta.1", "classnames": "^2.2.6", diff --git a/package.json b/package.json index 7ca8535..2afbd55 100644 --- a/package.json +++ b/package.json @@ -17,12 +17,12 @@ "url": "git@github.com-lingdocs:lingdocs/lingdocs-main.git" }, "peerDependencies": { - "@lingdocs/pashto-inflector": "3.6.0" + "@lingdocs/pashto-inflector": "3.6.2" }, "author": "lingdocs.com", "license": "MIT", "dependencies": { - "@lingdocs/pashto-inflector": "3.6.0", + "@lingdocs/pashto-inflector": "3.6.2", "lokijs": "^1.5.12", "nano": "^9.0.5", "passport-github2": "^0.1.12", diff --git a/website/package.json b/website/package.json index bcd2290..c7c550c 100644 --- a/website/package.json +++ b/website/package.json @@ -7,7 +7,7 @@ "private": true, "dependencies": { "@fortawesome/fontawesome-free": "^5.15.2", - "@lingdocs/pashto-inflector": "3.6.0", + "@lingdocs/pashto-inflector": "3.6.2", "@testing-library/jest-dom": "^5.11.4", "@testing-library/react": "^11.1.0", "@testing-library/user-event": "^12.1.10", @@ -110,6 +110,6 @@ "user-event": "^4.0.0" }, "peerDependencies": { - "@lingdocs/pashto-inflector": "3.6.0" + "@lingdocs/pashto-inflector": "3.6.2" } } diff --git a/website/src/lib/dictionary.ts b/website/src/lib/dictionary.ts index a2330fb..35f3c2f 100644 --- a/website/src/lib/dictionary.ts +++ b/website/src/lib/dictionary.ts @@ -271,7 +271,18 @@ function pashtoFuzzyLookup({ searchString, page, tp .simplesort("i") .data(); resultsGiven = exactResults.map((mpd: any) => mpd.$loki); - + // Get slightly fuzzy matches + const slightlyFuzzy = new RegExp(makeAWeeBitFuzzy(search, infIndex), "i"); + const slightlyFuzzyQuery = { + [index]: { $regex: slightlyFuzzy }, + $loki: { $nin: resultsGiven }, + }; + const slightlyFuzzyResultsLimit = (pageSize * page) - resultsGiven.length; + const slightlyFuzzyResults = dictDb.collection.chain() + .find(slightlyFuzzyQuery) + .limit(slightlyFuzzyResultsLimit) + .data(); + resultsGiven.push(...slightlyFuzzyResults.map((mpd: any) => mpd.$loki)); // Get fuzzy matches const pashtoRegExLogic = fuzzifyPashto(search, { script: index === "p" ? "Pashto" : "Latin", @@ -301,17 +312,18 @@ function pashtoFuzzyLookup({ searchString, page, tp .limit(fuzzyResultsLimit) .data(); const results = tpFilter - ? [...exactResults, ...fuzzyResults].filter(tpFilter) - : [...exactResults, ...fuzzyResults]; - const chunksToSort = chunkOutArray(results, pageSize); + ? [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults].filter(tpFilter) + : [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults]; // sort out each chunk (based on limit used multiple times by infinite scroll) - // so that when infinite scrolling, it doesn't resort the previous chunks given - // TODO: If on the first page, only sort the fuzzyResults + // so that when infinite scrolling, it doesn't re-sort the previous chunks given + const closeResultsLength = exactResults.length + slightlyFuzzyResults.length; + const chunksToSort = chunkOutArray(results, pageSize); return chunksToSort .reduce((acc, cur, i) => ((i === 0) ? [ - ...sortByRelevancy(cur.slice(0, exactResults.length), search, index), - ...sortByRelevancy(cur.slice(exactResults.length), search, index), + // don't sort theclose results in the first chunk + ...cur.slice(0, closeResultsLength), + ...sortByRelevancy(cur.slice(closeResultsLength), search, index), ] : [ ...acc, @@ -386,14 +398,12 @@ function makeVerbLookupPortal(): T.EntryLookupPortal { page: 1, tpFilter: tp.isVerbDictionaryEntry, }); - const r = vEntries.map((entry): T.VerbEntry => ({ + return vEntries.map((entry): T.VerbEntry => ({ entry, complement: (entry.c?.includes("comp.") && entry.l) ? dictionary.findOneByTs(entry.l) : undefined, })); - console.log(r); - return r; }, getByTs: (ts: number): T.VerbEntry | undefined => { const entry = dictDb.findOneByTs(ts); diff --git a/website/src/lib/wee-bit-fuzzy.test.ts b/website/src/lib/wee-bit-fuzzy.test.ts new file mode 100644 index 0000000..7d29fe9 --- /dev/null +++ b/website/src/lib/wee-bit-fuzzy.test.ts @@ -0,0 +1,28 @@ +import { makeAWeeBitFuzzy } from "./wee-bit-fuzzy"; + +const pMatches = [ + ["پیټی", "پېټی"], + ["دوستی", "دوستي"], + ["پته", "پټه"], +]; + +const fMatches = [ + ["sareyy", "saRey"], + ["peyTey", "peTey"], +]; + +pMatches.forEach((pair) => { + test(`${pair[0]} should match ${pair[1]}`, () => { + const re = makeAWeeBitFuzzy(pair[0], "p"); + const result = pair[1].match(new RegExp(re, "i")); + expect(result).toBeTruthy(); + }); +}); + +fMatches.forEach((pair) => { + test(`${pair[0]} should match ${pair[1]}`, () => { + const re = makeAWeeBitFuzzy(pair[0], "f"); + const result = pair[1].match(new RegExp(re, "i")); + expect(result).toBeTruthy(); + }); +}); \ No newline at end of file diff --git a/website/src/lib/wee-bit-fuzzy.ts b/website/src/lib/wee-bit-fuzzy.ts index 269ea93..617907d 100644 --- a/website/src/lib/wee-bit-fuzzy.ts +++ b/website/src/lib/wee-bit-fuzzy.ts @@ -6,36 +6,104 @@ * */ -const matcher = { - q: "[q|k]", - k: "[q|k]", - // TODO: this might not be the best way to handle - // double aa's passing as a's - because it can totally ignore the a's - a: "[a|á|ă]?a?", - á: "[a|á|ă]?a?", - ă: "[a|á|ă]?a?", - u: "[u|ú]", - ú: "[u|ú]", - e: "[e|é]", - é: "[e|é]", - i: "[i|í]", - í: "[i|í]", - o: "[o|ó]", - ó: "[o|ó]", - g: "[g|G]", - G: "[g|G]", - r: "[r|R]", - R: "[r|R]", +// const matcher = { +// q: "[q|k]", +// k: "[q|k]", +// // TODO: this might not be the best way to handle +// // double aa's passing as a's - because it can totally ignore the a's +// a: "[a|á|ă]?a?", +// á: "[a|á|ă]?a?", +// ă: "[a|á|ă]?a?", +// u: "[u|ú]", +// ú: "[u|ú]", +// e: "[e|é]", +// é: "[e|é]", +// i: "[i|í]", +// í: "[i|í]", +// o: "[o|ó]", +// ó: "[o|ó]", +// g: "[g|G]", +// G: "[g|G]", +// r: "[r|R]", +// R: "[r|R]", +// }; + +const fiveYeys = "[ئ|ۍ|ي|ې|ی]"; +const sSounds = "[س|ص|ث|څ]"; +const zSounds = "[ز|ژ|ض|ظ|ذ|ځ]"; +const tSounds = "[ت|ط|ټ]"; +const dSounds = "[د|ډ]"; +const rSounds = "[ر|ړ|ڼ]"; +const nSounds = "[ن|ڼ]"; + +const pReplacer = { + "ی": fiveYeys, + "ي": fiveYeys, + "ۍ": fiveYeys, + "ئ": fiveYeys, + "ې": fiveYeys, + + "س": sSounds, + "ص": sSounds, + "ث": sSounds, + "څ": sSounds, + + "ز": zSounds, + "ظ": zSounds, + "ذ": zSounds, + "ض": zSounds, + "ژ": zSounds, + "ځ": zSounds, + + "ت": tSounds, + "ط": tSounds, + "ټ": tSounds, + + "د": dSounds, + "ډ": dSounds, + + "ر": rSounds, + "ړ": rSounds, + + "ن": nSounds, + "ڼ": nSounds, }; -const fRepRegex = /r|R|q|k|a|á|ă|e|é|i|í|o|ó|g|G|u|ú/g; +const fiveYeysF = "(?:eyy|ey|ee|e|uy)"; +const zSoundsF = "(?:z|dz)"; + +const fReplacer = { + "eyy": fiveYeysF, + "ey": fiveYeysF, + "uy": fiveYeysF, + "ee": fiveYeysF, + "e": fiveYeysF, + + "z": zSoundsF, + "dz": zSoundsF, +}; + +const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g"); + +const fRepRegex = /eyy|ey|uy|ee|e|z|dz/g; + +function makePAWeeBitFuzzy(s: string): string { + // + s.replace(/ /g, "").split("").join(" *"); + return "^" + s.replace(pRepRegex, mtch => { + // @ts-ignore + return pReplacer[mtch]; + }); +} + +function makeFAWeeBitFuzzy(s: string): string { + return "^" + s.replace(fRepRegex, mtch => { + // @ts-ignore + return fReplacer[mtch]; + }); +} export function makeAWeeBitFuzzy(s: string, i: "f" | "p"): string { - const logic = i === "f" - ? "^" + s.replace(/ /g, "").split("").join("['|`]? *").replace(fRepRegex, (mtch) => { - // @ts-ignore - return matcher[mtch]; - }) - : "^" + s.replace(/ /g, "").split("").join(" *"); - return logic; + return i === "p" + ? makePAWeeBitFuzzy(s) + : makeFAWeeBitFuzzy(s); } \ No newline at end of file diff --git a/website/yarn.lock b/website/yarn.lock index b22dbe2..7631e3e 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -1590,10 +1590,10 @@ "@types/yargs" "^16.0.0" chalk "^4.0.0" -"@lingdocs/pashto-inflector@3.6.0": - version "3.6.0" - resolved "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz#89143246341bbca70c340f9cbcd72650f8348231" - integrity sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg== +"@lingdocs/pashto-inflector@3.6.2": + version "3.6.2" + resolved "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz#122eaeaac59253ea0ee708d772e860502aa1d6b7" + integrity sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ== dependencies: "@formkit/auto-animate" "^1.0.0-beta.1" classnames "^2.2.6" diff --git a/yarn.lock b/yarn.lock index d898ef2..30112cb 100644 --- a/yarn.lock +++ b/yarn.lock @@ -178,10 +178,10 @@ "resolved" "https://registry.npmjs.org/@hapi/hoek/-/hoek-9.2.1.tgz" "version" "9.2.1" -"@lingdocs/pashto-inflector@3.6.0": - "integrity" "sha512-OCkGiTTY8s2QgiTmP5MJ110GScul7ILtzoaz6b5zWq3Qerdhs10jAA0t/9OckfLE4yBkQYWkIG+5Qj4ZJGkODg==" - "resolved" "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.0.tgz" - "version" "3.6.0" +"@lingdocs/pashto-inflector@3.6.2": + "integrity" "sha512-hHvgJPrNAp/ZBvZRsm++X2vFnZyuVrpE54YWPubwk+1Xn+28otoJ34r/OsN4N7eXrXBcxawYsWhf/ot9D987GQ==" + "resolved" "https://npm.lingdocs.com/@lingdocs%2fpashto-inflector/-/pashto-inflector-3.6.2.tgz" + "version" "3.6.2" dependencies: "@formkit/auto-animate" "^1.0.0-beta.1" "classnames" "^2.2.6"