release new phonetics!

This commit is contained in:
adueck 2023-07-27 18:18:01 +04:00
parent 8dd63ad9c4
commit 54fb2050c1
21 changed files with 3376 additions and 2910 deletions

View File

@ -67,6 +67,10 @@ npm install
#### Development
```sh
firebase login
# get envars locally
firebase functions:config:get > .runtimeconfig.json
# start functions emulator
npm run serve
```

View File

@ -9,7 +9,7 @@
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"@lingdocs/inflect": "5.10.1",
"@lingdocs/inflect": "6.0.0",
"base64url": "^3.0.1",
"bcryptjs": "^2.4.3",
"connect-redis": "^6.0.0",
@ -124,9 +124,9 @@
}
},
"node_modules/@lingdocs/inflect": {
"version": "5.10.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.10.1.tgz",
"integrity": "sha512-8MPsfQzeerlyT02dz7D7L+AYFrjGOrQB7nMBUXutnLw3/RKhvW99dLImFZKSnCr8DZsEONEp0IVeqxeIUczxog==",
"version": "6.0.0",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-6.0.0.tgz",
"integrity": "sha512-aPvjqOkeKhu60Inbk7uuLooR/9hvUS4rDHyqR5JJPziZMLJ05U5fBTUvehit7stHSRGivskR00uU3liWbXce6g==",
"license": "MIT",
"dependencies": {
"fp-ts": "^2.16.0",
@ -2747,9 +2747,9 @@
}
},
"@lingdocs/inflect": {
"version": "5.10.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.10.1.tgz",
"integrity": "sha512-8MPsfQzeerlyT02dz7D7L+AYFrjGOrQB7nMBUXutnLw3/RKhvW99dLImFZKSnCr8DZsEONEp0IVeqxeIUczxog==",
"version": "6.0.0",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-6.0.0.tgz",
"integrity": "sha512-aPvjqOkeKhu60Inbk7uuLooR/9hvUS4rDHyqR5JJPziZMLJ05U5fBTUvehit7stHSRGivskR00uU3liWbXce6g==",
"requires": {
"fp-ts": "^2.16.0",
"pbf": "^3.2.1",

View File

@ -11,7 +11,7 @@
"author": "",
"license": "ISC",
"dependencies": {
"@lingdocs/inflect": "5.10.1",
"@lingdocs/inflect": "6.0.0",
"base64url": "^3.0.1",
"bcryptjs": "^2.4.3",
"connect-redis": "^6.0.0",
@ -22,6 +22,7 @@
"express-session": "^1.17.2",
"lokijs": "^1.5.12",
"nano": "^9.0.3",
"next": "^13.4.12",
"node-fetch": "^2.6.7",
"nodemailer": "^6.6.3",
"passport": "^0.4.1",
@ -42,6 +43,7 @@
"@types/cron": "^2.0.0",
"@types/express": "^4.17.13",
"@types/express-session": "^1.17.4",
"@types/lokijs": "^1.5.8",
"@types/node": "^16.6.0",
"@types/node-fetch": "^2.5.12",
"@types/nodemailer": "^6.4.4",

View File

@ -4,22 +4,22 @@ import { CronJob } from "cron";
const collectionName = "ps-dictionary";
const allWordsCollectionName = "all-words";
import {
readDictionary,
readDictionaryInfo,
Types as T,
typePredicates as tp,
entryOfFull,
standardizePashto,
} from "@lingdocs/inflect"
readDictionary,
readDictionaryInfo,
Types as T,
typePredicates as tp,
entryOfFull,
standardizePashto,
} from "@lingdocs/inflect";
export let collection: Collection<T.DictionaryEntry> | undefined = undefined;
export let allWordsCollection: Collection<T.PsString> | undefined = undefined;
const adapter = new LokiMemoryAdapter();
const lokidb = new loki("", {
adapter,
autoload: false,
autosave: false,
env: "NODEJS",
adapter,
autoload: false,
autosave: false,
env: "NODEJS",
});
const updateJob = new CronJob("* * * * *", updateDictionary, null, false);
@ -27,117 +27,126 @@ const updateJob = new CronJob("* * * * *", updateDictionary, null, false);
let version: number = 0;
async function fetchDictionary(): Promise<T.Dictionary> {
const res = await fetch(process.env.LINGDOCS_DICTIONARY_URL || "");
const buffer = await res.arrayBuffer();
return readDictionary(buffer as Uint8Array);
const res = await fetch(process.env.LINGDOCS_DICTIONARY_URL || "");
const buffer = await res.arrayBuffer();
return readDictionary(buffer as Uint8Array);
}
async function fetchAllWords(): Promise<T.AllWordsWithInflections> {
// TODO: this is really ugly
const res = await fetch(process.env.LINGDOCS_DICTIONARY_URL?.slice(0, -4) + "all-words.json");
return await res.json();
// TODO: this is really ugly
const res = await fetch(
process.env.LINGDOCS_DICTIONARY_URL?.slice(0, -10) +
"all-words-dictionary.json"
);
return await res.json();
}
async function fetchDictionaryInfo(): Promise<T.DictionaryInfo> {
const res = await fetch(process.env.LINGDOCS_DICTIONARY_URL + "-info" || "");
const buffer = await res.arrayBuffer();
return readDictionaryInfo(buffer as Uint8Array);
const res = await fetch(process.env.LINGDOCS_DICTIONARY_URL + "-info" || "");
const buffer = await res.arrayBuffer();
return readDictionaryInfo(buffer as Uint8Array);
}
export async function updateDictionary(): Promise<"no update" | "updated"> {
const info = await fetchDictionaryInfo();
if (info.release === version) {
return "no update";
}
const dictionary = await fetchDictionary();
version = dictionary.info.release;
collection?.clear();
lokidb.removeCollection(collectionName);
collection?.insert(dictionary.entries);
const allWords = await fetchAllWords();
allWordsCollection?.clear();
lokidb.removeCollection(allWordsCollectionName);
allWordsCollection?.insert(allWords.words);
return "updated";
const info = await fetchDictionaryInfo();
if (info.release === version) {
return "no update";
}
const dictionary = await fetchDictionary();
version = dictionary.info.release;
collection?.clear();
lokidb.removeCollection(collectionName);
collection?.insert(dictionary.entries);
const allWords = await fetchAllWords();
allWordsCollection?.clear();
lokidb.removeCollection(allWordsCollectionName);
allWordsCollection?.insert(allWords.words);
return "updated";
}
function getOneByTs(ts: number): T.DictionaryEntry {
if (!collection) {
throw new Error("dictionary not initialized");
}
const r = collection.by("ts", ts);
// @ts-ignore
const { $loki, meta, ...entry } = r;
return entry;
if (!collection) {
throw new Error("dictionary not initialized");
}
const r = collection.by("ts", ts);
// @ts-ignore
const { $loki, meta, ...entry } = r;
return entry;
}
export function findInAllWords(p: string | RegExp): T.PsWord[] | undefined {
if (!allWordsCollection) {
throw new Error("allWords not initialized");
}
return allWordsCollection.find({
p: typeof p === "string"
? p
: { $regex: p },
});
if (!allWordsCollection) {
throw new Error("allWords not initialized");
}
return allWordsCollection.find({
p: typeof p === "string" ? p : { $regex: p },
});
}
export async function getEntries(ids: (number | string)[]): Promise<{
results: (T.DictionaryEntry | T.VerbEntry)[],
notFound: (number | string)[],
results: (T.DictionaryEntry | T.VerbEntry)[];
notFound: (number | string)[];
}> {
if (!collection) {
throw new Error("dictionary not initialized");
}
const idsP = ids.map(x => typeof x === "number" ? x : standardizePashto(x))
const results: (T.DictionaryEntry | T.VerbEntry)[] = collection.find({
"$or": [
{ "ts": { "$in": idsP }},
{ "p": { "$in": idsP }},
],
}).map(x => {
const { $loki, meta, ...entry } = x;
return entry;
}).map((entry): T.DictionaryEntry | T.VerbEntry => {
if (tp.isVerbDictionaryEntry(entry)) {
if (entry.c?.includes("comp.") && entry.l) {
const complement = getOneByTs(entry.l);
if (!complement) throw new Error("Error getting complement "+entry.l);
return {
entry,
complement,
};
}
return { entry };
} else {
return entry;
if (!collection) {
throw new Error("dictionary not initialized");
}
const idsP = ids.map((x) =>
typeof x === "number" ? x : standardizePashto(x)
);
const results: (T.DictionaryEntry | T.VerbEntry)[] = collection
.find({
$or: [{ ts: { $in: idsP } }, { p: { $in: idsP } }],
})
.map((x) => {
const { $loki, meta, ...entry } = x;
return entry;
})
.map((entry): T.DictionaryEntry | T.VerbEntry => {
if (tp.isVerbDictionaryEntry(entry)) {
if (entry.c?.includes("comp.") && entry.l) {
const complement = getOneByTs(entry.l);
if (!complement)
throw new Error("Error getting complement " + entry.l);
return {
entry,
complement,
};
}
return { entry };
} else {
return entry;
}
});
return {
results,
notFound: ids.filter(id => !results.find(x => {
const entry = entryOfFull(x);
return entry.p === id || entry.ts === id;
})),
};
return {
results,
notFound: ids.filter(
(id) =>
!results.find((x) => {
const entry = entryOfFull(x);
return entry.p === id || entry.ts === id;
})
),
};
}
lokidb.loadDatabase({}, (err: Error) => {
lokidb.removeCollection(collectionName);
lokidb.removeCollection(allWordsCollectionName);
fetchDictionary().then((dictionary) => {
collection = lokidb.addCollection(collectionName, {
indices: ["i", "p"],
unique: ["ts"],
});
version = dictionary.info.release;
collection?.insert(dictionary.entries);
updateJob.start();
}).catch(console.error);
fetchAllWords().then((allWords) => {
allWordsCollection = lokidb.addCollection(allWordsCollectionName, {
indices: ["p"],
});
allWordsCollection?.insert(allWords.words);
lokidb.removeCollection(collectionName);
lokidb.removeCollection(allWordsCollectionName);
fetchDictionary()
.then((dictionary) => {
collection = lokidb.addCollection(collectionName, {
indices: ["i", "p"],
unique: ["ts"],
});
version = dictionary.info.release;
collection?.insert(dictionary.entries);
updateJob.start();
})
.catch(console.error);
fetchAllWords().then((allWords) => {
allWordsCollection = lokidb.addCollection(allWordsCollectionName, {
indices: ["p"],
});
allWordsCollection?.insert(allWords.words);
});
});

View File

@ -1,55 +1,54 @@
import express from "express";
import {
allWordsCollection,
collection,
findInAllWords,
getEntries,
updateDictionary,
allWordsCollection,
collection,
getEntries,
updateDictionary,
} from "../lib/dictionary";
import { scriptToPhonetics } from "../lib/scriptToPhonetics";
const dictionaryRouter = express.Router();
dictionaryRouter.post("/update", async (req, res, next) => {
const result = await updateDictionary();
res.send({ ok: true, result });
const result = await updateDictionary();
res.send({ ok: true, result });
});
dictionaryRouter.post("/script-to-phonetics", async (req, res, next) => {
if (!allWordsCollection) {
return res.send({ ok: false, message: "allWords not ready" });
}
const text = req.body.text as unknown;
const accents = req.body.accents as unknown;
if (!text || typeof text !== "string" || typeof accents !== "boolean") {
return res.status(400).send({ ok: false, error: "invalid query" });
}
const results = await scriptToPhonetics(text, accents);
res.send({ ok: true, results });
})
if (!allWordsCollection) {
return res.send({ ok: false, message: "allWords not ready" });
}
const text = req.body.text as unknown;
const accents = req.body.accents as unknown;
if (!text || typeof text !== "string" || typeof accents !== "boolean") {
return res.status(400).send({ ok: false, error: "invalid query" });
}
const results = await scriptToPhonetics(text, accents);
res.send({ ok: true, results });
});
dictionaryRouter.post("/entries", async (req, res, next) => {
if (!collection) {
return res.send({ ok: false, message: "dictionary not ready" });
}
const ids = req.body.ids as (number | string)[];
if (!Array.isArray(ids)) {
return res.status(400).send({ ok: false, error: "invalid query" });
}
const results = await getEntries(ids);
return res.send(results);
if (!collection) {
return res.send({ ok: false, message: "dictionary not ready" });
}
const ids = req.body.ids as (number | string)[];
if (!Array.isArray(ids)) {
return res.status(400).send({ ok: false, error: "invalid query" });
}
const results = await getEntries(ids);
return res.send(results);
});
dictionaryRouter.get("/entries/:id", async (req, res, next) => {
if (!collection) {
return res.send({ ok: false, message: "dictionary not ready" });
}
const ids = req.params.id.split(",").map(x => {
const n = parseInt(x);
return Number.isNaN(n) ? x : n;
});
const results = await getEntries(ids);
return res.send(results);
if (!collection) {
return res.send({ ok: false, message: "dictionary not ready" });
}
const ids = req.params.id.split(",").map((x) => {
const n = parseInt(x);
return Number.isNaN(n) ? x : n;
});
const results = await getEntries(ids);
return res.send(results);
});
export default dictionaryRouter;
export default dictionaryRouter;

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,7 @@
"name": "functions",
"dependencies": {
"@google-cloud/storage": "^5.8.1",
"@lingdocs/inflect": "5.10.1",
"@lingdocs/inflect": "6.0.0",
"@types/cors": "^2.8.10",
"@types/google-spreadsheet": "^3.0.2",
"@types/react": "^18.0.21",
@ -1468,9 +1468,9 @@
}
},
"node_modules/@lingdocs/inflect": {
"version": "5.10.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.10.1.tgz",
"integrity": "sha512-8MPsfQzeerlyT02dz7D7L+AYFrjGOrQB7nMBUXutnLw3/RKhvW99dLImFZKSnCr8DZsEONEp0IVeqxeIUczxog==",
"version": "6.0.0",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-6.0.0.tgz",
"integrity": "sha512-aPvjqOkeKhu60Inbk7uuLooR/9hvUS4rDHyqR5JJPziZMLJ05U5fBTUvehit7stHSRGivskR00uU3liWbXce6g==",
"license": "MIT",
"dependencies": {
"fp-ts": "^2.16.0",
@ -8056,9 +8056,9 @@
}
},
"@lingdocs/inflect": {
"version": "5.10.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.10.1.tgz",
"integrity": "sha512-8MPsfQzeerlyT02dz7D7L+AYFrjGOrQB7nMBUXutnLw3/RKhvW99dLImFZKSnCr8DZsEONEp0IVeqxeIUczxog==",
"version": "6.0.0",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-6.0.0.tgz",
"integrity": "sha512-aPvjqOkeKhu60Inbk7uuLooR/9hvUS4rDHyqR5JJPziZMLJ05U5fBTUvehit7stHSRGivskR00uU3liWbXce6g==",
"requires": {
"fp-ts": "^2.16.0",
"pbf": "^3.2.1",

View File

@ -15,7 +15,7 @@
"main": "lib/functions/src/index.js",
"dependencies": {
"@google-cloud/storage": "^5.8.1",
"@lingdocs/inflect": "5.10.1",
"@lingdocs/inflect": "6.0.0",
"@types/cors": "^2.8.10",
"@types/google-spreadsheet": "^3.0.2",
"@types/react": "^18.0.21",

View File

@ -4,47 +4,59 @@ import { receiveSubmissions } from "./submissions";
import lingdocsAuth from "./middleware/lingdocs-auth";
import publish from "./publish";
export const publishDictionary = functions.runWith({
export const publishDictionary = functions
.runWith({
timeoutSeconds: 525,
memory: "2GB"
}).https.onRequest(
memory: "2GB",
})
.https.onRequest(
lingdocsAuth(
async (req, res: functions.Response<FT.PublishDictionaryResponse | FT.FunctionError>) => {
if (req.user.level !== "editor") {
res.status(403).send({ ok: false, error: "403 forbidden" });
return;
}
try {
const response = await publish();
res.send(response);
} catch (e) {
// @ts-ignore
res.status(500).send({ ok: false, error: e.message });
}
async (
req,
res: functions.Response<FT.PublishDictionaryResponse | FT.FunctionError>
) => {
if (req.user.level !== "editor") {
res.status(403).send({ ok: false, error: "403 forbidden" });
return;
}
try {
const response = await publish();
res.send(response);
} catch (e) {
// @ts-ignore
res.status(500).send({ ok: false, error: e.message });
}
}
)
);
export const submissions = functions.runWith({
);
export const submissions = functions
.runWith({
timeoutSeconds: 60,
memory: "1GB",
}).https.onRequest(lingdocsAuth(
async (req, res: functions.Response<FT.SubmissionsResponse | FT.FunctionError>) => {
})
.https.onRequest(
lingdocsAuth(
async (
req,
res: functions.Response<FT.SubmissionsResponse | FT.FunctionError>
) => {
if (!Array.isArray(req.body)) {
res.status(400).send({
ok: false,
error: "invalid submission",
});
return;
res.status(400).send({
ok: false,
error: "invalid submission",
});
return;
}
const suggestions = req.body as FT.SubmissionsRequest;
try {
const response = await receiveSubmissions(suggestions, true);// req.user.level === "editor");
// TODO: WARN IF ANY OF THE EDITS DIDN'T HAPPEN
res.send(response);
const response = await receiveSubmissions(suggestions, true); // req.user.level === "editor");
// TODO: WARN IF ANY OF THE EDITS DIDN'T HAPPEN
res.send(response);
} catch (e) {
// @ts-ignore
res.status(500).send({ ok: false, error: e.message });
};
})
);
// @ts-ignore
res.status(500).send({ ok: false, error: e.message });
}
}
)
);

View File

@ -1,36 +1,33 @@
import { GoogleSpreadsheet } from "google-spreadsheet";
import * as functions from "firebase-functions";
import {
Types as T,
dictionaryEntryBooleanFields,
dictionaryEntryNumberFields,
dictionaryEntryTextFields,
validateEntry,
writeDictionary,
writeDictionaryInfo,
simplifyPhonetics,
standardizeEntry,
Types as T,
dictionaryEntryBooleanFields,
dictionaryEntryNumberFields,
dictionaryEntryTextFields,
validateEntry,
writeDictionary,
writeDictionaryInfo,
simplifyPhonetics,
standardizeEntry,
} from "@lingdocs/inflect";
import {
getWordList,
} from "./word-list-maker";
import {
PublishDictionaryResponse,
} from "../../website/src/types/functions-types";
import { getWordList } from "./word-list-maker";
import { PublishDictionaryResponse } from "../../website/src/types/functions-types";
import { Storage } from "@google-cloud/storage";
const storage = new Storage({
projectId: "lingdocs",
projectId: "lingdocs",
});
const title = "LingDocs Pashto Dictionary"
const license = "Copyright © 2021 lingdocs.com All Rights Reserved - Licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License - https://creativecommons.org/licenses/by-nc-sa/4.0/";
const title = "LingDocs Pashto Dictionary";
const license =
"Copyright © 2021 lingdocs.com All Rights Reserved - Licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License - https://creativecommons.org/licenses/by-nc-sa/4.0/";
const bucketName = "lingdocs";
const baseUrl = `https://storage.googleapis.com/${bucketName}/`;
const dictionaryFilename = "dict";
const dictionaryInfoFilename = "dict-info";
const dictionaryFilename = "dictionary";
const dictionaryInfoFilename = "dictionary-info";
// const hunspellAffFileFilename = "ps_AFF.aff";
// const hunspellDicFileFilename = "ps_AFF.dic";
const allWordsJsonFilename = "all-words.json";
const allWordsJsonFilename = "all-words-dictionary.json";
const url = `${baseUrl}${dictionaryFilename}`;
const infoUrl = `${baseUrl}${dictionaryInfoFilename}`;
@ -38,159 +35,173 @@ const infoUrl = `${baseUrl}${dictionaryInfoFilename}`;
// to keep the publish function time down
export default async function publish(): Promise<PublishDictionaryResponse> {
const entries = await getRawEntries();
const errors = checkForErrors(entries);
if (errors.length) {
return({ ok: false, errors });
}
// const duplicates = findDuplicates(entries);
// duplicates.forEach((duplicate) => {
// const index = entries.findIndex(e => e.ts === duplicate.ts);
// if (index > -1) entries.splice(index, 1);
// })
const dictionary: T.Dictionary = {
info: {
title,
license,
url,
infoUrl,
release: new Date().getTime(),
numberOfEntries: entries.length,
},
entries,
}
uploadDictionaryToStorage(dictionary).catch(console.error);
// TODO: make this async and run after publish response
doHunspellEtc(dictionary.info, entries).catch(console.error);
return {
ok: true,
info: dictionary.info
};
const entries = await getRawEntries();
const errors = checkForErrors(entries);
if (errors.length) {
return { ok: false, errors };
}
// const duplicates = findDuplicates(entries);
// duplicates.forEach((duplicate) => {
// const index = entries.findIndex(e => e.ts === duplicate.ts);
// if (index > -1) entries.splice(index, 1);
// })
const dictionary: T.Dictionary = {
info: {
title,
license,
url,
infoUrl,
release: new Date().getTime(),
numberOfEntries: entries.length,
},
entries,
};
uploadDictionaryToStorage(dictionary).catch(console.error);
// TODO: make this async and run after publish response
doHunspellEtc(dictionary.info, entries).catch(console.error);
return {
ok: true,
info: dictionary.info,
};
}
async function doHunspellEtc(info: T.DictionaryInfo, entries: T.DictionaryEntry[]) {
const wordlistResponse = getWordList(entries);
if (!wordlistResponse.ok) {
throw new Error(JSON.stringify(wordlistResponse.errors));
}
// const hunspell = makeHunspell(wordlistResponse.wordlist);
// await uploadHunspellToStorage(hunspell);
await uploadAllWordsToStoarage(info, wordlistResponse.wordlist)
async function doHunspellEtc(
info: T.DictionaryInfo,
entries: T.DictionaryEntry[]
) {
const wordlistResponse = getWordList(entries);
if (!wordlistResponse.ok) {
throw new Error(JSON.stringify(wordlistResponse.errors));
}
// const hunspell = makeHunspell(wordlistResponse.wordlist);
// await uploadHunspellToStorage(hunspell);
await uploadAllWordsToStoarage(info, wordlistResponse.wordlist);
}
/**
* Gets the entries from the spreadsheet, and also deletes duplicate
* entries that are sometimes annoyingly created by the GoogleSheets API
* when adding entries programmatically
*
* @returns
*
*
* @returns
*
*/
async function getRows() {
const doc = new GoogleSpreadsheet(
functions.config().sheet.id,
);
await doc.useServiceAccountAuth({
client_email: functions.config().serviceacct.email,
private_key: functions.config().serviceacct.key,
});
await doc.loadInfo();
const sheet = doc.sheetsByIndex[0];
const rows = await sheet.getRows();
rows.sort((a, b) => a.ts > b.ts ? -1 : a.ts < b.ts ? 1 : 0);
return rows;
const doc = new GoogleSpreadsheet(functions.config().sheet.id);
await doc.useServiceAccountAuth({
client_email: functions.config().serviceacct.email,
private_key: functions.config().serviceacct.key,
});
await doc.loadInfo();
const sheet = doc.sheetsByIndex[0];
const rows = await sheet.getRows();
rows.sort((a, b) => (a.ts > b.ts ? -1 : a.ts < b.ts ? 1 : 0));
return rows;
}
async function getRawEntries(): Promise<T.DictionaryEntry[]> {
const rows = await getRows();
async function deleteRow(i: number) {
console.log("WILL DELETE ROW", rows[i].p, rows[i].ts, rows[i].f);
await rows[i].delete();
const rows = await getRows();
async function deleteRow(i: number) {
console.log("WILL DELETE ROW", rows[i].p, rows[i].ts, rows[i].f);
await rows[i].delete();
}
const entries: T.DictionaryEntry[] = [];
let sheetIndex = 0;
// get the rows in order of ts for easy detection of duplicate entries
for (let i = 0; i < rows.length; i++) {
function sameEntry(a: any, b: any): boolean {
return a.p === b.p && a.f === b.f && a.e === b.e;
}
const entries: T.DictionaryEntry[] = [];
let sheetIndex = 0;
// get the rows in order of ts for easy detection of duplicate entries
for (let i = 0; i < rows.length; i++) {
function sameEntry(a: any, b: any): boolean {
return a.p === b.p && a.f === b.f && a.e === b.e;
}
sheetIndex++;
const row = rows[i];
const nextRow = rows[i+1] || undefined;
if (row.ts === nextRow?.ts) {
if (sameEntry(row, nextRow)) {
// this looks like a duplicate entry made by the sheets api
// delete it and keep going
await deleteRow(sheetIndex);
sheetIndex--;
continue;
} else {
throw new Error(`ts ${row.ts} is a duplicate ts of a different entry`);
}
}
const e: T.DictionaryEntry = {
i: 1,
ts: parseInt(row.ts),
p: row.p,
f: row.f,
g: simplifyPhonetics(row.f),
e: row.e,
};
dictionaryEntryNumberFields.forEach((field: T.DictionaryEntryNumberField) => {
if (row[field]) e[field] = parseInt(row[field]);
});
dictionaryEntryTextFields.forEach((field: T.DictionaryEntryTextField) => {
if (row[field]) e[field] = row[field].trim();
});
dictionaryEntryBooleanFields.forEach((field: T.DictionaryEntryBooleanField) => {
if (row[field]) e[field] = true;
});
entries.push(standardizeEntry(e));
sheetIndex++;
const row = rows[i];
const nextRow = rows[i + 1] || undefined;
if (row.ts === nextRow?.ts) {
if (sameEntry(row, nextRow)) {
// this looks like a duplicate entry made by the sheets api
// delete it and keep going
await deleteRow(sheetIndex);
sheetIndex--;
continue;
} else {
throw new Error(`ts ${row.ts} is a duplicate ts of a different entry`);
}
}
// add alphabetical index
entries.sort((a, b) => a.p.localeCompare(b.p, "ps"));
const entriesLength = entries.length;
// add index
for (let i = 0; i < entriesLength; i++) {
entries[i].i = i;
}
return entries;
const e: T.DictionaryEntry = {
i: 1,
ts: parseInt(row.ts),
p: row.p,
f: row.f,
g: simplifyPhonetics(row.f),
e: row.e,
};
dictionaryEntryNumberFields.forEach(
(field: T.DictionaryEntryNumberField) => {
if (row[field]) e[field] = parseInt(row[field]);
}
);
dictionaryEntryTextFields.forEach((field: T.DictionaryEntryTextField) => {
if (row[field]) e[field] = row[field].trim();
});
dictionaryEntryBooleanFields.forEach(
(field: T.DictionaryEntryBooleanField) => {
if (row[field]) e[field] = true;
}
);
entries.push(standardizeEntry(e));
}
// add alphabetical index
entries.sort((a, b) => a.p.localeCompare(b.p, "ps"));
const entriesLength = entries.length;
// add index
for (let i = 0; i < entriesLength; i++) {
entries[i].i = i;
}
return entries;
}
function checkForErrors(entries: T.DictionaryEntry[]): T.DictionaryEntryError[] {
return entries.reduce((errors: T.DictionaryEntryError[], entry: T.DictionaryEntry) => {
const response = validateEntry(entry);
if ("errors" in response && response.errors.length) {
return [...errors, response];
function checkForErrors(
entries: T.DictionaryEntry[]
): T.DictionaryEntryError[] {
return entries.reduce(
(errors: T.DictionaryEntryError[], entry: T.DictionaryEntry) => {
const response = validateEntry(entry);
if ("errors" in response && response.errors.length) {
return [...errors, response];
}
if ("checkComplement" in response) {
const complement = entries.find((e) => e.ts === entry.l);
if (!complement) {
const error: T.DictionaryEntryError = {
errors: ["complement link not found in dictonary"],
ts: entry.ts,
p: entry.p,
f: entry.f,
e: entry.e,
erroneousFields: ["l"],
};
return [...errors, error];
}
if ("checkComplement" in response) {
const complement = entries.find((e) => e.ts === entry.l);
if (!complement) {
const error: T.DictionaryEntryError = {
errors: ["complement link not found in dictonary"],
ts: entry.ts,
p: entry.p,
f: entry.f,
e: entry.e,
erroneousFields: ["l"],
};
return [...errors, error];
}
if (!complement.c?.includes("n.") && !complement.c?.includes("adj.") && !complement.c?.includes("adv.")) {
const error: T.DictionaryEntryError = {
errors: ["complement link to invalid complement"],
ts: entry.ts,
p: entry.p,
f: entry.f,
e: entry.e,
erroneousFields: ["l"],
};
return [...errors, error];
}
if (
!complement.c?.includes("n.") &&
!complement.c?.includes("adj.") &&
!complement.c?.includes("adv.")
) {
const error: T.DictionaryEntryError = {
errors: ["complement link to invalid complement"],
ts: entry.ts,
p: entry.p,
f: entry.f,
e: entry.e,
erroneousFields: ["l"],
};
return [...errors, error];
}
return errors;
}, []);
}
return errors;
},
[]
);
}
// function findDuplicates(entries: T.DictionaryEntry[]): T.DictionaryEntry[] {
@ -208,20 +219,20 @@ function checkForErrors(entries: T.DictionaryEntry[]): T.DictionaryEntryError[]
// }
async function upload(content: Buffer | string, filename: string) {
const isBuffer = typeof content !== "string";
const file = storage.bucket(bucketName).file(filename);
await file.save(content, {
gzip: isBuffer ? false : true,
predefinedAcl: "publicRead",
metadata: {
contentType: isBuffer
? "application/octet-stream"
: filename.slice(-5) === ".json"
? "application/json"
: "text/plain; charset=UTF-8",
cacheControl: "no-cache",
},
});
const isBuffer = typeof content !== "string";
const file = storage.bucket(bucketName).file(filename);
await file.save(content, {
gzip: isBuffer ? false : true,
predefinedAcl: "publicRead",
metadata: {
contentType: isBuffer
? "application/octet-stream"
: filename.slice(-5) === ".json"
? "application/json"
: "text/plain; charset=UTF-8",
cacheControl: "no-cache",
},
});
}
// async function uploadHunspellToStorage(wordlist: {
@ -234,19 +245,25 @@ async function upload(content: Buffer | string, filename: string) {
// ]);
// }
async function uploadAllWordsToStoarage(info: T.DictionaryInfo, words: T.PsString[]) {
await upload(JSON.stringify({ info, words } as T.AllWordsWithInflections), allWordsJsonFilename);
async function uploadAllWordsToStoarage(
info: T.DictionaryInfo,
words: T.PsString[]
) {
await upload(
JSON.stringify({ info, words } as T.AllWordsWithInflections),
allWordsJsonFilename
);
}
async function uploadDictionaryToStorage(dictionary: T.Dictionary) {
const dictionaryBuffer = writeDictionary(dictionary);
const dictionaryInfoBuffer = writeDictionaryInfo(dictionary.info);
await Promise.all([
upload(JSON.stringify(dictionary), `${dictionaryFilename}.json`),
upload(JSON.stringify(dictionary.info), `${dictionaryInfoFilename}.json`),
upload(dictionaryBuffer as Buffer, dictionaryFilename),
upload(dictionaryInfoBuffer as Buffer, dictionaryInfoFilename),
]);
const dictionaryBuffer = writeDictionary(dictionary);
const dictionaryInfoBuffer = writeDictionaryInfo(dictionary.info);
await Promise.all([
upload(JSON.stringify(dictionary), `${dictionaryFilename}.json`),
upload(JSON.stringify(dictionary.info), `${dictionaryInfoFilename}.json`),
upload(dictionaryBuffer as Buffer, dictionaryFilename),
upload(dictionaryInfoBuffer as Buffer, dictionaryInfoFilename),
]);
}
// function makeHunspell(wordlist: string[]) {

View File

@ -7,7 +7,7 @@
"private": true,
"dependencies": {
"@fortawesome/fontawesome-free": "^5.15.2",
"@lingdocs/ps-react": "5.10.1",
"@lingdocs/ps-react": "6.0.0",
"@testing-library/jest-dom": "^5.11.4",
"@testing-library/react": "^11.1.0",
"@testing-library/user-event": "^12.1.10",

View File

@ -9,7 +9,7 @@
* {
font-family: -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Oxygen,
Ubuntu, Cantarell, Fira Sans, Droid Sans, Helvetica Neue, sans-serif;
Ubuntu, Cantarell, Fira Sans, Droid Sans, Helvetica Neue, sans-serif;
}
:root {
@ -36,7 +36,7 @@
--farther: #bbb;
--farthest: #999;
--high-contrast: #cfcfcf;
--input-bg: #ccc;
}
@ -99,13 +99,23 @@ hr {
background-color: var(--closer) !important;
color: var(--high-contrast);
}
.bg-white {
background-color: var(--theme-shade) !important;
}
/* TODO: better handling of modals across light and dark modes */
.modal-body, .modal-title {
color:#1d1f25;
.modal-body,
.modal-title {
color: var(--high-contrast);
}
.modal-content {
background-color: var(--theme-shade);
}
.modal-content .table {
color: var(--high-contrast);
}
.table {
@ -310,6 +320,7 @@ input {
.entry-suggestion-button {
right: 15px;
}
.conjugation-search-button {
right: 15px;
}
@ -339,6 +350,7 @@ input {
text-decoration: none;
color: var(--farther);
}
.clickable:hover {
color: var(--farther);
}
@ -356,13 +368,13 @@ input {
.btn.bg-white:active,
.btn.bg-white:hover {
color: #555 !important;
color: #555 !important;
}
.btn-group.full-width {
display: flex;
}
.full-width .btn {
flex: 1;
}
@ -376,43 +388,49 @@ input {
/* Loding animation from https://projects.lukehaas.me/css-loaders/ */
.loader,
.loader:after {
border-radius: 50%;
width: 10em;
height: 10em;
border-radius: 50%;
width: 10em;
height: 10em;
}
.loader {
margin: 60px auto;
font-size: 10px;
position: relative;
text-indent: -9999em;
border-top: 1.1em solid var(--closer);
border-right: 1.1em solid var(--closer);
border-bottom: 1.1em solid var(--closer);
border-left: 1.1em solid var(--farthest);
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear;
margin: 60px auto;
font-size: 10px;
position: relative;
text-indent: -9999em;
border-top: 1.1em solid var(--closer);
border-right: 1.1em solid var(--closer);
border-bottom: 1.1em solid var(--closer);
border-left: 1.1em solid var(--farthest);
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear;
}
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
}
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
}
/* End of loading animation from https://projects.lukehaas.me/css-loaders/ */
/* End of loading animation from https://projects.lukehaas.me/css-loaders/ */

View File

@ -72,6 +72,7 @@ import PhraseBuilder from "./screens/PhraseBuilder";
import { searchAllInflections } from "./lib/search-all-inflections";
import { addToWordlist } from "./lib/wordlist-database";
import ScriptToPhonetics from "./screens/ScriptToPhonetics";
import { Modal, Button } from "react-bootstrap";
// to allow Moustrap key combos even when input fields are in focus
Mousetrap.prototype.stopCallback = function () {
@ -107,6 +108,7 @@ class App extends Component<RouteComponentProps, State> {
this.state = {
dictionaryStatus: "loading",
dictionaryInfo: undefined,
showModal: false,
// TODO: Choose between the saved options and the options in the saved user
options: savedOptions
? savedOptions
@ -146,6 +148,8 @@ class App extends Component<RouteComponentProps, State> {
this.handleRefreshReviewTasks = this.handleRefreshReviewTasks.bind(this);
this.handleDictionaryUpdate = this.handleDictionaryUpdate.bind(this);
this.handleInflectionSearch = this.handleInflectionSearch.bind(this);
this.handleShowModal = this.handleShowModal.bind(this);
this.handleCloseModal = this.handleCloseModal.bind(this);
}
public componentDidMount() {
@ -583,6 +587,14 @@ class App extends Component<RouteComponentProps, State> {
});
}
private handleCloseModal() {
this.setState({ showModal: false });
}
private handleShowModal() {
this.setState({ showModal: true });
}
render() {
return (
<div
@ -641,7 +653,7 @@ class App extends Component<RouteComponentProps, State> {
>
<div className="my-4">New words this month</div>
</Link>
<div className="mt-4 pt-3">
<div className="my-4 pt-3">
<Link
to="/phrase-builder"
className="plain-link h5 font-weight-light"
@ -656,6 +668,12 @@ class App extends Component<RouteComponentProps, State> {
Grammar
</a>
</div>
<button
onClick={this.handleShowModal}
className="mt-2 btn btn-lg btn-secondary"
>
New Phonetics for ی's!! 👀
</button>
</div>
</Route>
<Route path="/about">
@ -816,6 +834,87 @@ class App extends Component<RouteComponentProps, State> {
/>
)}
</footer>
<Modal
show={this.state.showModal}
onHide={this.handleCloseModal}
centered
>
<Modal.Header closeButton>
<Modal.Title>Phonetics Update! 📰</Modal.Title>
</Modal.Header>
<Modal.Body>
<p>
The phonetics for{" "}
<span style={{ backgroundColor: "rgba(255,255,0,0.4)" }}>
two of the five ی's have been updated
</span>{" "}
to something much more logical and helpful for pronunciation.
</p>
<h5>Pure Vowels (mouth stays still)</h5>
<table className="table">
<thead>
<tr>
<th scope="col">Letter</th>
<th scope="col">Phonetics</th>
<th scope="col">Sound</th>
</tr>
</thead>
<tbody>
<tr>
<td>ي</td>
<td>ee</td>
<td>long "ee" like "bee"</td>
</tr>
<tr>
<td>ې</td>
<td>e</td>
<td>
<div>
like "ee" but <em>with a slightly more open mouth</em>
</div>
<div className="small">
This is a special vowel <em>not found in English</em>
</div>
</td>
</tr>
</tbody>
</table>
<h5>Dipthongs (pure vowel + y)</h5>
<table className="table">
<thead>
<tr>
<th scope="col">Letter</th>
<th scope="col">Phonetics</th>
<th scope="col">Sound</th>
</tr>
</thead>
<tbody>
<tr style={{ backgroundColor: "rgba(255,255,0,0.4)" }}>
<td>ی</td>
<td>ay</td>
<td>short 'a' + y</td>
</tr>
<tr>
<td>ۍ</td>
<td>uy</td>
<td>'u' shwa (ə) + y</td>
</tr>
<tr style={{ backgroundColor: "rgba(255,255,0,0.4)" }}>
<td>ئ</td>
<td>ey</td>
<td>
<div>'e' (ې) + y</div>
</td>
</tr>
</tbody>
</table>
</Modal.Body>
<Modal.Footer>
<Button variant="secondary" onClick={this.handleCloseModal}>
Close
</Button>
</Modal.Footer>
</Modal>
</div>
);
}

View File

@ -10,10 +10,10 @@ import { DictionaryDb } from "./dictionary-core";
import sanitizePashto from "./sanitize-pashto";
import fillerWords from "./filler-words";
import {
Types as T,
simplifyPhonetics,
typePredicates as tp,
revertSpelling,
Types as T,
simplifyPhonetics,
typePredicates as tp,
revertSpelling,
} from "@lingdocs/ps-react";
import { isPashtoScript } from "./is-pashto";
import { fuzzifyPashto } from "./fuzzify-pashto/fuzzify-pashto";
@ -21,14 +21,11 @@ import { fuzzifyPashto } from "./fuzzify-pashto/fuzzify-pashto";
import relevancy from "relevancy";
import { makeAWeeBitFuzzy } from "./wee-bit-fuzzy";
import { getTextOptions } from "./get-text-options";
import {
DictionaryAPI,
State,
} from "../types/dictionary-types";
import { DictionaryAPI, State } from "../types/dictionary-types";
// const dictionaryBaseUrl = "https://storage.googleapis.com/lingdocs/";
const dictionaryUrl = `https://storage.googleapis.com/lingdocs/dict`;
const dictionaryInfoUrl = `https://storage.googleapis.com/lingdocs/dict-info`;
const dictionaryUrl = `https://storage.googleapis.com/lingdocs/dictionary`;
const dictionaryInfoUrl = `https://storage.googleapis.com/lingdocs/dictionary-info`;
const dictionaryInfoLocalStorageKey = "dictionaryInfo5";
const dictionaryCollectionName = "dictionary3";
@ -37,17 +34,19 @@ export const pageSize = 35;
const relevancySorter = new relevancy.Sorter();
const db = indexedDB.open('inPrivate');
const db = indexedDB.open("inPrivate");
db.onerror = (e) => {
console.error(e);
alert("Your browser does not have IndexedDB enabled. This might be because you are using private mode. Please use regular mode or enable IndexedDB to use this dictionary");
}
console.error(e);
alert(
"Your browser does not have IndexedDB enabled. This might be because you are using private mode. Please use regular mode or enable IndexedDB to use this dictionary"
);
};
const dictDb = new DictionaryDb({
url: dictionaryUrl,
infoUrl: dictionaryInfoUrl,
collectionName: dictionaryCollectionName,
infoLocalStorageKey: dictionaryInfoLocalStorageKey,
url: dictionaryUrl,
infoUrl: dictionaryInfoUrl,
collectionName: dictionaryCollectionName,
infoLocalStorageKey: dictionaryInfoLocalStorageKey,
});
function makeSearchStringSafe(searchString: string): string {
@ -57,9 +56,10 @@ function makeSearchStringSafe(searchString: string): string {
function fuzzifyEnglish(input: string): string {
const safeInput = input.trim().replace(/[#-.]|[[-^]|[?|{}]/g, "");
// TODO: Could do: cover british/american things like offense / offence
return safeInput.replace("to ", "")
.replace(/our/g, "ou?r")
.replace(/or/g, "ou?r");
return safeInput
.replace("to ", "")
.replace(/our/g, "ou?r")
.replace(/or/g, "ou?r");
}
function chunkOutArray<T>(arr: T[], chunkSize: number): T[][] {
@ -73,415 +73,465 @@ function chunkOutArray<T>(arr: T[], chunkSize: number): T[][] {
function getExpForInflections(input: string, index: "p" | "f"): RegExp {
let base = input;
if (index === "f") {
if (["e", "é", "a", "á", "ó", "o"].includes(input.slice(-1))) {
base = input.slice(0, -1);
}
return new RegExp(`\\b${base}`);
if (["e", "é", "a", "á", "ó", "o"].includes(input.slice(-1))) {
base = input.slice(0, -1);
}
return new RegExp(`\\b${base}`);
}
if (["ه", "ې", "و"].includes(input.slice(-1))) {
base = input.slice(0, -1);
base = input.slice(0, -1);
}
return new RegExp(`^${base}[و|ې|ه]?`);
}
function tsOneMonthBack(): number {
// https://stackoverflow.com/a/24049314/8620945
const d = new Date();
const m = d.getMonth();
d.setMonth(d.getMonth() - 1);
// If still in same month, set date to last day of
// previous month
if (d.getMonth() === m) d.setDate(0);
d.setHours(0, 0, 0);
d.setMilliseconds(0);
// Get the time value in milliseconds and convert to seconds
return d.getTime();
// https://stackoverflow.com/a/24049314/8620945
const d = new Date();
const m = d.getMonth();
d.setMonth(d.getMonth() - 1);
// If still in same month, set date to last day of
// previous month
if (d.getMonth() === m) d.setDate(0);
d.setHours(0, 0, 0);
d.setMilliseconds(0);
// Get the time value in milliseconds and convert to seconds
return d.getTime();
}
function alphabeticalLookup({ searchString, page }: {
searchString: string,
page: number,
function alphabeticalLookup({
searchString,
page,
}: {
searchString: string;
page: number;
}): T.DictionaryEntry[] {
const r = new RegExp("^" + sanitizePashto(makeSearchStringSafe(searchString)));
const regexResults: T.DictionaryEntry[] = dictDb.collection.find({
$or: [
{p: { $regex: r }},
{g: { $regex: r }},
],
});
const indexNumbers = regexResults.map((mpd: any) => mpd.i);
// Find the first matching word occuring first in the Pashto Index
let firstIndexNumber = null;
if (indexNumbers.length) {
firstIndexNumber = Math.min(...indexNumbers);
}
// $gt query from that first occurance
if (firstIndexNumber !== null) {
return dictDb.collection.chain()
.find({ i: { $gt: firstIndexNumber - 1 }})
.simplesort("i")
.limit(page * pageSize)
.data();
}
return [];
const r = new RegExp(
"^" + sanitizePashto(makeSearchStringSafe(searchString))
);
const regexResults: T.DictionaryEntry[] = dictDb.collection.find({
$or: [{ p: { $regex: r } }, { g: { $regex: r } }],
});
const indexNumbers = regexResults.map((mpd: any) => mpd.i);
// Find the first matching word occuring first in the Pashto Index
let firstIndexNumber = null;
if (indexNumbers.length) {
firstIndexNumber = Math.min(...indexNumbers);
}
// $gt query from that first occurance
if (firstIndexNumber !== null) {
return dictDb.collection
.chain()
.find({ i: { $gt: firstIndexNumber - 1 } })
.simplesort("i")
.limit(page * pageSize)
.data();
}
return [];
}
function fuzzyLookup<S extends T.DictionaryEntry>({ searchString, language, page, tpFilter }: {
searchString: string,
language: "Pashto" | "English" | "Both",
page: number,
tpFilter?: (e: T.DictionaryEntry) => e is S,
function fuzzyLookup<S extends T.DictionaryEntry>({
searchString,
language,
page,
tpFilter,
}: {
searchString: string;
language: "Pashto" | "English" | "Both";
page: number;
tpFilter?: (e: T.DictionaryEntry) => e is S;
}): S[] {
// TODO: Implement working with both
if (Number(searchString)) {
const entry = dictionary.findOneByTs(Number(searchString));
// @ts-ignore;
return entry ? [entry] : [] as S[];
}
return language === "Pashto"
? pashtoFuzzyLookup({ searchString, page, tpFilter })
: englishLookup({ searchString, page, tpFilter })
// TODO: Implement working with both
if (Number(searchString)) {
const entry = dictionary.findOneByTs(Number(searchString));
// @ts-ignore;
return entry ? [entry] : ([] as S[]);
}
return language === "Pashto"
? pashtoFuzzyLookup({ searchString, page, tpFilter })
: englishLookup({ searchString, page, tpFilter });
}
function englishLookup<S extends T.DictionaryEntry>({ searchString, page, tpFilter }: {
searchString: string,
page: number,
tpFilter?: (e: T.DictionaryEntry) => e is S,
function englishLookup<S extends T.DictionaryEntry>({
searchString,
page,
tpFilter,
}: {
searchString: string;
page: number;
tpFilter?: (e: T.DictionaryEntry) => e is S;
}): S[] {
function sortByR(a: T.DictionaryEntry, b: T.DictionaryEntry) {
return (b.r || 3) - (a.r || 3);
};
let resultsGiven: number[] = [];
// get exact results
const exactQuery = {
e: {
$regex: new RegExp(`^${fuzzifyEnglish(searchString)}$`, "i"),
},
};
const exactResultsLimit = pageSize < 10 ? Math.floor(pageSize / 2) : 10;
const exactResults = dictDb.collection.chain()
.find(exactQuery)
.limit(exactResultsLimit)
.simplesort("i")
.data();
exactResults.sort(sortByR);
resultsGiven = exactResults.map((mpd: any) => mpd.$loki);
// get results with full word match at beginning of string
const startingQuery = {
e: {
$regex: new RegExp(`^${fuzzifyEnglish(searchString)}\\b`, "i"),
},
$loki: { $nin: resultsGiven },
};
const startingResultsLimit = (pageSize * page) - resultsGiven.length;
const startingResults = dictDb.collection.chain()
.find(startingQuery)
.limit(startingResultsLimit)
.simplesort("i")
.data();
startingResults.sort(sortByR);
resultsGiven = [...resultsGiven, ...startingResults.map((mpd: any) => mpd.$loki)];
// get results with full word match anywhere
const fullWordQuery = {
e: {
$regex: new RegExp(`\\b${fuzzifyEnglish(searchString)}\\b`, "i"),
},
$loki: { $nin: resultsGiven },
};
const fullWordResultsLimit = (pageSize * page) - resultsGiven.length;
const fullWordResults = dictDb.collection.chain()
.find(fullWordQuery)
.limit(fullWordResultsLimit)
.simplesort("i")
.data();
fullWordResults.sort(sortByR);
resultsGiven = [...resultsGiven, ...fullWordResults.map((mpd: any) => mpd.$loki)]
// get results with partial match anywhere
const partialMatchQuery = {
e: {
$regex: new RegExp(`${fuzzifyEnglish(searchString)}`, "i"),
},
$loki: { $nin: resultsGiven },
};
const partialMatchLimit = (pageSize * page) - resultsGiven.length;
const partialMatchResults = dictDb.collection.chain()
.where(tpFilter ? tpFilter : () => true)
.find(partialMatchQuery)
.limit(partialMatchLimit)
.simplesort("i")
.data();
partialMatchResults.sort(sortByR);
const results = [
...exactResults,
...startingResults,
...fullWordResults,
...partialMatchResults,
];
if (tpFilter) {
return results.filter(tpFilter);
}
return results;
function sortByR(a: T.DictionaryEntry, b: T.DictionaryEntry) {
return (b.r || 3) - (a.r || 3);
}
let resultsGiven: number[] = [];
// get exact results
const exactQuery = {
e: {
$regex: new RegExp(`^${fuzzifyEnglish(searchString)}$`, "i"),
},
};
const exactResultsLimit = pageSize < 10 ? Math.floor(pageSize / 2) : 10;
const exactResults = dictDb.collection
.chain()
.find(exactQuery)
.limit(exactResultsLimit)
.simplesort("i")
.data();
exactResults.sort(sortByR);
resultsGiven = exactResults.map((mpd: any) => mpd.$loki);
// get results with full word match at beginning of string
const startingQuery = {
e: {
$regex: new RegExp(`^${fuzzifyEnglish(searchString)}\\b`, "i"),
},
$loki: { $nin: resultsGiven },
};
const startingResultsLimit = pageSize * page - resultsGiven.length;
const startingResults = dictDb.collection
.chain()
.find(startingQuery)
.limit(startingResultsLimit)
.simplesort("i")
.data();
startingResults.sort(sortByR);
resultsGiven = [
...resultsGiven,
...startingResults.map((mpd: any) => mpd.$loki),
];
// get results with full word match anywhere
const fullWordQuery = {
e: {
$regex: new RegExp(`\\b${fuzzifyEnglish(searchString)}\\b`, "i"),
},
$loki: { $nin: resultsGiven },
};
const fullWordResultsLimit = pageSize * page - resultsGiven.length;
const fullWordResults = dictDb.collection
.chain()
.find(fullWordQuery)
.limit(fullWordResultsLimit)
.simplesort("i")
.data();
fullWordResults.sort(sortByR);
resultsGiven = [
...resultsGiven,
...fullWordResults.map((mpd: any) => mpd.$loki),
];
// get results with partial match anywhere
const partialMatchQuery = {
e: {
$regex: new RegExp(`${fuzzifyEnglish(searchString)}`, "i"),
},
$loki: { $nin: resultsGiven },
};
const partialMatchLimit = pageSize * page - resultsGiven.length;
const partialMatchResults = dictDb.collection
.chain()
.where(tpFilter ? tpFilter : () => true)
.find(partialMatchQuery)
.limit(partialMatchLimit)
.simplesort("i")
.data();
partialMatchResults.sort(sortByR);
const results = [
...exactResults,
...startingResults,
...fullWordResults,
...partialMatchResults,
];
if (tpFilter) {
return results.filter(tpFilter);
}
return results;
}
function pashtoExactLookup(searchString: string): T.DictionaryEntry[] {
const index = isPashtoScript(searchString) ? "p" : "g";
const search = index === "g" ? simplifyPhonetics(searchString) : searchString;
return dictDb.collection.find({
[index]: search,
});
const index = isPashtoScript(searchString) ? "p" : "g";
const search = index === "g" ? simplifyPhonetics(searchString) : searchString;
return dictDb.collection.find({
[index]: search,
});
}
function pashtoFuzzyLookup<S extends T.DictionaryEntry>({ searchString, page, tpFilter }: {
searchString: string,
page: number,
tpFilter?: (e: T.DictionaryEntry) => e is S,
function pashtoFuzzyLookup<S extends T.DictionaryEntry>({
searchString,
page,
tpFilter,
}: {
searchString: string;
page: number;
tpFilter?: (e: T.DictionaryEntry) => e is S;
}): S[] {
let resultsGiven: number[] = [];
// Check if it's in Pashto or Latin script
const searchStringToUse = sanitizePashto(makeSearchStringSafe(searchString));
const index = isPashtoScript(searchStringToUse) ? "p" : "g";
const search = index === "g" ? simplifyPhonetics(searchStringToUse) : searchStringToUse;
const infIndex = index === "p" ? "p" : "f";
// Get exact matches
const exactExpression = new RegExp("^" + search);
const weeBitFuzzy = new RegExp("^" + makeAWeeBitFuzzy(search, infIndex));
// prepare exact expression for special matching
// TODO: This is all a bit messy and could be done without regex
const expressionForInflections = getExpForInflections(search, infIndex);
const arabicPluralIndex = `ap${infIndex}`;
const pashtoPluralIndex = `pp${infIndex}`;
const presentStemIndex = `ps${infIndex}`;
const firstInfIndex = `infa${infIndex}`;
const secondInfIndex = `infb${infIndex}`;
const pashtoExactResultFields = [
{
[index]: { $regex: exactExpression },
}, {
[arabicPluralIndex]: { $regex: weeBitFuzzy },
}, {
[pashtoPluralIndex]: { $regex: weeBitFuzzy },
}, {
[presentStemIndex]: { $regex: weeBitFuzzy },
},
{
[firstInfIndex]: { $regex: expressionForInflections },
},
{
[secondInfIndex]: { $regex: expressionForInflections },
},
];
const exactQuery = { $or: [...pashtoExactResultFields] };
// just special incase using really small limits
// multiple times scrolling / chunking / sorting might get a bit messed up if using a limit of less than 10
const exactResultsLimit = pageSize < 10 ? Math.floor(pageSize / 2) : 10;
const exactResults = dictDb.collection.chain()
.find(exactQuery)
.limit(exactResultsLimit)
.simplesort("i")
.data();
resultsGiven = exactResults.map((mpd: any) => mpd.$loki);
// Get slightly fuzzy matches
const slightlyFuzzy = new RegExp(makeAWeeBitFuzzy(search, infIndex), "i");
const slightlyFuzzyQuery = {
[index]: { $regex: slightlyFuzzy },
$loki: { $nin: resultsGiven },
};
const slightlyFuzzyResultsLimit = (pageSize * page) - resultsGiven.length;
const slightlyFuzzyResults = dictDb.collection.chain()
.find(slightlyFuzzyQuery)
.limit(slightlyFuzzyResultsLimit)
.data();
resultsGiven.push(...slightlyFuzzyResults.map((mpd: any) => mpd.$loki));
// Get fuzzy matches
const pashtoRegExLogic = fuzzifyPashto(search, {
script: index === "p" ? "Pashto" : "Latin",
simplifiedLatin: index === "g",
allowSpacesInWords: true,
matchStart: "word",
});
const fuzzyPashtoExperssion = new RegExp(pashtoRegExLogic);
const pashtoFuzzyQuery = [
{
[index]: { $regex: fuzzyPashtoExperssion },
}, { // TODO: Issue, this fuzzy doesn't line up well because it's not the simplified phonetics - still has 's etc
[arabicPluralIndex]: { $regex: fuzzyPashtoExperssion },
}, {
[presentStemIndex]: { $regex: fuzzyPashtoExperssion },
}
];
// fuzzy results should be allowed to take up the rest of the limit (not used up by exact results)
const fuzzyResultsLimit = (pageSize * page) - resultsGiven.length;
// don't get these fuzzy results if searching in only English
const fuzzyQuery = {
$or: pashtoFuzzyQuery,
$loki: { $nin: resultsGiven },
};
const fuzzyResults = dictDb.collection.chain()
.find(fuzzyQuery)
.limit(fuzzyResultsLimit)
.data();
const results = tpFilter
? [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults].filter(tpFilter)
: [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults];
// sort out each chunk (based on limit used multiple times by infinite scroll)
// so that when infinite scrolling, it doesn't re-sort the previous chunks given
const closeResultsLength = exactResults.length + slightlyFuzzyResults.length;
const chunksToSort = chunkOutArray(results, pageSize);
return chunksToSort
.reduce((acc, cur, i) => ((i === 0)
? [
...sortByRelevancy(cur.slice(0, closeResultsLength), search, index),
...sortByRelevancy(cur.slice(closeResultsLength), search, index),
]
: [
...acc,
...sortByRelevancy(cur, search, index),
]), []);
let resultsGiven: number[] = [];
// Check if it's in Pashto or Latin script
const searchStringToUse = sanitizePashto(makeSearchStringSafe(searchString));
const index = isPashtoScript(searchStringToUse) ? "p" : "g";
const search =
index === "g" ? simplifyPhonetics(searchStringToUse) : searchStringToUse;
const infIndex = index === "p" ? "p" : "f";
// Get exact matches
const exactExpression = new RegExp("^" + search);
const weeBitFuzzy = new RegExp("^" + makeAWeeBitFuzzy(search, infIndex));
// prepare exact expression for special matching
// TODO: This is all a bit messy and could be done without regex
const expressionForInflections = getExpForInflections(search, infIndex);
const arabicPluralIndex = `ap${infIndex}`;
const pashtoPluralIndex = `pp${infIndex}`;
const presentStemIndex = `ps${infIndex}`;
const firstInfIndex = `infa${infIndex}`;
const secondInfIndex = `infb${infIndex}`;
const pashtoExactResultFields = [
{
[index]: { $regex: exactExpression },
},
{
[arabicPluralIndex]: { $regex: weeBitFuzzy },
},
{
[pashtoPluralIndex]: { $regex: weeBitFuzzy },
},
{
[presentStemIndex]: { $regex: weeBitFuzzy },
},
{
[firstInfIndex]: { $regex: expressionForInflections },
},
{
[secondInfIndex]: { $regex: expressionForInflections },
},
];
const exactQuery = { $or: [...pashtoExactResultFields] };
// just special incase using really small limits
// multiple times scrolling / chunking / sorting might get a bit messed up if using a limit of less than 10
const exactResultsLimit = pageSize < 10 ? Math.floor(pageSize / 2) : 10;
const exactResults = dictDb.collection
.chain()
.find(exactQuery)
.limit(exactResultsLimit)
.simplesort("i")
.data();
resultsGiven = exactResults.map((mpd: any) => mpd.$loki);
// Get slightly fuzzy matches
const slightlyFuzzy = new RegExp(makeAWeeBitFuzzy(search, infIndex), "i");
const slightlyFuzzyQuery = {
[index]: { $regex: slightlyFuzzy },
$loki: { $nin: resultsGiven },
};
const slightlyFuzzyResultsLimit = pageSize * page - resultsGiven.length;
const slightlyFuzzyResults = dictDb.collection
.chain()
.find(slightlyFuzzyQuery)
.limit(slightlyFuzzyResultsLimit)
.data();
resultsGiven.push(...slightlyFuzzyResults.map((mpd: any) => mpd.$loki));
// Get fuzzy matches
const pashtoRegExLogic = fuzzifyPashto(search, {
script: index === "p" ? "Pashto" : "Latin",
simplifiedLatin: index === "g",
allowSpacesInWords: true,
matchStart: "word",
});
const fuzzyPashtoExperssion = new RegExp(pashtoRegExLogic);
const pashtoFuzzyQuery = [
{
[index]: { $regex: fuzzyPashtoExperssion },
},
{
// TODO: Issue, this fuzzy doesn't line up well because it's not the simplified phonetics - still has 's etc
[arabicPluralIndex]: { $regex: fuzzyPashtoExperssion },
},
{
[presentStemIndex]: { $regex: fuzzyPashtoExperssion },
},
];
// fuzzy results should be allowed to take up the rest of the limit (not used up by exact results)
const fuzzyResultsLimit = pageSize * page - resultsGiven.length;
// don't get these fuzzy results if searching in only English
const fuzzyQuery = {
$or: pashtoFuzzyQuery,
$loki: { $nin: resultsGiven },
};
const fuzzyResults = dictDb.collection
.chain()
.find(fuzzyQuery)
.limit(fuzzyResultsLimit)
.data();
const results = tpFilter
? [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults].filter(
tpFilter
)
: [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults];
// sort out each chunk (based on limit used multiple times by infinite scroll)
// so that when infinite scrolling, it doesn't re-sort the previous chunks given
const closeResultsLength = exactResults.length + slightlyFuzzyResults.length;
const chunksToSort = chunkOutArray(results, pageSize);
return chunksToSort.reduce(
(acc, cur, i) =>
i === 0
? [
...sortByRelevancy(cur.slice(0, closeResultsLength), search, index),
...sortByRelevancy(cur.slice(closeResultsLength), search, index),
]
: [...acc, ...sortByRelevancy(cur, search, index)],
[]
);
}
function sortByRelevancy<T>(arr: T[], searchI: string, index: string): T[] {
return relevancySorter.sort(arr, searchI, (obj: any, calc: any) => calc(obj[index]));
return relevancySorter.sort(arr, searchI, (obj: any, calc: any) =>
calc(obj[index])
);
}
function relatedWordsLookup(word: T.DictionaryEntry): T.DictionaryEntry[] {
const wordArray = word.e.trim()
.replace(/\?/g, "")
.replace(/( |,|\.|!|;|\(|\))/g, " ")
.split(/ +/)
.filter((w: string) => !fillerWords.includes(w));
let results: T.DictionaryEntry[] = [];
wordArray.forEach((w: string) => {
let r: RegExp;
try {
r = new RegExp(`\\b${w}\\b`, "i");
const relatedToWord = dictDb.collection.chain()
.find({
// don't include the original word
ts: { $ne: word.ts },
e: { $regex: r },
})
.limit(5)
.data();
results = [...results, ...relatedToWord];
// In case there's some weird regex fail
} catch (error) {
/* istanbul ignore next */
console.error(error);
}
});
// Remove duplicate items - https://stackoverflow.com/questions/40811451/remove-duplicates-from-a-array-of-objects
results = results.filter(function(a) {
// @ts-ignore
return !this[a.$loki] && (this[a.$loki] = true);
}, Object.create(null));
return(results);
const wordArray = word.e
.trim()
.replace(/\?/g, "")
.replace(/( |,|\.|!|;|\(|\))/g, " ")
.split(/ +/)
.filter((w: string) => !fillerWords.includes(w));
let results: T.DictionaryEntry[] = [];
wordArray.forEach((w: string) => {
let r: RegExp;
try {
r = new RegExp(`\\b${w}\\b`, "i");
const relatedToWord = dictDb.collection
.chain()
.find({
// don't include the original word
ts: { $ne: word.ts },
e: { $regex: r },
})
.limit(5)
.data();
results = [...results, ...relatedToWord];
// In case there's some weird regex fail
} catch (error) {
/* istanbul ignore next */
console.error(error);
}
});
// Remove duplicate items - https://stackoverflow.com/questions/40811451/remove-duplicates-from-a-array-of-objects
results = results.filter(function (a) {
// @ts-ignore
return !this[a.$loki] && (this[a.$loki] = true);
}, Object.create(null));
return results;
}
export function allEntries() {
return dictDb.collection.find();
return dictDb.collection.find();
}
function makeLookupPortal<X extends T.DictionaryEntry>(tpFilter: (x: T.DictionaryEntry) => x is X): T.EntryLookupPortal<X> {
return {
search: (s: string) => fuzzyLookup({
searchString: s,
language: "Pashto",
page: 1,
tpFilter,
}),
getByTs: (ts: number) => {
const res = dictDb.findOneByTs(ts);
if (!res) return undefined;
return tpFilter(res) ? res : undefined;
},
}
function makeLookupPortal<X extends T.DictionaryEntry>(
tpFilter: (x: T.DictionaryEntry) => x is X
): T.EntryLookupPortal<X> {
return {
search: (s: string) =>
fuzzyLookup({
searchString: s,
language: "Pashto",
page: 1,
tpFilter,
}),
getByTs: (ts: number) => {
const res = dictDb.findOneByTs(ts);
if (!res) return undefined;
return tpFilter(res) ? res : undefined;
},
};
}
function makeVerbLookupPortal(): T.EntryLookupPortal<T.VerbEntry> {
return {
search: (s: string) => {
const vEntries = fuzzyLookup({
searchString: s,
language: "Pashto",
page: 1,
tpFilter: tp.isVerbDictionaryEntry,
});
return vEntries.map((entry): T.VerbEntry => ({
entry,
complement: (entry.c?.includes("comp.") && entry.l)
? dictionary.findOneByTs(entry.l)
: undefined,
}));
},
getByTs: (ts: number): T.VerbEntry | undefined => {
const entry = dictDb.findOneByTs(ts);
if (!entry) return undefined;
if (!tp.isVerbDictionaryEntry(entry)) {
console.error("not valid verb entry");
return undefined;
}
const complement = (() => {
if (entry.c?.includes("comp") && entry.l) {
const comp = dictDb.findOneByTs(entry.l);
if (!comp) {
console.error("complement not found for", entry);
}
return comp;
} else {
return undefined;
}
})();
return { entry, complement };
},
}
return {
search: (s: string) => {
const vEntries = fuzzyLookup({
searchString: s,
language: "Pashto",
page: 1,
tpFilter: tp.isVerbDictionaryEntry,
});
return vEntries.map(
(entry): T.VerbEntry => ({
entry,
complement:
entry.c?.includes("comp.") && entry.l
? dictionary.findOneByTs(entry.l)
: undefined,
})
);
},
getByTs: (ts: number): T.VerbEntry | undefined => {
const entry = dictDb.findOneByTs(ts);
if (!entry) return undefined;
if (!tp.isVerbDictionaryEntry(entry)) {
console.error("not valid verb entry");
return undefined;
}
const complement = (() => {
if (entry.c?.includes("comp") && entry.l) {
const comp = dictDb.findOneByTs(entry.l);
if (!comp) {
console.error("complement not found for", entry);
}
return comp;
} else {
return undefined;
}
})();
return { entry, complement };
},
};
}
export const entryFeeder: T.EntryFeeder = {
nouns: makeLookupPortal(tp.isNounEntry),
verbs: makeVerbLookupPortal(),
adjectives: makeLookupPortal(tp.isAdjectiveEntry),
locativeAdverbs: makeLookupPortal(tp.isLocativeAdverbEntry),
adverbs: makeLookupPortal(tp.isAdverbEntry),
}
nouns: makeLookupPortal(tp.isNounEntry),
verbs: makeVerbLookupPortal(),
adjectives: makeLookupPortal(tp.isAdjectiveEntry),
locativeAdverbs: makeLookupPortal(tp.isLocativeAdverbEntry),
adverbs: makeLookupPortal(tp.isAdverbEntry),
};
export const dictionary: DictionaryAPI = {
// NOTE: For some reason that I do not understand you have to pass the functions from the
// dictionary core class in like this... ie. initialize: dictDb.initialize will mess up the this usage
// in the dictionary core class
initialize: async () => await dictDb.initialize(),
update: async (notifyUpdateComing: () => void) => await dictDb.updateDictionary(notifyUpdateComing),
search: function(state: State): T.DictionaryEntry[] {
const searchString = revertSpelling(
state.searchValue,
getTextOptions(state).spelling,
);
if (state.searchValue === "") {
return [];
}
return (state.options.searchType === "alphabetical" && state.options.language === "Pashto")
? alphabeticalLookup({
searchString,
page: state.page
})
: fuzzyLookup({
searchString,
language: state.options.language,
page: state.page,
});
},
exactPashtoSearch: pashtoExactLookup,
getNewWordsThisMonth: function(): T.DictionaryEntry[] {
return dictDb.collection.chain()
.find({ ts: { $gt: tsOneMonthBack() }})
.simplesort("ts")
.data()
.reverse();
},
findOneByTs: (ts: number) => dictDb.findOneByTs(ts),
findRelatedEntries: function(entry: T.DictionaryEntry): T.DictionaryEntry[] {
return relatedWordsLookup(entry);
},
}
// NOTE: For some reason that I do not understand you have to pass the functions from the
// dictionary core class in like this... ie. initialize: dictDb.initialize will mess up the this usage
// in the dictionary core class
initialize: async () => await dictDb.initialize(),
update: async (notifyUpdateComing: () => void) =>
await dictDb.updateDictionary(notifyUpdateComing),
search: function (state: State): T.DictionaryEntry[] {
const searchString = revertSpelling(
state.searchValue,
getTextOptions(state).spelling
);
if (state.searchValue === "") {
return [];
}
return state.options.searchType === "alphabetical" &&
state.options.language === "Pashto"
? alphabeticalLookup({
searchString,
page: state.page,
})
: fuzzyLookup({
searchString,
language: state.options.language,
page: state.page,
});
},
exactPashtoSearch: pashtoExactLookup,
getNewWordsThisMonth: function (): T.DictionaryEntry[] {
return dictDb.collection
.chain()
.find({ ts: { $gt: tsOneMonthBack() } })
.simplesort("ts")
.data()
.reverse();
},
findOneByTs: (ts: number) => dictDb.findOneByTs(ts),
findRelatedEntries: function (entry: T.DictionaryEntry): T.DictionaryEntry[] {
return relatedWordsLookup(entry);
},
};

View File

@ -10,186 +10,186 @@ import { fuzzifyPashto } from "./fuzzify-pashto";
type match = [string, string];
interface IDefaultInfoBlock {
matches: match[];
nonMatches: match[];
matches: match[];
nonMatches: match[];
}
const defaultInfo: IDefaultInfoBlock = {
matches: [
["اوسېدل", "وسېدل"],
["انبیه", "امبیه"],
["سرک", "صړق"],
["انطذاړ", "انتظار"],
["مالوم", "معلوم"],
["معلوم", "مالوم"],
["قېصا", "کيسه"],
["کور", "قوړ"],
["گرزيدل", "ګرځېدل"],
["سنگہ", "څنګه"],
["کار", "قهر"],
["زبا", "ژبه"],
["سڑے", "سړی"],
["استمال", "استعمال"],
["اعمل", "عمل"],
["جنگل", "ځنګل"],
["ځال", "جال"],
["زنگل", "ځنګل"],
["جرل", "ژړل"],
["فرمائيل", "فرمايل"],
["مرمنه", "مېرمنه"],
// using هٔ as two characters
["وارېدهٔ", "وارېده"],
// using as one character
["واريدۀ", "وارېده"],
["زوی", "زوئے"],
["ئے", "يې"],
// optional ا s in middle
["توقف", "تواقف"],
// option ي s in middle
["مناظره", "مناظيره"],
["بلکل", "بالکل"],
["مهرب", "محراب"],
["مسول", "مسوول"],
["ډارونکي", "ډاروونکي"],
["ډانګره", "ډانګوره"],
["هنداره", "هینداره"],
["متأصفانه", "متاسفانه"],
["وازف", "واظیف"],
["شوریٰ", "شورا"],
["ځنبېدل", "ځمبېدل"],
// consonant swap // TODO: more??
["مچلوغزه", "مچلوزغه"],
["رکشه", "رشکه"],
["پښه", "ښپه"],
],
nonMatches: [
["سرک", "ترک"],
["کار", "بېکاري"],
// ا should not be optional in the beginning or end
["اړتیا", "اړتی"],
["ړتیا", "اړتیا"],
// و should not be optional in the begenning or end
["ورور", "رور"],
],
matches: [
["اوسېدل", "وسېدل"],
["انبیه", "امبیه"],
["سرک", "صړق"],
["انطذاړ", "انتظار"],
["مالوم", "معلوم"],
["معلوم", "مالوم"],
["قېصا", "کيسه"],
["کور", "قوړ"],
["گرزيدل", "ګرځېدل"],
["سنگہ", "څنګه"],
["کار", "قهر"],
["زبا", "ژبه"],
["سڑے", "سړی"],
["استمال", "استعمال"],
["اعمل", "عمل"],
["جنگل", "ځنګل"],
["ځال", "جال"],
["زنگل", "ځنګل"],
["جرل", "ژړل"],
["فرمائيل", "فرمايل"],
["مرمنه", "مېرمنه"],
// using هٔ as two characters
["وارېدهٔ", "وارېده"],
// using as one character
["واريدۀ", "وارېده"],
["زوی", "زوئے"],
["ئے", "يې"],
// optional ا s in middle
["توقف", "تواقف"],
// option ي s in middle
["مناظره", "مناظيره"],
["بلکل", "بالکل"],
["مهرب", "محراب"],
["مسول", "مسوول"],
["ډارونکي", "ډاروونکي"],
["ډانګره", "ډانګوره"],
["هنداره", "هینداره"],
["متأصفانه", "متاسفانه"],
["وازف", "واظیف"],
["شوریٰ", "شورا"],
["ځنبېدل", "ځمبېدل"],
// consonant swap // TODO: more??
["مچلوغزه", "مچلوزغه"],
["رکشه", "رشکه"],
["پښه", "ښپه"],
],
nonMatches: [
["سرک", "ترک"],
["کار", "بېکاري"],
// ا should not be optional in the beginning or end
["اړتیا", "اړتی"],
["ړتیا", "اړتیا"],
// و should not be optional in the begenning or end
["ورور", "رور"],
],
};
const defaultLatinInfo: IDefaultInfoBlock = {
matches: [
// TODO:
["anbiya", "ambiya"],
["lootfun", "lUtfan"],
["sarey", "saRey"],
["senga", "tsanga"],
["daktur", "DakTar"],
["iteebar", "itibaar"],
["dzaal", "jaal"],
["bekaar", "bekáar"],
["bekár", "bekaar"],
["chaai", "cháai"],
["day", "daai"],
["dai", "dey"],
["daktar", "Daktár"],
["sarái", "saRey"],
["beter", "bahtár"],
["doosti", "dostee"],
["dắraghlum", "deraghlum"], // using the ă along with a combining ́
["dar", "dăr"],
["der", "dăr"],
["dur", "dăr"],
["chee", "che"],
["dzooy", "zooy"],
["delta", "dalta"],
["koorbaani", "qUrbaanee"],
["jamaat", "jamaa'at"],
["taaroof", "ta'aarÚf"],
["xudza", "xúdza"],
["ishaak", "is`haaq"],
["lUtfun", "lootfan"],
["miraab", "mihraab"],
["taamul", "tahamul"],
["otsedul", "osedul"],
["ghaara", "ghaaRa"],
["maafiat", "maafiyat"],
["tasalUt", "tassalUt"],
],
nonMatches: [
["kor", "por"],
["intizaar", "intizaam"],
["ishaat", "shaat"], // i should not be optional at the beginning
],
matches: [
// TODO:
["anbiya", "ambiya"],
["lootfun", "lUtfan"],
["saray", "saRay"],
["senga", "tsanga"],
["daktur", "DakTar"],
["iteebar", "itibaar"],
["dzaal", "jaal"],
["bekaar", "bekáar"],
["bekár", "bekaar"],
["chaai", "cháai"],
["day", "daai"],
["dai", "day"],
["daktar", "Daktár"],
["sarái", "saRay"],
["beter", "bahtár"],
["doosti", "dostee"],
["dắraghlum", "deraghlum"], // using the ă along with a combining ́
["dar", "dăr"],
["der", "dăr"],
["dur", "dăr"],
["chee", "che"],
["dzooy", "zooy"],
["delta", "dalta"],
["koorbaani", "qUrbaanee"],
["jamaat", "jamaa'at"],
["taaroof", "ta'aarÚf"],
["xudza", "xúdza"],
["ishaak", "is`haaq"],
["lUtfun", "lootfan"],
["miraab", "mihraab"],
["taamul", "tahamul"],
["otsedul", "osedul"],
["ghaara", "ghaaRa"],
["maafiat", "maafiyat"],
["tasalUt", "tassalUt"],
],
nonMatches: [
["kor", "por"],
["intizaar", "intizaam"],
["ishaat", "shaat"], // i should not be optional at the beginning
],
};
const withDiacritics: match[] = [
["تتتت", "تِتّتّت"],
["بببب", "بّبّبَب"],
["تتتت", "تِتّتّت"],
["بببب", "بّبّبَب"],
];
const matchesWithAn: match[] = [
["حتمن", "حتماً"],
["لتفن", "لطفاً"],
["کاملا", "کاملاً"],
["حتمن", "حتماً"],
["لتفن", "لطفاً"],
["کاملا", "کاملاً"],
];
const matchesWithSpaces: match[] = [
["دپاره", "د پاره"],
["بېکار", "بې کار"],
["د پاره", "دپاره"],
["بې کار", "بېکار"],
["کار مند", "کارمند"],
["همنشین", "هم نشین"],
["بغل کشي", "بغلکشي"],
["دپاره", "د پاره"],
["بېکار", "بې کار"],
["د پاره", "دپاره"],
["بې کار", "بېکار"],
["کار مند", "کارمند"],
["همنشین", "هم نشین"],
["بغل کشي", "بغلکشي"],
];
const matchesWithSpacesLatin: match[] = [
["dupaara", "du paara"],
["bekaara", "be kaara"],
["du paara", "dupaara"],
["be kaara", "bekaara"],
["oreckbgqjxmroe", "or ec kb gq jxmr oe"],
["cc cc c", "ccccc"],
["dupaara", "du paara"],
["bekaara", "be kaara"],
["du paara", "dupaara"],
["be kaara", "bekaara"],
["oreckbgqjxmroe", "or ec kb gq jxmr oe"],
["cc cc c", "ccccc"],
];
const defaultSimpleLatinInfo: IDefaultInfoBlock = {
matches: [
// TODO:
["anbiya", "ambiya"],
["lootfun", "lUtfan"],
["sarey", "saRey"],
["senga", "tsanga"],
["daktur", "DakTar"],
["iteebar", "itibaar"],
["dzaal", "jaal"],
["bekaar", "bekaar"],
["bekar", "bekaar"],
["chaai", "chaai"],
["day", "daai"],
["dai", "dey"],
["daktar", "Daktar"],
["sarai", "saRey"],
["beter", "bahtar"],
["doosti", "dostee"],
["daraghlum", "deraghlum"], // using the ă along with a combining ́
["dar", "dar"],
["der", "dar"],
["dur", "dar"],
["chee", "che"],
["dzooy", "zooy"],
["delta", "dalta"],
["koorbaani", "qUrbaanee"],
["taaroof", "taaarUf"],
["xudza", "xudza"],
["ishaak", "ishaaq"],
["lUtfun", "lootfan"],
["miraab", "mihraab"],
["taamul", "tahamul"],
["otsedul", "osedul"],
["ghaara", "ghaaRa"],
],
nonMatches: [
["kor", "por"],
["intizaar", "intizaam"],
["ishaat", "shaat"], // i should not be optional at the beginning
],
matches: [
// TODO:
["anbiya", "ambiya"],
["lootfun", "lUtfan"],
["saray", "saRay"],
["senga", "tsanga"],
["daktur", "DakTar"],
["iteebar", "itibaar"],
["dzaal", "jaal"],
["bekaar", "bekaar"],
["bekar", "bekaar"],
["chaai", "chaai"],
["day", "daai"],
["dai", "day"],
["daktar", "Daktar"],
["sarai", "saRay"],
["beter", "bahtar"],
["doosti", "dostee"],
["daraghlum", "deraghlum"], // using the ă along with a combining ́
["dar", "dar"],
["der", "dar"],
["dur", "dar"],
["chee", "che"],
["dzooy", "zooy"],
["delta", "dalta"],
["koorbaani", "qUrbaanee"],
["taaroof", "taaarUf"],
["xudza", "xudza"],
["ishaak", "ishaaq"],
["lUtfun", "lootfan"],
["miraab", "mihraab"],
["taamul", "tahamul"],
["otsedul", "osedul"],
["ghaara", "ghaaRa"],
],
nonMatches: [
["kor", "por"],
["intizaar", "intizaam"],
["ishaat", "shaat"], // i should not be optional at the beginning
],
};
interface ITestOptions {
@ -200,267 +200,301 @@ interface ITestOptions {
}
const optionsPossibilities: ITestOptions[] = [
{
options: {}, // default
...defaultInfo,
viceVersaMatches: true,
},
{
options: { script: "Latin" },
...defaultLatinInfo,
viceVersaMatches: true,
},
{
options: {matchStart: "word"}, // same as default
...defaultInfo,
viceVersaMatches: true,
},
{
options: { script: "Latin", simplifiedLatin: true },
...defaultSimpleLatinInfo,
viceVersaMatches: true,
},
{
matches: [
...matchesWithSpaces,
],
nonMatches: [],
options: {allowSpacesInWords: true},
viceVersaMatches: true,
},
{
matches: [
...matchesWithSpacesLatin,
],
nonMatches: [],
options: {allowSpacesInWords: true, script: "Latin"},
viceVersaMatches: true,
},
{
matches: [],
nonMatches: matchesWithSpaces,
options: {allowSpacesInWords: false},
},
{
matches: [],
nonMatches: matchesWithSpacesLatin,
options: {allowSpacesInWords: false, script: "Latin"},
},
{
matches: [
["کار", "بېکاري"],
],
nonMatches: [
["سرک", "بېترک"],
],
options: {matchStart: "anywhere"},
},
{
matches: [
["کور", "کور"],
["سری", "سړی"],
],
nonMatches: [
["سړي", "سړيتوب"],
["کور", "کورونه"],
],
options: {matchWholeWordOnly: true},
viceVersaMatches: true,
},
{
matches: [
["کور", "کور ته ځم"],
["سری", "سړی دی"],
],
nonMatches: [
["سړي", " سړيتوب"],
["کور", "خټين کورونه"],
],
options: {matchStart: "string"},
},
{
matches: [
["کور", "کور ته ځم"],
["سری", "سړی دی"],
],
nonMatches: [
["سړي", " سړيتوب"],
["کور", "خټين کورونه"],
],
options: {matchStart: "string"},
},
{
options: {}, // default
...defaultInfo,
viceVersaMatches: true,
},
{
options: { script: "Latin" },
...defaultLatinInfo,
viceVersaMatches: true,
},
{
options: { matchStart: "word" }, // same as default
...defaultInfo,
viceVersaMatches: true,
},
{
options: { script: "Latin", simplifiedLatin: true },
...defaultSimpleLatinInfo,
viceVersaMatches: true,
},
{
matches: [...matchesWithSpaces],
nonMatches: [],
options: { allowSpacesInWords: true },
viceVersaMatches: true,
},
{
matches: [...matchesWithSpacesLatin],
nonMatches: [],
options: { allowSpacesInWords: true, script: "Latin" },
viceVersaMatches: true,
},
{
matches: [],
nonMatches: matchesWithSpaces,
options: { allowSpacesInWords: false },
},
{
matches: [],
nonMatches: matchesWithSpacesLatin,
options: { allowSpacesInWords: false, script: "Latin" },
},
{
matches: [["کار", "بېکاري"]],
nonMatches: [["سرک", "بېترک"]],
options: { matchStart: "anywhere" },
},
{
matches: [
["کور", "کور"],
["سری", "سړی"],
],
nonMatches: [
["سړي", "سړيتوب"],
["کور", "کورونه"],
],
options: { matchWholeWordOnly: true },
viceVersaMatches: true,
},
{
matches: [
["کور", "کور ته ځم"],
["سری", "سړی دی"],
],
nonMatches: [
["سړي", " سړيتوب"],
["کور", "خټين کورونه"],
],
options: { matchStart: "string" },
},
{
matches: [
["کور", "کور ته ځم"],
["سری", "سړی دی"],
],
nonMatches: [
["سړي", " سړيتوب"],
["کور", "خټين کورونه"],
],
options: { matchStart: "string" },
},
];
const punctuationToExclude = [
"،", "؟", "؛", "۔", "۲", "۹", "۰", "»", "«", "٫", "!", ".", "؋", "٪", "٬", "×", ")", "(", " ", "\t",
"،",
"؟",
"؛",
"۔",
"۲",
"۹",
"۰",
"»",
"«",
"٫",
"!",
".",
"؋",
"٪",
"٬",
"×",
")",
"(",
" ",
"\t",
];
optionsPossibilities.forEach((o) => {
o.matches.forEach((m: any) => {
test(`${m[0]} should match ${m[1]}`, () => {
const re = fuzzifyPashto(m[0], o.options);
// eslint-disable-next-line
const result = m[1].match(new RegExp(re));
expect(result).toBeTruthy();
});
});
if (o.viceVersaMatches === true) {
o.matches.forEach((m: any) => {
test(`${m[0]} should match ${m[1]}`, () => {
const re = fuzzifyPashto(m[0], o.options);
// eslint-disable-next-line
const result = m[1].match(new RegExp(re));
expect(result).toBeTruthy();
});
test(`${m[1]} should match ${m[0]}`, () => {
const re = fuzzifyPashto(m[1], o.options);
// eslint-disable-next-line
const result = m[0].match(new RegExp(re));
expect(result).toBeTruthy();
});
});
if (o.viceVersaMatches === true) {
o.matches.forEach((m: any) => {
test(`${m[1]} should match ${m[0]}`, () => {
const re = fuzzifyPashto(m[1], o.options);
// eslint-disable-next-line
const result = m[0].match(new RegExp(re));
expect(result).toBeTruthy();
});
});
}
o.nonMatches.forEach((m: any) => {
test(`${m[0]} should not match ${m[1]}`, () => {
const re = fuzzifyPashto(m[0], o.options);
// eslint-disable-next-line
const result = m[1].match(new RegExp(re));
expect(result).toBeNull();
});
}
o.nonMatches.forEach((m: any) => {
test(`${m[0]} should not match ${m[1]}`, () => {
const re = fuzzifyPashto(m[0], o.options);
// eslint-disable-next-line
const result = m[1].match(new RegExp(re));
expect(result).toBeNull();
});
});
});
matchesWithAn.forEach((m: any) => {
test(`matching ${m[0]} should work with ${m[1]}`, () => {
const re = fuzzifyPashto(m[0], { matchWholeWordOnly: true });
// eslint-disable-next-line
const result = m[1].match(new RegExp(re));
expect(result).toBeTruthy();
});
test(`matching ${m[1]} should work with ${m[0]}`, () => {
const re = fuzzifyPashto(m[1], { matchWholeWordOnly: true });
// eslint-disable-next-line
const result = m[0].match(new RegExp(re));
expect(result).toBeTruthy();
});
test(`matching ${m[0]} should work with ${m[1]}`, () => {
const re = fuzzifyPashto(m[0], { matchWholeWordOnly: true });
// eslint-disable-next-line
const result = m[1].match(new RegExp(re));
expect(result).toBeTruthy();
});
test(`matching ${m[1]} should work with ${m[0]}`, () => {
const re = fuzzifyPashto(m[1], { matchWholeWordOnly: true });
// eslint-disable-next-line
const result = m[0].match(new RegExp(re));
expect(result).toBeTruthy();
});
});
withDiacritics.forEach((m: any) => {
test(`matich ${m[0]} should ignore the diactritics in ${m[1]}`, () => {
const re = fuzzifyPashto(m[0], { ignoreDiacritics: true });
// eslint-disable-next-line
const result = m[1].match(new RegExp(re));
expect(result).toBeTruthy();
});
test(`the diacritics should in ${m[0]} should be ignored when matching with ${m[1]}`, () => {
const re = fuzzifyPashto(m[1], { ignoreDiacritics: true });
// eslint-disable-next-line
const result = m[0].match(new RegExp(re));
expect(result).toBeTruthy();
});
test(`matich ${m[0]} should ignore the diactritics in ${m[1]}`, () => {
const re = fuzzifyPashto(m[0], { ignoreDiacritics: true });
// eslint-disable-next-line
const result = m[1].match(new RegExp(re));
expect(result).toBeTruthy();
});
test(`the diacritics should in ${m[0]} should be ignored when matching with ${m[1]}`, () => {
const re = fuzzifyPashto(m[1], { ignoreDiacritics: true });
// eslint-disable-next-line
const result = m[0].match(new RegExp(re));
expect(result).toBeTruthy();
});
});
test(`وs should be optional if entered in search string`, () => {
const re = fuzzifyPashto("لوتفن");
// eslint-disable-next-line
const result = "لطفاً".match(new RegExp(re));
expect(result).toBeTruthy();
const re = fuzzifyPashto("لوتفن");
// eslint-disable-next-line
const result = "لطفاً".match(new RegExp(re));
expect(result).toBeTruthy();
});
test(`matchWholeWordOnly should override matchStart = "anywhere"`, () => {
const re = fuzzifyPashto("کار", { matchWholeWordOnly: true, matchStart: "anywhere" });
// eslint-disable-next-line
const result = "کار کوه، بېکاره مه ګرځه".match(new RegExp(re));
expect(result).toHaveLength(1);
expect(result).toEqual(expect.not.arrayContaining(["بېکاره"]));
const re = fuzzifyPashto("کار", {
matchWholeWordOnly: true,
matchStart: "anywhere",
});
// eslint-disable-next-line
const result = "کار کوه، بېکاره مه ګرځه".match(new RegExp(re));
expect(result).toHaveLength(1);
expect(result).toEqual(expect.not.arrayContaining(["بېکاره"]));
});
test(`returnWholeWord should return the whole word`, () => {
// With Pashto Script
const re = fuzzifyPashto("کار", { returnWholeWord: true });
// eslint-disable-next-line
const result = "کارونه کوه، بېکاره مه شه".match(new RegExp(re));
expect(result).toHaveLength(1);
expect(result).toContain("کارونه");
// With Latin Script
const reLatin = fuzzifyPashto("kaar", {
returnWholeWord: true,
script: "Latin",
});
// eslint-disable-next-line
const resultLatin = "kaaroona kawa, bekaara ma gurdza.".match(new RegExp(reLatin));
expect(resultLatin).toHaveLength(1);
expect(resultLatin).toContain("kaaroona");
// With Pashto Script
const re = fuzzifyPashto("کار", { returnWholeWord: true });
// eslint-disable-next-line
const result = "کارونه کوه، بېکاره مه شه".match(new RegExp(re));
expect(result).toHaveLength(1);
expect(result).toContain("کارونه");
// With Latin Script
const reLatin = fuzzifyPashto("kaar", {
returnWholeWord: true,
script: "Latin",
});
// eslint-disable-next-line
const resultLatin = "kaaroona kawa, bekaara ma gurdza.".match(
new RegExp(reLatin)
);
expect(resultLatin).toHaveLength(1);
expect(resultLatin).toContain("kaaroona");
});
test(`returnWholeWord should return the whole word even when starting the matching in the middle`, () => {
// With Pashto Script
const re = fuzzifyPashto("کار", { returnWholeWord: true, matchStart: "anywhere" });
// eslint-disable-next-line
const result = "کارونه کوه، بېکاره مه شه".match(new RegExp(re, "g"));
expect(result).toHaveLength(2);
expect(result).toContain(" بېکاره");
// With Pashto Script
const re = fuzzifyPashto("کار", {
returnWholeWord: true,
matchStart: "anywhere",
});
// eslint-disable-next-line
const result = "کارونه کوه، بېکاره مه شه".match(new RegExp(re, "g"));
expect(result).toHaveLength(2);
expect(result).toContain(" بېکاره");
// With Latin Script
const reLatin = fuzzifyPashto("kaar", {
matchStart: "anywhere",
returnWholeWord: true,
script: "Latin",
});
// eslint-disable-next-line
const resultLatin = "kaaroona kawa bekaara ma gurdza".match(new RegExp(reLatin, "g"));
expect(resultLatin).toHaveLength(2);
expect(resultLatin).toContain("bekaara");
// With Latin Script
const reLatin = fuzzifyPashto("kaar", {
matchStart: "anywhere",
returnWholeWord: true,
script: "Latin",
});
// eslint-disable-next-line
const resultLatin = "kaaroona kawa bekaara ma gurdza".match(
new RegExp(reLatin, "g")
);
expect(resultLatin).toHaveLength(2);
expect(resultLatin).toContain("bekaara");
});
test(`returnWholeWord should should not return partial matches if matchWholeWordOnly is true`, () => {
// With Pashto Script
const re = fuzzifyPashto("کار", { returnWholeWord: true, matchStart: "anywhere", matchWholeWordOnly: true });
// eslint-disable-next-line
const result = "کارونه کوه، بېکاره مه ګرځه".match(new RegExp(re));
expect(result).toBeNull();
// With Pashto Script
const re = fuzzifyPashto("کار", {
returnWholeWord: true,
matchStart: "anywhere",
matchWholeWordOnly: true,
});
// eslint-disable-next-line
const result = "کارونه کوه، بېکاره مه ګرځه".match(new RegExp(re));
expect(result).toBeNull();
// With Latin Script
const reLatin = fuzzifyPashto("kaar", {
matchStart: "anywhere",
matchWholeWordOnly: true,
// With Latin Script
const reLatin = fuzzifyPashto("kaar", {
matchStart: "anywhere",
matchWholeWordOnly: true,
returnWholeWord: true,
script: "Latin",
});
// eslint-disable-next-line
const resultLatin = "kaaroona kawa bekaara ma gurdza".match(
new RegExp(reLatin)
);
expect(resultLatin).toBeNull();
});
punctuationToExclude.forEach((m) => {
test(`${m} should not be considered part of a Pashto word`, () => {
const re = fuzzifyPashto("کور", {
returnWholeWord: true,
matchStart: "word",
});
// ISSUE: This should also work when the word is PRECEDED by the punctuation
// Need to work with a lookbehind equivalent
// eslint-disable-next-line
const result = `زمونږ کورونه${m} دي`.match(new RegExp(re));
expect(result).toHaveLength(1);
expect(result).toContain(" کورونه");
// Matches will unfortunately have a space on the front of the word, issue with missing es2018 lookbehinds
});
});
punctuationToExclude.forEach((m) => {
// tslint:disable-next-line
test(`${m} should not be considered part of a Pashto word (front or back with es2018) - or should fail if using a non es2018 environment`, () => {
let result: any;
let failed = false;
// if environment is not es2018 with lookbehind support (like node 6, 8) this will fail
try {
const re = fuzzifyPashto("کور", {
returnWholeWord: true,
script: "Latin",
});
// eslint-disable-next-line
const resultLatin = "kaaroona kawa bekaara ma gurdza".match(new RegExp(reLatin));
expect(resultLatin).toBeNull();
});
punctuationToExclude.forEach((m) => {
test(`${m} should not be considered part of a Pashto word`, () => {
const re = fuzzifyPashto("کور", { returnWholeWord: true, matchStart: "word" });
// ISSUE: This should also work when the word is PRECEDED by the punctuation
// Need to work with a lookbehind equivalent
// eslint-disable-next-line
const result = `زمونږ کورونه${m} دي`.match(new RegExp(re));
expect(result).toHaveLength(1);
expect(result).toContain(" کورونه");
// Matches will unfortunately have a space on the front of the word, issue with missing es2018 lookbehinds
});
});
punctuationToExclude.forEach((m) => {
// tslint:disable-next-line
test(`${m} should not be considered part of a Pashto word (front or back with es2018) - or should fail if using a non es2018 environment`, () => {
let result: any;
let failed = false;
// if environment is not es2018 with lookbehind support (like node 6, 8) this will fail
try {
const re = fuzzifyPashto("کور", { returnWholeWord: true, matchStart: "word", es2018: true });
// eslint-disable-next-line
result = `زمونږ ${m}کورونه${m} دي`.match(new RegExp(re));
} catch (error) {
failed = true;
}
const worked = failed || (result.length === 1 && result.includes("کورونه"));
expect(worked).toBe(true);
});
matchStart: "word",
es2018: true,
});
// eslint-disable-next-line
result = `زمونږ ${m}کورونه${m} دي`.match(new RegExp(re));
} catch (error) {
failed = true;
}
const worked = failed || (result.length === 1 && result.includes("کورونه"));
expect(worked).toBe(true);
});
});
test(`Arabic punctuation or numbers should not be considered part of a Pashto word`, () => {
const re = fuzzifyPashto("کار", { returnWholeWord: true });
// eslint-disable-next-line
const result = "کارونه کوه، بېکاره مه ګرځه".match(new RegExp(re));
expect(result).toHaveLength(1);
expect(result).toContain("کارونه");
const re = fuzzifyPashto("کار", { returnWholeWord: true });
// eslint-disable-next-line
const result = "کارونه کوه، بېکاره مه ګرځه".match(new RegExp(re));
expect(result).toHaveLength(1);
expect(result).toContain("کارونه");
});

View File

@ -14,7 +14,7 @@ const velarPlosives = "ګغږکقگك";
const rLikeSounds = "رړڑڼ";
const labialPlosivesAndFricatives = "فپب";
// Includes Arabic ى \u0649
const theFiveYeys = "ېۍیيئےى";
const theFiveYays = "ېۍیيئےى";
const guttural = "ښخشخهحغګ";
interface IReplacerInfoItem {
@ -38,7 +38,6 @@ const ghzCombo = ["غز", "زغ"];
const pxCombo = ["پښ", "ښپ"];
const kshCombo = ["کش", "شک", "کښ", "کش"];
export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [
{ char: "اً", range: "ان" },
{
@ -54,15 +53,25 @@ export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [
{ char: "ٳ", range: "اآهأ" },
{ char: "یٰ", range: "ای", plus: ["یٰ"] },
{ char: "ی", range: theFiveYeys, plus: ["ئی", "ئي", "یٰ"], ignorableIfInMiddle: true },
{ char: "ي", range: theFiveYeys, plus: ["ئی", "ئي", "یٰ"], ignorableIfInMiddle: true },
{ char: "ې", range: theFiveYeys, ignorableIfInMiddle: true },
{ char: "ۍ", range: theFiveYeys },
{ char: "ئي", range: theFiveYeys, plus: ["ئی", "ئي"] },
{ char: "ئی", range: theFiveYeys, plus: ["ئی", "ئي"] },
{ char: "ئے", range: theFiveYeys, plus: ["ئی", "ئي", "يې"]},
{ char: "ئ", range: theFiveYeys, ignorableIfInMiddle: true },
{ char: "ے", range: theFiveYeys },
{
char: "ی",
range: theFiveYays,
plus: ["ئی", "ئي", "یٰ"],
ignorableIfInMiddle: true,
},
{
char: "ي",
range: theFiveYays,
plus: ["ئی", "ئي", "یٰ"],
ignorableIfInMiddle: true,
},
{ char: "ې", range: theFiveYays, ignorableIfInMiddle: true },
{ char: "ۍ", range: theFiveYays },
{ char: "ئي", range: theFiveYays, plus: ["ئی", "ئي"] },
{ char: "ئی", range: theFiveYays, plus: ["ئی", "ئي"] },
{ char: "ئے", range: theFiveYays, plus: ["ئی", "ئي", "يې"] },
{ char: "ئ", range: theFiveYays, ignorableIfInMiddle: true },
{ char: "ے", range: theFiveYays },
{ char: "س", range: sSounds },
{ char: "ص", range: sSounds },
@ -79,7 +88,7 @@ export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [
{ char: "ع", range: "اوع", ignorable: true },
{ char: "و", range: "وع", plus: ["وو"], ignorableIfInMiddle: true },
{ char: "ؤ", range: "وع"},
{ char: "ؤ", range: "وع" },
{ char: "ښ", range: guttural },
{ char: "غ", range: guttural },
@ -91,7 +100,7 @@ export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [
{ char: "ز", range: zSounds },
{ char: "ض", range: zSounds },
{ char: "ذ", range: zSounds },
{ char: "ځ", range: zSounds + "جڅ"},
{ char: "ځ", range: zSounds + "جڅ" },
{ char: "ظ", range: zSounds },
{ char: "ژ", range: "زضظژذځږج" },
@ -133,11 +142,12 @@ export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [
];
// tslint:disable-next-line
export const pashtoReplacerRegex = /اً|أ|ا|آ|ٱ|ٲ|ٳ|ئی|ئي|ئے|یٰ|ی|ي|ې|ۍ|ئ|ے|س|ص|ث|څ|ج|چ|هٔ|ه|ۀ|غز|زغ|کش|شک|ښک|ښک|پښ|ښپ|ہ|ع|و|ؤ|ښ|غ|خ|ح|ش|ز|ض|ذ|ځ|ظ|ژ|ر|ړ|ڑ|ت|ټ|ٹ|ط|د|ډ|ڈ|مب|م|نب|ن|ڼ|ک|ګ|گ|ل|ق|ږ|ب|پ|ف/g;
export const pashtoReplacerRegex =
/اً|أ|ا|آ|ٱ|ٲ|ٳ|ئی|ئي|ئے|یٰ|ی|ي|ې|ۍ|ئ|ے|س|ص|ث|څ|ج|چ|هٔ|ه|ۀ|غز|زغ|کش|شک|ښک|ښک|پښ|ښپ|ہ|ع|و|ؤ|ښ|غ|خ|ح|ش|ز|ض|ذ|ځ|ظ|ژ|ر|ړ|ڑ|ت|ټ|ٹ|ط|د|ډ|ڈ|مب|م|نب|ن|ڼ|ک|ګ|گ|ل|ق|ږ|ب|پ|ف/g;
// TODO: I removed the h? 's at the beginning and ends. was that a good idea?
const aaySoundLatin = "(?:[aá]a?i|[eé]y|[aá]a?y|[aá]h?i)";
const aaySoundSimpleLatin = "(?:aa?i|ey|aa?y|ah?i)";
const aaySoundSimpleLatin = "(?:aa?i|ay|aa?y|ah?i)";
const longASoundLatin = "(?:[aá]{1,2}'?h?a{0,2}?)h?";
const longASoundSimpleLatin = "(?:a{1,2}'?h?a{0,2}?)h?";
const shortASoundLatin = "(?:[aáă][a|́]?|au|áu|[uú]|[UÚ]|[ií]|[eé])?h?";
@ -146,8 +156,8 @@ const shwaSoundLatin = "(?:[uú]|[oó]o?|w[uú]|[aáă]|[ií]|[UÚ])?";
const shwaSoundSimpleLatin = "(?:u|oo?|wu|a|i|U)?";
const ooSoundLatin = "(?:[oó]o?|[áa]u|w[uú]|[aá]w|[uú]|[UÚ])(?:h|w)?";
const ooSoundSimpleLatin = "(?:oo?|au|wu|aw|u|U)(?:h|w)?";
const eySoundLatin = "(?:[eé]y|[eé]e?|[uú]y|[aá]y|[ií])";
const eySoundSimpleLatin = "(?:ey|ee?|uy|ay|i)";
const aySoundLatin = "(?:[eé]y|[eé]e?|[uú]y|[aá]y|[ií])";
const aySoundSimpleLatin = "(?:ay|ee?|uy|ay|i)";
const middleESoundLatin = "(?:[eé]e?|[ií]|[aáă]|[eé])[h|y|́]?";
const middleESoundSimpleLatin = "(?:ee?|i|a|e)[h|y]?";
const iSoundLatin = "-?(?:[uú]|[aáă]|[ií]|[eé]e?)?h?-?";
@ -180,67 +190,67 @@ export const latinReplacerInfo: IPhoneticsReplacerInfoItem[] = [
{ char: "óo", repl: ooSoundLatin },
{ char: "i", repl: iSoundLatin, replWhenBeginning: iSoundLatinBeginning },
{ char: "í", repl: iSoundLatin, replWhenBeginning: iSoundLatinBeginning },
{ char: "ey", repl: eySoundLatin },
{ char: "éy", repl: eySoundLatin },
{ char: "ee", repl: eySoundLatin },
{ char: "ée", repl: eySoundLatin },
{ char: "uy", repl: eySoundLatin },
{ char: "úy", repl: eySoundLatin },
{ char: "ay", repl: aySoundLatin },
{ char: "áy", repl: aySoundLatin },
{ char: "ee", repl: aySoundLatin },
{ char: "ée", repl: aySoundLatin },
{ char: "uy", repl: aySoundLatin },
{ char: "úy", repl: aySoundLatin },
{ char: "e", repl: middleESoundLatin },
{ char: "é", repl: middleESoundLatin },
{ char: "w", repl: "(?:w{1,2}?[UÚ]?|b)"},
{ char: "y", repl: "[ií]?y?"},
{ char: "w", repl: "(?:w{1,2}?[UÚ]?|b)" },
{ char: "y", repl: "[ií]?y?" },
{ char: "ts", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "s", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "ss", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "c", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "dz", repl: "(?:dz|z{1,2}|j)"},
{ char: "z", repl: "(?:s{1,2}|dz|z{1,2}|ts)"},
{ char: "t", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "tt", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "T", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "d", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "dd", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "D", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "r", repl: "(?:R|r{1,2}|N)"},
{ char: "rr", repl: "(?:R|r{1,2}|N)"},
{ char: "R", repl: "(?:R|r{1,2}|N)"},
{ char: "nb", repl: "(?:nb|mb)"},
{ char: "mb", repl: "(?:nb|mb)"},
{ char: "n", repl: "(?:n{1,2}|N)"},
{ char: "N", repl: "(?:R|r{1,2}|N)"},
{ char: "f", repl: "(?:f{1,2}|p{1,2})"},
{ char: "ff", repl: "(?:f{1,2}|p{1,2})"},
{ char: "b", repl: "(?:b{1,2}|p{1,2})"},
{ char: "bb", repl: "(?:b{1,2}|p{1,2})"},
{ char: "p", repl: "(?:b{1,2}|p{1,2}|f{1,2})"},
{ char: "pp", repl: "(?:b{1,2}|p{1,2}|f{1,2})"},
{ char: "ts", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "s", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "ss", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "c", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "dz", repl: "(?:dz|z{1,2}|j)" },
{ char: "z", repl: "(?:s{1,2}|dz|z{1,2}|ts)" },
{ char: "t", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "tt", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "T", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "d", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "dd", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "D", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "r", repl: "(?:R|r{1,2}|N)" },
{ char: "rr", repl: "(?:R|r{1,2}|N)" },
{ char: "R", repl: "(?:R|r{1,2}|N)" },
{ char: "nb", repl: "(?:nb|mb)" },
{ char: "mb", repl: "(?:nb|mb)" },
{ char: "n", repl: "(?:n{1,2}|N)" },
{ char: "N", repl: "(?:R|r{1,2}|N)" },
{ char: "f", repl: "(?:f{1,2}|p{1,2})" },
{ char: "ff", repl: "(?:f{1,2}|p{1,2})" },
{ char: "b", repl: "(?:b{1,2}|p{1,2})" },
{ char: "bb", repl: "(?:b{1,2}|p{1,2})" },
{ char: "p", repl: "(?:b{1,2}|p{1,2}|f{1,2})" },
{ char: "pp", repl: "(?:b{1,2}|p{1,2}|f{1,2})" },
{ char: "sh", repl: "(?:x|sh|s`h)"},
{ char: "x", repl: "(?:kh|gh|x|h){1,2}"},
{ char: "kh", repl: "(?:kh|gh|x|h){1,2}"},
{ char: "sh", repl: "(?:x|sh|s`h)" },
{ char: "x", repl: "(?:kh|gh|x|h){1,2}" },
{ char: "kh", repl: "(?:kh|gh|x|h){1,2}" },
{ char: "k", repl: "(?:k{1,2}|q{1,2})"},
{ char: "q", repl: "(?:k{1,2}|q{1,2})"},
{ char: "k", repl: "(?:k{1,2}|q{1,2})" },
{ char: "q", repl: "(?:k{1,2}|q{1,2})" },
{ char: "jz", repl: "(?:G|jz)"},
{ char: "G", repl: "(?:jz|G|g)"},
{ char: "jz", repl: "(?:G|jz)" },
{ char: "G", repl: "(?:jz|G|g)" },
{ char: "g", repl: "(?:gh?|k{1,2}|G)"},
{ char: "gh", repl: "(?:g|gh|kh|G)"},
{ char: "g", repl: "(?:gh?|k{1,2}|G)" },
{ char: "gh", repl: "(?:g|gh|kh|G)" },
{ char: "j", repl: "(?:j{1,2}|ch|dz)"},
{ char: "ch", repl: "(?:j{1,2}|ch)"},
{ char: "j", repl: "(?:j{1,2}|ch|dz)" },
{ char: "ch", repl: "(?:j{1,2}|ch)" },
{ char: "l", repl: "l{1,2}"},
{ char: "ll", repl: "l{1,2}"},
{ char: "m", repl: "m{1,2}"},
{ char: "mm", repl: "m{1,2}"},
{ char: "h", repl: "k?h?"},
{ char: "'", repl: "['||`]?"},
{ char: "", repl: "['||`]?"},
{ char: "`", repl: "['||`]?"},
{ char: "l", repl: "l{1,2}" },
{ char: "ll", repl: "l{1,2}" },
{ char: "m", repl: "m{1,2}" },
{ char: "mm", repl: "m{1,2}" },
{ char: "h", repl: "k?h?" },
{ char: "'", repl: "['||`]?" },
{ char: "", repl: "['||`]?" },
{ char: "`", repl: "['||`]?" },
];
export const simpleLatinReplacerInfo: IPhoneticsReplacerInfoItem[] = [
@ -254,65 +264,71 @@ export const simpleLatinReplacerInfo: IPhoneticsReplacerInfoItem[] = [
{ char: "U", repl: ooSoundSimpleLatin },
{ char: "o", repl: ooSoundSimpleLatin },
{ char: "oo", repl: ooSoundSimpleLatin },
{ char: "i", repl: iSoundSimpleLatin, replWhenBeginning: iSoundSimpleLatinBeginning },
{ char: "ey", repl: eySoundSimpleLatin },
{ char: "ee", repl: eySoundSimpleLatin },
{ char: "uy", repl: eySoundSimpleLatin },
{
char: "i",
repl: iSoundSimpleLatin,
replWhenBeginning: iSoundSimpleLatinBeginning,
},
{ char: "ay", repl: aySoundSimpleLatin },
{ char: "ee", repl: aySoundSimpleLatin },
{ char: "uy", repl: aySoundSimpleLatin },
{ char: "e", repl: middleESoundSimpleLatin },
{ char: "w", repl: "(?:w{1,2}?[UÚ]?|b)"},
{ char: "y", repl: "[ií]?y?"},
{ char: "w", repl: "(?:w{1,2}?[UÚ]?|b)" },
{ char: "y", repl: "[ií]?y?" },
{ char: "ts", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "s", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "c", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "dz", repl: "(?:dz|z{1,2}|j)"},
{ char: "z", repl: "(?:s{1,2}|dz|z{1,2}|ts)"},
{ char: "t", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "tt", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "T", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "d", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "dd", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "D", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "r", repl: "(?:R|r{1,2}|N)"},
{ char: "rr", repl: "(?:R|r{1,2}|N)"},
{ char: "R", repl: "(?:R|r{1,2}|N)"},
{ char: "nb", repl: "(?:nb|mb|nw)"},
{ char: "mb", repl: "(?:nb|mb)"},
{ char: "n", repl: "(?:n{1,2}|N)"},
{ char: "N", repl: "(?:R|r{1,2}|N)"},
{ char: "f", repl: "(?:f{1,2}|p{1,2})"},
{ char: "ff", repl: "(?:f{1,2}|p{1,2})"},
{ char: "b", repl: "(?:b{1,2}|p{1,2}|w)"},
{ char: "bb", repl: "(?:b{1,2}|p{1,2})"},
{ char: "p", repl: "(?:b{1,2}|p{1,2}|f{1,2})"},
{ char: "pp", repl: "(?:b{1,2}|p{1,2}|f{1,2})"},
{ char: "ts", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "s", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "c", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "dz", repl: "(?:dz|z{1,2}|j)" },
{ char: "z", repl: "(?:s{1,2}|dz|z{1,2}|ts)" },
{ char: "t", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "tt", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "T", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "d", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "dd", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "D", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "r", repl: "(?:R|r{1,2}|N)" },
{ char: "rr", repl: "(?:R|r{1,2}|N)" },
{ char: "R", repl: "(?:R|r{1,2}|N)" },
{ char: "nb", repl: "(?:nb|mb|nw)" },
{ char: "mb", repl: "(?:nb|mb)" },
{ char: "n", repl: "(?:n{1,2}|N)" },
{ char: "N", repl: "(?:R|r{1,2}|N)" },
{ char: "f", repl: "(?:f{1,2}|p{1,2})" },
{ char: "ff", repl: "(?:f{1,2}|p{1,2})" },
{ char: "b", repl: "(?:b{1,2}|p{1,2}|w)" },
{ char: "bb", repl: "(?:b{1,2}|p{1,2})" },
{ char: "p", repl: "(?:b{1,2}|p{1,2}|f{1,2})" },
{ char: "pp", repl: "(?:b{1,2}|p{1,2}|f{1,2})" },
{ char: "sh", repl: "(?:x|sh|s`h)"},
{ char: "x", repl: "(?:kh|gh|x|h){1,2}"},
{ char: "kh", repl: "(?:kh|gh|x|h){1,2}"},
{ char: "sh", repl: "(?:x|sh|s`h)" },
{ char: "x", repl: "(?:kh|gh|x|h){1,2}" },
{ char: "kh", repl: "(?:kh|gh|x|h){1,2}" },
{ char: "k", repl: "(?:k{1,2}|q{1,2})"},
{ char: "kk", repl: "(?:k{1,2}|q{1,2})"},
{ char: "q", repl: "(?:k{1,2}|q{1,2})"},
{ char: "qq", repl: "(?:k{1,2}|q{1,2})"},
{ char: "k", repl: "(?:k{1,2}|q{1,2})" },
{ char: "kk", repl: "(?:k{1,2}|q{1,2})" },
{ char: "q", repl: "(?:k{1,2}|q{1,2})" },
{ char: "qq", repl: "(?:k{1,2}|q{1,2})" },
{ char: "jz", repl: "(?:G|jz)"},
{ char: "G", repl: "(?:jz|G|g)"},
{ char: "jz", repl: "(?:G|jz)" },
{ char: "G", repl: "(?:jz|G|g)" },
{ char: "g", repl: "(?:gh?|k{1,2}|G)"},
{ char: "gh", repl: "(?:g|gh|kh|G)"},
{ char: "g", repl: "(?:gh?|k{1,2}|G)" },
{ char: "gh", repl: "(?:g|gh|kh|G)" },
{ char: "j", repl: "(?:j{1,2}|ch|dz)"},
{ char: "ch", repl: "(?:j{1,2}|ch)"},
{ char: "j", repl: "(?:j{1,2}|ch|dz)" },
{ char: "ch", repl: "(?:j{1,2}|ch)" },
{ char: "l", repl: "l{1,2}"},
{ char: "ll", repl: "l{1,2}"},
{ char: "m", repl: "m{1,2}"},
{ char: "mm", repl: "m{1,2}"},
{ char: "h", repl: "k?h?"},
{ char: "l", repl: "l{1,2}" },
{ char: "ll", repl: "l{1,2}" },
{ char: "m", repl: "m{1,2}" },
{ char: "mm", repl: "m{1,2}" },
{ char: "h", repl: "k?h?" },
];
// tslint:disable-next-line
export const latinReplacerRegex = /yee|a{1,2}[i|y]|á{1,2}[i|y]|aa|áa|a|ắ|ă|á|U|Ú|u|ú|oo|óo|o|ó|e{1,2}|ée|é|ey|éy|uy|úy|i|í|w|y|q|q|ts|sh|ss|s|dz|z|tt|t|T|dd|d|D|r{1,2}|R|nb|mb|n{1,2}|N|f{1,2}|b{1,2}|p{1,2}|x|kh|q|kk|k|gh|g|G|j|ch|c|ll|l|m{1,2}|h||'|`/g;
export const latinReplacerRegex =
/yee|a{1,2}[i|y]|á{1,2}[i|y]|aa|áa|a|ắ|ă|á|U|Ú|u|ú|oo|óo|o|ó|e{1,2}|ée|é|ay|áy|uy|úy|i|í|w|y|q|q|ts|sh|ss|s|dz|z|tt|t|T|dd|d|D|r{1,2}|R|nb|mb|n{1,2}|N|f{1,2}|b{1,2}|p{1,2}|x|kh|q|kk|k|gh|g|G|j|ch|c|ll|l|m{1,2}|h||'|`/g;
export const simpleLatinReplacerRegex = /yee|a{1,2}[i|y]|aa|a|U|u|oo|o|e{1,2}|ey|uy|i|w|y|q|ts|sh|s|dz|z|tt|t|T|dd|d|D|r{1,2}|R|nb|mb|n{1,2}|N|f{1,2}|b{1,2}|p{1,2}|x|kh|q|k|gh|g|G|j|ch|c|ll|l|m{1,2}|h/g;
export const simpleLatinReplacerRegex =
/yee|a{1,2}[i|y]|aa|a|U|u|oo|o|e{1,2}|ay|uy|i|w|y|q|ts|sh|s|dz|z|tt|t|T|dd|d|D|r{1,2}|R|nb|mb|n{1,2}|N|f{1,2}|b{1,2}|p{1,2}|x|kh|q|k|gh|g|G|j|ch|c|ll|l|m{1,2}|h/g;

View File

@ -14,7 +14,7 @@ export const userLocalStorageName = "user1";
export function saveOptions(options: Options): void {
localStorage.setItem(optionsLocalStorageName, JSON.stringify(options));
};
}
export const readOptions = (): undefined | Options => {
const optionsRaw = localStorage.getItem(optionsLocalStorageName);
@ -23,10 +23,6 @@ export const readOptions = (): undefined | Options => {
}
try {
const options = JSON.parse(optionsRaw) as Options;
if (!("searchBarStickyFocus" in options)) {
// compatibility with legacy options
options.searchBarStickyFocus = false;
}
return options;
} catch (e) {
console.error("error parsing saved state JSON", e);
@ -40,18 +36,18 @@ export function saveUser(user: AT.LingdocsUser | undefined): void {
} else {
localStorage.removeItem(userLocalStorageName);
}
};
}
export const readUser = (): AT.LingdocsUser | undefined => {
const userRaw = localStorage.getItem(userLocalStorageName);
if (!userRaw) {
return undefined;
}
try {
const user = JSON.parse(userRaw) as AT.LingdocsUser;
return user;
} catch (e) {
console.error("error parsing saved user JSON", e);
return undefined;
}
};
const userRaw = localStorage.getItem(userLocalStorageName);
if (!userRaw) {
return undefined;
}
try {
const user = JSON.parse(userRaw) as AT.LingdocsUser;
return user;
} catch (e) {
console.error("error parsing saved user JSON", e);
return undefined;
}
};

View File

@ -1,40 +1,40 @@
import { makeAWeeBitFuzzy } from "./wee-bit-fuzzy";
const pMatches = [
["پیټی", "پېټی"],
["دوستی", "دوستي"],
["پته", "پټه"],
["تخلیه", "تحلیه"],
["پیټی", "پېټی"],
["دوستی", "دوستي"],
["پته", "پټه"],
["تخلیه", "تحلیه"],
];
const fMatches = [
["tahliya", "takhliya"],
["sareyy", "saRey"],
["peyTey", "peTey"],
["tahliya", "takhliya"],
["sarey", "saRay"],
["peyTey", "peTey"],
];
pMatches.forEach((pair) => {
test(`${pair[0]} should match ${pair[1]}`, () => {
const re = makeAWeeBitFuzzy(pair[0], "p");
const result = pair[1].match(new RegExp(re, "i"));
expect(result).toBeTruthy();
});
test(`${pair[1]} should match ${pair[0]}`, () => {
const re = makeAWeeBitFuzzy(pair[1], "p");
const result = pair[0].match(new RegExp(re, "i"));
expect(result).toBeTruthy();
});
test(`${pair[0]} should match ${pair[1]}`, () => {
const re = makeAWeeBitFuzzy(pair[0], "p");
const result = pair[1].match(new RegExp(re, "i"));
expect(result).toBeTruthy();
});
test(`${pair[1]} should match ${pair[0]}`, () => {
const re = makeAWeeBitFuzzy(pair[1], "p");
const result = pair[0].match(new RegExp(re, "i"));
expect(result).toBeTruthy();
});
});
fMatches.forEach((pair) => {
test(`${pair[0]} should match ${pair[1]} both ways`, () => {
const re = makeAWeeBitFuzzy(pair[0], "f");
const result = pair[1].match(new RegExp(re, "i"));
expect(result).toBeTruthy();
});
test(`${pair[1]} should match ${pair[0]} both ways`, () => {
const re = makeAWeeBitFuzzy(pair[1], "f");
const result = pair[0].match(new RegExp(re, "i"));
expect(result).toBeTruthy();
});
});
test(`${pair[0]} should match ${pair[1]} both ways`, () => {
const re = makeAWeeBitFuzzy(pair[0], "f");
const result = pair[1].match(new RegExp(re, "i"));
expect(result).toBeTruthy();
});
test(`${pair[1]} should match ${pair[0]} both ways`, () => {
const re = makeAWeeBitFuzzy(pair[1], "f");
const result = pair[0].match(new RegExp(re, "i"));
expect(result).toBeTruthy();
});
});

View File

@ -28,7 +28,7 @@
// R: "[r|R]",
// };
const fiveYeys = "[ئ|ۍ|ي|ې|ی]";
const fiveYays = "[ئ|ۍ|ي|ې|ی]";
const sSounds = "[س|ص|ث|څ]";
const zSounds = "[ز|ژ|ض|ظ|ذ|ځ]";
const tSounds = "[ت|ط|ټ]";
@ -39,106 +39,115 @@ const hKhSounds = "[خ|ح|ښ|ه]";
const alef = "[آ|ا]";
const pReplacer = {
"ی": fiveYeys,
"ي": fiveYeys,
"ۍ": fiveYeys,
"ئ": fiveYeys,
"ې": fiveYeys,
ی: fiveYays,
ي: fiveYays,
ۍ: fiveYays,
ئ: fiveYays,
ې: fiveYays,
"س": sSounds,
"ص": sSounds,
"ث": sSounds,
"څ": sSounds,
س: sSounds,
ص: sSounds,
ث: sSounds,
څ: sSounds,
"ز": zSounds,
"ظ": zSounds,
"ذ": zSounds,
"ض": zSounds,
"ژ": zSounds,
"ځ": zSounds,
ز: zSounds,
ظ: zSounds,
ذ: zSounds,
ض: zSounds,
ژ: zSounds,
ځ: zSounds,
"ت": tSounds,
"ط": tSounds,
"ټ": tSounds,
ت: tSounds,
ط: tSounds,
ټ: tSounds,
"د": dSounds,
"ډ": dSounds,
د: dSounds,
ډ: dSounds,
"ر": rSounds,
"ړ": rSounds,
ر: rSounds,
ړ: rSounds,
"ن": nSounds,
"ڼ": nSounds,
ن: nSounds,
ڼ: nSounds,
"خ": hKhSounds,
"ح": hKhSounds,
"ښ": hKhSounds,
"ه": hKhSounds,
خ: hKhSounds,
ح: hKhSounds,
ښ: hKhSounds,
ه: hKhSounds,
"ا": alef,
"آ": alef,
ا: alef,
آ: alef,
};
const fiveYeysF = "(?:eyy|ey|ee|é|e|uy)";
const fiveYaysF = "(?:ey|ay|ee|é|e|uy)";
const hKhF = "(?:kh|h|x)";
const zSoundsF = "(?:z|dz)";
const sSoundsF = "(?:ts|s)";
const fReplacer = {
"eyy": fiveYeysF,
"ey": fiveYeysF,
"uy": fiveYeysF,
"ee": fiveYeysF,
"e": fiveYeysF,
ey: fiveYaysF,
ay: fiveYaysF,
uy: fiveYaysF,
ee: fiveYaysF,
e: fiveYaysF,
"z": zSoundsF,
"dz": zSoundsF,
"x": hKhF,
"h": hKhF,
"kh": hKhF,
"ts": sSoundsF,
"s": sSoundsF,
// only used if ignoring accents
"a": "[a|á]",
"á": "[a|á|u|ú]",
"u": "[u|ú|a|á]",
"ú": "[u|ú]",
"o": "[o|ó]",
"ó": "[o|ó]",
"i": "[i|í]",
"í": "[i|í]",
"U": "[U|Ú]",
"Ú": "[U|Ú]",
"éy": fiveYeysF,
"éyy": fiveYeysF,
"úy": fiveYeysF,
"ée": fiveYeysF,
"é": fiveYeysF,
z: zSoundsF,
dz: zSoundsF,
x: hKhF,
h: hKhF,
kh: hKhF,
ts: sSoundsF,
s: sSoundsF,
// only used if ignoring accents
a: "[a|á]",
á: "[a|á|u|ú]",
u: "[u|ú|a|á]",
ú: "[u|ú]",
o: "[o|ó]",
ó: "[o|ó]",
i: "[i|í]",
í: "[i|í]",
U: "[U|Ú]",
Ú: "[U|Ú]",
áy: fiveYaysF,
éy: fiveYaysF,
úy: fiveYaysF,
ée: fiveYaysF,
é: fiveYaysF,
};
const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g");
const fRepRegex = /eyy|ey|uy|ee|e|z|dz|x|kh|h|ts|s/g;
const fRepRegex = /ey|ay|uy|ee|e|z|dz|x|kh|h|ts|s/g;
const fRepRegexWAccents = /eyy|éyy|ey|éy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g;
const fRepRegexWAccents =
/ey|éy|ay|áy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g;
function makePAWeeBitFuzzy(s: string): string {
// + s.replace(/ /g, "").split("").join(" *");
return "^" + s.replace(pRepRegex, mtch => {
// @ts-ignore
return `${pReplacer[mtch]}`;
});
// + s.replace(/ /g, "").split("").join(" *");
return (
"^" +
s.replace(pRepRegex, (mtch) => {
// @ts-ignore
return `${pReplacer[mtch]}`;
})
);
}
function makeFAWeeBitFuzzy(s: string, ignoreAccent?: boolean): string {
return "^" + s.replace((ignoreAccent ? fRepRegexWAccents : fRepRegex), mtch => {
// @ts-ignore
return fReplacer[mtch];
});
return (
"^" +
s.replace(ignoreAccent ? fRepRegexWAccents : fRepRegex, (mtch) => {
// @ts-ignore
return fReplacer[mtch];
})
);
}
export function makeAWeeBitFuzzy(s: string, i: "f" | "p", ignoreAccent?: boolean): string {
return i === "p"
? makePAWeeBitFuzzy(s)
: makeFAWeeBitFuzzy(s, ignoreAccent);
}
export function makeAWeeBitFuzzy(
s: string,
i: "f" | "p",
ignoreAccent?: boolean
): string {
return i === "p" ? makePAWeeBitFuzzy(s) : makeFAWeeBitFuzzy(s, ignoreAccent);
}

View File

@ -1,71 +1,88 @@
export type DictionaryStatus = "loading" | "ready" | "updating" | "error loading";
export type DictionaryStatus =
| "loading"
| "ready"
| "updating"
| "error loading";
export type State = {
dictionaryStatus: DictionaryStatus,
searchValue: string,
options: Options,
page: number,
isolatedEntry: import("@lingdocs/ps-react").Types.DictionaryEntry | undefined,
results: import("@lingdocs/ps-react").Types.DictionaryEntry[],
wordlist: WordlistWord[],
reviewTasks: import("./functions-types").ReviewTask[],
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo | undefined,
user: undefined | import("./account-types").LingdocsUser,
inflectionSearchResults: undefined | "searching" | {
exact: InflectionSearchResult[],
fuzzy: InflectionSearchResult[],
},
}
dictionaryStatus: DictionaryStatus;
showModal: boolean;
searchValue: string;
options: Options;
page: number;
isolatedEntry: import("@lingdocs/ps-react").Types.DictionaryEntry | undefined;
results: import("@lingdocs/ps-react").Types.DictionaryEntry[];
wordlist: WordlistWord[];
reviewTasks: import("./functions-types").ReviewTask[];
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo | undefined;
user: undefined | import("./account-types").LingdocsUser;
inflectionSearchResults:
| undefined
| "searching"
| {
exact: InflectionSearchResult[];
fuzzy: InflectionSearchResult[];
};
};
export type DictionaryAPI = {
initialize: () => Promise<{
response: "loaded first time" | "loaded from saved",
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo,
}>,
update: (updateComing: () => void) => Promise<{
response: "no need for update" | "updated" | "unable to check",
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo,
}>,
search: (state: State) => import("@lingdocs/ps-react").Types.DictionaryEntry[],
exactPashtoSearch: (search: string) => import("@lingdocs/ps-react").Types.DictionaryEntry[],
getNewWordsThisMonth: () => import("@lingdocs/ps-react").Types.DictionaryEntry[],
findOneByTs: (ts: number) => import("@lingdocs/ps-react").Types.DictionaryEntry | undefined,
findRelatedEntries: (entry: import("@lingdocs/ps-react").Types.DictionaryEntry) => import("@lingdocs/ps-react").Types.DictionaryEntry[],
}
initialize: () => Promise<{
response: "loaded first time" | "loaded from saved";
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo;
}>;
update: (updateComing: () => void) => Promise<{
response: "no need for update" | "updated" | "unable to check";
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo;
}>;
search: (
state: State
) => import("@lingdocs/ps-react").Types.DictionaryEntry[];
exactPashtoSearch: (
search: string
) => import("@lingdocs/ps-react").Types.DictionaryEntry[];
getNewWordsThisMonth: () => import("@lingdocs/ps-react").Types.DictionaryEntry[];
findOneByTs: (
ts: number
) => import("@lingdocs/ps-react").Types.DictionaryEntry | undefined;
findRelatedEntries: (
entry: import("@lingdocs/ps-react").Types.DictionaryEntry
) => import("@lingdocs/ps-react").Types.DictionaryEntry[];
};
export type WordlistWordBase = {
_id: string,
/* a backup copy of the full dictionary entry in case it gets deleted from the dictionary */
entry: import("@lingdocs/ps-react").Types.DictionaryEntry,
/* the notes/context provided by the user for the word in their wordlist */
notes: string,
supermemo: import("supermemo").SuperMemoItem,
/* rep/stage of warmup stage before moving into supermemo mode */
warmup: number | "done",
/* date due for review - ISO string */
dueDate: number,
}
_id: string;
/* a backup copy of the full dictionary entry in case it gets deleted from the dictionary */
entry: import("@lingdocs/ps-react").Types.DictionaryEntry;
/* the notes/context provided by the user for the word in their wordlist */
notes: string;
supermemo: import("supermemo").SuperMemoItem;
/* rep/stage of warmup stage before moving into supermemo mode */
warmup: number | "done";
/* date due for review - ISO string */
dueDate: number;
};
export type WordlistAttachmentInfo = {
imgSize?: { height: number, width: number },
_attachments: Attachments,
}
imgSize?: { height: number; width: number };
_attachments: Attachments;
};
export type WordlistWordWAttachments = WordlistWordBase & WordlistAttachmentInfo;
export type WordlistWordWAttachments = WordlistWordBase &
WordlistAttachmentInfo;
export type WordlistWord = WordlistWordBase | WordlistWordWAttachments;
export type Options = {
language: Language,
searchType: SearchType,
theme: Theme,
textOptionsRecord: TextOptionsRecord,
wordlistMode: WordlistMode,
wordlistReviewLanguage: Language,
wordlistReviewBadge: boolean,
searchBarPosition: SearchBarPosition,
searchBarStickyFocus: boolean,
}
language: Language;
searchType: SearchType;
theme: Theme;
textOptionsRecord: TextOptionsRecord;
wordlistMode: WordlistMode;
wordlistReviewLanguage: Language;
wordlistReviewBadge: boolean;
searchBarPosition: SearchBarPosition;
searchBarStickyFocus: boolean;
};
export type Language = "Pashto" | "English";
export type SearchType = "alphabetical" | "fuzzy";
@ -78,84 +95,102 @@ export type SearchBarPosition = "top" | "bottom";
export type WordlistMode = "browse" | "review";
export type TextOptionsRecord = {
lastModified: import("./account-types").TimeStamp,
textOptions: import("@lingdocs/ps-react").Types.TextOptions,
lastModified: import("./account-types").TimeStamp;
textOptions: import("@lingdocs/ps-react").Types.TextOptions;
};
export type UserLevel = "basic" | "student" | "editor";
export type OptionsAction = {
type: "toggleSearchType",
} | {
type: "toggleLanguage",
} | {
type: "changeTheme",
payload: Theme,
} | {
type: "changeSearchBarPosition",
payload: SearchBarPosition,
} | {
type: "changeWordlistMode",
payload: WordlistMode,
} | {
type: "changeWordlistReviewLanguage",
payload: Language,
} | {
type: "changeWordlistReviewBadge",
payload: boolean,
} | {
type: "updateTextOptionsRecord",
payload: TextOptionsRecord,
} | {
type: "changeSearchBarStickyFocus",
payload: boolean,
} | {
type: "setShowPlayStoreButton",
payload: boolean,
};
export type OptionsAction =
| {
type: "toggleSearchType";
}
| {
type: "toggleLanguage";
}
| {
type: "changeTheme";
payload: Theme;
}
| {
type: "changeSearchBarPosition";
payload: SearchBarPosition;
}
| {
type: "changeWordlistMode";
payload: WordlistMode;
}
| {
type: "changeWordlistReviewLanguage";
payload: Language;
}
| {
type: "changeWordlistReviewBadge";
payload: boolean;
}
| {
type: "updateTextOptionsRecord";
payload: TextOptionsRecord;
}
| {
type: "changeSearchBarStickyFocus";
payload: boolean;
}
| {
type: "setShowPlayStoreButton";
payload: boolean;
};
export type TextOptionsAction = {
type: "changePTextSize",
payload: PTextSize,
} | {
type: "changeSpelling",
payload: import("@lingdocs/ps-react").Types.Spelling,
} | {
type: "changePhonetics",
payload: "lingdocs" | "ipa" | "alalc" | "none",
} | {
type: "changeDialect",
payload: "standard" | "peshawer" | "southern",
} | {
type: "changeDiacritics",
payload: boolean,
};
export type TextOptionsAction =
| {
type: "changePTextSize";
payload: PTextSize;
}
| {
type: "changeSpelling";
payload: import("@lingdocs/ps-react").Types.Spelling;
}
| {
type: "changePhonetics";
payload: "lingdocs" | "ipa" | "alalc" | "none";
}
| {
type: "changeDialect";
payload: "standard" | "peshawer" | "southern";
}
| {
type: "changeDiacritics";
payload: boolean;
};
export type AttachmentToPut = {
content_type: string,
data: string | Blob,
}
export type AttachmentWithData = {
content_type: string,
digest: string,
data: string | Blob,
}
export type AttachmentWOutData = {
content_type: string,
digest: string,
stub: true;
}
export type Attachment = AttachmentToPut | AttachmentWithData | AttachmentWOutData
export type AttachmentType = "image" | "audio";
export type Attachments = {
/* only allows one image and one audio attachment - max 2 values */
[filename: string]: Attachment,
content_type: string;
data: string | Blob;
};
export type WordlistWordDoc = WordlistWord & { _rev: string, _id: string };
export type AttachmentWithData = {
content_type: string;
digest: string;
data: string | Blob;
};
export type AttachmentWOutData = {
content_type: string;
digest: string;
stub: true;
};
export type Attachment =
| AttachmentToPut
| AttachmentWithData
| AttachmentWOutData;
export type AttachmentType = "image" | "audio";
export type Attachments = {
/* only allows one image and one audio attachment - max 2 values */
[filename: string]: Attachment;
};
export type WordlistWordDoc = WordlistWord & { _rev: string; _id: string };
export type InflectionName = "plain" | "1st" | "2nd";
@ -167,15 +202,14 @@ export type PluralInflectionName = "plural" | "2nd";
// the possible matches, and their person/inflection number
export type InflectionSearchResult = {
entry: import("@lingdocs/ps-react").Types.DictionaryEntry,
forms: InflectionFormMatch[],
}
export type InflectionFormMatch = {
path: string[],
matches: {
ps: import("@lingdocs/ps-react").Types.PsString,
pos: InflectionName[] | import("@lingdocs/ps-react").Types.Person[] | null,
}[],
entry: import("@lingdocs/ps-react").Types.DictionaryEntry;
forms: InflectionFormMatch[];
};
export type InflectionFormMatch = {
path: string[];
matches: {
ps: import("@lingdocs/ps-react").Types.PsString;
pos: InflectionName[] | import("@lingdocs/ps-react").Types.Person[] | null;
}[];
};

View File

@ -2349,10 +2349,10 @@
"@jridgewell/resolve-uri" "^3.0.3"
"@jridgewell/sourcemap-codec" "^1.4.10"
"@lingdocs/ps-react@5.10.1":
version "5.10.1"
resolved "https://npm.lingdocs.com/@lingdocs%2fps-react/-/ps-react-5.10.1.tgz#949850aaa3c9de54d4beed1daa9b546bb0a84df9"
integrity sha512-Ro/6Fq9mEdF4/2wJf8USkIlYe+9vWmez/RhoUF0mTjOhmyTGV6cpajK0Qpo1WyCaL5d/6BTI3qVuk5h8pWRQjA==
"@lingdocs/ps-react@6.0.0":
version "6.0.0"
resolved "https://npm.lingdocs.com/@lingdocs%2fps-react/-/ps-react-6.0.0.tgz#dbdfd1a5afd19253679169eacbf1da5562db5dc3"
integrity sha512-+j6F65FtmPbeEjjHtE3JqKHtCcUM+cMAN2RMTd8yyacJ4sTJW/oWC+6rAQGQqc1da3lP7tuxt6p+esmFYI9fgQ==
dependencies:
"@formkit/auto-animate" "^1.0.0-beta.3"
classnames "^2.2.6"