release new phonetics!

This commit is contained in:
adueck 2023-07-27 18:18:01 +04:00
parent 8dd63ad9c4
commit 54fb2050c1
21 changed files with 3376 additions and 2910 deletions

View File

@ -67,6 +67,10 @@ npm install
#### Development
```sh
firebase login
# get envars locally
firebase functions:config:get > .runtimeconfig.json
# start functions emulator
npm run serve
```

View File

@ -9,7 +9,7 @@
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"@lingdocs/inflect": "5.10.1",
"@lingdocs/inflect": "6.0.0",
"base64url": "^3.0.1",
"bcryptjs": "^2.4.3",
"connect-redis": "^6.0.0",
@ -124,9 +124,9 @@
}
},
"node_modules/@lingdocs/inflect": {
"version": "5.10.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.10.1.tgz",
"integrity": "sha512-8MPsfQzeerlyT02dz7D7L+AYFrjGOrQB7nMBUXutnLw3/RKhvW99dLImFZKSnCr8DZsEONEp0IVeqxeIUczxog==",
"version": "6.0.0",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-6.0.0.tgz",
"integrity": "sha512-aPvjqOkeKhu60Inbk7uuLooR/9hvUS4rDHyqR5JJPziZMLJ05U5fBTUvehit7stHSRGivskR00uU3liWbXce6g==",
"license": "MIT",
"dependencies": {
"fp-ts": "^2.16.0",
@ -2747,9 +2747,9 @@
}
},
"@lingdocs/inflect": {
"version": "5.10.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.10.1.tgz",
"integrity": "sha512-8MPsfQzeerlyT02dz7D7L+AYFrjGOrQB7nMBUXutnLw3/RKhvW99dLImFZKSnCr8DZsEONEp0IVeqxeIUczxog==",
"version": "6.0.0",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-6.0.0.tgz",
"integrity": "sha512-aPvjqOkeKhu60Inbk7uuLooR/9hvUS4rDHyqR5JJPziZMLJ05U5fBTUvehit7stHSRGivskR00uU3liWbXce6g==",
"requires": {
"fp-ts": "^2.16.0",
"pbf": "^3.2.1",

View File

@ -11,7 +11,7 @@
"author": "",
"license": "ISC",
"dependencies": {
"@lingdocs/inflect": "5.10.1",
"@lingdocs/inflect": "6.0.0",
"base64url": "^3.0.1",
"bcryptjs": "^2.4.3",
"connect-redis": "^6.0.0",
@ -22,6 +22,7 @@
"express-session": "^1.17.2",
"lokijs": "^1.5.12",
"nano": "^9.0.3",
"next": "^13.4.12",
"node-fetch": "^2.6.7",
"nodemailer": "^6.6.3",
"passport": "^0.4.1",
@ -42,6 +43,7 @@
"@types/cron": "^2.0.0",
"@types/express": "^4.17.13",
"@types/express-session": "^1.17.4",
"@types/lokijs": "^1.5.8",
"@types/node": "^16.6.0",
"@types/node-fetch": "^2.5.12",
"@types/nodemailer": "^6.4.4",

View File

@ -10,7 +10,7 @@ import {
typePredicates as tp,
entryOfFull,
standardizePashto,
} from "@lingdocs/inflect"
} from "@lingdocs/inflect";
export let collection: Collection<T.DictionaryEntry> | undefined = undefined;
export let allWordsCollection: Collection<T.PsString> | undefined = undefined;
@ -34,7 +34,10 @@ async function fetchDictionary(): Promise<T.Dictionary> {
async function fetchAllWords(): Promise<T.AllWordsWithInflections> {
// TODO: this is really ugly
const res = await fetch(process.env.LINGDOCS_DICTIONARY_URL?.slice(0, -4) + "all-words.json");
const res = await fetch(
process.env.LINGDOCS_DICTIONARY_URL?.slice(0, -10) +
"all-words-dictionary.json"
);
return await res.json();
}
@ -76,33 +79,34 @@ export function findInAllWords(p: string | RegExp): T.PsWord[] | undefined {
throw new Error("allWords not initialized");
}
return allWordsCollection.find({
p: typeof p === "string"
? p
: { $regex: p },
p: typeof p === "string" ? p : { $regex: p },
});
}
export async function getEntries(ids: (number | string)[]): Promise<{
results: (T.DictionaryEntry | T.VerbEntry)[],
notFound: (number | string)[],
results: (T.DictionaryEntry | T.VerbEntry)[];
notFound: (number | string)[];
}> {
if (!collection) {
throw new Error("dictionary not initialized");
}
const idsP = ids.map(x => typeof x === "number" ? x : standardizePashto(x))
const results: (T.DictionaryEntry | T.VerbEntry)[] = collection.find({
"$or": [
{ "ts": { "$in": idsP }},
{ "p": { "$in": idsP }},
],
}).map(x => {
const idsP = ids.map((x) =>
typeof x === "number" ? x : standardizePashto(x)
);
const results: (T.DictionaryEntry | T.VerbEntry)[] = collection
.find({
$or: [{ ts: { $in: idsP } }, { p: { $in: idsP } }],
})
.map((x) => {
const { $loki, meta, ...entry } = x;
return entry;
}).map((entry): T.DictionaryEntry | T.VerbEntry => {
})
.map((entry): T.DictionaryEntry | T.VerbEntry => {
if (tp.isVerbDictionaryEntry(entry)) {
if (entry.c?.includes("comp.") && entry.l) {
const complement = getOneByTs(entry.l);
if (!complement) throw new Error("Error getting complement "+entry.l);
if (!complement)
throw new Error("Error getting complement " + entry.l);
return {
entry,
complement,
@ -115,17 +119,21 @@ export async function getEntries(ids: (number | string)[]): Promise<{
});
return {
results,
notFound: ids.filter(id => !results.find(x => {
notFound: ids.filter(
(id) =>
!results.find((x) => {
const entry = entryOfFull(x);
return entry.p === id || entry.ts === id;
})),
})
),
};
}
lokidb.loadDatabase({}, (err: Error) => {
lokidb.removeCollection(collectionName);
lokidb.removeCollection(allWordsCollectionName);
fetchDictionary().then((dictionary) => {
fetchDictionary()
.then((dictionary) => {
collection = lokidb.addCollection(collectionName, {
indices: ["i", "p"],
unique: ["ts"],
@ -133,7 +141,8 @@ lokidb.loadDatabase({}, (err: Error) => {
version = dictionary.info.release;
collection?.insert(dictionary.entries);
updateJob.start();
}).catch(console.error);
})
.catch(console.error);
fetchAllWords().then((allWords) => {
allWordsCollection = lokidb.addCollection(allWordsCollectionName, {
indices: ["p"],

View File

@ -2,7 +2,6 @@ import express from "express";
import {
allWordsCollection,
collection,
findInAllWords,
getEntries,
updateDictionary,
} from "../lib/dictionary";
@ -26,7 +25,7 @@ dictionaryRouter.post("/script-to-phonetics", async (req, res, next) => {
}
const results = await scriptToPhonetics(text, accents);
res.send({ ok: true, results });
})
});
dictionaryRouter.post("/entries", async (req, res, next) => {
if (!collection) {
@ -44,7 +43,7 @@ dictionaryRouter.get("/entries/:id", async (req, res, next) => {
if (!collection) {
return res.send({ ok: false, message: "dictionary not ready" });
}
const ids = req.params.id.split(",").map(x => {
const ids = req.params.id.split(",").map((x) => {
const n = parseInt(x);
return Number.isNaN(n) ? x : n;
});

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,7 @@
"name": "functions",
"dependencies": {
"@google-cloud/storage": "^5.8.1",
"@lingdocs/inflect": "5.10.1",
"@lingdocs/inflect": "6.0.0",
"@types/cors": "^2.8.10",
"@types/google-spreadsheet": "^3.0.2",
"@types/react": "^18.0.21",
@ -1468,9 +1468,9 @@
}
},
"node_modules/@lingdocs/inflect": {
"version": "5.10.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.10.1.tgz",
"integrity": "sha512-8MPsfQzeerlyT02dz7D7L+AYFrjGOrQB7nMBUXutnLw3/RKhvW99dLImFZKSnCr8DZsEONEp0IVeqxeIUczxog==",
"version": "6.0.0",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-6.0.0.tgz",
"integrity": "sha512-aPvjqOkeKhu60Inbk7uuLooR/9hvUS4rDHyqR5JJPziZMLJ05U5fBTUvehit7stHSRGivskR00uU3liWbXce6g==",
"license": "MIT",
"dependencies": {
"fp-ts": "^2.16.0",
@ -8056,9 +8056,9 @@
}
},
"@lingdocs/inflect": {
"version": "5.10.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.10.1.tgz",
"integrity": "sha512-8MPsfQzeerlyT02dz7D7L+AYFrjGOrQB7nMBUXutnLw3/RKhvW99dLImFZKSnCr8DZsEONEp0IVeqxeIUczxog==",
"version": "6.0.0",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-6.0.0.tgz",
"integrity": "sha512-aPvjqOkeKhu60Inbk7uuLooR/9hvUS4rDHyqR5JJPziZMLJ05U5fBTUvehit7stHSRGivskR00uU3liWbXce6g==",
"requires": {
"fp-ts": "^2.16.0",
"pbf": "^3.2.1",

View File

@ -15,7 +15,7 @@
"main": "lib/functions/src/index.js",
"dependencies": {
"@google-cloud/storage": "^5.8.1",
"@lingdocs/inflect": "5.10.1",
"@lingdocs/inflect": "6.0.0",
"@types/cors": "^2.8.10",
"@types/google-spreadsheet": "^3.0.2",
"@types/react": "^18.0.21",

View File

@ -4,12 +4,17 @@ import { receiveSubmissions } from "./submissions";
import lingdocsAuth from "./middleware/lingdocs-auth";
import publish from "./publish";
export const publishDictionary = functions.runWith({
export const publishDictionary = functions
.runWith({
timeoutSeconds: 525,
memory: "2GB"
}).https.onRequest(
memory: "2GB",
})
.https.onRequest(
lingdocsAuth(
async (req, res: functions.Response<FT.PublishDictionaryResponse | FT.FunctionError>) => {
async (
req,
res: functions.Response<FT.PublishDictionaryResponse | FT.FunctionError>
) => {
if (req.user.level !== "editor") {
res.status(403).send({ ok: false, error: "403 forbidden" });
return;
@ -23,13 +28,19 @@ export const publishDictionary = functions.runWith({
}
}
)
);
);
export const submissions = functions.runWith({
export const submissions = functions
.runWith({
timeoutSeconds: 60,
memory: "1GB",
}).https.onRequest(lingdocsAuth(
async (req, res: functions.Response<FT.SubmissionsResponse | FT.FunctionError>) => {
})
.https.onRequest(
lingdocsAuth(
async (
req,
res: functions.Response<FT.SubmissionsResponse | FT.FunctionError>
) => {
if (!Array.isArray(req.body)) {
res.status(400).send({
ok: false,
@ -39,12 +50,13 @@ export const submissions = functions.runWith({
}
const suggestions = req.body as FT.SubmissionsRequest;
try {
const response = await receiveSubmissions(suggestions, true);// req.user.level === "editor");
const response = await receiveSubmissions(suggestions, true); // req.user.level === "editor");
// TODO: WARN IF ANY OF THE EDITS DIDN'T HAPPEN
res.send(response);
} catch (e) {
// @ts-ignore
res.status(500).send({ ok: false, error: e.message });
};
})
);
}
}
)
);

View File

@ -11,26 +11,23 @@ import {
simplifyPhonetics,
standardizeEntry,
} from "@lingdocs/inflect";
import {
getWordList,
} from "./word-list-maker";
import {
PublishDictionaryResponse,
} from "../../website/src/types/functions-types";
import { getWordList } from "./word-list-maker";
import { PublishDictionaryResponse } from "../../website/src/types/functions-types";
import { Storage } from "@google-cloud/storage";
const storage = new Storage({
projectId: "lingdocs",
});
const title = "LingDocs Pashto Dictionary"
const license = "Copyright © 2021 lingdocs.com All Rights Reserved - Licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License - https://creativecommons.org/licenses/by-nc-sa/4.0/";
const title = "LingDocs Pashto Dictionary";
const license =
"Copyright © 2021 lingdocs.com All Rights Reserved - Licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License - https://creativecommons.org/licenses/by-nc-sa/4.0/";
const bucketName = "lingdocs";
const baseUrl = `https://storage.googleapis.com/${bucketName}/`;
const dictionaryFilename = "dict";
const dictionaryInfoFilename = "dict-info";
const dictionaryFilename = "dictionary";
const dictionaryInfoFilename = "dictionary-info";
// const hunspellAffFileFilename = "ps_AFF.aff";
// const hunspellDicFileFilename = "ps_AFF.dic";
const allWordsJsonFilename = "all-words.json";
const allWordsJsonFilename = "all-words-dictionary.json";
const url = `${baseUrl}${dictionaryFilename}`;
const infoUrl = `${baseUrl}${dictionaryInfoFilename}`;
@ -41,7 +38,7 @@ export default async function publish(): Promise<PublishDictionaryResponse> {
const entries = await getRawEntries();
const errors = checkForErrors(entries);
if (errors.length) {
return({ ok: false, errors });
return { ok: false, errors };
}
// const duplicates = findDuplicates(entries);
// duplicates.forEach((duplicate) => {
@ -58,24 +55,27 @@ export default async function publish(): Promise<PublishDictionaryResponse> {
numberOfEntries: entries.length,
},
entries,
}
};
uploadDictionaryToStorage(dictionary).catch(console.error);
// TODO: make this async and run after publish response
doHunspellEtc(dictionary.info, entries).catch(console.error);
return {
ok: true,
info: dictionary.info
info: dictionary.info,
};
}
async function doHunspellEtc(info: T.DictionaryInfo, entries: T.DictionaryEntry[]) {
async function doHunspellEtc(
info: T.DictionaryInfo,
entries: T.DictionaryEntry[]
) {
const wordlistResponse = getWordList(entries);
if (!wordlistResponse.ok) {
throw new Error(JSON.stringify(wordlistResponse.errors));
}
// const hunspell = makeHunspell(wordlistResponse.wordlist);
// await uploadHunspellToStorage(hunspell);
await uploadAllWordsToStoarage(info, wordlistResponse.wordlist)
await uploadAllWordsToStoarage(info, wordlistResponse.wordlist);
}
/**
@ -88,9 +88,7 @@ async function doHunspellEtc(info: T.DictionaryInfo, entries: T.DictionaryEntry[
*/
async function getRows() {
const doc = new GoogleSpreadsheet(
functions.config().sheet.id,
);
const doc = new GoogleSpreadsheet(functions.config().sheet.id);
await doc.useServiceAccountAuth({
client_email: functions.config().serviceacct.email,
private_key: functions.config().serviceacct.key,
@ -98,7 +96,7 @@ async function getRows() {
await doc.loadInfo();
const sheet = doc.sheetsByIndex[0];
const rows = await sheet.getRows();
rows.sort((a, b) => a.ts > b.ts ? -1 : a.ts < b.ts ? 1 : 0);
rows.sort((a, b) => (a.ts > b.ts ? -1 : a.ts < b.ts ? 1 : 0));
return rows;
}
@ -117,7 +115,7 @@ async function getRawEntries(): Promise<T.DictionaryEntry[]> {
}
sheetIndex++;
const row = rows[i];
const nextRow = rows[i+1] || undefined;
const nextRow = rows[i + 1] || undefined;
if (row.ts === nextRow?.ts) {
if (sameEntry(row, nextRow)) {
// this looks like a duplicate entry made by the sheets api
@ -137,15 +135,19 @@ async function getRawEntries(): Promise<T.DictionaryEntry[]> {
g: simplifyPhonetics(row.f),
e: row.e,
};
dictionaryEntryNumberFields.forEach((field: T.DictionaryEntryNumberField) => {
dictionaryEntryNumberFields.forEach(
(field: T.DictionaryEntryNumberField) => {
if (row[field]) e[field] = parseInt(row[field]);
});
}
);
dictionaryEntryTextFields.forEach((field: T.DictionaryEntryTextField) => {
if (row[field]) e[field] = row[field].trim();
});
dictionaryEntryBooleanFields.forEach((field: T.DictionaryEntryBooleanField) => {
dictionaryEntryBooleanFields.forEach(
(field: T.DictionaryEntryBooleanField) => {
if (row[field]) e[field] = true;
});
}
);
entries.push(standardizeEntry(e));
}
// add alphabetical index
@ -158,8 +160,11 @@ async function getRawEntries(): Promise<T.DictionaryEntry[]> {
return entries;
}
function checkForErrors(entries: T.DictionaryEntry[]): T.DictionaryEntryError[] {
return entries.reduce((errors: T.DictionaryEntryError[], entry: T.DictionaryEntry) => {
function checkForErrors(
entries: T.DictionaryEntry[]
): T.DictionaryEntryError[] {
return entries.reduce(
(errors: T.DictionaryEntryError[], entry: T.DictionaryEntry) => {
const response = validateEntry(entry);
if ("errors" in response && response.errors.length) {
return [...errors, response];
@ -177,7 +182,11 @@ function checkForErrors(entries: T.DictionaryEntry[]): T.DictionaryEntryError[]
};
return [...errors, error];
}
if (!complement.c?.includes("n.") && !complement.c?.includes("adj.") && !complement.c?.includes("adv.")) {
if (
!complement.c?.includes("n.") &&
!complement.c?.includes("adj.") &&
!complement.c?.includes("adv.")
) {
const error: T.DictionaryEntryError = {
errors: ["complement link to invalid complement"],
ts: entry.ts,
@ -190,7 +199,9 @@ function checkForErrors(entries: T.DictionaryEntry[]): T.DictionaryEntryError[]
}
}
return errors;
}, []);
},
[]
);
}
// function findDuplicates(entries: T.DictionaryEntry[]): T.DictionaryEntry[] {
@ -234,8 +245,14 @@ async function upload(content: Buffer | string, filename: string) {
// ]);
// }
async function uploadAllWordsToStoarage(info: T.DictionaryInfo, words: T.PsString[]) {
await upload(JSON.stringify({ info, words } as T.AllWordsWithInflections), allWordsJsonFilename);
async function uploadAllWordsToStoarage(
info: T.DictionaryInfo,
words: T.PsString[]
) {
await upload(
JSON.stringify({ info, words } as T.AllWordsWithInflections),
allWordsJsonFilename
);
}
async function uploadDictionaryToStorage(dictionary: T.Dictionary) {

View File

@ -7,7 +7,7 @@
"private": true,
"dependencies": {
"@fortawesome/fontawesome-free": "^5.15.2",
"@lingdocs/ps-react": "5.10.1",
"@lingdocs/ps-react": "6.0.0",
"@testing-library/jest-dom": "^5.11.4",
"@testing-library/react": "^11.1.0",
"@testing-library/user-event": "^12.1.10",

View File

@ -99,13 +99,23 @@ hr {
background-color: var(--closer) !important;
color: var(--high-contrast);
}
.bg-white {
background-color: var(--theme-shade) !important;
}
/* TODO: better handling of modals across light and dark modes */
.modal-body, .modal-title {
color:#1d1f25;
.modal-body,
.modal-title {
color: var(--high-contrast);
}
.modal-content {
background-color: var(--theme-shade);
}
.modal-content .table {
color: var(--high-contrast);
}
.table {
@ -310,6 +320,7 @@ input {
.entry-suggestion-button {
right: 15px;
}
.conjugation-search-button {
right: 15px;
}
@ -339,6 +350,7 @@ input {
text-decoration: none;
color: var(--farther);
}
.clickable:hover {
color: var(--farther);
}
@ -380,6 +392,7 @@ input {
width: 10em;
height: 10em;
}
.loader {
margin: 60px auto;
font-size: 10px;
@ -395,24 +408,29 @@ input {
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear;
}
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
}
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
}
/* End of loading animation from https://projects.lukehaas.me/css-loaders/ */

View File

@ -72,6 +72,7 @@ import PhraseBuilder from "./screens/PhraseBuilder";
import { searchAllInflections } from "./lib/search-all-inflections";
import { addToWordlist } from "./lib/wordlist-database";
import ScriptToPhonetics from "./screens/ScriptToPhonetics";
import { Modal, Button } from "react-bootstrap";
// to allow Moustrap key combos even when input fields are in focus
Mousetrap.prototype.stopCallback = function () {
@ -107,6 +108,7 @@ class App extends Component<RouteComponentProps, State> {
this.state = {
dictionaryStatus: "loading",
dictionaryInfo: undefined,
showModal: false,
// TODO: Choose between the saved options and the options in the saved user
options: savedOptions
? savedOptions
@ -146,6 +148,8 @@ class App extends Component<RouteComponentProps, State> {
this.handleRefreshReviewTasks = this.handleRefreshReviewTasks.bind(this);
this.handleDictionaryUpdate = this.handleDictionaryUpdate.bind(this);
this.handleInflectionSearch = this.handleInflectionSearch.bind(this);
this.handleShowModal = this.handleShowModal.bind(this);
this.handleCloseModal = this.handleCloseModal.bind(this);
}
public componentDidMount() {
@ -583,6 +587,14 @@ class App extends Component<RouteComponentProps, State> {
});
}
private handleCloseModal() {
this.setState({ showModal: false });
}
private handleShowModal() {
this.setState({ showModal: true });
}
render() {
return (
<div
@ -641,7 +653,7 @@ class App extends Component<RouteComponentProps, State> {
>
<div className="my-4">New words this month</div>
</Link>
<div className="mt-4 pt-3">
<div className="my-4 pt-3">
<Link
to="/phrase-builder"
className="plain-link h5 font-weight-light"
@ -656,6 +668,12 @@ class App extends Component<RouteComponentProps, State> {
Grammar
</a>
</div>
<button
onClick={this.handleShowModal}
className="mt-2 btn btn-lg btn-secondary"
>
New Phonetics for ی's!! 👀
</button>
</div>
</Route>
<Route path="/about">
@ -816,6 +834,87 @@ class App extends Component<RouteComponentProps, State> {
/>
)}
</footer>
<Modal
show={this.state.showModal}
onHide={this.handleCloseModal}
centered
>
<Modal.Header closeButton>
<Modal.Title>Phonetics Update! 📰</Modal.Title>
</Modal.Header>
<Modal.Body>
<p>
The phonetics for{" "}
<span style={{ backgroundColor: "rgba(255,255,0,0.4)" }}>
two of the five ی's have been updated
</span>{" "}
to something much more logical and helpful for pronunciation.
</p>
<h5>Pure Vowels (mouth stays still)</h5>
<table className="table">
<thead>
<tr>
<th scope="col">Letter</th>
<th scope="col">Phonetics</th>
<th scope="col">Sound</th>
</tr>
</thead>
<tbody>
<tr>
<td>ي</td>
<td>ee</td>
<td>long "ee" like "bee"</td>
</tr>
<tr>
<td>ې</td>
<td>e</td>
<td>
<div>
like "ee" but <em>with a slightly more open mouth</em>
</div>
<div className="small">
This is a special vowel <em>not found in English</em>
</div>
</td>
</tr>
</tbody>
</table>
<h5>Dipthongs (pure vowel + y)</h5>
<table className="table">
<thead>
<tr>
<th scope="col">Letter</th>
<th scope="col">Phonetics</th>
<th scope="col">Sound</th>
</tr>
</thead>
<tbody>
<tr style={{ backgroundColor: "rgba(255,255,0,0.4)" }}>
<td>ی</td>
<td>ay</td>
<td>short 'a' + y</td>
</tr>
<tr>
<td>ۍ</td>
<td>uy</td>
<td>'u' shwa (ə) + y</td>
</tr>
<tr style={{ backgroundColor: "rgba(255,255,0,0.4)" }}>
<td>ئ</td>
<td>ey</td>
<td>
<div>'e' (ې) + y</div>
</td>
</tr>
</tbody>
</table>
</Modal.Body>
<Modal.Footer>
<Button variant="secondary" onClick={this.handleCloseModal}>
Close
</Button>
</Modal.Footer>
</Modal>
</div>
);
}

View File

@ -21,14 +21,11 @@ import { fuzzifyPashto } from "./fuzzify-pashto/fuzzify-pashto";
import relevancy from "relevancy";
import { makeAWeeBitFuzzy } from "./wee-bit-fuzzy";
import { getTextOptions } from "./get-text-options";
import {
DictionaryAPI,
State,
} from "../types/dictionary-types";
import { DictionaryAPI, State } from "../types/dictionary-types";
// const dictionaryBaseUrl = "https://storage.googleapis.com/lingdocs/";
const dictionaryUrl = `https://storage.googleapis.com/lingdocs/dict`;
const dictionaryInfoUrl = `https://storage.googleapis.com/lingdocs/dict-info`;
const dictionaryUrl = `https://storage.googleapis.com/lingdocs/dictionary`;
const dictionaryInfoUrl = `https://storage.googleapis.com/lingdocs/dictionary-info`;
const dictionaryInfoLocalStorageKey = "dictionaryInfo5";
const dictionaryCollectionName = "dictionary3";
@ -37,11 +34,13 @@ export const pageSize = 35;
const relevancySorter = new relevancy.Sorter();
const db = indexedDB.open('inPrivate');
const db = indexedDB.open("inPrivate");
db.onerror = (e) => {
console.error(e);
alert("Your browser does not have IndexedDB enabled. This might be because you are using private mode. Please use regular mode or enable IndexedDB to use this dictionary");
}
alert(
"Your browser does not have IndexedDB enabled. This might be because you are using private mode. Please use regular mode or enable IndexedDB to use this dictionary"
);
};
const dictDb = new DictionaryDb({
url: dictionaryUrl,
@ -57,7 +56,8 @@ function makeSearchStringSafe(searchString: string): string {
function fuzzifyEnglish(input: string): string {
const safeInput = input.trim().replace(/[#-.]|[[-^]|[?|{}]/g, "");
// TODO: Could do: cover british/american things like offense / offence
return safeInput.replace("to ", "")
return safeInput
.replace("to ", "")
.replace(/our/g, "ou?r")
.replace(/or/g, "ou?r");
}
@ -100,16 +100,18 @@ function tsOneMonthBack(): number {
return d.getTime();
}
function alphabeticalLookup({ searchString, page }: {
searchString: string,
page: number,
function alphabeticalLookup({
searchString,
page,
}: {
searchString: string;
page: number;
}): T.DictionaryEntry[] {
const r = new RegExp("^" + sanitizePashto(makeSearchStringSafe(searchString)));
const r = new RegExp(
"^" + sanitizePashto(makeSearchStringSafe(searchString))
);
const regexResults: T.DictionaryEntry[] = dictDb.collection.find({
$or: [
{p: { $regex: r }},
{g: { $regex: r }},
],
$or: [{ p: { $regex: r } }, { g: { $regex: r } }],
});
const indexNumbers = regexResults.map((mpd: any) => mpd.i);
// Find the first matching word occuring first in the Pashto Index
@ -119,8 +121,9 @@ function alphabeticalLookup({ searchString, page }: {
}
// $gt query from that first occurance
if (firstIndexNumber !== null) {
return dictDb.collection.chain()
.find({ i: { $gt: firstIndexNumber - 1 }})
return dictDb.collection
.chain()
.find({ i: { $gt: firstIndexNumber - 1 } })
.simplesort("i")
.limit(page * pageSize)
.data();
@ -128,32 +131,40 @@ function alphabeticalLookup({ searchString, page }: {
return [];
}
function fuzzyLookup<S extends T.DictionaryEntry>({ searchString, language, page, tpFilter }: {
searchString: string,
language: "Pashto" | "English" | "Both",
page: number,
tpFilter?: (e: T.DictionaryEntry) => e is S,
function fuzzyLookup<S extends T.DictionaryEntry>({
searchString,
language,
page,
tpFilter,
}: {
searchString: string;
language: "Pashto" | "English" | "Both";
page: number;
tpFilter?: (e: T.DictionaryEntry) => e is S;
}): S[] {
// TODO: Implement working with both
if (Number(searchString)) {
const entry = dictionary.findOneByTs(Number(searchString));
// @ts-ignore;
return entry ? [entry] : [] as S[];
return entry ? [entry] : ([] as S[]);
}
return language === "Pashto"
? pashtoFuzzyLookup({ searchString, page, tpFilter })
: englishLookup({ searchString, page, tpFilter })
: englishLookup({ searchString, page, tpFilter });
}
function englishLookup<S extends T.DictionaryEntry>({ searchString, page, tpFilter }: {
searchString: string,
page: number,
tpFilter?: (e: T.DictionaryEntry) => e is S,
function englishLookup<S extends T.DictionaryEntry>({
searchString,
page,
tpFilter,
}: {
searchString: string;
page: number;
tpFilter?: (e: T.DictionaryEntry) => e is S;
}): S[] {
function sortByR(a: T.DictionaryEntry, b: T.DictionaryEntry) {
return (b.r || 3) - (a.r || 3);
};
}
let resultsGiven: number[] = [];
// get exact results
const exactQuery = {
@ -162,7 +173,8 @@ function englishLookup<S extends T.DictionaryEntry>({ searchString, page, tpFilt
},
};
const exactResultsLimit = pageSize < 10 ? Math.floor(pageSize / 2) : 10;
const exactResults = dictDb.collection.chain()
const exactResults = dictDb.collection
.chain()
.find(exactQuery)
.limit(exactResultsLimit)
.simplesort("i")
@ -176,14 +188,18 @@ function englishLookup<S extends T.DictionaryEntry>({ searchString, page, tpFilt
},
$loki: { $nin: resultsGiven },
};
const startingResultsLimit = (pageSize * page) - resultsGiven.length;
const startingResults = dictDb.collection.chain()
const startingResultsLimit = pageSize * page - resultsGiven.length;
const startingResults = dictDb.collection
.chain()
.find(startingQuery)
.limit(startingResultsLimit)
.simplesort("i")
.data();
startingResults.sort(sortByR);
resultsGiven = [...resultsGiven, ...startingResults.map((mpd: any) => mpd.$loki)];
resultsGiven = [
...resultsGiven,
...startingResults.map((mpd: any) => mpd.$loki),
];
// get results with full word match anywhere
const fullWordQuery = {
e: {
@ -191,14 +207,18 @@ function englishLookup<S extends T.DictionaryEntry>({ searchString, page, tpFilt
},
$loki: { $nin: resultsGiven },
};
const fullWordResultsLimit = (pageSize * page) - resultsGiven.length;
const fullWordResults = dictDb.collection.chain()
const fullWordResultsLimit = pageSize * page - resultsGiven.length;
const fullWordResults = dictDb.collection
.chain()
.find(fullWordQuery)
.limit(fullWordResultsLimit)
.simplesort("i")
.data();
fullWordResults.sort(sortByR);
resultsGiven = [...resultsGiven, ...fullWordResults.map((mpd: any) => mpd.$loki)]
resultsGiven = [
...resultsGiven,
...fullWordResults.map((mpd: any) => mpd.$loki),
];
// get results with partial match anywhere
const partialMatchQuery = {
e: {
@ -206,8 +226,9 @@ function englishLookup<S extends T.DictionaryEntry>({ searchString, page, tpFilt
},
$loki: { $nin: resultsGiven },
};
const partialMatchLimit = (pageSize * page) - resultsGiven.length;
const partialMatchResults = dictDb.collection.chain()
const partialMatchLimit = pageSize * page - resultsGiven.length;
const partialMatchResults = dictDb.collection
.chain()
.where(tpFilter ? tpFilter : () => true)
.find(partialMatchQuery)
.limit(partialMatchLimit)
@ -234,16 +255,21 @@ function pashtoExactLookup(searchString: string): T.DictionaryEntry[] {
});
}
function pashtoFuzzyLookup<S extends T.DictionaryEntry>({ searchString, page, tpFilter }: {
searchString: string,
page: number,
tpFilter?: (e: T.DictionaryEntry) => e is S,
function pashtoFuzzyLookup<S extends T.DictionaryEntry>({
searchString,
page,
tpFilter,
}: {
searchString: string;
page: number;
tpFilter?: (e: T.DictionaryEntry) => e is S;
}): S[] {
let resultsGiven: number[] = [];
// Check if it's in Pashto or Latin script
const searchStringToUse = sanitizePashto(makeSearchStringSafe(searchString));
const index = isPashtoScript(searchStringToUse) ? "p" : "g";
const search = index === "g" ? simplifyPhonetics(searchStringToUse) : searchStringToUse;
const search =
index === "g" ? simplifyPhonetics(searchStringToUse) : searchStringToUse;
const infIndex = index === "p" ? "p" : "f";
// Get exact matches
const exactExpression = new RegExp("^" + search);
@ -259,11 +285,14 @@ function pashtoFuzzyLookup<S extends T.DictionaryEntry>({ searchString, page, tp
const pashtoExactResultFields = [
{
[index]: { $regex: exactExpression },
}, {
},
{
[arabicPluralIndex]: { $regex: weeBitFuzzy },
}, {
},
{
[pashtoPluralIndex]: { $regex: weeBitFuzzy },
}, {
},
{
[presentStemIndex]: { $regex: weeBitFuzzy },
},
{
@ -277,7 +306,8 @@ function pashtoFuzzyLookup<S extends T.DictionaryEntry>({ searchString, page, tp
// just special incase using really small limits
// multiple times scrolling / chunking / sorting might get a bit messed up if using a limit of less than 10
const exactResultsLimit = pageSize < 10 ? Math.floor(pageSize / 2) : 10;
const exactResults = dictDb.collection.chain()
const exactResults = dictDb.collection
.chain()
.find(exactQuery)
.limit(exactResultsLimit)
.simplesort("i")
@ -289,8 +319,9 @@ function pashtoFuzzyLookup<S extends T.DictionaryEntry>({ searchString, page, tp
[index]: { $regex: slightlyFuzzy },
$loki: { $nin: resultsGiven },
};
const slightlyFuzzyResultsLimit = (pageSize * page) - resultsGiven.length;
const slightlyFuzzyResults = dictDb.collection.chain()
const slightlyFuzzyResultsLimit = pageSize * page - resultsGiven.length;
const slightlyFuzzyResults = dictDb.collection
.chain()
.find(slightlyFuzzyQuery)
.limit(slightlyFuzzyResultsLimit)
.data();
@ -306,48 +337,57 @@ function pashtoFuzzyLookup<S extends T.DictionaryEntry>({ searchString, page, tp
const pashtoFuzzyQuery = [
{
[index]: { $regex: fuzzyPashtoExperssion },
}, { // TODO: Issue, this fuzzy doesn't line up well because it's not the simplified phonetics - still has 's etc
},
{
// TODO: Issue, this fuzzy doesn't line up well because it's not the simplified phonetics - still has 's etc
[arabicPluralIndex]: { $regex: fuzzyPashtoExperssion },
}, {
},
{
[presentStemIndex]: { $regex: fuzzyPashtoExperssion },
}
},
];
// fuzzy results should be allowed to take up the rest of the limit (not used up by exact results)
const fuzzyResultsLimit = (pageSize * page) - resultsGiven.length;
const fuzzyResultsLimit = pageSize * page - resultsGiven.length;
// don't get these fuzzy results if searching in only English
const fuzzyQuery = {
$or: pashtoFuzzyQuery,
$loki: { $nin: resultsGiven },
};
const fuzzyResults = dictDb.collection.chain()
const fuzzyResults = dictDb.collection
.chain()
.find(fuzzyQuery)
.limit(fuzzyResultsLimit)
.data();
const results = tpFilter
? [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults].filter(tpFilter)
? [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults].filter(
tpFilter
)
: [...exactResults, ...slightlyFuzzyResults, ...fuzzyResults];
// sort out each chunk (based on limit used multiple times by infinite scroll)
// so that when infinite scrolling, it doesn't re-sort the previous chunks given
const closeResultsLength = exactResults.length + slightlyFuzzyResults.length;
const chunksToSort = chunkOutArray(results, pageSize);
return chunksToSort
.reduce((acc, cur, i) => ((i === 0)
return chunksToSort.reduce(
(acc, cur, i) =>
i === 0
? [
...sortByRelevancy(cur.slice(0, closeResultsLength), search, index),
...sortByRelevancy(cur.slice(closeResultsLength), search, index),
]
: [
...acc,
...sortByRelevancy(cur, search, index),
]), []);
: [...acc, ...sortByRelevancy(cur, search, index)],
[]
);
}
function sortByRelevancy<T>(arr: T[], searchI: string, index: string): T[] {
return relevancySorter.sort(arr, searchI, (obj: any, calc: any) => calc(obj[index]));
return relevancySorter.sort(arr, searchI, (obj: any, calc: any) =>
calc(obj[index])
);
}
function relatedWordsLookup(word: T.DictionaryEntry): T.DictionaryEntry[] {
const wordArray = word.e.trim()
const wordArray = word.e
.trim()
.replace(/\?/g, "")
.replace(/( |,|\.|!|;|\(|\))/g, " ")
.split(/ +/)
@ -357,7 +397,8 @@ function relatedWordsLookup(word: T.DictionaryEntry): T.DictionaryEntry[] {
let r: RegExp;
try {
r = new RegExp(`\\b${w}\\b`, "i");
const relatedToWord = dictDb.collection.chain()
const relatedToWord = dictDb.collection
.chain()
.find({
// don't include the original word
ts: { $ne: word.ts },
@ -373,20 +414,23 @@ function relatedWordsLookup(word: T.DictionaryEntry): T.DictionaryEntry[] {
}
});
// Remove duplicate items - https://stackoverflow.com/questions/40811451/remove-duplicates-from-a-array-of-objects
results = results.filter(function(a) {
results = results.filter(function (a) {
// @ts-ignore
return !this[a.$loki] && (this[a.$loki] = true);
}, Object.create(null));
return(results);
return results;
}
export function allEntries() {
return dictDb.collection.find();
}
function makeLookupPortal<X extends T.DictionaryEntry>(tpFilter: (x: T.DictionaryEntry) => x is X): T.EntryLookupPortal<X> {
function makeLookupPortal<X extends T.DictionaryEntry>(
tpFilter: (x: T.DictionaryEntry) => x is X
): T.EntryLookupPortal<X> {
return {
search: (s: string) => fuzzyLookup({
search: (s: string) =>
fuzzyLookup({
searchString: s,
language: "Pashto",
page: 1,
@ -397,7 +441,7 @@ function makeLookupPortal<X extends T.DictionaryEntry>(tpFilter: (x: T.Dictionar
if (!res) return undefined;
return tpFilter(res) ? res : undefined;
},
}
};
}
function makeVerbLookupPortal(): T.EntryLookupPortal<T.VerbEntry> {
@ -409,12 +453,15 @@ function makeVerbLookupPortal(): T.EntryLookupPortal<T.VerbEntry> {
page: 1,
tpFilter: tp.isVerbDictionaryEntry,
});
return vEntries.map((entry): T.VerbEntry => ({
return vEntries.map(
(entry): T.VerbEntry => ({
entry,
complement: (entry.c?.includes("comp.") && entry.l)
complement:
entry.c?.includes("comp.") && entry.l
? dictionary.findOneByTs(entry.l)
: undefined,
}));
})
);
},
getByTs: (ts: number): T.VerbEntry | undefined => {
const entry = dictDb.findOneByTs(ts);
@ -436,7 +483,7 @@ function makeVerbLookupPortal(): T.EntryLookupPortal<T.VerbEntry> {
})();
return { entry, complement };
},
}
};
}
export const entryFeeder: T.EntryFeeder = {
@ -445,26 +492,28 @@ export const entryFeeder: T.EntryFeeder = {
adjectives: makeLookupPortal(tp.isAdjectiveEntry),
locativeAdverbs: makeLookupPortal(tp.isLocativeAdverbEntry),
adverbs: makeLookupPortal(tp.isAdverbEntry),
}
};
export const dictionary: DictionaryAPI = {
// NOTE: For some reason that I do not understand you have to pass the functions from the
// dictionary core class in like this... ie. initialize: dictDb.initialize will mess up the this usage
// in the dictionary core class
initialize: async () => await dictDb.initialize(),
update: async (notifyUpdateComing: () => void) => await dictDb.updateDictionary(notifyUpdateComing),
search: function(state: State): T.DictionaryEntry[] {
update: async (notifyUpdateComing: () => void) =>
await dictDb.updateDictionary(notifyUpdateComing),
search: function (state: State): T.DictionaryEntry[] {
const searchString = revertSpelling(
state.searchValue,
getTextOptions(state).spelling,
getTextOptions(state).spelling
);
if (state.searchValue === "") {
return [];
}
return (state.options.searchType === "alphabetical" && state.options.language === "Pashto")
return state.options.searchType === "alphabetical" &&
state.options.language === "Pashto"
? alphabeticalLookup({
searchString,
page: state.page
page: state.page,
})
: fuzzyLookup({
searchString,
@ -473,15 +522,16 @@ export const dictionary: DictionaryAPI = {
});
},
exactPashtoSearch: pashtoExactLookup,
getNewWordsThisMonth: function(): T.DictionaryEntry[] {
return dictDb.collection.chain()
.find({ ts: { $gt: tsOneMonthBack() }})
getNewWordsThisMonth: function (): T.DictionaryEntry[] {
return dictDb.collection
.chain()
.find({ ts: { $gt: tsOneMonthBack() } })
.simplesort("ts")
.data()
.reverse();
},
findOneByTs: (ts: number) => dictDb.findOneByTs(ts),
findRelatedEntries: function(entry: T.DictionaryEntry): T.DictionaryEntry[] {
findRelatedEntries: function (entry: T.DictionaryEntry): T.DictionaryEntry[] {
return relatedWordsLookup(entry);
},
}
};

View File

@ -78,7 +78,7 @@ const defaultLatinInfo: IDefaultInfoBlock = {
// TODO:
["anbiya", "ambiya"],
["lootfun", "lUtfan"],
["sarey", "saRey"],
["saray", "saRay"],
["senga", "tsanga"],
["daktur", "DakTar"],
["iteebar", "itibaar"],
@ -87,9 +87,9 @@ const defaultLatinInfo: IDefaultInfoBlock = {
["bekár", "bekaar"],
["chaai", "cháai"],
["day", "daai"],
["dai", "dey"],
["dai", "day"],
["daktar", "Daktár"],
["sarái", "saRey"],
["sarái", "saRay"],
["beter", "bahtár"],
["doosti", "dostee"],
["dắraghlum", "deraghlum"], // using the ă along with a combining ́
@ -154,7 +154,7 @@ const defaultSimpleLatinInfo: IDefaultInfoBlock = {
// TODO:
["anbiya", "ambiya"],
["lootfun", "lUtfan"],
["sarey", "saRey"],
["saray", "saRay"],
["senga", "tsanga"],
["daktur", "DakTar"],
["iteebar", "itibaar"],
@ -163,9 +163,9 @@ const defaultSimpleLatinInfo: IDefaultInfoBlock = {
["bekar", "bekaar"],
["chaai", "chaai"],
["day", "daai"],
["dai", "dey"],
["dai", "day"],
["daktar", "Daktar"],
["sarai", "saRey"],
["sarai", "saRay"],
["beter", "bahtar"],
["doosti", "dostee"],
["daraghlum", "deraghlum"], // using the ă along with a combining ́
@ -211,7 +211,7 @@ const optionsPossibilities: ITestOptions[] = [
viceVersaMatches: true,
},
{
options: {matchStart: "word"}, // same as default
options: { matchStart: "word" }, // same as default
...defaultInfo,
viceVersaMatches: true,
},
@ -221,39 +221,31 @@ const optionsPossibilities: ITestOptions[] = [
viceVersaMatches: true,
},
{
matches: [
...matchesWithSpaces,
],
matches: [...matchesWithSpaces],
nonMatches: [],
options: {allowSpacesInWords: true},
options: { allowSpacesInWords: true },
viceVersaMatches: true,
},
{
matches: [
...matchesWithSpacesLatin,
],
matches: [...matchesWithSpacesLatin],
nonMatches: [],
options: {allowSpacesInWords: true, script: "Latin"},
options: { allowSpacesInWords: true, script: "Latin" },
viceVersaMatches: true,
},
{
matches: [],
nonMatches: matchesWithSpaces,
options: {allowSpacesInWords: false},
options: { allowSpacesInWords: false },
},
{
matches: [],
nonMatches: matchesWithSpacesLatin,
options: {allowSpacesInWords: false, script: "Latin"},
options: { allowSpacesInWords: false, script: "Latin" },
},
{
matches: [
["کار", "بېکاري"],
],
nonMatches: [
["سرک", "بېترک"],
],
options: {matchStart: "anywhere"},
matches: [["کار", "بېکاري"]],
nonMatches: [["سرک", "بېترک"]],
options: { matchStart: "anywhere" },
},
{
matches: [
@ -264,7 +256,7 @@ const optionsPossibilities: ITestOptions[] = [
["سړي", "سړيتوب"],
["کور", "کورونه"],
],
options: {matchWholeWordOnly: true},
options: { matchWholeWordOnly: true },
viceVersaMatches: true,
},
{
@ -276,7 +268,7 @@ const optionsPossibilities: ITestOptions[] = [
["سړي", " سړيتوب"],
["کور", "خټين کورونه"],
],
options: {matchStart: "string"},
options: { matchStart: "string" },
},
{
matches: [
@ -287,12 +279,31 @@ const optionsPossibilities: ITestOptions[] = [
["سړي", " سړيتوب"],
["کور", "خټين کورونه"],
],
options: {matchStart: "string"},
options: { matchStart: "string" },
},
];
const punctuationToExclude = [
"،", "؟", "؛", "۔", "۲", "۹", "۰", "»", "«", "٫", "!", ".", "؋", "٪", "٬", "×", ")", "(", " ", "\t",
"،",
"؟",
"؛",
"۔",
"۲",
"۹",
"۰",
"»",
"«",
"٫",
"!",
".",
"؋",
"٪",
"٬",
"×",
")",
"(",
" ",
"\t",
];
optionsPossibilities.forEach((o) => {
@ -362,7 +373,10 @@ test(`وs should be optional if entered in search string`, () => {
});
test(`matchWholeWordOnly should override matchStart = "anywhere"`, () => {
const re = fuzzifyPashto("کار", { matchWholeWordOnly: true, matchStart: "anywhere" });
const re = fuzzifyPashto("کار", {
matchWholeWordOnly: true,
matchStart: "anywhere",
});
// eslint-disable-next-line
const result = "کار کوه، بېکاره مه ګرځه".match(new RegExp(re));
expect(result).toHaveLength(1);
@ -382,14 +396,19 @@ test(`returnWholeWord should return the whole word`, () => {
script: "Latin",
});
// eslint-disable-next-line
const resultLatin = "kaaroona kawa, bekaara ma gurdza.".match(new RegExp(reLatin));
const resultLatin = "kaaroona kawa, bekaara ma gurdza.".match(
new RegExp(reLatin)
);
expect(resultLatin).toHaveLength(1);
expect(resultLatin).toContain("kaaroona");
});
test(`returnWholeWord should return the whole word even when starting the matching in the middle`, () => {
// With Pashto Script
const re = fuzzifyPashto("کار", { returnWholeWord: true, matchStart: "anywhere" });
const re = fuzzifyPashto("کار", {
returnWholeWord: true,
matchStart: "anywhere",
});
// eslint-disable-next-line
const result = "کارونه کوه، بېکاره مه شه".match(new RegExp(re, "g"));
expect(result).toHaveLength(2);
@ -402,14 +421,20 @@ test(`returnWholeWord should return the whole word even when starting the matchi
script: "Latin",
});
// eslint-disable-next-line
const resultLatin = "kaaroona kawa bekaara ma gurdza".match(new RegExp(reLatin, "g"));
const resultLatin = "kaaroona kawa bekaara ma gurdza".match(
new RegExp(reLatin, "g")
);
expect(resultLatin).toHaveLength(2);
expect(resultLatin).toContain("bekaara");
});
test(`returnWholeWord should should not return partial matches if matchWholeWordOnly is true`, () => {
// With Pashto Script
const re = fuzzifyPashto("کار", { returnWholeWord: true, matchStart: "anywhere", matchWholeWordOnly: true });
const re = fuzzifyPashto("کار", {
returnWholeWord: true,
matchStart: "anywhere",
matchWholeWordOnly: true,
});
// eslint-disable-next-line
const result = "کارونه کوه، بېکاره مه ګرځه".match(new RegExp(re));
expect(result).toBeNull();
@ -422,13 +447,18 @@ test(`returnWholeWord should should not return partial matches if matchWholeWord
script: "Latin",
});
// eslint-disable-next-line
const resultLatin = "kaaroona kawa bekaara ma gurdza".match(new RegExp(reLatin));
const resultLatin = "kaaroona kawa bekaara ma gurdza".match(
new RegExp(reLatin)
);
expect(resultLatin).toBeNull();
});
punctuationToExclude.forEach((m) => {
test(`${m} should not be considered part of a Pashto word`, () => {
const re = fuzzifyPashto("کور", { returnWholeWord: true, matchStart: "word" });
const re = fuzzifyPashto("کور", {
returnWholeWord: true,
matchStart: "word",
});
// ISSUE: This should also work when the word is PRECEDED by the punctuation
// Need to work with a lookbehind equivalent
// eslint-disable-next-line
@ -446,7 +476,11 @@ punctuationToExclude.forEach((m) => {
let failed = false;
// if environment is not es2018 with lookbehind support (like node 6, 8) this will fail
try {
const re = fuzzifyPashto("کور", { returnWholeWord: true, matchStart: "word", es2018: true });
const re = fuzzifyPashto("کور", {
returnWholeWord: true,
matchStart: "word",
es2018: true,
});
// eslint-disable-next-line
result = `زمونږ ${m}کورونه${m} دي`.match(new RegExp(re));
} catch (error) {

View File

@ -14,7 +14,7 @@ const velarPlosives = "ګغږکقگك";
const rLikeSounds = "رړڑڼ";
const labialPlosivesAndFricatives = "فپب";
// Includes Arabic ى \u0649
const theFiveYeys = "ېۍیيئےى";
const theFiveYays = "ېۍیيئےى";
const guttural = "ښخشخهحغګ";
interface IReplacerInfoItem {
@ -38,7 +38,6 @@ const ghzCombo = ["غز", "زغ"];
const pxCombo = ["پښ", "ښپ"];
const kshCombo = ["کش", "شک", "کښ", "کش"];
export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [
{ char: "اً", range: "ان" },
{
@ -54,15 +53,25 @@ export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [
{ char: "ٳ", range: "اآهأ" },
{ char: "یٰ", range: "ای", plus: ["یٰ"] },
{ char: "ی", range: theFiveYeys, plus: ["ئی", "ئي", "یٰ"], ignorableIfInMiddle: true },
{ char: "ي", range: theFiveYeys, plus: ["ئی", "ئي", "یٰ"], ignorableIfInMiddle: true },
{ char: "ې", range: theFiveYeys, ignorableIfInMiddle: true },
{ char: "ۍ", range: theFiveYeys },
{ char: "ئي", range: theFiveYeys, plus: ["ئی", "ئي"] },
{ char: "ئی", range: theFiveYeys, plus: ["ئی", "ئي"] },
{ char: "ئے", range: theFiveYeys, plus: ["ئی", "ئي", "يې"]},
{ char: "ئ", range: theFiveYeys, ignorableIfInMiddle: true },
{ char: "ے", range: theFiveYeys },
{
char: "ی",
range: theFiveYays,
plus: ["ئی", "ئي", "یٰ"],
ignorableIfInMiddle: true,
},
{
char: "ي",
range: theFiveYays,
plus: ["ئی", "ئي", "یٰ"],
ignorableIfInMiddle: true,
},
{ char: "ې", range: theFiveYays, ignorableIfInMiddle: true },
{ char: "ۍ", range: theFiveYays },
{ char: "ئي", range: theFiveYays, plus: ["ئی", "ئي"] },
{ char: "ئی", range: theFiveYays, plus: ["ئی", "ئي"] },
{ char: "ئے", range: theFiveYays, plus: ["ئی", "ئي", "يې"] },
{ char: "ئ", range: theFiveYays, ignorableIfInMiddle: true },
{ char: "ے", range: theFiveYays },
{ char: "س", range: sSounds },
{ char: "ص", range: sSounds },
@ -79,7 +88,7 @@ export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [
{ char: "ع", range: "اوع", ignorable: true },
{ char: "و", range: "وع", plus: ["وو"], ignorableIfInMiddle: true },
{ char: "ؤ", range: "وع"},
{ char: "ؤ", range: "وع" },
{ char: "ښ", range: guttural },
{ char: "غ", range: guttural },
@ -91,7 +100,7 @@ export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [
{ char: "ز", range: zSounds },
{ char: "ض", range: zSounds },
{ char: "ذ", range: zSounds },
{ char: "ځ", range: zSounds + "جڅ"},
{ char: "ځ", range: zSounds + "جڅ" },
{ char: "ظ", range: zSounds },
{ char: "ژ", range: "زضظژذځږج" },
@ -133,11 +142,12 @@ export const pashtoReplacerInfo: IPashtoReplacerInfoItem[] = [
];
// tslint:disable-next-line
export const pashtoReplacerRegex = /اً|أ|ا|آ|ٱ|ٲ|ٳ|ئی|ئي|ئے|یٰ|ی|ي|ې|ۍ|ئ|ے|س|ص|ث|څ|ج|چ|هٔ|ه|ۀ|غز|زغ|کش|شک|ښک|ښک|پښ|ښپ|ہ|ع|و|ؤ|ښ|غ|خ|ح|ش|ز|ض|ذ|ځ|ظ|ژ|ر|ړ|ڑ|ت|ټ|ٹ|ط|د|ډ|ڈ|مب|م|نب|ن|ڼ|ک|ګ|گ|ل|ق|ږ|ب|پ|ف/g;
export const pashtoReplacerRegex =
/اً|أ|ا|آ|ٱ|ٲ|ٳ|ئی|ئي|ئے|یٰ|ی|ي|ې|ۍ|ئ|ے|س|ص|ث|څ|ج|چ|هٔ|ه|ۀ|غز|زغ|کش|شک|ښک|ښک|پښ|ښپ|ہ|ع|و|ؤ|ښ|غ|خ|ح|ش|ز|ض|ذ|ځ|ظ|ژ|ر|ړ|ڑ|ت|ټ|ٹ|ط|د|ډ|ڈ|مب|م|نب|ن|ڼ|ک|ګ|گ|ل|ق|ږ|ب|پ|ف/g;
// TODO: I removed the h? 's at the beginning and ends. was that a good idea?
const aaySoundLatin = "(?:[aá]a?i|[eé]y|[aá]a?y|[aá]h?i)";
const aaySoundSimpleLatin = "(?:aa?i|ey|aa?y|ah?i)";
const aaySoundSimpleLatin = "(?:aa?i|ay|aa?y|ah?i)";
const longASoundLatin = "(?:[aá]{1,2}'?h?a{0,2}?)h?";
const longASoundSimpleLatin = "(?:a{1,2}'?h?a{0,2}?)h?";
const shortASoundLatin = "(?:[aáă][a|́]?|au|áu|[uú]|[UÚ]|[ií]|[eé])?h?";
@ -146,8 +156,8 @@ const shwaSoundLatin = "(?:[uú]|[oó]o?|w[uú]|[aáă]|[ií]|[UÚ])?";
const shwaSoundSimpleLatin = "(?:u|oo?|wu|a|i|U)?";
const ooSoundLatin = "(?:[oó]o?|[áa]u|w[uú]|[aá]w|[uú]|[UÚ])(?:h|w)?";
const ooSoundSimpleLatin = "(?:oo?|au|wu|aw|u|U)(?:h|w)?";
const eySoundLatin = "(?:[eé]y|[eé]e?|[uú]y|[aá]y|[ií])";
const eySoundSimpleLatin = "(?:ey|ee?|uy|ay|i)";
const aySoundLatin = "(?:[eé]y|[eé]e?|[uú]y|[aá]y|[ií])";
const aySoundSimpleLatin = "(?:ay|ee?|uy|ay|i)";
const middleESoundLatin = "(?:[eé]e?|[ií]|[aáă]|[eé])[h|y|́]?";
const middleESoundSimpleLatin = "(?:ee?|i|a|e)[h|y]?";
const iSoundLatin = "-?(?:[uú]|[aáă]|[ií]|[eé]e?)?h?-?";
@ -180,67 +190,67 @@ export const latinReplacerInfo: IPhoneticsReplacerInfoItem[] = [
{ char: "óo", repl: ooSoundLatin },
{ char: "i", repl: iSoundLatin, replWhenBeginning: iSoundLatinBeginning },
{ char: "í", repl: iSoundLatin, replWhenBeginning: iSoundLatinBeginning },
{ char: "ey", repl: eySoundLatin },
{ char: "éy", repl: eySoundLatin },
{ char: "ee", repl: eySoundLatin },
{ char: "ée", repl: eySoundLatin },
{ char: "uy", repl: eySoundLatin },
{ char: "úy", repl: eySoundLatin },
{ char: "ay", repl: aySoundLatin },
{ char: "áy", repl: aySoundLatin },
{ char: "ee", repl: aySoundLatin },
{ char: "ée", repl: aySoundLatin },
{ char: "uy", repl: aySoundLatin },
{ char: "úy", repl: aySoundLatin },
{ char: "e", repl: middleESoundLatin },
{ char: "é", repl: middleESoundLatin },
{ char: "w", repl: "(?:w{1,2}?[UÚ]?|b)"},
{ char: "y", repl: "[ií]?y?"},
{ char: "w", repl: "(?:w{1,2}?[UÚ]?|b)" },
{ char: "y", repl: "[ií]?y?" },
{ char: "ts", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "s", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "ss", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "c", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "dz", repl: "(?:dz|z{1,2}|j)"},
{ char: "z", repl: "(?:s{1,2}|dz|z{1,2}|ts)"},
{ char: "t", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "tt", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "T", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "d", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "dd", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "D", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "r", repl: "(?:R|r{1,2}|N)"},
{ char: "rr", repl: "(?:R|r{1,2}|N)"},
{ char: "R", repl: "(?:R|r{1,2}|N)"},
{ char: "nb", repl: "(?:nb|mb)"},
{ char: "mb", repl: "(?:nb|mb)"},
{ char: "n", repl: "(?:n{1,2}|N)"},
{ char: "N", repl: "(?:R|r{1,2}|N)"},
{ char: "f", repl: "(?:f{1,2}|p{1,2})"},
{ char: "ff", repl: "(?:f{1,2}|p{1,2})"},
{ char: "b", repl: "(?:b{1,2}|p{1,2})"},
{ char: "bb", repl: "(?:b{1,2}|p{1,2})"},
{ char: "p", repl: "(?:b{1,2}|p{1,2}|f{1,2})"},
{ char: "pp", repl: "(?:b{1,2}|p{1,2}|f{1,2})"},
{ char: "ts", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "s", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "ss", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "c", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "dz", repl: "(?:dz|z{1,2}|j)" },
{ char: "z", repl: "(?:s{1,2}|dz|z{1,2}|ts)" },
{ char: "t", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "tt", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "T", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "d", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "dd", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "D", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "r", repl: "(?:R|r{1,2}|N)" },
{ char: "rr", repl: "(?:R|r{1,2}|N)" },
{ char: "R", repl: "(?:R|r{1,2}|N)" },
{ char: "nb", repl: "(?:nb|mb)" },
{ char: "mb", repl: "(?:nb|mb)" },
{ char: "n", repl: "(?:n{1,2}|N)" },
{ char: "N", repl: "(?:R|r{1,2}|N)" },
{ char: "f", repl: "(?:f{1,2}|p{1,2})" },
{ char: "ff", repl: "(?:f{1,2}|p{1,2})" },
{ char: "b", repl: "(?:b{1,2}|p{1,2})" },
{ char: "bb", repl: "(?:b{1,2}|p{1,2})" },
{ char: "p", repl: "(?:b{1,2}|p{1,2}|f{1,2})" },
{ char: "pp", repl: "(?:b{1,2}|p{1,2}|f{1,2})" },
{ char: "sh", repl: "(?:x|sh|s`h)"},
{ char: "x", repl: "(?:kh|gh|x|h){1,2}"},
{ char: "kh", repl: "(?:kh|gh|x|h){1,2}"},
{ char: "sh", repl: "(?:x|sh|s`h)" },
{ char: "x", repl: "(?:kh|gh|x|h){1,2}" },
{ char: "kh", repl: "(?:kh|gh|x|h){1,2}" },
{ char: "k", repl: "(?:k{1,2}|q{1,2})"},
{ char: "q", repl: "(?:k{1,2}|q{1,2})"},
{ char: "k", repl: "(?:k{1,2}|q{1,2})" },
{ char: "q", repl: "(?:k{1,2}|q{1,2})" },
{ char: "jz", repl: "(?:G|jz)"},
{ char: "G", repl: "(?:jz|G|g)"},
{ char: "jz", repl: "(?:G|jz)" },
{ char: "G", repl: "(?:jz|G|g)" },
{ char: "g", repl: "(?:gh?|k{1,2}|G)"},
{ char: "gh", repl: "(?:g|gh|kh|G)"},
{ char: "g", repl: "(?:gh?|k{1,2}|G)" },
{ char: "gh", repl: "(?:g|gh|kh|G)" },
{ char: "j", repl: "(?:j{1,2}|ch|dz)"},
{ char: "ch", repl: "(?:j{1,2}|ch)"},
{ char: "j", repl: "(?:j{1,2}|ch|dz)" },
{ char: "ch", repl: "(?:j{1,2}|ch)" },
{ char: "l", repl: "l{1,2}"},
{ char: "ll", repl: "l{1,2}"},
{ char: "m", repl: "m{1,2}"},
{ char: "mm", repl: "m{1,2}"},
{ char: "h", repl: "k?h?"},
{ char: "'", repl: "['||`]?"},
{ char: "", repl: "['||`]?"},
{ char: "`", repl: "['||`]?"},
{ char: "l", repl: "l{1,2}" },
{ char: "ll", repl: "l{1,2}" },
{ char: "m", repl: "m{1,2}" },
{ char: "mm", repl: "m{1,2}" },
{ char: "h", repl: "k?h?" },
{ char: "'", repl: "['||`]?" },
{ char: "", repl: "['||`]?" },
{ char: "`", repl: "['||`]?" },
];
export const simpleLatinReplacerInfo: IPhoneticsReplacerInfoItem[] = [
@ -254,65 +264,71 @@ export const simpleLatinReplacerInfo: IPhoneticsReplacerInfoItem[] = [
{ char: "U", repl: ooSoundSimpleLatin },
{ char: "o", repl: ooSoundSimpleLatin },
{ char: "oo", repl: ooSoundSimpleLatin },
{ char: "i", repl: iSoundSimpleLatin, replWhenBeginning: iSoundSimpleLatinBeginning },
{ char: "ey", repl: eySoundSimpleLatin },
{ char: "ee", repl: eySoundSimpleLatin },
{ char: "uy", repl: eySoundSimpleLatin },
{
char: "i",
repl: iSoundSimpleLatin,
replWhenBeginning: iSoundSimpleLatinBeginning,
},
{ char: "ay", repl: aySoundSimpleLatin },
{ char: "ee", repl: aySoundSimpleLatin },
{ char: "uy", repl: aySoundSimpleLatin },
{ char: "e", repl: middleESoundSimpleLatin },
{ char: "w", repl: "(?:w{1,2}?[UÚ]?|b)"},
{ char: "y", repl: "[ií]?y?"},
{ char: "w", repl: "(?:w{1,2}?[UÚ]?|b)" },
{ char: "y", repl: "[ií]?y?" },
{ char: "ts", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "s", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "c", repl: "(?:s{1,2}|z{1,2|ts|c)"},
{ char: "dz", repl: "(?:dz|z{1,2}|j)"},
{ char: "z", repl: "(?:s{1,2}|dz|z{1,2}|ts)"},
{ char: "t", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "tt", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "T", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "d", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "dd", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "D", repl: "(?:t{1,2}|T|d{1,2}|D)"},
{ char: "r", repl: "(?:R|r{1,2}|N)"},
{ char: "rr", repl: "(?:R|r{1,2}|N)"},
{ char: "R", repl: "(?:R|r{1,2}|N)"},
{ char: "nb", repl: "(?:nb|mb|nw)"},
{ char: "mb", repl: "(?:nb|mb)"},
{ char: "n", repl: "(?:n{1,2}|N)"},
{ char: "N", repl: "(?:R|r{1,2}|N)"},
{ char: "f", repl: "(?:f{1,2}|p{1,2})"},
{ char: "ff", repl: "(?:f{1,2}|p{1,2})"},
{ char: "b", repl: "(?:b{1,2}|p{1,2}|w)"},
{ char: "bb", repl: "(?:b{1,2}|p{1,2})"},
{ char: "p", repl: "(?:b{1,2}|p{1,2}|f{1,2})"},
{ char: "pp", repl: "(?:b{1,2}|p{1,2}|f{1,2})"},
{ char: "ts", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "s", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "c", repl: "(?:s{1,2}|z{1,2|ts|c)" },
{ char: "dz", repl: "(?:dz|z{1,2}|j)" },
{ char: "z", repl: "(?:s{1,2}|dz|z{1,2}|ts)" },
{ char: "t", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "tt", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "T", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "d", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "dd", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "D", repl: "(?:t{1,2}|T|d{1,2}|D)" },
{ char: "r", repl: "(?:R|r{1,2}|N)" },
{ char: "rr", repl: "(?:R|r{1,2}|N)" },
{ char: "R", repl: "(?:R|r{1,2}|N)" },
{ char: "nb", repl: "(?:nb|mb|nw)" },
{ char: "mb", repl: "(?:nb|mb)" },
{ char: "n", repl: "(?:n{1,2}|N)" },
{ char: "N", repl: "(?:R|r{1,2}|N)" },
{ char: "f", repl: "(?:f{1,2}|p{1,2})" },
{ char: "ff", repl: "(?:f{1,2}|p{1,2})" },
{ char: "b", repl: "(?:b{1,2}|p{1,2}|w)" },
{ char: "bb", repl: "(?:b{1,2}|p{1,2})" },
{ char: "p", repl: "(?:b{1,2}|p{1,2}|f{1,2})" },
{ char: "pp", repl: "(?:b{1,2}|p{1,2}|f{1,2})" },
{ char: "sh", repl: "(?:x|sh|s`h)"},
{ char: "x", repl: "(?:kh|gh|x|h){1,2}"},
{ char: "kh", repl: "(?:kh|gh|x|h){1,2}"},
{ char: "sh", repl: "(?:x|sh|s`h)" },
{ char: "x", repl: "(?:kh|gh|x|h){1,2}" },
{ char: "kh", repl: "(?:kh|gh|x|h){1,2}" },
{ char: "k", repl: "(?:k{1,2}|q{1,2})"},
{ char: "kk", repl: "(?:k{1,2}|q{1,2})"},
{ char: "q", repl: "(?:k{1,2}|q{1,2})"},
{ char: "qq", repl: "(?:k{1,2}|q{1,2})"},
{ char: "k", repl: "(?:k{1,2}|q{1,2})" },
{ char: "kk", repl: "(?:k{1,2}|q{1,2})" },
{ char: "q", repl: "(?:k{1,2}|q{1,2})" },
{ char: "qq", repl: "(?:k{1,2}|q{1,2})" },
{ char: "jz", repl: "(?:G|jz)"},
{ char: "G", repl: "(?:jz|G|g)"},
{ char: "jz", repl: "(?:G|jz)" },
{ char: "G", repl: "(?:jz|G|g)" },
{ char: "g", repl: "(?:gh?|k{1,2}|G)"},
{ char: "gh", repl: "(?:g|gh|kh|G)"},
{ char: "g", repl: "(?:gh?|k{1,2}|G)" },
{ char: "gh", repl: "(?:g|gh|kh|G)" },
{ char: "j", repl: "(?:j{1,2}|ch|dz)"},
{ char: "ch", repl: "(?:j{1,2}|ch)"},
{ char: "j", repl: "(?:j{1,2}|ch|dz)" },
{ char: "ch", repl: "(?:j{1,2}|ch)" },
{ char: "l", repl: "l{1,2}"},
{ char: "ll", repl: "l{1,2}"},
{ char: "m", repl: "m{1,2}"},
{ char: "mm", repl: "m{1,2}"},
{ char: "h", repl: "k?h?"},
{ char: "l", repl: "l{1,2}" },
{ char: "ll", repl: "l{1,2}" },
{ char: "m", repl: "m{1,2}" },
{ char: "mm", repl: "m{1,2}" },
{ char: "h", repl: "k?h?" },
];
// tslint:disable-next-line
export const latinReplacerRegex = /yee|a{1,2}[i|y]|á{1,2}[i|y]|aa|áa|a|ắ|ă|á|U|Ú|u|ú|oo|óo|o|ó|e{1,2}|ée|é|ey|éy|uy|úy|i|í|w|y|q|q|ts|sh|ss|s|dz|z|tt|t|T|dd|d|D|r{1,2}|R|nb|mb|n{1,2}|N|f{1,2}|b{1,2}|p{1,2}|x|kh|q|kk|k|gh|g|G|j|ch|c|ll|l|m{1,2}|h||'|`/g;
export const latinReplacerRegex =
/yee|a{1,2}[i|y]|á{1,2}[i|y]|aa|áa|a|ắ|ă|á|U|Ú|u|ú|oo|óo|o|ó|e{1,2}|ée|é|ay|áy|uy|úy|i|í|w|y|q|q|ts|sh|ss|s|dz|z|tt|t|T|dd|d|D|r{1,2}|R|nb|mb|n{1,2}|N|f{1,2}|b{1,2}|p{1,2}|x|kh|q|kk|k|gh|g|G|j|ch|c|ll|l|m{1,2}|h||'|`/g;
export const simpleLatinReplacerRegex = /yee|a{1,2}[i|y]|aa|a|U|u|oo|o|e{1,2}|ey|uy|i|w|y|q|ts|sh|s|dz|z|tt|t|T|dd|d|D|r{1,2}|R|nb|mb|n{1,2}|N|f{1,2}|b{1,2}|p{1,2}|x|kh|q|k|gh|g|G|j|ch|c|ll|l|m{1,2}|h/g;
export const simpleLatinReplacerRegex =
/yee|a{1,2}[i|y]|aa|a|U|u|oo|o|e{1,2}|ay|uy|i|w|y|q|ts|sh|s|dz|z|tt|t|T|dd|d|D|r{1,2}|R|nb|mb|n{1,2}|N|f{1,2}|b{1,2}|p{1,2}|x|kh|q|k|gh|g|G|j|ch|c|ll|l|m{1,2}|h/g;

View File

@ -14,7 +14,7 @@ export const userLocalStorageName = "user1";
export function saveOptions(options: Options): void {
localStorage.setItem(optionsLocalStorageName, JSON.stringify(options));
};
}
export const readOptions = (): undefined | Options => {
const optionsRaw = localStorage.getItem(optionsLocalStorageName);
@ -23,10 +23,6 @@ export const readOptions = (): undefined | Options => {
}
try {
const options = JSON.parse(optionsRaw) as Options;
if (!("searchBarStickyFocus" in options)) {
// compatibility with legacy options
options.searchBarStickyFocus = false;
}
return options;
} catch (e) {
console.error("error parsing saved state JSON", e);
@ -40,7 +36,7 @@ export function saveUser(user: AT.LingdocsUser | undefined): void {
} else {
localStorage.removeItem(userLocalStorageName);
}
};
}
export const readUser = (): AT.LingdocsUser | undefined => {
const userRaw = localStorage.getItem(userLocalStorageName);

View File

@ -9,7 +9,7 @@ const pMatches = [
const fMatches = [
["tahliya", "takhliya"],
["sareyy", "saRey"],
["sarey", "saRay"],
["peyTey", "peTey"],
];

View File

@ -28,7 +28,7 @@
// R: "[r|R]",
// };
const fiveYeys = "[ئ|ۍ|ي|ې|ی]";
const fiveYays = "[ئ|ۍ|ي|ې|ی]";
const sSounds = "[س|ص|ث|څ]";
const zSounds = "[ز|ژ|ض|ظ|ذ|ځ]";
const tSounds = "[ت|ط|ټ]";
@ -39,106 +39,115 @@ const hKhSounds = "[خ|ح|ښ|ه]";
const alef = "[آ|ا]";
const pReplacer = {
"ی": fiveYeys,
"ي": fiveYeys,
"ۍ": fiveYeys,
"ئ": fiveYeys,
"ې": fiveYeys,
ی: fiveYays,
ي: fiveYays,
ۍ: fiveYays,
ئ: fiveYays,
ې: fiveYays,
"س": sSounds,
"ص": sSounds,
"ث": sSounds,
"څ": sSounds,
س: sSounds,
ص: sSounds,
ث: sSounds,
څ: sSounds,
"ز": zSounds,
"ظ": zSounds,
"ذ": zSounds,
"ض": zSounds,
"ژ": zSounds,
"ځ": zSounds,
ز: zSounds,
ظ: zSounds,
ذ: zSounds,
ض: zSounds,
ژ: zSounds,
ځ: zSounds,
"ت": tSounds,
"ط": tSounds,
"ټ": tSounds,
ت: tSounds,
ط: tSounds,
ټ: tSounds,
"د": dSounds,
"ډ": dSounds,
د: dSounds,
ډ: dSounds,
"ر": rSounds,
"ړ": rSounds,
ر: rSounds,
ړ: rSounds,
"ن": nSounds,
"ڼ": nSounds,
ن: nSounds,
ڼ: nSounds,
"خ": hKhSounds,
"ح": hKhSounds,
"ښ": hKhSounds,
"ه": hKhSounds,
خ: hKhSounds,
ح: hKhSounds,
ښ: hKhSounds,
ه: hKhSounds,
"ا": alef,
"آ": alef,
ا: alef,
آ: alef,
};
const fiveYeysF = "(?:eyy|ey|ee|é|e|uy)";
const fiveYaysF = "(?:ey|ay|ee|é|e|uy)";
const hKhF = "(?:kh|h|x)";
const zSoundsF = "(?:z|dz)";
const sSoundsF = "(?:ts|s)";
const fReplacer = {
"eyy": fiveYeysF,
"ey": fiveYeysF,
"uy": fiveYeysF,
"ee": fiveYeysF,
"e": fiveYeysF,
ey: fiveYaysF,
ay: fiveYaysF,
uy: fiveYaysF,
ee: fiveYaysF,
e: fiveYaysF,
"z": zSoundsF,
"dz": zSoundsF,
"x": hKhF,
"h": hKhF,
"kh": hKhF,
"ts": sSoundsF,
"s": sSoundsF,
z: zSoundsF,
dz: zSoundsF,
x: hKhF,
h: hKhF,
kh: hKhF,
ts: sSoundsF,
s: sSoundsF,
// only used if ignoring accents
"a": "[a|á]",
"á": "[a|á|u|ú]",
"u": "[u|ú|a|á]",
"ú": "[u|ú]",
"o": "[o|ó]",
"ó": "[o|ó]",
"i": "[i|í]",
"í": "[i|í]",
"U": "[U|Ú]",
"Ú": "[U|Ú]",
"éy": fiveYeysF,
"éyy": fiveYeysF,
"úy": fiveYeysF,
"ée": fiveYeysF,
"é": fiveYeysF,
a: "[a|á]",
á: "[a|á|u|ú]",
u: "[u|ú|a|á]",
ú: "[u|ú]",
o: "[o|ó]",
ó: "[o|ó]",
i: "[i|í]",
í: "[i|í]",
U: "[U|Ú]",
Ú: "[U|Ú]",
áy: fiveYaysF,
éy: fiveYaysF,
úy: fiveYaysF,
ée: fiveYaysF,
é: fiveYaysF,
};
const pRepRegex = new RegExp(Object.keys(pReplacer).join("|"), "g");
const fRepRegex = /eyy|ey|uy|ee|e|z|dz|x|kh|h|ts|s/g;
const fRepRegex = /ey|ay|uy|ee|e|z|dz|x|kh|h|ts|s/g;
const fRepRegexWAccents = /eyy|éyy|ey|éy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g;
const fRepRegexWAccents =
/ey|éy|ay|áy|uy|úy|ee|ée|e|é|z|dz|x|ts|s|kh|h|a|á|i|í|o|ó|u|ú|U|Ú/g;
function makePAWeeBitFuzzy(s: string): string {
// + s.replace(/ /g, "").split("").join(" *");
return "^" + s.replace(pRepRegex, mtch => {
return (
"^" +
s.replace(pRepRegex, (mtch) => {
// @ts-ignore
return `${pReplacer[mtch]}`;
});
})
);
}
function makeFAWeeBitFuzzy(s: string, ignoreAccent?: boolean): string {
return "^" + s.replace((ignoreAccent ? fRepRegexWAccents : fRepRegex), mtch => {
return (
"^" +
s.replace(ignoreAccent ? fRepRegexWAccents : fRepRegex, (mtch) => {
// @ts-ignore
return fReplacer[mtch];
});
})
);
}
export function makeAWeeBitFuzzy(s: string, i: "f" | "p", ignoreAccent?: boolean): string {
return i === "p"
? makePAWeeBitFuzzy(s)
: makeFAWeeBitFuzzy(s, ignoreAccent);
export function makeAWeeBitFuzzy(
s: string,
i: "f" | "p",
ignoreAccent?: boolean
): string {
return i === "p" ? makePAWeeBitFuzzy(s) : makeFAWeeBitFuzzy(s, ignoreAccent);
}

View File

@ -1,71 +1,88 @@
export type DictionaryStatus = "loading" | "ready" | "updating" | "error loading";
export type DictionaryStatus =
| "loading"
| "ready"
| "updating"
| "error loading";
export type State = {
dictionaryStatus: DictionaryStatus,
searchValue: string,
options: Options,
page: number,
isolatedEntry: import("@lingdocs/ps-react").Types.DictionaryEntry | undefined,
results: import("@lingdocs/ps-react").Types.DictionaryEntry[],
wordlist: WordlistWord[],
reviewTasks: import("./functions-types").ReviewTask[],
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo | undefined,
user: undefined | import("./account-types").LingdocsUser,
inflectionSearchResults: undefined | "searching" | {
exact: InflectionSearchResult[],
fuzzy: InflectionSearchResult[],
},
}
dictionaryStatus: DictionaryStatus;
showModal: boolean;
searchValue: string;
options: Options;
page: number;
isolatedEntry: import("@lingdocs/ps-react").Types.DictionaryEntry | undefined;
results: import("@lingdocs/ps-react").Types.DictionaryEntry[];
wordlist: WordlistWord[];
reviewTasks: import("./functions-types").ReviewTask[];
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo | undefined;
user: undefined | import("./account-types").LingdocsUser;
inflectionSearchResults:
| undefined
| "searching"
| {
exact: InflectionSearchResult[];
fuzzy: InflectionSearchResult[];
};
};
export type DictionaryAPI = {
initialize: () => Promise<{
response: "loaded first time" | "loaded from saved",
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo,
}>,
response: "loaded first time" | "loaded from saved";
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo;
}>;
update: (updateComing: () => void) => Promise<{
response: "no need for update" | "updated" | "unable to check",
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo,
}>,
search: (state: State) => import("@lingdocs/ps-react").Types.DictionaryEntry[],
exactPashtoSearch: (search: string) => import("@lingdocs/ps-react").Types.DictionaryEntry[],
getNewWordsThisMonth: () => import("@lingdocs/ps-react").Types.DictionaryEntry[],
findOneByTs: (ts: number) => import("@lingdocs/ps-react").Types.DictionaryEntry | undefined,
findRelatedEntries: (entry: import("@lingdocs/ps-react").Types.DictionaryEntry) => import("@lingdocs/ps-react").Types.DictionaryEntry[],
}
response: "no need for update" | "updated" | "unable to check";
dictionaryInfo: import("@lingdocs/ps-react").Types.DictionaryInfo;
}>;
search: (
state: State
) => import("@lingdocs/ps-react").Types.DictionaryEntry[];
exactPashtoSearch: (
search: string
) => import("@lingdocs/ps-react").Types.DictionaryEntry[];
getNewWordsThisMonth: () => import("@lingdocs/ps-react").Types.DictionaryEntry[];
findOneByTs: (
ts: number
) => import("@lingdocs/ps-react").Types.DictionaryEntry | undefined;
findRelatedEntries: (
entry: import("@lingdocs/ps-react").Types.DictionaryEntry
) => import("@lingdocs/ps-react").Types.DictionaryEntry[];
};
export type WordlistWordBase = {
_id: string,
_id: string;
/* a backup copy of the full dictionary entry in case it gets deleted from the dictionary */
entry: import("@lingdocs/ps-react").Types.DictionaryEntry,
entry: import("@lingdocs/ps-react").Types.DictionaryEntry;
/* the notes/context provided by the user for the word in their wordlist */
notes: string,
supermemo: import("supermemo").SuperMemoItem,
notes: string;
supermemo: import("supermemo").SuperMemoItem;
/* rep/stage of warmup stage before moving into supermemo mode */
warmup: number | "done",
warmup: number | "done";
/* date due for review - ISO string */
dueDate: number,
}
dueDate: number;
};
export type WordlistAttachmentInfo = {
imgSize?: { height: number, width: number },
_attachments: Attachments,
}
imgSize?: { height: number; width: number };
_attachments: Attachments;
};
export type WordlistWordWAttachments = WordlistWordBase & WordlistAttachmentInfo;
export type WordlistWordWAttachments = WordlistWordBase &
WordlistAttachmentInfo;
export type WordlistWord = WordlistWordBase | WordlistWordWAttachments;
export type Options = {
language: Language,
searchType: SearchType,
theme: Theme,
textOptionsRecord: TextOptionsRecord,
wordlistMode: WordlistMode,
wordlistReviewLanguage: Language,
wordlistReviewBadge: boolean,
searchBarPosition: SearchBarPosition,
searchBarStickyFocus: boolean,
}
language: Language;
searchType: SearchType;
theme: Theme;
textOptionsRecord: TextOptionsRecord;
wordlistMode: WordlistMode;
wordlistReviewLanguage: Language;
wordlistReviewBadge: boolean;
searchBarPosition: SearchBarPosition;
searchBarStickyFocus: boolean;
};
export type Language = "Pashto" | "English";
export type SearchType = "alphabetical" | "fuzzy";
@ -78,84 +95,102 @@ export type SearchBarPosition = "top" | "bottom";
export type WordlistMode = "browse" | "review";
export type TextOptionsRecord = {
lastModified: import("./account-types").TimeStamp,
textOptions: import("@lingdocs/ps-react").Types.TextOptions,
lastModified: import("./account-types").TimeStamp;
textOptions: import("@lingdocs/ps-react").Types.TextOptions;
};
export type UserLevel = "basic" | "student" | "editor";
export type OptionsAction = {
type: "toggleSearchType",
} | {
type: "toggleLanguage",
} | {
type: "changeTheme",
payload: Theme,
} | {
type: "changeSearchBarPosition",
payload: SearchBarPosition,
} | {
type: "changeWordlistMode",
payload: WordlistMode,
} | {
type: "changeWordlistReviewLanguage",
payload: Language,
} | {
type: "changeWordlistReviewBadge",
payload: boolean,
} | {
type: "updateTextOptionsRecord",
payload: TextOptionsRecord,
} | {
type: "changeSearchBarStickyFocus",
payload: boolean,
} | {
type: "setShowPlayStoreButton",
payload: boolean,
};
export type OptionsAction =
| {
type: "toggleSearchType";
}
| {
type: "toggleLanguage";
}
| {
type: "changeTheme";
payload: Theme;
}
| {
type: "changeSearchBarPosition";
payload: SearchBarPosition;
}
| {
type: "changeWordlistMode";
payload: WordlistMode;
}
| {
type: "changeWordlistReviewLanguage";
payload: Language;
}
| {
type: "changeWordlistReviewBadge";
payload: boolean;
}
| {
type: "updateTextOptionsRecord";
payload: TextOptionsRecord;
}
| {
type: "changeSearchBarStickyFocus";
payload: boolean;
}
| {
type: "setShowPlayStoreButton";
payload: boolean;
};
export type TextOptionsAction = {
type: "changePTextSize",
payload: PTextSize,
} | {
type: "changeSpelling",
payload: import("@lingdocs/ps-react").Types.Spelling,
} | {
type: "changePhonetics",
payload: "lingdocs" | "ipa" | "alalc" | "none",
} | {
type: "changeDialect",
payload: "standard" | "peshawer" | "southern",
} | {
type: "changeDiacritics",
payload: boolean,
};
export type TextOptionsAction =
| {
type: "changePTextSize";
payload: PTextSize;
}
| {
type: "changeSpelling";
payload: import("@lingdocs/ps-react").Types.Spelling;
}
| {
type: "changePhonetics";
payload: "lingdocs" | "ipa" | "alalc" | "none";
}
| {
type: "changeDialect";
payload: "standard" | "peshawer" | "southern";
}
| {
type: "changeDiacritics";
payload: boolean;
};
export type AttachmentToPut = {
content_type: string,
data: string | Blob,
}
content_type: string;
data: string | Blob;
};
export type AttachmentWithData = {
content_type: string,
digest: string,
data: string | Blob,
}
content_type: string;
digest: string;
data: string | Blob;
};
export type AttachmentWOutData = {
content_type: string,
digest: string,
content_type: string;
digest: string;
stub: true;
}
};
export type Attachment = AttachmentToPut | AttachmentWithData | AttachmentWOutData
export type Attachment =
| AttachmentToPut
| AttachmentWithData
| AttachmentWOutData;
export type AttachmentType = "image" | "audio";
export type Attachments = {
/* only allows one image and one audio attachment - max 2 values */
[filename: string]: Attachment,
[filename: string]: Attachment;
};
export type WordlistWordDoc = WordlistWord & { _rev: string, _id: string };
export type WordlistWordDoc = WordlistWord & { _rev: string; _id: string };
export type InflectionName = "plain" | "1st" | "2nd";
@ -167,15 +202,14 @@ export type PluralInflectionName = "plural" | "2nd";
// the possible matches, and their person/inflection number
export type InflectionSearchResult = {
entry: import("@lingdocs/ps-react").Types.DictionaryEntry,
forms: InflectionFormMatch[],
}
export type InflectionFormMatch = {
path: string[],
matches: {
ps: import("@lingdocs/ps-react").Types.PsString,
pos: InflectionName[] | import("@lingdocs/ps-react").Types.Person[] | null,
}[],
entry: import("@lingdocs/ps-react").Types.DictionaryEntry;
forms: InflectionFormMatch[];
};
export type InflectionFormMatch = {
path: string[];
matches: {
ps: import("@lingdocs/ps-react").Types.PsString;
pos: InflectionName[] | import("@lingdocs/ps-react").Types.Person[] | null;
}[];
};

View File

@ -2349,10 +2349,10 @@
"@jridgewell/resolve-uri" "^3.0.3"
"@jridgewell/sourcemap-codec" "^1.4.10"
"@lingdocs/ps-react@5.10.1":
version "5.10.1"
resolved "https://npm.lingdocs.com/@lingdocs%2fps-react/-/ps-react-5.10.1.tgz#949850aaa3c9de54d4beed1daa9b546bb0a84df9"
integrity sha512-Ro/6Fq9mEdF4/2wJf8USkIlYe+9vWmez/RhoUF0mTjOhmyTGV6cpajK0Qpo1WyCaL5d/6BTI3qVuk5h8pWRQjA==
"@lingdocs/ps-react@6.0.0":
version "6.0.0"
resolved "https://npm.lingdocs.com/@lingdocs%2fps-react/-/ps-react-6.0.0.tgz#dbdfd1a5afd19253679169eacbf1da5562db5dc3"
integrity sha512-+j6F65FtmPbeEjjHtE3JqKHtCcUM+cMAN2RMTd8yyacJ4sTJW/oWC+6rAQGQqc1da3lP7tuxt6p+esmFYI9fgQ==
dependencies:
"@formkit/auto-animate" "^1.0.0-beta.3"
classnames "^2.2.6"