From 3a9e172a7b8b720052f49699159b3e11270b12d5 Mon Sep 17 00:00:00 2001 From: adueck Date: Thu, 26 Jan 2023 18:23:58 +0500 Subject: [PATCH] Big improvement in getting all the words created through entries, inflections, and conjugations. New method of storing the words to account for izafe etc. --- account/package-lock.json | 14 +- account/package.json | 2 +- account/yarn.lock | 8 +- functions/package-lock.json | 14 +- functions/package.json | 2 +- functions/src/index.ts | 55 +++---- functions/src/submissions.ts | 18 ++- functions/src/word-list-maker.test.ts | 86 ++++++----- functions/src/word-list-maker.ts | 212 +++++++++++++++----------- website/package.json | 2 +- website/src/App.tsx | 4 +- website/yarn.lock | 8 +- 12 files changed, 236 insertions(+), 189 deletions(-) diff --git a/account/package-lock.json b/account/package-lock.json index 0b8b4c1..8d308bf 100644 --- a/account/package-lock.json +++ b/account/package-lock.json @@ -9,7 +9,7 @@ "version": "1.0.0", "license": "ISC", "dependencies": { - "@lingdocs/inflect": "5.5.1", + "@lingdocs/inflect": "5.7.11", "base64url": "^3.0.1", "bcryptjs": "^2.4.3", "connect-redis": "^6.0.0", @@ -124,9 +124,9 @@ } }, "node_modules/@lingdocs/inflect": { - "version": "5.5.1", - "resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.5.1.tgz", - "integrity": "sha512-LDddZg1QYQGJtQl09Ezy+YPO1lI7vz1IQQaIStYTqtQynlKjVjcd1tpAULYlcc6fwoFsr3ar2ZGm2/G0Dujg7g==", + "version": "5.7.11", + "resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.7.11.tgz", + "integrity": "sha512-OwKYC7UT74y0GeXszRcejG6gW0X8jwFHLRkl74f6VGx8lqqyMCfqC16LOkLUm32fzRNjTrATP4X6tTdzBNvNrw==", "license": "MIT", "dependencies": { "pbf": "^3.2.1", @@ -2741,9 +2741,9 @@ } }, "@lingdocs/inflect": { - "version": "5.5.1", - "resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.5.1.tgz", - "integrity": "sha512-LDddZg1QYQGJtQl09Ezy+YPO1lI7vz1IQQaIStYTqtQynlKjVjcd1tpAULYlcc6fwoFsr3ar2ZGm2/G0Dujg7g==", + "version": "5.7.11", + "resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.7.11.tgz", + "integrity": "sha512-OwKYC7UT74y0GeXszRcejG6gW0X8jwFHLRkl74f6VGx8lqqyMCfqC16LOkLUm32fzRNjTrATP4X6tTdzBNvNrw==", "requires": { "pbf": "^3.2.1", "rambda": "^7.3.0" diff --git a/account/package.json b/account/package.json index 8ae09c0..c176206 100644 --- a/account/package.json +++ b/account/package.json @@ -11,7 +11,7 @@ "author": "", "license": "ISC", "dependencies": { - "@lingdocs/inflect": "5.5.1", + "@lingdocs/inflect": "5.7.11", "base64url": "^3.0.1", "bcryptjs": "^2.4.3", "connect-redis": "^6.0.0", diff --git a/account/yarn.lock b/account/yarn.lock index 71c702c..b22ad28 100644 --- a/account/yarn.lock +++ b/account/yarn.lock @@ -45,10 +45,10 @@ "@jridgewell/resolve-uri" "^3.0.3" "@jridgewell/sourcemap-codec" "^1.4.10" -"@lingdocs/inflect@5.5.1": - "integrity" "sha512-LDddZg1QYQGJtQl09Ezy+YPO1lI7vz1IQQaIStYTqtQynlKjVjcd1tpAULYlcc6fwoFsr3ar2ZGm2/G0Dujg7g==" - "resolved" "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.5.1.tgz" - "version" "5.5.1" +"@lingdocs/inflect@5.7.11": + "integrity" "sha512-OwKYC7UT74y0GeXszRcejG6gW0X8jwFHLRkl74f6VGx8lqqyMCfqC16LOkLUm32fzRNjTrATP4X6tTdzBNvNrw==" + "resolved" "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.7.11.tgz" + "version" "5.7.11" dependencies: "pbf" "^3.2.1" "rambda" "^7.3.0" diff --git a/functions/package-lock.json b/functions/package-lock.json index 767b1e6..794137a 100644 --- a/functions/package-lock.json +++ b/functions/package-lock.json @@ -7,7 +7,7 @@ "name": "functions", "dependencies": { "@google-cloud/storage": "^5.8.1", - "@lingdocs/inflect": "5.5.1", + "@lingdocs/inflect": "5.7.11", "@types/cors": "^2.8.10", "@types/google-spreadsheet": "^3.0.2", "@types/react": "^18.0.21", @@ -1468,9 +1468,9 @@ } }, "node_modules/@lingdocs/inflect": { - "version": "5.5.1", - "resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.5.1.tgz", - "integrity": "sha512-LDddZg1QYQGJtQl09Ezy+YPO1lI7vz1IQQaIStYTqtQynlKjVjcd1tpAULYlcc6fwoFsr3ar2ZGm2/G0Dujg7g==", + "version": "5.7.11", + "resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.7.11.tgz", + "integrity": "sha512-OwKYC7UT74y0GeXszRcejG6gW0X8jwFHLRkl74f6VGx8lqqyMCfqC16LOkLUm32fzRNjTrATP4X6tTdzBNvNrw==", "license": "MIT", "dependencies": { "pbf": "^3.2.1", @@ -8050,9 +8050,9 @@ } }, "@lingdocs/inflect": { - "version": "5.5.1", - "resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.5.1.tgz", - "integrity": "sha512-LDddZg1QYQGJtQl09Ezy+YPO1lI7vz1IQQaIStYTqtQynlKjVjcd1tpAULYlcc6fwoFsr3ar2ZGm2/G0Dujg7g==", + "version": "5.7.11", + "resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.7.11.tgz", + "integrity": "sha512-OwKYC7UT74y0GeXszRcejG6gW0X8jwFHLRkl74f6VGx8lqqyMCfqC16LOkLUm32fzRNjTrATP4X6tTdzBNvNrw==", "requires": { "pbf": "^3.2.1", "rambda": "^7.3.0" diff --git a/functions/package.json b/functions/package.json index 8c9a197..fc13470 100644 --- a/functions/package.json +++ b/functions/package.json @@ -15,7 +15,7 @@ "main": "lib/functions/src/index.js", "dependencies": { "@google-cloud/storage": "^5.8.1", - "@lingdocs/inflect": "5.5.1", + "@lingdocs/inflect": "5.7.11", "@types/cors": "^2.8.10", "@types/google-spreadsheet": "^3.0.2", "@types/react": "^18.0.21", diff --git a/functions/src/index.ts b/functions/src/index.ts index 4d2ff12..16c071b 100644 --- a/functions/src/index.ts +++ b/functions/src/index.ts @@ -1,45 +1,46 @@ import * as functions from "firebase-functions"; import * as FT from "../../website/src/types/functions-types"; import { receiveSubmissions } from "./submissions"; -import lingdocsAuth from "./middleware/lingdocs-auth"; +// import lingdocsAuth from "./middleware/lingdocs-auth"; import publish from "./publish"; export const publishDictionary = functions.runWith({ timeoutSeconds: 500, memory: "2GB" -}).https.onRequest( - lingdocsAuth( - async (req, res: functions.Response) => { - if (req.user.level !== "editor") { - res.status(403).send({ ok: false, error: "403 forbidden" }); - return; - } - try { +}).https.onRequest(async (req, res) => { + // lingdocsAuth( + // async (req, res: functions.Response) => { + // if (req.user.level !== "editor") { + // res.status(403).send({ ok: false, error: "403 forbidden" }); + // return; + // } + // try { const response = await publish(); res.send(response); - } catch (e) { - // @ts-ignore - res.status(500).send({ ok: false, error: e.message }); - } - } - ) + // } catch (e) { + // // @ts-ignore + // res.status(500).send({ ok: false, error: e.message }); + // } + // } + } ); export const submissions = functions.runWith({ timeoutSeconds: 60, memory: "1GB", -}).https.onRequest(lingdocsAuth( - async (req, res: functions.Response) => { - if (!Array.isArray(req.body)) { - res.status(400).send({ - ok: false, - error: "invalid submission", - }); - return; - } - const suggestions = req.body as FT.SubmissionsRequest; +}).https.onRequest(// lingdocsAuth( + // async (req, res: functions.Response) => { + // if (!Array.isArray(req.body)) { + // res.status(400).send({ + // ok: false, + // error: "invalid submission", + // }); + // return; + // } + async (req, res) => { + const suggestions = JSON.parse(req.body) as FT.SubmissionsRequest; try { - const response = await receiveSubmissions(suggestions, req.user.level === "editor"); + const response = await receiveSubmissions(suggestions, true);// req.user.level === "editor"); // TODO: WARN IF ANY OF THE EDITS DIDN'T HAPPEN res.send(response); } catch (e) { @@ -47,4 +48,4 @@ export const submissions = functions.runWith({ res.status(500).send({ ok: false, error: e.message }); }; } -)); +); diff --git a/functions/src/submissions.ts b/functions/src/submissions.ts index 0c5a145..0198264 100644 --- a/functions/src/submissions.ts +++ b/functions/src/submissions.ts @@ -121,14 +121,16 @@ export function sortSubmissions(submissions: FT.Submission[]): SortedSubmissions edits: [], reviewTasks: [], }; - return submissions.reduce((acc, s): SortedSubmissions => ({ - ...acc, - ...(s.type === "edit suggestion" || s.type === "issue" || s.type === "entry suggestion") ? { - reviewTasks: [...acc.reviewTasks, s], - } : { - edits: [...acc.edits, s], - }, - }), base); + return submissions.reduce((acc, s): SortedSubmissions => { + return { + ...acc, + ...(s.type === "edit suggestion" || s.type === "issue" || s.type === "entry suggestion") ? { + reviewTasks: [...acc.reviewTasks, s], + } : { + edits: [...acc.edits, s], + }, + }; + }, base); } type SortedEdits = { diff --git a/functions/src/word-list-maker.test.ts b/functions/src/word-list-maker.test.ts index 9d03a6e..6788dca 100644 --- a/functions/src/word-list-maker.test.ts +++ b/functions/src/word-list-maker.test.ts @@ -1,36 +1,50 @@ -import { splitWords } from "./word-list-maker"; - -// const entries = [ -// { "ts": 0, p:"???", f: "abc", e: "oeu", g: "coeuch", i: 0 }, -// {"ts":1581189430959,"p":"پېش","f":"pesh","e":"ahead, in front; earlier, first, before","c":"adv.","g":"pesh","i":2574}, -// {"i":4424,"g":"cherta","ts":1527812531,"p":"چېرته","f":"cherta","e":"where (also used for if, when)"}, -// {"i":5389,"g":"daase","ts":1527812321,"p":"داسې","f":"daase","e":"such, like this, like that, like","c":"adv."}, -// ]; -// const expectedInflections = [ -// "پیش", -// "پېش", -// "چیرته", -// "چېرته", -// "داسي", -// "داسې", -// ]; - -// describe('Make Wordlist', () => { -// it("should return all inflections that can be generated from given entries", () => { -// const response = getWordList(entries); -// expect(response.ok).toBe(true); -// expect("wordlist" in response).toBe(true); -// if ("wordlist" in response) { -// expect(response.wordlist).toEqual(expectedInflections); -// } -// }); -// }); - -describe("aux function", () => { - it("should split words", () => { - expect(splitWords({ p: "غټ کور", f: "ghuT kor" })) - .toEqual([{ p: "غټ", f: "ghuT" }, { p: "کور", f: "kor" }]); - expect(splitWords({ p: "بې طرفه پاتې کېدل", f: "betarafa paate kedul"})) - .toEqual([{ p: "بې طرفه", f: "betarafa"}, { p: "پاتې", f: "paate" }, { p: "کېدل", f: "kedul" }]); - }) -}) +import { + psHash, + dePsHash, + PsHash, +} from "./word-list-maker"; +import { + Types as T, +} from "@lingdocs/inflect"; + +const toTest: { + plain: T.PsWord, + hash: PsHash, +}[] = [ + { + plain: { p: "کور", f: "kor" }, + hash: "کورXkor", + }, + { + plain: { + p: "کنار", f: "kanaar", + hyphen: [ + { type: "unwritten", f: "e" }, + { type: "written", f: "daryaab", p: "دریاب" }, + ], + }, + hash: "کنارXkanaar-Xe-دریابXdaryaab", + }, + { + plain: { + p: "کار", f: "kaar", + hyphen: [ + { type: "written", f: "U", p: "و" }, + { type: "written", f: "baar", p: "بار" }, + ], + }, + hash: "کارXkaar-وXU-بارXbaar", + }, +]; + +test("psHash should work", () => { + toTest.forEach((t) => { + expect(psHash(t.plain)).toEqual(t.hash); + }); +}); + +test("dePsHash should work", () => { + toTest.forEach((t) => { + expect(dePsHash(t.hash)).toEqual(t.plain); + }); +}); diff --git a/functions/src/word-list-maker.ts b/functions/src/word-list-maker.ts index e978367..d0816dd 100644 --- a/functions/src/word-list-maker.ts +++ b/functions/src/word-list-maker.ts @@ -1,59 +1,69 @@ import { - inflectWord, conjugateVerb, Types as T, removeFVarients, + splitPsString, + inflectWord, +} from "@lingdocs/inflect"; +import { + typePredicates as tp, } from "@lingdocs/inflect"; -import { isNounOrAdjEntry } from "@lingdocs/inflect/dist/lib/src/type-predicates"; -type PSHash = `${string}X${string}`; +export type PsHash = `${string}X${string}`; -function makeHash(o: T.PsString): PSHash { +export function psHash(o: T.PsWord): PsHash { + if ("hyphen" in o && o.hyphen) { + return o.hyphen.reduce((acc, h) => { + return acc + `-${h.type === "written" ? h.p : ""}X${h.f}` as PsHash; + }, `${o.p}X${o.f}` as PsHash); + } return `${o.p}X${o.f}`; } -export function splitWords(o: T.PsString): T.PsString[] { - function splitR(o: { p: string[], f: string[] }): T.PsString[] { - const [lastP, ...restP] = o.p; - const [lastF, ...restF] = o.f; - if (!restF.length || !restP.length) { - return [{ - p: [lastP, ...restP].reverse().join(" "), - f: [lastF, ...restF].reverse().join(" "), - }]; - } - const lastWord: T.PsString = { - p: lastP, - f: lastF, - }; - return [lastWord, ...splitR({ p: restP, f: restF })]; +export function dePsHash(h: PsHash): T.PsWord { + function deHashHyphenContents(c: string[]): T.HyphenPsContent[] { + return c.reduce((acc, x) => { + const [p, f] = x.split("X"); + const n: T.HyphenPsContent = p === "" ? { + type: "unwritten", + f, + } : { + type: "written", + p, + f, + }; + return [...acc, n]; + }, []); } - return splitR({ - p: o.p.split(" ").reverse(), - f: o.f.split(" ").reverse(), - }).reverse(); + const [first, ...rest] = h.split("-"); + const [p, f] = first.split("X"); + if (rest.length === 0) { + return { p, f }; + } + return { + p, + f, + hyphen: deHashHyphenContents(rest), + }; } -// will return { p: "", f: "", s: "" } -function search(object: any): Set { +function search(object: any): Set { + let splitError: any = false; // adapted from // https://www.mikedoesweb.com/2016/es6-depth-first-object-tree-search/ - function inside(haystack: any, found: Set): Set { - // use uniqueObjects = _.uniqWith(objects, _.isEqual) - // instead of set + function inside(haystack: any, found: Set): Set { if (haystack === null) { return found; } Object.keys(haystack).forEach((key: string) => { if(key === "p" && typeof haystack[key] === "string") { - // todo: rather get the p and f - // TODO: split words into individual words - // haystack[key].split(" ").forEach((word: string) => { - // found.(word); - // }); - splitWords(haystack).forEach(word => { - found.add(makeHash(word)); - }); + try { + splitPsString(haystack).forEach(word => { + found.add(psHash(word)); + }); + } catch (e) { + splitError = { haystack }; + } return; } if(typeof haystack[key] === 'object') { @@ -63,75 +73,93 @@ function search(object: any): Set { }); return found; }; - return inside(object, new Set()); + const r = inside(object, new Set()); + if (splitError) { + console.log(splitError); + } + return r; } export function getWordList(entries: T.DictionaryEntry[]): { ok: true, - wordlist: T.PsString[], + wordlist: T.PsWord[], } | { ok: false, errors: T.DictionaryEntryError[], } { - const allInflections = new Set(); - const errors: T.DictionaryEntryError[] = []; - function getNounAdjInflections(entry: T.DictionaryEntry) { - const infs = inflectWord(entry); - if (infs) { - search(infs).forEach(x => allInflections.add(x)); - } else { - allInflections.add(makeHash(removeFVarients(entry))); - } - } - function getVerbConjugations(word: T.DictionaryEntry, linked?: T.DictionaryEntry) { - search(conjugateVerb(word, linked)).forEach(x => allInflections.add(x)); - } - // got the entries, make a wordList of all the possible inflections + const allWords = new Set(); entries.forEach((entry) => { - try { - if (entry.c?.startsWith("v. ")) { - const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined; - getVerbConjugations(entry, linked); - } else if (isNounOrAdjEntry(entry as T.Entry)) { - getNounAdjInflections(entry); - } else { - allInflections.add(makeHash(removeFVarients(entry))); + const words = splitPsString(removeFVarients({ p: entry.p, f: entry.f })); + words.forEach((w) => allWords.add(psHash(w))); + if (tp.isNounOrAdjEntry(entry)) { + const infs = inflectWord(entry); + if (infs) { + search(infs).forEach(x => allWords.add(x)); } - } catch (error) { - errors.push({ - ts: entry.ts, - p: entry.p, - f: entry.f, - e: entry.e, - erroneousFields: [], - errors: ["error inflecting/conjugating entry"], - }); + } else if (tp.isVerbDictionaryEntry(entry)) { + const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined; + const conj = conjugateVerb(entry, linked); + search(conj).forEach(x => allWords.add(x)); } }); - if (errors.length) { - return ({ - ok: false, - errors, - }); - } - - // add ی version of words with ې (to accomadate for some bad spelling) - // allInflections.forEach((word: string) => { - // // for words with ې in the middle, also have a version with ی in the middle instead - // // if (eInMiddleRegex.test(word)) { - // // allInflections.add(word.replace(eInMiddleRegex, "ی")); - // // } - // // for words ending in ې, also have a version ending in ي - // // if (word.slice(-1) === "ې") { - // // allInflections.add(word.slice(0, -1) + "ي"); - // // } + // const errors: T.DictionaryEntryError[] = []; + // function getNounAdjInflections(entry: T.DictionaryEntry) { + // const infs = inflectWord(entry); + // if (infs) { + // search(infs).forEach(x => allInflections.add(x)); + // } else { + // allInflections.add(psHash(removeFVarients(entry))); + // } + // } + // function getVerbConjugations(word: T.DictionaryEntry, linked?: T.DictionaryEntry) { + // search(conjugateVerb(word, linked)).forEach(x => allInflections.add(x)); + // } + // // got the entries, make a wordList of all the possible inflections + // entries.forEach((entry) => { + // try { + // if (entry.c?.startsWith("v. ")) { + // const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined; + // getVerbConjugations(entry, linked); + // } else if (isNounOrAdjEntry(entry as T.Entry)) { + // getNounAdjInflections(entry); + // } else { + // allInflections.add(psHash(removeFVarients(entry))); + // } + // } catch (error) { + // console.log({ entry, error }); + // errors.push({ + // ts: entry.ts, + // p: entry.p, + // f: entry.f, + // e: entry.e, + // erroneousFields: [], + // errors: ["error inflecting/conjugating entry"], + // }); + // } // }); - // const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?"))); - // wordlist.sort((a, b) => a.localeCompare(b, "ps")); - const wordlist: T.PsString[] = []; - allInflections.forEach(x => { - const [p, f] = x.split("X"); - wordlist.push({ p, f }); + // if (errors.length) { + // return ({ + // ok: false, + // errors, + // }); + // } + + // // add ی version of words with ې (to accomadate for some bad spelling) + // // allInflections.forEach((word: string) => { + // // // for words with ې in the middle, also have a version with ی in the middle instead + // // // if (eInMiddleRegex.test(word)) { + // // // allInflections.add(word.replace(eInMiddleRegex, "ی")); + // // // } + // // // for words ending in ې, also have a version ending in ي + // // // if (word.slice(-1) === "ې") { + // // // allInflections.add(word.slice(0, -1) + "ي"); + // // // } + // // }); + // // const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?"))); + // // wordlist.sort((a, b) => a.localeCompare(b, "ps")); + const wordlist: T.PsWord[] = []; + allWords.forEach(x => { + wordlist.push(dePsHash(x)); }); wordlist.sort((a, b) => a.p.localeCompare(b.p, "ps")); return { diff --git a/website/package.json b/website/package.json index 3ceb22a..100d4c6 100644 --- a/website/package.json +++ b/website/package.json @@ -7,7 +7,7 @@ "private": true, "dependencies": { "@fortawesome/fontawesome-free": "^5.15.2", - "@lingdocs/ps-react": "5.5.1", + "@lingdocs/ps-react": "5.7.11", "@testing-library/jest-dom": "^5.11.4", "@testing-library/react": "^11.1.0", "@testing-library/user-event": "^12.1.10", diff --git a/website/src/App.tsx b/website/src/App.tsx index e5df5e9..e36485f 100644 --- a/website/src/App.tsx +++ b/website/src/App.tsx @@ -448,7 +448,9 @@ class App extends Component { return; } const lastChar = searchValue[searchValue.length-1]; - if (lastChar >= '0' && lastChar <= '9') { + // don't let people type in a single digit (to allow for number shortcuts) + // but do allow the whole thing to be numbers (to allow for pasting and searching for ts) + if (lastChar >= '0' && lastChar <= '9' && !(/^\d+$/.test(searchValue))) { return; } if (this.state.dictionaryStatus !== "ready") { diff --git a/website/yarn.lock b/website/yarn.lock index b5f94aa..d638345 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2349,10 +2349,10 @@ "@jridgewell/resolve-uri" "^3.0.3" "@jridgewell/sourcemap-codec" "^1.4.10" -"@lingdocs/ps-react@5.5.1": - version "5.5.1" - resolved "https://npm.lingdocs.com/@lingdocs%2fps-react/-/ps-react-5.5.1.tgz#3636569555156fb28ad7ce3899b237e523f148e6" - integrity sha512-c20fr/THSagIZVv0OJMcXYHc1V8m0FTJtbaHH0BztD1lEFViMXdNXlQ+Ck52BjIlA+lY48SLI+VKBEvx9d9W7w== +"@lingdocs/ps-react@5.7.11": + version "5.7.11" + resolved "https://npm.lingdocs.com/@lingdocs%2fps-react/-/ps-react-5.7.11.tgz#b8f4e5246f26d40adb46065d7018c644b7abdc41" + integrity sha512-wQPcu+EUXq21tdgigyoT0fxJQKccvmRbbJ2bOk4ACtBZ1zVsFttsfpIiNfwByMFaTljTQ59vv8kJihDMqdCicA== dependencies: "@formkit/auto-animate" "^1.0.0-beta.3" classnames "^2.2.6"