From c62db2168c02facc9c51027973ab4d6b215c0896 Mon Sep 17 00:00:00 2001 From: adueck Date: Thu, 19 Jan 2023 23:35:35 +0500 Subject: [PATCH] publish stuff --- functions/package-lock.json | 1 + functions/package.json | 1 + functions/src/publish.ts | 42 +++++++++++++------------- functions/src/word-list-maker.ts | 52 ++++++++++++++++++++++---------- 4 files changed, 59 insertions(+), 37 deletions(-) diff --git a/functions/package-lock.json b/functions/package-lock.json index 2db8a11..3d59701 100644 --- a/functions/package-lock.json +++ b/functions/package-lock.json @@ -15,6 +15,7 @@ "firebase-admin": "^9.2.0", "firebase-functions": "^3.24.1", "google-spreadsheet": "^3.1.15", + "lodash": "^4.17.21", "nano": "^9.0.3", "node-fetch": "^2.6.1", "react": "^17.0.1", diff --git a/functions/package.json b/functions/package.json index 2db8c22..30a5684 100644 --- a/functions/package.json +++ b/functions/package.json @@ -22,6 +22,7 @@ "firebase-admin": "^9.2.0", "firebase-functions": "^3.24.1", "google-spreadsheet": "^3.1.15", + "lodash": "^4.17.21", "nano": "^9.0.3", "node-fetch": "^2.6.1", "react": "^17.0.1", diff --git a/functions/src/publish.ts b/functions/src/publish.ts index 3432d59..089fc3f 100644 --- a/functions/src/publish.ts +++ b/functions/src/publish.ts @@ -28,8 +28,8 @@ const bucketName = "lingdocs"; const baseUrl = `https://storage.googleapis.com/${bucketName}/`; const dictionaryFilename = "dict"; const dictionaryInfoFilename = "dict-info"; -const hunspellAffFileFilename = "ps_AFF.aff"; -const hunspellDicFileFilename = "ps_AFF.dic"; +// const hunspellAffFileFilename = "ps_AFF.aff"; +// const hunspellDicFileFilename = "ps_AFF.dic"; const allWordsJsonFilename = "all-words.json"; const url = `${baseUrl}${dictionaryFilename}`; const infoUrl = `${baseUrl}${dictionaryInfoFilename}`; @@ -73,8 +73,8 @@ async function doHunspellEtc(entries: T.DictionaryEntry[]) { if (!wordlistResponse.ok) { throw new Error(JSON.stringify(wordlistResponse.errors)); } - const hunspell = makeHunspell(wordlistResponse.wordlist); - await uploadHunspellToStorage(hunspell); + // const hunspell = makeHunspell(wordlistResponse.wordlist); + // await uploadHunspellToStorage(hunspell); await uploadAllWordsToStoarage(wordlistResponse.wordlist) } @@ -224,18 +224,18 @@ async function upload(content: Buffer | string, filename: string) { }); } -async function uploadHunspellToStorage(wordlist: { - affContent: string, - dicContent: string, -}) { - await Promise.all([ - upload(wordlist.affContent, hunspellAffFileFilename), - upload(wordlist.dicContent, hunspellDicFileFilename), - ]); -} +// async function uploadHunspellToStorage(wordlist: { +// affContent: string, +// dicContent: string, +// }) { +// await Promise.all([ +// upload(wordlist.affContent, hunspellAffFileFilename), +// upload(wordlist.dicContent, hunspellDicFileFilename), +// ]); +// } -async function uploadAllWordsToStoarage(words: string[]) { - await upload(JSON.stringify({words}), allWordsJsonFilename) +async function uploadAllWordsToStoarage(words: T.PsString[]) { + await upload(JSON.stringify(words), allWordsJsonFilename) } async function uploadDictionaryToStorage(dictionary: T.Dictionary) { @@ -249,9 +249,9 @@ async function uploadDictionaryToStorage(dictionary: T.Dictionary) { ]); } -function makeHunspell(wordlist: string[]) { - return { - dicContent: wordlist.reduce((acc, word) => acc + word + "\n", wordlist.length + "\n"), - affContent: "SET UTF-8\nCOMPLEXPREFIXES\nIGNORE ۱۲۳۴۵۶۷۸۹۰-=ًٌٍَُِّْ؛:؟.،,،؟\n", - }; -} +// function makeHunspell(wordlist: string[]) { +// return { +// dicContent: wordlist.reduce((acc, word) => acc + word + "\n", wordlist.length + "\n"), +// affContent: "SET UTF-8\nCOMPLEXPREFIXES\nIGNORE ۱۲۳۴۵۶۷۸۹۰-=ًٌٍَُِّْ؛:؟.،,،؟\n", +// }; +// } diff --git a/functions/src/word-list-maker.ts b/functions/src/word-list-maker.ts index 1b2a96b..559dd16 100644 --- a/functions/src/word-list-maker.ts +++ b/functions/src/word-list-maker.ts @@ -2,50 +2,67 @@ import { inflectWord, conjugateVerb, Types as T, + removeFVarients, } from "@lingdocs/inflect"; +import { isNounOrAdjEntry } from "@lingdocs/inflect/dist/lib/src/type-predicates"; +import { + uniqWith, + isEqual, +} from "lodash"; -function search(key: string, object: any): string[] { +// will return { p: "", f: "", s: "" } +function search(object: any): T.PsString[] { // adapted from // https://www.mikedoesweb.com/2016/es6-depth-first-object-tree-search/ - function inside(needle: string, haystack: any, found: Set = new Set()): Set { + function inside(haystack: any, found: T.PsString[]): T.PsString[] { + // use uniqueObjects = _.uniqWith(objects, _.isEqual) + // instead of set if (haystack === null) { return found; } Object.keys(haystack).forEach((key: string) => { - if(key === needle && typeof haystack[key] === "string") { - haystack[key].split(" ").forEach((word: string) => { - found.add(word); - }); + if(key === "p" && typeof haystack[key] === "string") { + // todo: rather get the p and f + // TODO: split words into individual words + // haystack[key].split(" ").forEach((word: string) => { + // found.(word); + // }); + found.push(haystack as T.PsString) return; } if(typeof haystack[key] === 'object') { - inside(needle, haystack[key], found); + inside(haystack[key], found); } return; }); return found; }; - return Array.from(inside(key, object)); + return uniqWith(inside(object, []), isEqual); } export function getWordList(entries: T.DictionaryEntry[]): { ok: true, - wordlist: string[], + wordlist: T.PsString[], } | { ok: false, errors: T.DictionaryEntryError[], } { - const allInflections: Set = new Set(); + let allInflections: T.PsString[] = []; + function addPs(ps: T.PsString) { + if (!allInflections.find(x => !(x.p === ps.p && x.f === ps.f))) { + allInflections.push(ps); + }; + } const errors: T.DictionaryEntryError[] = []; function getNounAdjInflections(entry: T.DictionaryEntry) { const infs = inflectWord(entry); if (infs) { - search("p", infs).forEach(w => allInflections.add(w)); + search(infs).forEach(addPs); } } function getVerbConjugations(word: T.DictionaryEntry, linked?: T.DictionaryEntry) { - search("p", conjugateVerb(word, linked)).forEach(w => allInflections.add(w)); + search(conjugateVerb(word, linked)).forEach(addPs); } // got the entries, make a wordList of all the possible inflections entries.forEach((entry) => { @@ -53,8 +70,11 @@ export function getWordList(entries: T.DictionaryEntry[]): { if (entry.c?.startsWith("v. ")) { const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined; getVerbConjugations(entry, linked); + } else if (isNounOrAdjEntry(entry as T.Entry)) { + getNounAdjInflections(entry); + } else { + addPs(removeFVarients({ p: entry.p, f: entry.f })); } - getNounAdjInflections(entry); } catch (error) { errors.push({ ts: entry.ts, @@ -84,11 +104,11 @@ export function getWordList(entries: T.DictionaryEntry[]): { // // allInflections.add(word.slice(0, -1) + "ي"); // // } // }); - const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?"))); - wordlist.sort((a, b) => a.localeCompare(b, "ps")); + // const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?"))); + // wordlist.sort((a, b) => a.localeCompare(b, "ps")); return { ok: true, - wordlist, + wordlist: allInflections, }; }