publish stuff

This commit is contained in:
adueck 2023-01-19 23:35:35 +05:00
parent eeb35bf489
commit c62db2168c
4 changed files with 59 additions and 37 deletions

View File

@ -15,6 +15,7 @@
"firebase-admin": "^9.2.0", "firebase-admin": "^9.2.0",
"firebase-functions": "^3.24.1", "firebase-functions": "^3.24.1",
"google-spreadsheet": "^3.1.15", "google-spreadsheet": "^3.1.15",
"lodash": "^4.17.21",
"nano": "^9.0.3", "nano": "^9.0.3",
"node-fetch": "^2.6.1", "node-fetch": "^2.6.1",
"react": "^17.0.1", "react": "^17.0.1",

View File

@ -22,6 +22,7 @@
"firebase-admin": "^9.2.0", "firebase-admin": "^9.2.0",
"firebase-functions": "^3.24.1", "firebase-functions": "^3.24.1",
"google-spreadsheet": "^3.1.15", "google-spreadsheet": "^3.1.15",
"lodash": "^4.17.21",
"nano": "^9.0.3", "nano": "^9.0.3",
"node-fetch": "^2.6.1", "node-fetch": "^2.6.1",
"react": "^17.0.1", "react": "^17.0.1",

View File

@ -28,8 +28,8 @@ const bucketName = "lingdocs";
const baseUrl = `https://storage.googleapis.com/${bucketName}/`; const baseUrl = `https://storage.googleapis.com/${bucketName}/`;
const dictionaryFilename = "dict"; const dictionaryFilename = "dict";
const dictionaryInfoFilename = "dict-info"; const dictionaryInfoFilename = "dict-info";
const hunspellAffFileFilename = "ps_AFF.aff"; // const hunspellAffFileFilename = "ps_AFF.aff";
const hunspellDicFileFilename = "ps_AFF.dic"; // const hunspellDicFileFilename = "ps_AFF.dic";
const allWordsJsonFilename = "all-words.json"; const allWordsJsonFilename = "all-words.json";
const url = `${baseUrl}${dictionaryFilename}`; const url = `${baseUrl}${dictionaryFilename}`;
const infoUrl = `${baseUrl}${dictionaryInfoFilename}`; const infoUrl = `${baseUrl}${dictionaryInfoFilename}`;
@ -73,8 +73,8 @@ async function doHunspellEtc(entries: T.DictionaryEntry[]) {
if (!wordlistResponse.ok) { if (!wordlistResponse.ok) {
throw new Error(JSON.stringify(wordlistResponse.errors)); throw new Error(JSON.stringify(wordlistResponse.errors));
} }
const hunspell = makeHunspell(wordlistResponse.wordlist); // const hunspell = makeHunspell(wordlistResponse.wordlist);
await uploadHunspellToStorage(hunspell); // await uploadHunspellToStorage(hunspell);
await uploadAllWordsToStoarage(wordlistResponse.wordlist) await uploadAllWordsToStoarage(wordlistResponse.wordlist)
} }
@ -224,18 +224,18 @@ async function upload(content: Buffer | string, filename: string) {
}); });
} }
async function uploadHunspellToStorage(wordlist: { // async function uploadHunspellToStorage(wordlist: {
affContent: string, // affContent: string,
dicContent: string, // dicContent: string,
}) { // }) {
await Promise.all([ // await Promise.all([
upload(wordlist.affContent, hunspellAffFileFilename), // upload(wordlist.affContent, hunspellAffFileFilename),
upload(wordlist.dicContent, hunspellDicFileFilename), // upload(wordlist.dicContent, hunspellDicFileFilename),
]); // ]);
} // }
async function uploadAllWordsToStoarage(words: string[]) { async function uploadAllWordsToStoarage(words: T.PsString[]) {
await upload(JSON.stringify({words}), allWordsJsonFilename) await upload(JSON.stringify(words), allWordsJsonFilename)
} }
async function uploadDictionaryToStorage(dictionary: T.Dictionary) { async function uploadDictionaryToStorage(dictionary: T.Dictionary) {
@ -249,9 +249,9 @@ async function uploadDictionaryToStorage(dictionary: T.Dictionary) {
]); ]);
} }
function makeHunspell(wordlist: string[]) { // function makeHunspell(wordlist: string[]) {
return { // return {
dicContent: wordlist.reduce((acc, word) => acc + word + "\n", wordlist.length + "\n"), // dicContent: wordlist.reduce((acc, word) => acc + word + "\n", wordlist.length + "\n"),
affContent: "SET UTF-8\nCOMPLEXPREFIXES\nIGNORE ۱۲۳۴۵۶۷۸۹۰-=ًٌٍَُِّْ؛:؟.،,،؟\n", // affContent: "SET UTF-8\nCOMPLEXPREFIXES\nIGNORE ۱۲۳۴۵۶۷۸۹۰-=ًٌٍَُِّْ؛:؟.،,،؟\n",
}; // };
} // }

View File

@ -2,50 +2,67 @@ import {
inflectWord, inflectWord,
conjugateVerb, conjugateVerb,
Types as T, Types as T,
removeFVarients,
} from "@lingdocs/inflect"; } from "@lingdocs/inflect";
import { isNounOrAdjEntry } from "@lingdocs/inflect/dist/lib/src/type-predicates";
import {
uniqWith,
isEqual,
} from "lodash";
function search(key: string, object: any): string[] { // will return { p: "", f: "", s: "" }
function search(object: any): T.PsString[] {
// adapted from // adapted from
// https://www.mikedoesweb.com/2016/es6-depth-first-object-tree-search/ // https://www.mikedoesweb.com/2016/es6-depth-first-object-tree-search/
function inside(needle: string, haystack: any, found: Set<string> = new Set()): Set<string> { function inside(haystack: any, found: T.PsString[]): T.PsString[] {
// use uniqueObjects = _.uniqWith(objects, _.isEqual)
// instead of set
if (haystack === null) { if (haystack === null) {
return found; return found;
} }
Object.keys(haystack).forEach((key: string) => { Object.keys(haystack).forEach((key: string) => {
if(key === needle && typeof haystack[key] === "string") { if(key === "p" && typeof haystack[key] === "string") {
haystack[key].split(" ").forEach((word: string) => { // todo: rather get the p and f
found.add(word); // TODO: split words into individual words
}); // haystack[key].split(" ").forEach((word: string) => {
// found.(word);
// });
found.push(haystack as T.PsString)
return; return;
} }
if(typeof haystack[key] === 'object') { if(typeof haystack[key] === 'object') {
inside(needle, haystack[key], found); inside(haystack[key], found);
} }
return; return;
}); });
return found; return found;
}; };
return Array.from(inside(key, object)); return uniqWith(inside(object, []), isEqual);
} }
export function getWordList(entries: T.DictionaryEntry[]): { export function getWordList(entries: T.DictionaryEntry[]): {
ok: true, ok: true,
wordlist: string[], wordlist: T.PsString[],
} | { } | {
ok: false, ok: false,
errors: T.DictionaryEntryError[], errors: T.DictionaryEntryError[],
} { } {
const allInflections: Set<string> = new Set(); let allInflections: T.PsString[] = [];
function addPs(ps: T.PsString) {
if (!allInflections.find(x => !(x.p === ps.p && x.f === ps.f))) {
allInflections.push(ps);
};
}
const errors: T.DictionaryEntryError[] = []; const errors: T.DictionaryEntryError[] = [];
function getNounAdjInflections(entry: T.DictionaryEntry) { function getNounAdjInflections(entry: T.DictionaryEntry) {
const infs = inflectWord(entry); const infs = inflectWord(entry);
if (infs) { if (infs) {
search("p", infs).forEach(w => allInflections.add(w)); search(infs).forEach(addPs);
} }
} }
function getVerbConjugations(word: T.DictionaryEntry, linked?: T.DictionaryEntry) { function getVerbConjugations(word: T.DictionaryEntry, linked?: T.DictionaryEntry) {
search("p", conjugateVerb(word, linked)).forEach(w => allInflections.add(w)); search(conjugateVerb(word, linked)).forEach(addPs);
} }
// got the entries, make a wordList of all the possible inflections // got the entries, make a wordList of all the possible inflections
entries.forEach((entry) => { entries.forEach((entry) => {
@ -53,8 +70,11 @@ export function getWordList(entries: T.DictionaryEntry[]): {
if (entry.c?.startsWith("v. ")) { if (entry.c?.startsWith("v. ")) {
const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined; const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined;
getVerbConjugations(entry, linked); getVerbConjugations(entry, linked);
} else if (isNounOrAdjEntry(entry as T.Entry)) {
getNounAdjInflections(entry);
} else {
addPs(removeFVarients({ p: entry.p, f: entry.f }));
} }
getNounAdjInflections(entry);
} catch (error) { } catch (error) {
errors.push({ errors.push({
ts: entry.ts, ts: entry.ts,
@ -84,11 +104,11 @@ export function getWordList(entries: T.DictionaryEntry[]): {
// // allInflections.add(word.slice(0, -1) + "ي"); // // allInflections.add(word.slice(0, -1) + "ي");
// // } // // }
// }); // });
const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?"))); // const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?")));
wordlist.sort((a, b) => a.localeCompare(b, "ps")); // wordlist.sort((a, b) => a.localeCompare(b, "ps"));
return { return {
ok: true, ok: true,
wordlist, wordlist: allInflections,
}; };
} }