publish stuff

This commit is contained in:
adueck 2023-01-19 23:35:35 +05:00
parent eeb35bf489
commit c62db2168c
4 changed files with 59 additions and 37 deletions

View File

@ -15,6 +15,7 @@
"firebase-admin": "^9.2.0",
"firebase-functions": "^3.24.1",
"google-spreadsheet": "^3.1.15",
"lodash": "^4.17.21",
"nano": "^9.0.3",
"node-fetch": "^2.6.1",
"react": "^17.0.1",

View File

@ -22,6 +22,7 @@
"firebase-admin": "^9.2.0",
"firebase-functions": "^3.24.1",
"google-spreadsheet": "^3.1.15",
"lodash": "^4.17.21",
"nano": "^9.0.3",
"node-fetch": "^2.6.1",
"react": "^17.0.1",

View File

@ -28,8 +28,8 @@ const bucketName = "lingdocs";
const baseUrl = `https://storage.googleapis.com/${bucketName}/`;
const dictionaryFilename = "dict";
const dictionaryInfoFilename = "dict-info";
const hunspellAffFileFilename = "ps_AFF.aff";
const hunspellDicFileFilename = "ps_AFF.dic";
// const hunspellAffFileFilename = "ps_AFF.aff";
// const hunspellDicFileFilename = "ps_AFF.dic";
const allWordsJsonFilename = "all-words.json";
const url = `${baseUrl}${dictionaryFilename}`;
const infoUrl = `${baseUrl}${dictionaryInfoFilename}`;
@ -73,8 +73,8 @@ async function doHunspellEtc(entries: T.DictionaryEntry[]) {
if (!wordlistResponse.ok) {
throw new Error(JSON.stringify(wordlistResponse.errors));
}
const hunspell = makeHunspell(wordlistResponse.wordlist);
await uploadHunspellToStorage(hunspell);
// const hunspell = makeHunspell(wordlistResponse.wordlist);
// await uploadHunspellToStorage(hunspell);
await uploadAllWordsToStoarage(wordlistResponse.wordlist)
}
@ -224,18 +224,18 @@ async function upload(content: Buffer | string, filename: string) {
});
}
async function uploadHunspellToStorage(wordlist: {
affContent: string,
dicContent: string,
}) {
await Promise.all([
upload(wordlist.affContent, hunspellAffFileFilename),
upload(wordlist.dicContent, hunspellDicFileFilename),
]);
}
// async function uploadHunspellToStorage(wordlist: {
// affContent: string,
// dicContent: string,
// }) {
// await Promise.all([
// upload(wordlist.affContent, hunspellAffFileFilename),
// upload(wordlist.dicContent, hunspellDicFileFilename),
// ]);
// }
async function uploadAllWordsToStoarage(words: string[]) {
await upload(JSON.stringify({words}), allWordsJsonFilename)
async function uploadAllWordsToStoarage(words: T.PsString[]) {
await upload(JSON.stringify(words), allWordsJsonFilename)
}
async function uploadDictionaryToStorage(dictionary: T.Dictionary) {
@ -249,9 +249,9 @@ async function uploadDictionaryToStorage(dictionary: T.Dictionary) {
]);
}
function makeHunspell(wordlist: string[]) {
return {
dicContent: wordlist.reduce((acc, word) => acc + word + "\n", wordlist.length + "\n"),
affContent: "SET UTF-8\nCOMPLEXPREFIXES\nIGNORE ۱۲۳۴۵۶۷۸۹۰-=ًٌٍَُِّْ؛:؟.،,،؟\n",
};
}
// function makeHunspell(wordlist: string[]) {
// return {
// dicContent: wordlist.reduce((acc, word) => acc + word + "\n", wordlist.length + "\n"),
// affContent: "SET UTF-8\nCOMPLEXPREFIXES\nIGNORE ۱۲۳۴۵۶۷۸۹۰-=ًٌٍَُِّْ؛:؟.،,،؟\n",
// };
// }

View File

@ -2,50 +2,67 @@ import {
inflectWord,
conjugateVerb,
Types as T,
removeFVarients,
} from "@lingdocs/inflect";
import { isNounOrAdjEntry } from "@lingdocs/inflect/dist/lib/src/type-predicates";
import {
uniqWith,
isEqual,
} from "lodash";
function search(key: string, object: any): string[] {
// will return { p: "", f: "", s: "" }
function search(object: any): T.PsString[] {
// adapted from
// https://www.mikedoesweb.com/2016/es6-depth-first-object-tree-search/
function inside(needle: string, haystack: any, found: Set<string> = new Set()): Set<string> {
function inside(haystack: any, found: T.PsString[]): T.PsString[] {
// use uniqueObjects = _.uniqWith(objects, _.isEqual)
// instead of set
if (haystack === null) {
return found;
}
Object.keys(haystack).forEach((key: string) => {
if(key === needle && typeof haystack[key] === "string") {
haystack[key].split(" ").forEach((word: string) => {
found.add(word);
});
if(key === "p" && typeof haystack[key] === "string") {
// todo: rather get the p and f
// TODO: split words into individual words
// haystack[key].split(" ").forEach((word: string) => {
// found.(word);
// });
found.push(haystack as T.PsString)
return;
}
if(typeof haystack[key] === 'object') {
inside(needle, haystack[key], found);
inside(haystack[key], found);
}
return;
});
return found;
};
return Array.from(inside(key, object));
return uniqWith(inside(object, []), isEqual);
}
export function getWordList(entries: T.DictionaryEntry[]): {
ok: true,
wordlist: string[],
wordlist: T.PsString[],
} | {
ok: false,
errors: T.DictionaryEntryError[],
} {
const allInflections: Set<string> = new Set();
let allInflections: T.PsString[] = [];
function addPs(ps: T.PsString) {
if (!allInflections.find(x => !(x.p === ps.p && x.f === ps.f))) {
allInflections.push(ps);
};
}
const errors: T.DictionaryEntryError[] = [];
function getNounAdjInflections(entry: T.DictionaryEntry) {
const infs = inflectWord(entry);
if (infs) {
search("p", infs).forEach(w => allInflections.add(w));
search(infs).forEach(addPs);
}
}
function getVerbConjugations(word: T.DictionaryEntry, linked?: T.DictionaryEntry) {
search("p", conjugateVerb(word, linked)).forEach(w => allInflections.add(w));
search(conjugateVerb(word, linked)).forEach(addPs);
}
// got the entries, make a wordList of all the possible inflections
entries.forEach((entry) => {
@ -53,8 +70,11 @@ export function getWordList(entries: T.DictionaryEntry[]): {
if (entry.c?.startsWith("v. ")) {
const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined;
getVerbConjugations(entry, linked);
} else if (isNounOrAdjEntry(entry as T.Entry)) {
getNounAdjInflections(entry);
} else {
addPs(removeFVarients({ p: entry.p, f: entry.f }));
}
getNounAdjInflections(entry);
} catch (error) {
errors.push({
ts: entry.ts,
@ -84,11 +104,11 @@ export function getWordList(entries: T.DictionaryEntry[]): {
// // allInflections.add(word.slice(0, -1) + "ي");
// // }
// });
const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?")));
wordlist.sort((a, b) => a.localeCompare(b, "ps"));
// const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?")));
// wordlist.sort((a, b) => a.localeCompare(b, "ps"));
return {
ok: true,
wordlist,
wordlist: allInflections,
};
}