publish stuff
This commit is contained in:
parent
eeb35bf489
commit
c62db2168c
|
@ -15,6 +15,7 @@
|
|||
"firebase-admin": "^9.2.0",
|
||||
"firebase-functions": "^3.24.1",
|
||||
"google-spreadsheet": "^3.1.15",
|
||||
"lodash": "^4.17.21",
|
||||
"nano": "^9.0.3",
|
||||
"node-fetch": "^2.6.1",
|
||||
"react": "^17.0.1",
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
"firebase-admin": "^9.2.0",
|
||||
"firebase-functions": "^3.24.1",
|
||||
"google-spreadsheet": "^3.1.15",
|
||||
"lodash": "^4.17.21",
|
||||
"nano": "^9.0.3",
|
||||
"node-fetch": "^2.6.1",
|
||||
"react": "^17.0.1",
|
||||
|
|
|
@ -28,8 +28,8 @@ const bucketName = "lingdocs";
|
|||
const baseUrl = `https://storage.googleapis.com/${bucketName}/`;
|
||||
const dictionaryFilename = "dict";
|
||||
const dictionaryInfoFilename = "dict-info";
|
||||
const hunspellAffFileFilename = "ps_AFF.aff";
|
||||
const hunspellDicFileFilename = "ps_AFF.dic";
|
||||
// const hunspellAffFileFilename = "ps_AFF.aff";
|
||||
// const hunspellDicFileFilename = "ps_AFF.dic";
|
||||
const allWordsJsonFilename = "all-words.json";
|
||||
const url = `${baseUrl}${dictionaryFilename}`;
|
||||
const infoUrl = `${baseUrl}${dictionaryInfoFilename}`;
|
||||
|
@ -73,8 +73,8 @@ async function doHunspellEtc(entries: T.DictionaryEntry[]) {
|
|||
if (!wordlistResponse.ok) {
|
||||
throw new Error(JSON.stringify(wordlistResponse.errors));
|
||||
}
|
||||
const hunspell = makeHunspell(wordlistResponse.wordlist);
|
||||
await uploadHunspellToStorage(hunspell);
|
||||
// const hunspell = makeHunspell(wordlistResponse.wordlist);
|
||||
// await uploadHunspellToStorage(hunspell);
|
||||
await uploadAllWordsToStoarage(wordlistResponse.wordlist)
|
||||
}
|
||||
|
||||
|
@ -224,18 +224,18 @@ async function upload(content: Buffer | string, filename: string) {
|
|||
});
|
||||
}
|
||||
|
||||
async function uploadHunspellToStorage(wordlist: {
|
||||
affContent: string,
|
||||
dicContent: string,
|
||||
}) {
|
||||
await Promise.all([
|
||||
upload(wordlist.affContent, hunspellAffFileFilename),
|
||||
upload(wordlist.dicContent, hunspellDicFileFilename),
|
||||
]);
|
||||
}
|
||||
// async function uploadHunspellToStorage(wordlist: {
|
||||
// affContent: string,
|
||||
// dicContent: string,
|
||||
// }) {
|
||||
// await Promise.all([
|
||||
// upload(wordlist.affContent, hunspellAffFileFilename),
|
||||
// upload(wordlist.dicContent, hunspellDicFileFilename),
|
||||
// ]);
|
||||
// }
|
||||
|
||||
async function uploadAllWordsToStoarage(words: string[]) {
|
||||
await upload(JSON.stringify({words}), allWordsJsonFilename)
|
||||
async function uploadAllWordsToStoarage(words: T.PsString[]) {
|
||||
await upload(JSON.stringify(words), allWordsJsonFilename)
|
||||
}
|
||||
|
||||
async function uploadDictionaryToStorage(dictionary: T.Dictionary) {
|
||||
|
@ -249,9 +249,9 @@ async function uploadDictionaryToStorage(dictionary: T.Dictionary) {
|
|||
]);
|
||||
}
|
||||
|
||||
function makeHunspell(wordlist: string[]) {
|
||||
return {
|
||||
dicContent: wordlist.reduce((acc, word) => acc + word + "\n", wordlist.length + "\n"),
|
||||
affContent: "SET UTF-8\nCOMPLEXPREFIXES\nIGNORE ۱۲۳۴۵۶۷۸۹۰-=ًٌٍَُِّْ؛:؟.،,،؟\n",
|
||||
};
|
||||
}
|
||||
// function makeHunspell(wordlist: string[]) {
|
||||
// return {
|
||||
// dicContent: wordlist.reduce((acc, word) => acc + word + "\n", wordlist.length + "\n"),
|
||||
// affContent: "SET UTF-8\nCOMPLEXPREFIXES\nIGNORE ۱۲۳۴۵۶۷۸۹۰-=ًٌٍَُِّْ؛:؟.،,،؟\n",
|
||||
// };
|
||||
// }
|
||||
|
|
|
@ -2,50 +2,67 @@ import {
|
|||
inflectWord,
|
||||
conjugateVerb,
|
||||
Types as T,
|
||||
removeFVarients,
|
||||
} from "@lingdocs/inflect";
|
||||
import { isNounOrAdjEntry } from "@lingdocs/inflect/dist/lib/src/type-predicates";
|
||||
import {
|
||||
uniqWith,
|
||||
isEqual,
|
||||
} from "lodash";
|
||||
|
||||
|
||||
function search(key: string, object: any): string[] {
|
||||
// will return { p: "", f: "", s: "" }
|
||||
function search(object: any): T.PsString[] {
|
||||
// adapted from
|
||||
// https://www.mikedoesweb.com/2016/es6-depth-first-object-tree-search/
|
||||
function inside(needle: string, haystack: any, found: Set<string> = new Set()): Set<string> {
|
||||
function inside(haystack: any, found: T.PsString[]): T.PsString[] {
|
||||
// use uniqueObjects = _.uniqWith(objects, _.isEqual)
|
||||
// instead of set
|
||||
if (haystack === null) {
|
||||
return found;
|
||||
}
|
||||
Object.keys(haystack).forEach((key: string) => {
|
||||
if(key === needle && typeof haystack[key] === "string") {
|
||||
haystack[key].split(" ").forEach((word: string) => {
|
||||
found.add(word);
|
||||
});
|
||||
if(key === "p" && typeof haystack[key] === "string") {
|
||||
// todo: rather get the p and f
|
||||
// TODO: split words into individual words
|
||||
// haystack[key].split(" ").forEach((word: string) => {
|
||||
// found.(word);
|
||||
// });
|
||||
found.push(haystack as T.PsString)
|
||||
return;
|
||||
}
|
||||
if(typeof haystack[key] === 'object') {
|
||||
inside(needle, haystack[key], found);
|
||||
inside(haystack[key], found);
|
||||
}
|
||||
return;
|
||||
});
|
||||
return found;
|
||||
};
|
||||
return Array.from(inside(key, object));
|
||||
return uniqWith(inside(object, []), isEqual);
|
||||
}
|
||||
|
||||
export function getWordList(entries: T.DictionaryEntry[]): {
|
||||
ok: true,
|
||||
wordlist: string[],
|
||||
wordlist: T.PsString[],
|
||||
} | {
|
||||
ok: false,
|
||||
errors: T.DictionaryEntryError[],
|
||||
} {
|
||||
const allInflections: Set<string> = new Set();
|
||||
let allInflections: T.PsString[] = [];
|
||||
function addPs(ps: T.PsString) {
|
||||
if (!allInflections.find(x => !(x.p === ps.p && x.f === ps.f))) {
|
||||
allInflections.push(ps);
|
||||
};
|
||||
}
|
||||
const errors: T.DictionaryEntryError[] = [];
|
||||
function getNounAdjInflections(entry: T.DictionaryEntry) {
|
||||
const infs = inflectWord(entry);
|
||||
if (infs) {
|
||||
search("p", infs).forEach(w => allInflections.add(w));
|
||||
search(infs).forEach(addPs);
|
||||
}
|
||||
}
|
||||
function getVerbConjugations(word: T.DictionaryEntry, linked?: T.DictionaryEntry) {
|
||||
search("p", conjugateVerb(word, linked)).forEach(w => allInflections.add(w));
|
||||
search(conjugateVerb(word, linked)).forEach(addPs);
|
||||
}
|
||||
// got the entries, make a wordList of all the possible inflections
|
||||
entries.forEach((entry) => {
|
||||
|
@ -53,8 +70,11 @@ export function getWordList(entries: T.DictionaryEntry[]): {
|
|||
if (entry.c?.startsWith("v. ")) {
|
||||
const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined;
|
||||
getVerbConjugations(entry, linked);
|
||||
} else if (isNounOrAdjEntry(entry as T.Entry)) {
|
||||
getNounAdjInflections(entry);
|
||||
} else {
|
||||
addPs(removeFVarients({ p: entry.p, f: entry.f }));
|
||||
}
|
||||
getNounAdjInflections(entry);
|
||||
} catch (error) {
|
||||
errors.push({
|
||||
ts: entry.ts,
|
||||
|
@ -84,11 +104,11 @@ export function getWordList(entries: T.DictionaryEntry[]): {
|
|||
// // allInflections.add(word.slice(0, -1) + "ي");
|
||||
// // }
|
||||
// });
|
||||
const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?")));
|
||||
wordlist.sort((a, b) => a.localeCompare(b, "ps"));
|
||||
// const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?")));
|
||||
// wordlist.sort((a, b) => a.localeCompare(b, "ps"));
|
||||
return {
|
||||
ok: true,
|
||||
wordlist,
|
||||
wordlist: allInflections,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue