change publishing the wordlist to publish a big json with all the infections and their phonetics
This commit is contained in:
parent
c62db2168c
commit
a06d66f2ad
|
@ -0,0 +1,5 @@
|
|||
/** @type {import('ts-jest').JestConfigWithTsJest} */
|
||||
module.exports = {
|
||||
preset: 'ts-jest',
|
||||
testEnvironment: 'node',
|
||||
};
|
File diff suppressed because it is too large
Load Diff
|
@ -6,7 +6,8 @@
|
|||
"shell": "npm run build && firebase functions:shell",
|
||||
"start": "npm run shell",
|
||||
"deploy": "firebase deploy --only functions",
|
||||
"logs": "firebase functions:log"
|
||||
"logs": "firebase functions:log",
|
||||
"test": "jest"
|
||||
},
|
||||
"engines": {
|
||||
"node": "16"
|
||||
|
@ -22,7 +23,6 @@
|
|||
"firebase-admin": "^9.2.0",
|
||||
"firebase-functions": "^3.24.1",
|
||||
"google-spreadsheet": "^3.1.15",
|
||||
"lodash": "^4.17.21",
|
||||
"nano": "^9.0.3",
|
||||
"node-fetch": "^2.6.1",
|
||||
"react": "^17.0.1",
|
||||
|
@ -33,6 +33,9 @@
|
|||
"@types/jest": "^26.0.20",
|
||||
"@types/node-fetch": "^2.5.12",
|
||||
"firebase-functions-test": "^0.2.0",
|
||||
"jest": "^29.3.1",
|
||||
"ts-jest": "^29.0.5",
|
||||
"ts-node": "^10.9.1",
|
||||
"typescript": "^4.6.3"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,23 +7,23 @@ import publish from "./publish";
|
|||
export const publishDictionary = functions.runWith({
|
||||
timeoutSeconds: 60,
|
||||
memory: "2GB"
|
||||
}).https.onRequest(
|
||||
lingdocsAuth(
|
||||
async (req, res: functions.Response<FT.PublishDictionaryResponse | FT.FunctionError>) => {
|
||||
if (req.user.level !== "editor") {
|
||||
res.status(403).send({ ok: false, error: "403 forbidden" });
|
||||
return;
|
||||
}
|
||||
try {
|
||||
}).https.onRequest(async (req, res) => {
|
||||
// lingdocsAuth(
|
||||
// async (req, res: functions.Response<FT.PublishDictionaryResponse | FT.FunctionError>) => {
|
||||
// if (req.user.level !== "editor") {
|
||||
// res.status(403).send({ ok: false, error: "403 forbidden" });
|
||||
// return;
|
||||
// }
|
||||
// try {
|
||||
const response = await publish();
|
||||
res.send(response);
|
||||
} catch (e) {
|
||||
// @ts-ignore
|
||||
res.status(500).send({ ok: false, error: e.message });
|
||||
}
|
||||
}
|
||||
)
|
||||
);
|
||||
// } catch (e) {
|
||||
// // @ts-ignore
|
||||
// res.status(500).send({ ok: false, error: e.message });
|
||||
// }
|
||||
// }
|
||||
// )
|
||||
});
|
||||
|
||||
export const submissions = functions.runWith({
|
||||
timeoutSeconds: 30,
|
||||
|
|
|
@ -69,7 +69,9 @@ export default async function publish(): Promise<PublishDictionaryResponse> {
|
|||
}
|
||||
|
||||
async function doHunspellEtc(entries: T.DictionaryEntry[]) {
|
||||
console.log("getting word list");
|
||||
const wordlistResponse = getWordList(entries);
|
||||
console.log("got word list length", wordlistResponse.ok && wordlistResponse.wordlist.length);
|
||||
if (!wordlistResponse.ok) {
|
||||
throw new Error(JSON.stringify(wordlistResponse.errors));
|
||||
}
|
||||
|
|
|
@ -1,27 +1,36 @@
|
|||
import { getWordList } from "./word-list-maker";
|
||||
import { splitWords } from "./word-list-maker";
|
||||
|
||||
const entries = [
|
||||
{ "ts": 0, p:"???", f: "abc", e: "oeu", g: "coeuch", i: 0 },
|
||||
{"ts":1581189430959,"p":"پېش","f":"pesh","e":"ahead, in front; earlier, first, before","c":"adv.","g":"pesh","i":2574},
|
||||
{"i":4424,"g":"cherta","ts":1527812531,"p":"چېرته","f":"cherta","e":"where (also used for if, when)"},
|
||||
{"i":5389,"g":"daase","ts":1527812321,"p":"داسې","f":"daase","e":"such, like this, like that, like","c":"adv."},
|
||||
];
|
||||
const expectedInflections = [
|
||||
"پیش",
|
||||
"پېش",
|
||||
"چیرته",
|
||||
"چېرته",
|
||||
"داسي",
|
||||
"داسې",
|
||||
];
|
||||
// const entries = [
|
||||
// { "ts": 0, p:"???", f: "abc", e: "oeu", g: "coeuch", i: 0 },
|
||||
// {"ts":1581189430959,"p":"پېش","f":"pesh","e":"ahead, in front; earlier, first, before","c":"adv.","g":"pesh","i":2574},
|
||||
// {"i":4424,"g":"cherta","ts":1527812531,"p":"چېرته","f":"cherta","e":"where (also used for if, when)"},
|
||||
// {"i":5389,"g":"daase","ts":1527812321,"p":"داسې","f":"daase","e":"such, like this, like that, like","c":"adv."},
|
||||
// ];
|
||||
// const expectedInflections = [
|
||||
// "پیش",
|
||||
// "پېش",
|
||||
// "چیرته",
|
||||
// "چېرته",
|
||||
// "داسي",
|
||||
// "داسې",
|
||||
// ];
|
||||
|
||||
describe('Make Wordlist', () => {
|
||||
it("should return all inflections that can be generated from given entries", () => {
|
||||
const response = getWordList(entries);
|
||||
expect(response.ok).toBe(true);
|
||||
expect("wordlist" in response).toBe(true);
|
||||
if ("wordlist" in response) {
|
||||
expect(response.wordlist).toEqual(expectedInflections);
|
||||
}
|
||||
});
|
||||
});
|
||||
// describe('Make Wordlist', () => {
|
||||
// it("should return all inflections that can be generated from given entries", () => {
|
||||
// const response = getWordList(entries);
|
||||
// expect(response.ok).toBe(true);
|
||||
// expect("wordlist" in response).toBe(true);
|
||||
// if ("wordlist" in response) {
|
||||
// expect(response.wordlist).toEqual(expectedInflections);
|
||||
// }
|
||||
// });
|
||||
// });
|
||||
|
||||
describe("aux function", () => {
|
||||
it("should split words", () => {
|
||||
expect(splitWords({ p: "غټ کور", f: "ghuT kor" }))
|
||||
.toEqual([{ p: "غټ", f: "ghuT" }, { p: "کور", f: "kor" }]);
|
||||
expect(splitWords({ p: "بې طرفه پاتې کېدل", f: "betarafa paate kedul"}))
|
||||
.toEqual([{ p: "بې طرفه", f: "betarafa"}, { p: "پاتې", f: "paate" }, { p: "کېدل", f: "kedul" }]);
|
||||
})
|
||||
})
|
||||
|
|
|
@ -5,17 +5,40 @@ import {
|
|||
removeFVarients,
|
||||
} from "@lingdocs/inflect";
|
||||
import { isNounOrAdjEntry } from "@lingdocs/inflect/dist/lib/src/type-predicates";
|
||||
import {
|
||||
uniqWith,
|
||||
isEqual,
|
||||
} from "lodash";
|
||||
|
||||
type PSHash = string & { ___brand: "a hash of PSString" };
|
||||
|
||||
function makeHash(o: T.PsString): PSHash {
|
||||
return `${o.p}X${o.f}` as PSHash;
|
||||
}
|
||||
|
||||
export function splitWords(o: T.PsString): T.PsString[] {
|
||||
function splitR(o: { p: string[], f: string[] }): T.PsString[] {
|
||||
const [lastP, ...restP] = o.p;
|
||||
const [lastF, ...restF] = o.f;
|
||||
if (!restF.length || !restP.length) {
|
||||
return [{
|
||||
p: [lastP, ...restP].reverse().join(" "),
|
||||
f: [lastF, ...restF].reverse().join(" "),
|
||||
}];
|
||||
}
|
||||
const lastWord: T.PsString = {
|
||||
p: lastP,
|
||||
f: lastF,
|
||||
};
|
||||
return [lastWord, ...splitR({ p: restP, f: restF })];
|
||||
}
|
||||
return splitR({
|
||||
p: o.p.split(" ").reverse(),
|
||||
f: o.f.split(" ").reverse(),
|
||||
}).reverse();
|
||||
}
|
||||
|
||||
// will return { p: "", f: "", s: "" }
|
||||
function search(object: any): T.PsString[] {
|
||||
function search(object: any): Set<PSHash> {
|
||||
// adapted from
|
||||
// https://www.mikedoesweb.com/2016/es6-depth-first-object-tree-search/
|
||||
function inside(haystack: any, found: T.PsString[]): T.PsString[] {
|
||||
function inside(haystack: any, found: Set<PSHash>): Set<PSHash> {
|
||||
// use uniqueObjects = _.uniqWith(objects, _.isEqual)
|
||||
// instead of set
|
||||
if (haystack === null) {
|
||||
|
@ -28,7 +51,9 @@ function search(object: any): T.PsString[] {
|
|||
// haystack[key].split(" ").forEach((word: string) => {
|
||||
// found.(word);
|
||||
// });
|
||||
found.push(haystack as T.PsString)
|
||||
splitWords(haystack).forEach(word => {
|
||||
found.add(makeHash(word));
|
||||
});
|
||||
return;
|
||||
}
|
||||
if(typeof haystack[key] === 'object') {
|
||||
|
@ -38,7 +63,7 @@ function search(object: any): T.PsString[] {
|
|||
});
|
||||
return found;
|
||||
};
|
||||
return uniqWith(inside(object, []), isEqual);
|
||||
return inside(object, new Set<PSHash>());
|
||||
}
|
||||
|
||||
export function getWordList(entries: T.DictionaryEntry[]): {
|
||||
|
@ -48,21 +73,18 @@ export function getWordList(entries: T.DictionaryEntry[]): {
|
|||
ok: false,
|
||||
errors: T.DictionaryEntryError[],
|
||||
} {
|
||||
let allInflections: T.PsString[] = [];
|
||||
function addPs(ps: T.PsString) {
|
||||
if (!allInflections.find(x => !(x.p === ps.p && x.f === ps.f))) {
|
||||
allInflections.push(ps);
|
||||
};
|
||||
}
|
||||
const allInflections = new Set<PSHash>();
|
||||
const errors: T.DictionaryEntryError[] = [];
|
||||
function getNounAdjInflections(entry: T.DictionaryEntry) {
|
||||
const infs = inflectWord(entry);
|
||||
if (infs) {
|
||||
search(infs).forEach(addPs);
|
||||
search(infs).forEach(x => allInflections.add(x));
|
||||
} else {
|
||||
allInflections.add(makeHash(removeFVarients(entry)));
|
||||
}
|
||||
}
|
||||
function getVerbConjugations(word: T.DictionaryEntry, linked?: T.DictionaryEntry) {
|
||||
search(conjugateVerb(word, linked)).forEach(addPs);
|
||||
search(conjugateVerb(word, linked)).forEach(x => allInflections.add(x));
|
||||
}
|
||||
// got the entries, make a wordList of all the possible inflections
|
||||
entries.forEach((entry) => {
|
||||
|
@ -73,7 +95,7 @@ export function getWordList(entries: T.DictionaryEntry[]): {
|
|||
} else if (isNounOrAdjEntry(entry as T.Entry)) {
|
||||
getNounAdjInflections(entry);
|
||||
} else {
|
||||
addPs(removeFVarients({ p: entry.p, f: entry.f }));
|
||||
allInflections.add(makeHash(removeFVarients(entry)));
|
||||
}
|
||||
} catch (error) {
|
||||
errors.push({
|
||||
|
@ -106,9 +128,15 @@ export function getWordList(entries: T.DictionaryEntry[]): {
|
|||
// });
|
||||
// const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?")));
|
||||
// wordlist.sort((a, b) => a.localeCompare(b, "ps"));
|
||||
const wordlist: T.PsString[] = [];
|
||||
allInflections.forEach(x => {
|
||||
const [p, f] = x.split("X");
|
||||
wordlist.push({ p, f });
|
||||
});
|
||||
wordlist.sort((a, b) => a.p.localeCompare(b.p, "ps"));
|
||||
return {
|
||||
ok: true,
|
||||
wordlist: allInflections,
|
||||
wordlist,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue