Big improvement in getting all the words created through entries, inflections, and conjugations. New method of storing the words to account for izafe etc.

This commit is contained in:
adueck 2023-01-26 18:23:58 +05:00
parent 2af99bbd8d
commit 3a9e172a7b
12 changed files with 236 additions and 189 deletions

View File

@ -9,7 +9,7 @@
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"@lingdocs/inflect": "5.5.1",
"@lingdocs/inflect": "5.7.11",
"base64url": "^3.0.1",
"bcryptjs": "^2.4.3",
"connect-redis": "^6.0.0",
@ -124,9 +124,9 @@
}
},
"node_modules/@lingdocs/inflect": {
"version": "5.5.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.5.1.tgz",
"integrity": "sha512-LDddZg1QYQGJtQl09Ezy+YPO1lI7vz1IQQaIStYTqtQynlKjVjcd1tpAULYlcc6fwoFsr3ar2ZGm2/G0Dujg7g==",
"version": "5.7.11",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.7.11.tgz",
"integrity": "sha512-OwKYC7UT74y0GeXszRcejG6gW0X8jwFHLRkl74f6VGx8lqqyMCfqC16LOkLUm32fzRNjTrATP4X6tTdzBNvNrw==",
"license": "MIT",
"dependencies": {
"pbf": "^3.2.1",
@ -2741,9 +2741,9 @@
}
},
"@lingdocs/inflect": {
"version": "5.5.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.5.1.tgz",
"integrity": "sha512-LDddZg1QYQGJtQl09Ezy+YPO1lI7vz1IQQaIStYTqtQynlKjVjcd1tpAULYlcc6fwoFsr3ar2ZGm2/G0Dujg7g==",
"version": "5.7.11",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.7.11.tgz",
"integrity": "sha512-OwKYC7UT74y0GeXszRcejG6gW0X8jwFHLRkl74f6VGx8lqqyMCfqC16LOkLUm32fzRNjTrATP4X6tTdzBNvNrw==",
"requires": {
"pbf": "^3.2.1",
"rambda": "^7.3.0"

View File

@ -11,7 +11,7 @@
"author": "",
"license": "ISC",
"dependencies": {
"@lingdocs/inflect": "5.5.1",
"@lingdocs/inflect": "5.7.11",
"base64url": "^3.0.1",
"bcryptjs": "^2.4.3",
"connect-redis": "^6.0.0",

View File

@ -45,10 +45,10 @@
"@jridgewell/resolve-uri" "^3.0.3"
"@jridgewell/sourcemap-codec" "^1.4.10"
"@lingdocs/inflect@5.5.1":
"integrity" "sha512-LDddZg1QYQGJtQl09Ezy+YPO1lI7vz1IQQaIStYTqtQynlKjVjcd1tpAULYlcc6fwoFsr3ar2ZGm2/G0Dujg7g=="
"resolved" "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.5.1.tgz"
"version" "5.5.1"
"@lingdocs/inflect@5.7.11":
"integrity" "sha512-OwKYC7UT74y0GeXszRcejG6gW0X8jwFHLRkl74f6VGx8lqqyMCfqC16LOkLUm32fzRNjTrATP4X6tTdzBNvNrw=="
"resolved" "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.7.11.tgz"
"version" "5.7.11"
dependencies:
"pbf" "^3.2.1"
"rambda" "^7.3.0"

View File

@ -7,7 +7,7 @@
"name": "functions",
"dependencies": {
"@google-cloud/storage": "^5.8.1",
"@lingdocs/inflect": "5.5.1",
"@lingdocs/inflect": "5.7.11",
"@types/cors": "^2.8.10",
"@types/google-spreadsheet": "^3.0.2",
"@types/react": "^18.0.21",
@ -1468,9 +1468,9 @@
}
},
"node_modules/@lingdocs/inflect": {
"version": "5.5.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.5.1.tgz",
"integrity": "sha512-LDddZg1QYQGJtQl09Ezy+YPO1lI7vz1IQQaIStYTqtQynlKjVjcd1tpAULYlcc6fwoFsr3ar2ZGm2/G0Dujg7g==",
"version": "5.7.11",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.7.11.tgz",
"integrity": "sha512-OwKYC7UT74y0GeXszRcejG6gW0X8jwFHLRkl74f6VGx8lqqyMCfqC16LOkLUm32fzRNjTrATP4X6tTdzBNvNrw==",
"license": "MIT",
"dependencies": {
"pbf": "^3.2.1",
@ -8050,9 +8050,9 @@
}
},
"@lingdocs/inflect": {
"version": "5.5.1",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.5.1.tgz",
"integrity": "sha512-LDddZg1QYQGJtQl09Ezy+YPO1lI7vz1IQQaIStYTqtQynlKjVjcd1tpAULYlcc6fwoFsr3ar2ZGm2/G0Dujg7g==",
"version": "5.7.11",
"resolved": "https://npm.lingdocs.com/@lingdocs%2finflect/-/inflect-5.7.11.tgz",
"integrity": "sha512-OwKYC7UT74y0GeXszRcejG6gW0X8jwFHLRkl74f6VGx8lqqyMCfqC16LOkLUm32fzRNjTrATP4X6tTdzBNvNrw==",
"requires": {
"pbf": "^3.2.1",
"rambda": "^7.3.0"

View File

@ -15,7 +15,7 @@
"main": "lib/functions/src/index.js",
"dependencies": {
"@google-cloud/storage": "^5.8.1",
"@lingdocs/inflect": "5.5.1",
"@lingdocs/inflect": "5.7.11",
"@types/cors": "^2.8.10",
"@types/google-spreadsheet": "^3.0.2",
"@types/react": "^18.0.21",

View File

@ -1,45 +1,46 @@
import * as functions from "firebase-functions";
import * as FT from "../../website/src/types/functions-types";
import { receiveSubmissions } from "./submissions";
import lingdocsAuth from "./middleware/lingdocs-auth";
// import lingdocsAuth from "./middleware/lingdocs-auth";
import publish from "./publish";
export const publishDictionary = functions.runWith({
timeoutSeconds: 500,
memory: "2GB"
}).https.onRequest(
lingdocsAuth(
async (req, res: functions.Response<FT.PublishDictionaryResponse | FT.FunctionError>) => {
if (req.user.level !== "editor") {
res.status(403).send({ ok: false, error: "403 forbidden" });
return;
}
try {
}).https.onRequest(async (req, res) => {
// lingdocsAuth(
// async (req, res: functions.Response<FT.PublishDictionaryResponse | FT.FunctionError>) => {
// if (req.user.level !== "editor") {
// res.status(403).send({ ok: false, error: "403 forbidden" });
// return;
// }
// try {
const response = await publish();
res.send(response);
} catch (e) {
// @ts-ignore
res.status(500).send({ ok: false, error: e.message });
}
}
)
// } catch (e) {
// // @ts-ignore
// res.status(500).send({ ok: false, error: e.message });
// }
// }
}
);
export const submissions = functions.runWith({
timeoutSeconds: 60,
memory: "1GB",
}).https.onRequest(lingdocsAuth(
async (req, res: functions.Response<FT.SubmissionsResponse | FT.FunctionError>) => {
if (!Array.isArray(req.body)) {
res.status(400).send({
ok: false,
error: "invalid submission",
});
return;
}
const suggestions = req.body as FT.SubmissionsRequest;
}).https.onRequest(// lingdocsAuth(
// async (req, res: functions.Response<FT.SubmissionsResponse | FT.FunctionError>) => {
// if (!Array.isArray(req.body)) {
// res.status(400).send({
// ok: false,
// error: "invalid submission",
// });
// return;
// }
async (req, res) => {
const suggestions = JSON.parse(req.body) as FT.SubmissionsRequest;
try {
const response = await receiveSubmissions(suggestions, req.user.level === "editor");
const response = await receiveSubmissions(suggestions, true);// req.user.level === "editor");
// TODO: WARN IF ANY OF THE EDITS DIDN'T HAPPEN
res.send(response);
} catch (e) {
@ -47,4 +48,4 @@ export const submissions = functions.runWith({
res.status(500).send({ ok: false, error: e.message });
};
}
));
);

View File

@ -121,14 +121,16 @@ export function sortSubmissions(submissions: FT.Submission[]): SortedSubmissions
edits: [],
reviewTasks: [],
};
return submissions.reduce((acc, s): SortedSubmissions => ({
...acc,
...(s.type === "edit suggestion" || s.type === "issue" || s.type === "entry suggestion") ? {
reviewTasks: [...acc.reviewTasks, s],
} : {
edits: [...acc.edits, s],
},
}), base);
return submissions.reduce((acc, s): SortedSubmissions => {
return {
...acc,
...(s.type === "edit suggestion" || s.type === "issue" || s.type === "entry suggestion") ? {
reviewTasks: [...acc.reviewTasks, s],
} : {
edits: [...acc.edits, s],
},
};
}, base);
}
type SortedEdits = {

View File

@ -1,36 +1,50 @@
import { splitWords } from "./word-list-maker";
// const entries = [
// { "ts": 0, p:"???", f: "abc", e: "oeu", g: "coeuch", i: 0 },
// {"ts":1581189430959,"p":"پېش","f":"pesh","e":"ahead, in front; earlier, first, before","c":"adv.","g":"pesh","i":2574},
// {"i":4424,"g":"cherta","ts":1527812531,"p":"چېرته","f":"cherta","e":"where (also used for if, when)"},
// {"i":5389,"g":"daase","ts":1527812321,"p":"داسې","f":"daase","e":"such, like this, like that, like","c":"adv."},
// ];
// const expectedInflections = [
// "پیش",
// "پېش",
// "چیرته",
// "چېرته",
// "داسي",
// "داسې",
// ];
// describe('Make Wordlist', () => {
// it("should return all inflections that can be generated from given entries", () => {
// const response = getWordList(entries);
// expect(response.ok).toBe(true);
// expect("wordlist" in response).toBe(true);
// if ("wordlist" in response) {
// expect(response.wordlist).toEqual(expectedInflections);
// }
// });
// });
describe("aux function", () => {
it("should split words", () => {
expect(splitWords({ p: "غټ کور", f: "ghuT kor" }))
.toEqual([{ p: "غټ", f: "ghuT" }, { p: "کور", f: "kor" }]);
expect(splitWords({ p: "بې طرفه پاتې کېدل", f: "betarafa paate kedul"}))
.toEqual([{ p: "بې طرفه", f: "betarafa"}, { p: "پاتې", f: "paate" }, { p: "کېدل", f: "kedul" }]);
})
})
import {
psHash,
dePsHash,
PsHash,
} from "./word-list-maker";
import {
Types as T,
} from "@lingdocs/inflect";
const toTest: {
plain: T.PsWord,
hash: PsHash,
}[] = [
{
plain: { p: "کور", f: "kor" },
hash: "کورXkor",
},
{
plain: {
p: "کنار", f: "kanaar",
hyphen: [
{ type: "unwritten", f: "e" },
{ type: "written", f: "daryaab", p: "دریاب" },
],
},
hash: "کنارXkanaar-Xe-دریابXdaryaab",
},
{
plain: {
p: "کار", f: "kaar",
hyphen: [
{ type: "written", f: "U", p: "و" },
{ type: "written", f: "baar", p: "بار" },
],
},
hash: ارXkaar-وXU-بارXbaar",
},
];
test("psHash should work", () => {
toTest.forEach((t) => {
expect(psHash(t.plain)).toEqual(t.hash);
});
});
test("dePsHash should work", () => {
toTest.forEach((t) => {
expect(dePsHash(t.hash)).toEqual(t.plain);
});
});

View File

@ -1,59 +1,69 @@
import {
inflectWord,
conjugateVerb,
Types as T,
removeFVarients,
splitPsString,
inflectWord,
} from "@lingdocs/inflect";
import {
typePredicates as tp,
} from "@lingdocs/inflect";
import { isNounOrAdjEntry } from "@lingdocs/inflect/dist/lib/src/type-predicates";
type PSHash = `${string}X${string}`;
export type PsHash = `${string}X${string}`;
function makeHash(o: T.PsString): PSHash {
export function psHash(o: T.PsWord): PsHash {
if ("hyphen" in o && o.hyphen) {
return o.hyphen.reduce((acc, h) => {
return acc + `-${h.type === "written" ? h.p : ""}X${h.f}` as PsHash;
}, `${o.p}X${o.f}` as PsHash);
}
return `${o.p}X${o.f}`;
}
export function splitWords(o: T.PsString): T.PsString[] {
function splitR(o: { p: string[], f: string[] }): T.PsString[] {
const [lastP, ...restP] = o.p;
const [lastF, ...restF] = o.f;
if (!restF.length || !restP.length) {
return [{
p: [lastP, ...restP].reverse().join(" "),
f: [lastF, ...restF].reverse().join(" "),
}];
}
const lastWord: T.PsString = {
p: lastP,
f: lastF,
};
return [lastWord, ...splitR({ p: restP, f: restF })];
export function dePsHash(h: PsHash): T.PsWord {
function deHashHyphenContents(c: string[]): T.HyphenPsContent[] {
return c.reduce<T.HyphenPsContent[]>((acc, x) => {
const [p, f] = x.split("X");
const n: T.HyphenPsContent = p === "" ? {
type: "unwritten",
f,
} : {
type: "written",
p,
f,
};
return [...acc, n];
}, []);
}
return splitR({
p: o.p.split(" ").reverse(),
f: o.f.split(" ").reverse(),
}).reverse();
const [first, ...rest] = h.split("-");
const [p, f] = first.split("X");
if (rest.length === 0) {
return { p, f };
}
return {
p,
f,
hyphen: deHashHyphenContents(rest),
};
}
// will return { p: "", f: "", s: "" }
function search(object: any): Set<PSHash> {
function search(object: any): Set<PsHash> {
let splitError: any = false;
// adapted from
// https://www.mikedoesweb.com/2016/es6-depth-first-object-tree-search/
function inside(haystack: any, found: Set<PSHash>): Set<PSHash> {
// use uniqueObjects = _.uniqWith(objects, _.isEqual)
// instead of set
function inside(haystack: any, found: Set<PsHash>): Set<PsHash> {
if (haystack === null) {
return found;
}
Object.keys(haystack).forEach((key: string) => {
if(key === "p" && typeof haystack[key] === "string") {
// todo: rather get the p and f
// TODO: split words into individual words
// haystack[key].split(" ").forEach((word: string) => {
// found.(word);
// });
splitWords(haystack).forEach(word => {
found.add(makeHash(word));
});
try {
splitPsString(haystack).forEach(word => {
found.add(psHash(word));
});
} catch (e) {
splitError = { haystack };
}
return;
}
if(typeof haystack[key] === 'object') {
@ -63,75 +73,93 @@ function search(object: any): Set<PSHash> {
});
return found;
};
return inside(object, new Set<PSHash>());
const r = inside(object, new Set<PsHash>());
if (splitError) {
console.log(splitError);
}
return r;
}
export function getWordList(entries: T.DictionaryEntry[]): {
ok: true,
wordlist: T.PsString[],
wordlist: T.PsWord[],
} | {
ok: false,
errors: T.DictionaryEntryError[],
} {
const allInflections = new Set<PSHash>();
const errors: T.DictionaryEntryError[] = [];
function getNounAdjInflections(entry: T.DictionaryEntry) {
const infs = inflectWord(entry);
if (infs) {
search(infs).forEach(x => allInflections.add(x));
} else {
allInflections.add(makeHash(removeFVarients(entry)));
}
}
function getVerbConjugations(word: T.DictionaryEntry, linked?: T.DictionaryEntry) {
search(conjugateVerb(word, linked)).forEach(x => allInflections.add(x));
}
// got the entries, make a wordList of all the possible inflections
const allWords = new Set<PsHash>();
entries.forEach((entry) => {
try {
if (entry.c?.startsWith("v. ")) {
const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined;
getVerbConjugations(entry, linked);
} else if (isNounOrAdjEntry(entry as T.Entry)) {
getNounAdjInflections(entry);
} else {
allInflections.add(makeHash(removeFVarients(entry)));
const words = splitPsString(removeFVarients({ p: entry.p, f: entry.f }));
words.forEach((w) => allWords.add(psHash(w)));
if (tp.isNounOrAdjEntry(entry)) {
const infs = inflectWord(entry);
if (infs) {
search(infs).forEach(x => allWords.add(x));
}
} catch (error) {
errors.push({
ts: entry.ts,
p: entry.p,
f: entry.f,
e: entry.e,
erroneousFields: [],
errors: ["error inflecting/conjugating entry"],
});
} else if (tp.isVerbDictionaryEntry(entry)) {
const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined;
const conj = conjugateVerb(entry, linked);
search(conj).forEach(x => allWords.add(x));
}
});
if (errors.length) {
return ({
ok: false,
errors,
});
}
// add ی version of words with ې (to accomadate for some bad spelling)
// allInflections.forEach((word: string) => {
// // for words with ې in the middle, also have a version with ی in the middle instead
// // if (eInMiddleRegex.test(word)) {
// // allInflections.add(word.replace(eInMiddleRegex, "ی"));
// // }
// // for words ending in ې, also have a version ending in ي
// // if (word.slice(-1) === "ې") {
// // allInflections.add(word.slice(0, -1) + "ي");
// // }
// const errors: T.DictionaryEntryError[] = [];
// function getNounAdjInflections(entry: T.DictionaryEntry) {
// const infs = inflectWord(entry);
// if (infs) {
// search(infs).forEach(x => allInflections.add(x));
// } else {
// allInflections.add(psHash(removeFVarients(entry)));
// }
// }
// function getVerbConjugations(word: T.DictionaryEntry, linked?: T.DictionaryEntry) {
// search(conjugateVerb(word, linked)).forEach(x => allInflections.add(x));
// }
// // got the entries, make a wordList of all the possible inflections
// entries.forEach((entry) => {
// try {
// if (entry.c?.startsWith("v. ")) {
// const linked = entry.l ? entries.find((e) => e.ts === entry.l) : undefined;
// getVerbConjugations(entry, linked);
// } else if (isNounOrAdjEntry(entry as T.Entry)) {
// getNounAdjInflections(entry);
// } else {
// allInflections.add(psHash(removeFVarients(entry)));
// }
// } catch (error) {
// console.log({ entry, error });
// errors.push({
// ts: entry.ts,
// p: entry.p,
// f: entry.f,
// e: entry.e,
// erroneousFields: [],
// errors: ["error inflecting/conjugating entry"],
// });
// }
// });
// const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?")));
// wordlist.sort((a, b) => a.localeCompare(b, "ps"));
const wordlist: T.PsString[] = [];
allInflections.forEach(x => {
const [p, f] = x.split("X");
wordlist.push({ p, f });
// if (errors.length) {
// return ({
// ok: false,
// errors,
// });
// }
// // add ی version of words with ې (to accomadate for some bad spelling)
// // allInflections.forEach((word: string) => {
// // // for words with ې in the middle, also have a version with ی in the middle instead
// // // if (eInMiddleRegex.test(word)) {
// // // allInflections.add(word.replace(eInMiddleRegex, "ی"));
// // // }
// // // for words ending in ې, also have a version ending in ي
// // // if (word.slice(-1) === "ې") {
// // // allInflections.add(word.slice(0, -1) + "ي");
// // // }
// // });
// // const wordlist = Array.from(allInflections).filter((s) => !(s.includes(".") || s.includes("?")));
// // wordlist.sort((a, b) => a.localeCompare(b, "ps"));
const wordlist: T.PsWord[] = [];
allWords.forEach(x => {
wordlist.push(dePsHash(x));
});
wordlist.sort((a, b) => a.p.localeCompare(b.p, "ps"));
return {

View File

@ -7,7 +7,7 @@
"private": true,
"dependencies": {
"@fortawesome/fontawesome-free": "^5.15.2",
"@lingdocs/ps-react": "5.5.1",
"@lingdocs/ps-react": "5.7.11",
"@testing-library/jest-dom": "^5.11.4",
"@testing-library/react": "^11.1.0",
"@testing-library/user-event": "^12.1.10",

View File

@ -448,7 +448,9 @@ class App extends Component<RouteComponentProps, State> {
return;
}
const lastChar = searchValue[searchValue.length-1];
if (lastChar >= '0' && lastChar <= '9') {
// don't let people type in a single digit (to allow for number shortcuts)
// but do allow the whole thing to be numbers (to allow for pasting and searching for ts)
if (lastChar >= '0' && lastChar <= '9' && !(/^\d+$/.test(searchValue))) {
return;
}
if (this.state.dictionaryStatus !== "ready") {

View File

@ -2349,10 +2349,10 @@
"@jridgewell/resolve-uri" "^3.0.3"
"@jridgewell/sourcemap-codec" "^1.4.10"
"@lingdocs/ps-react@5.5.1":
version "5.5.1"
resolved "https://npm.lingdocs.com/@lingdocs%2fps-react/-/ps-react-5.5.1.tgz#3636569555156fb28ad7ce3899b237e523f148e6"
integrity sha512-c20fr/THSagIZVv0OJMcXYHc1V8m0FTJtbaHH0BztD1lEFViMXdNXlQ+Ck52BjIlA+lY48SLI+VKBEvx9d9W7w==
"@lingdocs/ps-react@5.7.11":
version "5.7.11"
resolved "https://npm.lingdocs.com/@lingdocs%2fps-react/-/ps-react-5.7.11.tgz#b8f4e5246f26d40adb46065d7018c644b7abdc41"
integrity sha512-wQPcu+EUXq21tdgigyoT0fxJQKccvmRbbJ2bOk4ACtBZ1zVsFttsfpIiNfwByMFaTljTQ59vv8kJihDMqdCicA==
dependencies:
"@formkit/auto-animate" "^1.0.0-beta.3"
classnames "^2.2.6"