pashto-dictionary/functions/lib/publishing-helpers.ts

171 lines
4.9 KiB
TypeScript

import { Types as T, validateEntry } from "@lingdocs/inflect";
const title = "LingDocs Pashto Dictionary";
const license = `Copyright © ${new Date().getFullYear()} lingdocs.com All Rights Reserved - Licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License - https://creativecommons.org/licenses/by-nc-sa/4.0/`;
const baseUrl = `https://storage.lingdocs.com/dictionary/`;
export const dictionaryFilename = "dictionary";
export const dictionaryInfoFilename = "dictionary-info";
// const hunspellAffFileFilename = "ps_AFF.aff";
// const hunspellDicFileFilename = "ps_AFF.dic";
// const allWordsJsonFilename = "all-words-dictionary.json";
const url = `${baseUrl}${dictionaryFilename}`;
const infoUrl = `${baseUrl}${dictionaryInfoFilename}`;
export function makeDictionaryObject(
entries: T.DictionaryEntry[]
): T.Dictionary {
return {
info: {
title,
license,
url,
infoUrl,
release: new Date().getTime(),
numberOfEntries: entries.length,
},
entries,
};
}
export function checkForErrors(
entries: T.DictionaryEntry[]
): (T.DictionaryEntryError | { duplicates: number[] })[] {
// check for duplicates
const tsMap: Record<number, T.DictionaryEntry> = {};
const duplicates: number[] = [];
// making a map here based on ts with the entry will speed up the
// compliment checking process!!
for (var i = 0; i < entries.length; i++) {
const ts = entries[i].ts;
if (ts in tsMap) {
duplicates.push(ts);
} else {
tsMap[ts] = entries[i];
}
}
if (duplicates.length) {
return [{ duplicates }];
}
// check for errors
return entries.reduce(
(errors: T.DictionaryEntryError[], entry: T.DictionaryEntry) => {
const response = validateEntry(entry);
if ("errors" in response && response.errors.length) {
return [...errors, response];
}
if ("checkComplement" in response) {
if (!entry.l) {
const error: T.DictionaryEntryError = {
errors: ["complement needed"],
ts: entry.ts,
p: entry.p,
f: entry.f,
e: entry.e,
erroneousFields: ["l"],
};
return [...errors, error];
}
const complement = tsMap[entry.l];
if (!complement) {
const error: T.DictionaryEntryError = {
errors: ["complement link not found in dictionary"],
ts: entry.ts,
p: entry.p,
f: entry.f,
e: entry.e,
erroneousFields: ["l"],
};
return [...errors, error];
}
if (
!complement.c?.includes("n.") &&
!complement.c?.includes("adj.") &&
!complement.c?.includes("adv.")
) {
const error: T.DictionaryEntryError = {
errors: ["complement link to invalid complement"],
ts: entry.ts,
p: entry.p,
f: entry.f,
e: entry.e,
erroneousFields: ["l"],
};
return [...errors, error];
}
}
return errors;
},
[]
);
}
export function makeSitemap(dictionary: T.Dictionary): string {
function tsToDate(ts: number): string {
if (ts < 10000000000) {
// approximate date for old-style timestamps
return "2021-01-01";
}
return getDateString(new Date(ts));
}
function getDateString(d: Date): string {
return d.toISOString().split("T")[0];
}
const pages = [
"",
"about",
"settings",
"account",
"phrase-builder",
"new-entries",
];
const currentDate = getDateString(new Date());
return `<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
${pages
.map(
(page) =>
`
<url>
<loc>https://dictionary.lingdocs.com/${page}</loc>
<lastmod>${currentDate}</lastmod>
</url>`
)
.join("")}
${dictionary.entries
.map(
(entry) =>
`
<url>
<loc>https://dictionary.lingdocs.com/word?id=${entry.ts}</loc>
<lastmod>${tsToDate(entry.ts)}</lastmod>
</url>`
)
.join("")}
</urlset>
`;
}
// FOR HUNSPELL
// const hunspellAffFileFilename = "ps_AFF.aff";
// const hunspellDicFileFilename = "ps_AFF.dic";
// async function doHunspellEtc(
// info: T.DictionaryInfo,
// entries: T.DictionaryEntry[]
// ) {
// const wordlistResponse = getWordList(entries);
// if (!wordlistResponse.ok) {
// throw new Error(JSON.stringify(wordlistResponse.errors));
// }
// // const hunspell = makeHunspell(wordlistResponse.wordlist);
// // await uploadHunspellToStorage(hunspell);
// await uploadAllWordsToStoarage(info, wordlistResponse.wordlist);
// }
// function makeHunspell(wordlist: string[]) {
// return {
// dicContent: wordlist.reduce((acc, word) => acc + word + "\n", wordlist.length + "\n"),
// affContent: "SET UTF-8\nCOMPLEXPREFIXES\nIGNORE ۱۲۳۴۵۶۷۸۹۰-=ًٌٍَُِّْ؛:؟.،,،؟\n",
// };
// }