add checking for spacing and hyphen discrepencies in entries

This commit is contained in:
adueck 2023-01-25 18:57:52 +05:00
parent 35d8346682
commit b59709bc1c
6 changed files with 135 additions and 31 deletions

4
package-lock.json generated
View File

@ -1,12 +1,12 @@
{
"name": "pashto-inflector",
"version": "5.5.1",
"version": "5.6.0",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "pashto-inflector",
"version": "5.5.1",
"version": "5.6.0",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {

View File

@ -1,6 +1,6 @@
{
"name": "pashto-inflector",
"version": "5.5.1",
"version": "5.6.0",
"author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com",

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/ps-react",
"version": "5.5.1",
"version": "5.6.0",
"description": "Pashto inflector library module with React components",
"main": "dist/components/library.js",
"module": "dist/components/library.js",

View File

@ -1,6 +1,6 @@
{
"name": "@lingdocs/inflect",
"version": "5.5.1",
"version": "5.6.0",
"description": "Pashto inflector library",
"main": "dist/index.js",
"types": "dist/lib/library.d.ts",

View File

@ -175,6 +175,84 @@ const toTest: {
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
output: { ok: true },
},
{
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
output: { ok: true },
},
{
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"bechaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
output: {
errors: ["spacing discrepency between p and f"],
p: "بې چاره",
f: "bechaara",
e: "poor thing, pitiful",
ts: 1527812488,
erroneousFields: ["p", "f"],
},
},
{
input: {"ts":1527812488,"i":1934,"p":"بېچاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
output: {
errors: ["spacing discrepency between p and f"],
p: "بېچاره",
f: "be chaara",
e: "poor thing, pitiful",
ts: 1527812488,
erroneousFields: ["p", "f"],
},
},
{
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
output: { ok: true }
},
{
input: {"ts":1527814265,"i":12969,"p":"مکتب","f":"maktab","g":"maktab","e":"school","r":4,"c":"n. m.","app":"مکاتب","apf":"ma kaatib"},
output: {
errors: ["spacing discrepency between app and apf"],
p: "مکتب",
f: "maktab",
e: "school",
ts: 1527814265,
erroneousFields: ["app", "apf"],
},
},
{
input: {"ts":1594909066356,"i":5839,"p":"خوا و شا","f":"khwaa-U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
output: { ok: true },
},
{
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خوا و شا","f":"khwaaU-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
output: {
errors: ["hyphen/spacing discrepency between p and f"],
p: "خوا و شا",
f: "khwaaU-shaa",
e: "around, in the area",
ts: 1594909066356,
erroneousFields: ["p", "f"],
},
},
{
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خواو شا","f":"khwaa-U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
output: {
errors: ["hyphen/spacing discrepency between p and f"],
p: "خواو شا",
f: "khwaa-U-shaa",
e: "around, in the area",
ts: 1594909066356,
erroneousFields: ["p", "f"],
},
},
{
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خواو شا","f":"khwaa U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
output: {
errors: ["presence of both hyphen and space in f"],
p: "خواو شا",
f: "khwaa U-shaa",
e: "around, in the area",
ts: 1594909066356,
erroneousFields: ["f"],
},
},
];
test("validateEntry should work", () => {

View File

@ -7,6 +7,7 @@
*/
import * as T from "../../types";
import { removeFVarients } from "./accent-and-ps-utils";
import {
phoneticsToDiacritics,
} from "./phonetics-to-diacritics";
@ -48,15 +49,15 @@ export function validateEntry(entry: T.DictionaryEntry): T.DictionaryEntryError
} | {
checkComplement: true,
} {
let errors: string[] = [];
const errors = new Set<string>();
const erroneousFields = new Set<T.DictionaryEntryField>();
requiredFields.forEach((field) => {
if (field !== "i" && !entry[field]) {
errors.push(`missing ${field}`);
errors.add(`missing ${field}`);
erroneousFields.add(field);
}
if (field === "i" && typeof entry[field] !== "number") {
errors.push(`missing ${field}`);
errors.add(`missing ${field}`);
erroneousFields.add(field);
}
});
@ -65,52 +66,77 @@ export function validateEntry(entry: T.DictionaryEntry): T.DictionaryEntryError
const fField = pair[1];
const p = entry[pField];
const f = entry[fField];
if (!requiredFields.includes(pair[0])) {
if (!p && !f) {
return;
}
if (!p && f) {
errors.push(`missing ${pField}`);
erroneousFields.add(pField);
return;
}
if (p && !f) {
errors.push(`missing ${fField}`);
erroneousFields.add(fField);
return;
}
const isRequired = requiredFields.includes(pair[0]);
if (!isRequired && !p && !f) {
return;
}
if (p && f && (!phoneticsToDiacritics(p, f) && !entry.diacExcept)) {
errors.push(`script and phonetics do not match for ${pField} and ${fField}`);
if (!p && !f) {
errors.add(`missing ${pField}`);
errors.add(`missing ${fField}`);
erroneousFields.add(pField);
erroneousFields.add(fField);
return;
}
if (!f || !p) {
const errField = !p ? pField : fField;
errors.add(`missing ${errField}`);
erroneousFields.add(errField);
return;
}
if (!phoneticsToDiacritics(p, f) && !entry.diacExcept) {
errors.add(`script and phonetics do not match for ${pField} and ${fField}`);
erroneousFields.add(pField)
erroneousFields.add(fField);
}
const firstF = removeFVarients(f);
if (firstF.includes("-")) {
if (firstF.includes(" ")) {
errors.add(`presence of both hyphen and space in ${fField}`);
erroneousFields.add(fField);
}
const fWords = firstF.split("-");
const pWords = p.split(" ");
if (fWords.length !== pWords.length) {
errors.add(`hyphen/spacing discrepency between ${pField} and ${fField}`);
erroneousFields.add(pField);
erroneousFields.add(fField);
}
} else {
// check spacing
const fWords = firstF.split(" ");
const pWords = p.split(" ");
if (fWords.length !== pWords.length) {
errors.add(`spacing discrepency between ${pField} and ${fField}`);
erroneousFields.add(pField);
erroneousFields.add(fField);
}
}
});
if ((entry.separationAtP && !entry.separationAtF)) {
errors.push("missing separationAtF");
errors.add("missing separationAtF");
erroneousFields.add("separationAtF");
}
if ((!entry.separationAtP && entry.separationAtF)) {
errors.push("missing separationAtP");
errors.add("missing separationAtP");
erroneousFields.add("separationAtP");
}
if (entry.c && entry.c.slice(0, 2) === "v." && entry.c.includes("comp.") && !entry.l) {
errors.push("missing complement for compound verb");
errors.add("missing complement for compound verb");
erroneousFields.add("l");
}
if (entry.c && entry.c.includes("stat. comp. intrans.") && !entry.p.endsWith("ېدل")) {
errors.push("wrong ending for intrans. stat. comp");
errors.add("wrong ending for intrans. stat. comp");
erroneousFields.add("p");
erroneousFields.add("f");
}
if (entry.c && entry.c.includes("stat. comp. trans.") && !entry.p.endsWith("ول")) {
errors.push("wrong ending for trans. stat. comp");
errors.add("wrong ending for trans. stat. comp");
erroneousFields.add("p");
erroneousFields.add("f");
}
if (errors.length) {
if (errors.size) {
return {
errors,
errors: Array.from(errors),
p: entry.p || "",
f: entry.f || "",
e: entry.e || "",