add checking for spacing and hyphen discrepencies in entries

This commit is contained in:
adueck 2023-01-25 18:57:52 +05:00
parent 35d8346682
commit b59709bc1c
6 changed files with 135 additions and 31 deletions

4
package-lock.json generated
View File

@ -1,12 +1,12 @@
{ {
"name": "pashto-inflector", "name": "pashto-inflector",
"version": "5.5.1", "version": "5.6.0",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "pashto-inflector", "name": "pashto-inflector",
"version": "5.5.1", "version": "5.6.0",
"hasInstallScript": true, "hasInstallScript": true,
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {

View File

@ -1,6 +1,6 @@
{ {
"name": "pashto-inflector", "name": "pashto-inflector",
"version": "5.5.1", "version": "5.6.0",
"author": "lingdocs.com", "author": "lingdocs.com",
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
"homepage": "https://verbs.lingdocs.com", "homepage": "https://verbs.lingdocs.com",

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/ps-react", "name": "@lingdocs/ps-react",
"version": "5.5.1", "version": "5.6.0",
"description": "Pashto inflector library module with React components", "description": "Pashto inflector library module with React components",
"main": "dist/components/library.js", "main": "dist/components/library.js",
"module": "dist/components/library.js", "module": "dist/components/library.js",

View File

@ -1,6 +1,6 @@
{ {
"name": "@lingdocs/inflect", "name": "@lingdocs/inflect",
"version": "5.5.1", "version": "5.6.0",
"description": "Pashto inflector library", "description": "Pashto inflector library",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/lib/library.d.ts", "types": "dist/lib/library.d.ts",

View File

@ -175,6 +175,84 @@ const toTest: {
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true}, input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
output: { ok: true }, output: { ok: true },
}, },
{
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
output: { ok: true },
},
{
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"bechaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
output: {
errors: ["spacing discrepency between p and f"],
p: "بې چاره",
f: "bechaara",
e: "poor thing, pitiful",
ts: 1527812488,
erroneousFields: ["p", "f"],
},
},
{
input: {"ts":1527812488,"i":1934,"p":"بېچاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
output: {
errors: ["spacing discrepency between p and f"],
p: "بېچاره",
f: "be chaara",
e: "poor thing, pitiful",
ts: 1527812488,
erroneousFields: ["p", "f"],
},
},
{
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
output: { ok: true }
},
{
input: {"ts":1527814265,"i":12969,"p":"مکتب","f":"maktab","g":"maktab","e":"school","r":4,"c":"n. m.","app":"مکاتب","apf":"ma kaatib"},
output: {
errors: ["spacing discrepency between app and apf"],
p: "مکتب",
f: "maktab",
e: "school",
ts: 1527814265,
erroneousFields: ["app", "apf"],
},
},
{
input: {"ts":1594909066356,"i":5839,"p":"خوا و شا","f":"khwaa-U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
output: { ok: true },
},
{
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خوا و شا","f":"khwaaU-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
output: {
errors: ["hyphen/spacing discrepency between p and f"],
p: "خوا و شا",
f: "khwaaU-shaa",
e: "around, in the area",
ts: 1594909066356,
erroneousFields: ["p", "f"],
},
},
{
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خواو شا","f":"khwaa-U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
output: {
errors: ["hyphen/spacing discrepency between p and f"],
p: "خواو شا",
f: "khwaa-U-shaa",
e: "around, in the area",
ts: 1594909066356,
erroneousFields: ["p", "f"],
},
},
{
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خواو شا","f":"khwaa U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
output: {
errors: ["presence of both hyphen and space in f"],
p: "خواو شا",
f: "khwaa U-shaa",
e: "around, in the area",
ts: 1594909066356,
erroneousFields: ["f"],
},
},
]; ];
test("validateEntry should work", () => { test("validateEntry should work", () => {

View File

@ -7,6 +7,7 @@
*/ */
import * as T from "../../types"; import * as T from "../../types";
import { removeFVarients } from "./accent-and-ps-utils";
import { import {
phoneticsToDiacritics, phoneticsToDiacritics,
} from "./phonetics-to-diacritics"; } from "./phonetics-to-diacritics";
@ -48,15 +49,15 @@ export function validateEntry(entry: T.DictionaryEntry): T.DictionaryEntryError
} | { } | {
checkComplement: true, checkComplement: true,
} { } {
let errors: string[] = []; const errors = new Set<string>();
const erroneousFields = new Set<T.DictionaryEntryField>(); const erroneousFields = new Set<T.DictionaryEntryField>();
requiredFields.forEach((field) => { requiredFields.forEach((field) => {
if (field !== "i" && !entry[field]) { if (field !== "i" && !entry[field]) {
errors.push(`missing ${field}`); errors.add(`missing ${field}`);
erroneousFields.add(field); erroneousFields.add(field);
} }
if (field === "i" && typeof entry[field] !== "number") { if (field === "i" && typeof entry[field] !== "number") {
errors.push(`missing ${field}`); errors.add(`missing ${field}`);
erroneousFields.add(field); erroneousFields.add(field);
} }
}); });
@ -65,52 +66,77 @@ export function validateEntry(entry: T.DictionaryEntry): T.DictionaryEntryError
const fField = pair[1]; const fField = pair[1];
const p = entry[pField]; const p = entry[pField];
const f = entry[fField]; const f = entry[fField];
if (!requiredFields.includes(pair[0])) { const isRequired = requiredFields.includes(pair[0]);
if (!p && !f) { if (!isRequired && !p && !f) {
return; return;
}
if (!p && f) {
errors.push(`missing ${pField}`);
erroneousFields.add(pField);
return;
}
if (p && !f) {
errors.push(`missing ${fField}`);
erroneousFields.add(fField);
return;
}
} }
if (p && f && (!phoneticsToDiacritics(p, f) && !entry.diacExcept)) { if (!p && !f) {
errors.push(`script and phonetics do not match for ${pField} and ${fField}`); errors.add(`missing ${pField}`);
errors.add(`missing ${fField}`);
erroneousFields.add(pField);
erroneousFields.add(fField);
return;
}
if (!f || !p) {
const errField = !p ? pField : fField;
errors.add(`missing ${errField}`);
erroneousFields.add(errField);
return;
}
if (!phoneticsToDiacritics(p, f) && !entry.diacExcept) {
errors.add(`script and phonetics do not match for ${pField} and ${fField}`);
erroneousFields.add(pField) erroneousFields.add(pField)
erroneousFields.add(fField); erroneousFields.add(fField);
} }
const firstF = removeFVarients(f);
if (firstF.includes("-")) {
if (firstF.includes(" ")) {
errors.add(`presence of both hyphen and space in ${fField}`);
erroneousFields.add(fField);
}
const fWords = firstF.split("-");
const pWords = p.split(" ");
if (fWords.length !== pWords.length) {
errors.add(`hyphen/spacing discrepency between ${pField} and ${fField}`);
erroneousFields.add(pField);
erroneousFields.add(fField);
}
} else {
// check spacing
const fWords = firstF.split(" ");
const pWords = p.split(" ");
if (fWords.length !== pWords.length) {
errors.add(`spacing discrepency between ${pField} and ${fField}`);
erroneousFields.add(pField);
erroneousFields.add(fField);
}
}
}); });
if ((entry.separationAtP && !entry.separationAtF)) { if ((entry.separationAtP && !entry.separationAtF)) {
errors.push("missing separationAtF"); errors.add("missing separationAtF");
erroneousFields.add("separationAtF"); erroneousFields.add("separationAtF");
} }
if ((!entry.separationAtP && entry.separationAtF)) { if ((!entry.separationAtP && entry.separationAtF)) {
errors.push("missing separationAtP"); errors.add("missing separationAtP");
erroneousFields.add("separationAtP"); erroneousFields.add("separationAtP");
} }
if (entry.c && entry.c.slice(0, 2) === "v." && entry.c.includes("comp.") && !entry.l) { if (entry.c && entry.c.slice(0, 2) === "v." && entry.c.includes("comp.") && !entry.l) {
errors.push("missing complement for compound verb"); errors.add("missing complement for compound verb");
erroneousFields.add("l"); erroneousFields.add("l");
} }
if (entry.c && entry.c.includes("stat. comp. intrans.") && !entry.p.endsWith("ېدل")) { if (entry.c && entry.c.includes("stat. comp. intrans.") && !entry.p.endsWith("ېدل")) {
errors.push("wrong ending for intrans. stat. comp"); errors.add("wrong ending for intrans. stat. comp");
erroneousFields.add("p"); erroneousFields.add("p");
erroneousFields.add("f"); erroneousFields.add("f");
} }
if (entry.c && entry.c.includes("stat. comp. trans.") && !entry.p.endsWith("ول")) { if (entry.c && entry.c.includes("stat. comp. trans.") && !entry.p.endsWith("ول")) {
errors.push("wrong ending for trans. stat. comp"); errors.add("wrong ending for trans. stat. comp");
erroneousFields.add("p"); erroneousFields.add("p");
erroneousFields.add("f"); erroneousFields.add("f");
} }
if (errors.length) { if (errors.size) {
return { return {
errors, errors: Array.from(errors),
p: entry.p || "", p: entry.p || "",
f: entry.f || "", f: entry.f || "",
e: entry.e || "", e: entry.e || "",