add checking for spacing and hyphen discrepencies in entries
This commit is contained in:
parent
35d8346682
commit
b59709bc1c
|
@ -1,12 +1,12 @@
|
|||
{
|
||||
"name": "pashto-inflector",
|
||||
"version": "5.5.1",
|
||||
"version": "5.6.0",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "pashto-inflector",
|
||||
"version": "5.5.1",
|
||||
"version": "5.6.0",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "pashto-inflector",
|
||||
"version": "5.5.1",
|
||||
"version": "5.6.0",
|
||||
"author": "lingdocs.com",
|
||||
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
|
||||
"homepage": "https://verbs.lingdocs.com",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@lingdocs/ps-react",
|
||||
"version": "5.5.1",
|
||||
"version": "5.6.0",
|
||||
"description": "Pashto inflector library module with React components",
|
||||
"main": "dist/components/library.js",
|
||||
"module": "dist/components/library.js",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@lingdocs/inflect",
|
||||
"version": "5.5.1",
|
||||
"version": "5.6.0",
|
||||
"description": "Pashto inflector library",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/lib/library.d.ts",
|
||||
|
|
|
@ -175,6 +175,84 @@ const toTest: {
|
|||
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
|
||||
output: { ok: true },
|
||||
},
|
||||
{
|
||||
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
|
||||
output: { ok: true },
|
||||
},
|
||||
{
|
||||
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"bechaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
||||
output: {
|
||||
errors: ["spacing discrepency between p and f"],
|
||||
p: "بې چاره",
|
||||
f: "bechaara",
|
||||
e: "poor thing, pitiful",
|
||||
ts: 1527812488,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {"ts":1527812488,"i":1934,"p":"بېچاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
||||
output: {
|
||||
errors: ["spacing discrepency between p and f"],
|
||||
p: "بېچاره",
|
||||
f: "be chaara",
|
||||
e: "poor thing, pitiful",
|
||||
ts: 1527812488,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
||||
output: { ok: true }
|
||||
},
|
||||
{
|
||||
input: {"ts":1527814265,"i":12969,"p":"مکتب","f":"maktab","g":"maktab","e":"school","r":4,"c":"n. m.","app":"مکاتب","apf":"ma kaatib"},
|
||||
output: {
|
||||
errors: ["spacing discrepency between app and apf"],
|
||||
p: "مکتب",
|
||||
f: "maktab",
|
||||
e: "school",
|
||||
ts: 1527814265,
|
||||
erroneousFields: ["app", "apf"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {"ts":1594909066356,"i":5839,"p":"خوا و شا","f":"khwaa-U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
|
||||
output: { ok: true },
|
||||
},
|
||||
{
|
||||
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خوا و شا","f":"khwaaU-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
|
||||
output: {
|
||||
errors: ["hyphen/spacing discrepency between p and f"],
|
||||
p: "خوا و شا",
|
||||
f: "khwaaU-shaa",
|
||||
e: "around, in the area",
|
||||
ts: 1594909066356,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خواو شا","f":"khwaa-U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
|
||||
output: {
|
||||
errors: ["hyphen/spacing discrepency between p and f"],
|
||||
p: "خواو شا",
|
||||
f: "khwaa-U-shaa",
|
||||
e: "around, in the area",
|
||||
ts: 1594909066356,
|
||||
erroneousFields: ["p", "f"],
|
||||
},
|
||||
},
|
||||
{
|
||||
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خواو شا","f":"khwaa U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
|
||||
output: {
|
||||
errors: ["presence of both hyphen and space in f"],
|
||||
p: "خواو شا",
|
||||
f: "khwaa U-shaa",
|
||||
e: "around, in the area",
|
||||
ts: 1594909066356,
|
||||
erroneousFields: ["f"],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
test("validateEntry should work", () => {
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
*/
|
||||
|
||||
import * as T from "../../types";
|
||||
import { removeFVarients } from "./accent-and-ps-utils";
|
||||
import {
|
||||
phoneticsToDiacritics,
|
||||
} from "./phonetics-to-diacritics";
|
||||
|
@ -48,15 +49,15 @@ export function validateEntry(entry: T.DictionaryEntry): T.DictionaryEntryError
|
|||
} | {
|
||||
checkComplement: true,
|
||||
} {
|
||||
let errors: string[] = [];
|
||||
const errors = new Set<string>();
|
||||
const erroneousFields = new Set<T.DictionaryEntryField>();
|
||||
requiredFields.forEach((field) => {
|
||||
if (field !== "i" && !entry[field]) {
|
||||
errors.push(`missing ${field}`);
|
||||
errors.add(`missing ${field}`);
|
||||
erroneousFields.add(field);
|
||||
}
|
||||
if (field === "i" && typeof entry[field] !== "number") {
|
||||
errors.push(`missing ${field}`);
|
||||
errors.add(`missing ${field}`);
|
||||
erroneousFields.add(field);
|
||||
}
|
||||
});
|
||||
|
@ -65,52 +66,77 @@ export function validateEntry(entry: T.DictionaryEntry): T.DictionaryEntryError
|
|||
const fField = pair[1];
|
||||
const p = entry[pField];
|
||||
const f = entry[fField];
|
||||
if (!requiredFields.includes(pair[0])) {
|
||||
const isRequired = requiredFields.includes(pair[0]);
|
||||
if (!isRequired && !p && !f) {
|
||||
return;
|
||||
}
|
||||
if (!p && !f) {
|
||||
return;
|
||||
}
|
||||
if (!p && f) {
|
||||
errors.push(`missing ${pField}`);
|
||||
errors.add(`missing ${pField}`);
|
||||
errors.add(`missing ${fField}`);
|
||||
erroneousFields.add(pField);
|
||||
return;
|
||||
}
|
||||
if (p && !f) {
|
||||
errors.push(`missing ${fField}`);
|
||||
erroneousFields.add(fField);
|
||||
return;
|
||||
}
|
||||
if (!f || !p) {
|
||||
const errField = !p ? pField : fField;
|
||||
errors.add(`missing ${errField}`);
|
||||
erroneousFields.add(errField);
|
||||
return;
|
||||
}
|
||||
if (p && f && (!phoneticsToDiacritics(p, f) && !entry.diacExcept)) {
|
||||
errors.push(`script and phonetics do not match for ${pField} and ${fField}`);
|
||||
if (!phoneticsToDiacritics(p, f) && !entry.diacExcept) {
|
||||
errors.add(`script and phonetics do not match for ${pField} and ${fField}`);
|
||||
erroneousFields.add(pField)
|
||||
erroneousFields.add(fField);
|
||||
}
|
||||
const firstF = removeFVarients(f);
|
||||
if (firstF.includes("-")) {
|
||||
if (firstF.includes(" ")) {
|
||||
errors.add(`presence of both hyphen and space in ${fField}`);
|
||||
erroneousFields.add(fField);
|
||||
}
|
||||
const fWords = firstF.split("-");
|
||||
const pWords = p.split(" ");
|
||||
if (fWords.length !== pWords.length) {
|
||||
errors.add(`hyphen/spacing discrepency between ${pField} and ${fField}`);
|
||||
erroneousFields.add(pField);
|
||||
erroneousFields.add(fField);
|
||||
}
|
||||
} else {
|
||||
// check spacing
|
||||
const fWords = firstF.split(" ");
|
||||
const pWords = p.split(" ");
|
||||
if (fWords.length !== pWords.length) {
|
||||
errors.add(`spacing discrepency between ${pField} and ${fField}`);
|
||||
erroneousFields.add(pField);
|
||||
erroneousFields.add(fField);
|
||||
}
|
||||
}
|
||||
});
|
||||
if ((entry.separationAtP && !entry.separationAtF)) {
|
||||
errors.push("missing separationAtF");
|
||||
errors.add("missing separationAtF");
|
||||
erroneousFields.add("separationAtF");
|
||||
}
|
||||
if ((!entry.separationAtP && entry.separationAtF)) {
|
||||
errors.push("missing separationAtP");
|
||||
errors.add("missing separationAtP");
|
||||
erroneousFields.add("separationAtP");
|
||||
}
|
||||
if (entry.c && entry.c.slice(0, 2) === "v." && entry.c.includes("comp.") && !entry.l) {
|
||||
errors.push("missing complement for compound verb");
|
||||
errors.add("missing complement for compound verb");
|
||||
erroneousFields.add("l");
|
||||
}
|
||||
if (entry.c && entry.c.includes("stat. comp. intrans.") && !entry.p.endsWith("ېدل")) {
|
||||
errors.push("wrong ending for intrans. stat. comp");
|
||||
errors.add("wrong ending for intrans. stat. comp");
|
||||
erroneousFields.add("p");
|
||||
erroneousFields.add("f");
|
||||
}
|
||||
if (entry.c && entry.c.includes("stat. comp. trans.") && !entry.p.endsWith("ول")) {
|
||||
errors.push("wrong ending for trans. stat. comp");
|
||||
errors.add("wrong ending for trans. stat. comp");
|
||||
erroneousFields.add("p");
|
||||
erroneousFields.add("f");
|
||||
}
|
||||
if (errors.length) {
|
||||
if (errors.size) {
|
||||
return {
|
||||
errors,
|
||||
errors: Array.from(errors),
|
||||
p: entry.p || "",
|
||||
f: entry.f || "",
|
||||
e: entry.e || "",
|
||||
|
|
Loading…
Reference in New Issue