add checking for spacing and hyphen discrepencies in entries
This commit is contained in:
parent
35d8346682
commit
b59709bc1c
|
@ -1,12 +1,12 @@
|
||||||
{
|
{
|
||||||
"name": "pashto-inflector",
|
"name": "pashto-inflector",
|
||||||
"version": "5.5.1",
|
"version": "5.6.0",
|
||||||
"lockfileVersion": 2,
|
"lockfileVersion": 2,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "pashto-inflector",
|
"name": "pashto-inflector",
|
||||||
"version": "5.5.1",
|
"version": "5.6.0",
|
||||||
"hasInstallScript": true,
|
"hasInstallScript": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "pashto-inflector",
|
"name": "pashto-inflector",
|
||||||
"version": "5.5.1",
|
"version": "5.6.0",
|
||||||
"author": "lingdocs.com",
|
"author": "lingdocs.com",
|
||||||
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
|
"description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations",
|
||||||
"homepage": "https://verbs.lingdocs.com",
|
"homepage": "https://verbs.lingdocs.com",
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "@lingdocs/ps-react",
|
"name": "@lingdocs/ps-react",
|
||||||
"version": "5.5.1",
|
"version": "5.6.0",
|
||||||
"description": "Pashto inflector library module with React components",
|
"description": "Pashto inflector library module with React components",
|
||||||
"main": "dist/components/library.js",
|
"main": "dist/components/library.js",
|
||||||
"module": "dist/components/library.js",
|
"module": "dist/components/library.js",
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "@lingdocs/inflect",
|
"name": "@lingdocs/inflect",
|
||||||
"version": "5.5.1",
|
"version": "5.6.0",
|
||||||
"description": "Pashto inflector library",
|
"description": "Pashto inflector library",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/lib/library.d.ts",
|
"types": "dist/lib/library.d.ts",
|
||||||
|
|
|
@ -175,6 +175,84 @@ const toTest: {
|
||||||
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
|
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
|
||||||
output: { ok: true },
|
output: { ok: true },
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true},
|
||||||
|
output: { ok: true },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"bechaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
||||||
|
output: {
|
||||||
|
errors: ["spacing discrepency between p and f"],
|
||||||
|
p: "بې چاره",
|
||||||
|
f: "bechaara",
|
||||||
|
e: "poor thing, pitiful",
|
||||||
|
ts: 1527812488,
|
||||||
|
erroneousFields: ["p", "f"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: {"ts":1527812488,"i":1934,"p":"بېچاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
||||||
|
output: {
|
||||||
|
errors: ["spacing discrepency between p and f"],
|
||||||
|
p: "بېچاره",
|
||||||
|
f: "be chaara",
|
||||||
|
e: "poor thing, pitiful",
|
||||||
|
ts: 1527812488,
|
||||||
|
erroneousFields: ["p", "f"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."},
|
||||||
|
output: { ok: true }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: {"ts":1527814265,"i":12969,"p":"مکتب","f":"maktab","g":"maktab","e":"school","r":4,"c":"n. m.","app":"مکاتب","apf":"ma kaatib"},
|
||||||
|
output: {
|
||||||
|
errors: ["spacing discrepency between app and apf"],
|
||||||
|
p: "مکتب",
|
||||||
|
f: "maktab",
|
||||||
|
e: "school",
|
||||||
|
ts: 1527814265,
|
||||||
|
erroneousFields: ["app", "apf"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: {"ts":1594909066356,"i":5839,"p":"خوا و شا","f":"khwaa-U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
|
||||||
|
output: { ok: true },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خوا و شا","f":"khwaaU-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
|
||||||
|
output: {
|
||||||
|
errors: ["hyphen/spacing discrepency between p and f"],
|
||||||
|
p: "خوا و شا",
|
||||||
|
f: "khwaaU-shaa",
|
||||||
|
e: "around, in the area",
|
||||||
|
ts: 1594909066356,
|
||||||
|
erroneousFields: ["p", "f"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خواو شا","f":"khwaa-U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
|
||||||
|
output: {
|
||||||
|
errors: ["hyphen/spacing discrepency between p and f"],
|
||||||
|
p: "خواو شا",
|
||||||
|
f: "khwaa-U-shaa",
|
||||||
|
e: "around, in the area",
|
||||||
|
ts: 1594909066356,
|
||||||
|
erroneousFields: ["p", "f"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خواو شا","f":"khwaa U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."},
|
||||||
|
output: {
|
||||||
|
errors: ["presence of both hyphen and space in f"],
|
||||||
|
p: "خواو شا",
|
||||||
|
f: "khwaa U-shaa",
|
||||||
|
e: "around, in the area",
|
||||||
|
ts: 1594909066356,
|
||||||
|
erroneousFields: ["f"],
|
||||||
|
},
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
test("validateEntry should work", () => {
|
test("validateEntry should work", () => {
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import * as T from "../../types";
|
import * as T from "../../types";
|
||||||
|
import { removeFVarients } from "./accent-and-ps-utils";
|
||||||
import {
|
import {
|
||||||
phoneticsToDiacritics,
|
phoneticsToDiacritics,
|
||||||
} from "./phonetics-to-diacritics";
|
} from "./phonetics-to-diacritics";
|
||||||
|
@ -48,15 +49,15 @@ export function validateEntry(entry: T.DictionaryEntry): T.DictionaryEntryError
|
||||||
} | {
|
} | {
|
||||||
checkComplement: true,
|
checkComplement: true,
|
||||||
} {
|
} {
|
||||||
let errors: string[] = [];
|
const errors = new Set<string>();
|
||||||
const erroneousFields = new Set<T.DictionaryEntryField>();
|
const erroneousFields = new Set<T.DictionaryEntryField>();
|
||||||
requiredFields.forEach((field) => {
|
requiredFields.forEach((field) => {
|
||||||
if (field !== "i" && !entry[field]) {
|
if (field !== "i" && !entry[field]) {
|
||||||
errors.push(`missing ${field}`);
|
errors.add(`missing ${field}`);
|
||||||
erroneousFields.add(field);
|
erroneousFields.add(field);
|
||||||
}
|
}
|
||||||
if (field === "i" && typeof entry[field] !== "number") {
|
if (field === "i" && typeof entry[field] !== "number") {
|
||||||
errors.push(`missing ${field}`);
|
errors.add(`missing ${field}`);
|
||||||
erroneousFields.add(field);
|
erroneousFields.add(field);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -65,52 +66,77 @@ export function validateEntry(entry: T.DictionaryEntry): T.DictionaryEntryError
|
||||||
const fField = pair[1];
|
const fField = pair[1];
|
||||||
const p = entry[pField];
|
const p = entry[pField];
|
||||||
const f = entry[fField];
|
const f = entry[fField];
|
||||||
if (!requiredFields.includes(pair[0])) {
|
const isRequired = requiredFields.includes(pair[0]);
|
||||||
if (!p && !f) {
|
if (!isRequired && !p && !f) {
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
if (!p && f) {
|
|
||||||
errors.push(`missing ${pField}`);
|
|
||||||
erroneousFields.add(pField);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (p && !f) {
|
|
||||||
errors.push(`missing ${fField}`);
|
|
||||||
erroneousFields.add(fField);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (p && f && (!phoneticsToDiacritics(p, f) && !entry.diacExcept)) {
|
if (!p && !f) {
|
||||||
errors.push(`script and phonetics do not match for ${pField} and ${fField}`);
|
errors.add(`missing ${pField}`);
|
||||||
|
errors.add(`missing ${fField}`);
|
||||||
|
erroneousFields.add(pField);
|
||||||
|
erroneousFields.add(fField);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!f || !p) {
|
||||||
|
const errField = !p ? pField : fField;
|
||||||
|
errors.add(`missing ${errField}`);
|
||||||
|
erroneousFields.add(errField);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!phoneticsToDiacritics(p, f) && !entry.diacExcept) {
|
||||||
|
errors.add(`script and phonetics do not match for ${pField} and ${fField}`);
|
||||||
erroneousFields.add(pField)
|
erroneousFields.add(pField)
|
||||||
erroneousFields.add(fField);
|
erroneousFields.add(fField);
|
||||||
}
|
}
|
||||||
|
const firstF = removeFVarients(f);
|
||||||
|
if (firstF.includes("-")) {
|
||||||
|
if (firstF.includes(" ")) {
|
||||||
|
errors.add(`presence of both hyphen and space in ${fField}`);
|
||||||
|
erroneousFields.add(fField);
|
||||||
|
}
|
||||||
|
const fWords = firstF.split("-");
|
||||||
|
const pWords = p.split(" ");
|
||||||
|
if (fWords.length !== pWords.length) {
|
||||||
|
errors.add(`hyphen/spacing discrepency between ${pField} and ${fField}`);
|
||||||
|
erroneousFields.add(pField);
|
||||||
|
erroneousFields.add(fField);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// check spacing
|
||||||
|
const fWords = firstF.split(" ");
|
||||||
|
const pWords = p.split(" ");
|
||||||
|
if (fWords.length !== pWords.length) {
|
||||||
|
errors.add(`spacing discrepency between ${pField} and ${fField}`);
|
||||||
|
erroneousFields.add(pField);
|
||||||
|
erroneousFields.add(fField);
|
||||||
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
if ((entry.separationAtP && !entry.separationAtF)) {
|
if ((entry.separationAtP && !entry.separationAtF)) {
|
||||||
errors.push("missing separationAtF");
|
errors.add("missing separationAtF");
|
||||||
erroneousFields.add("separationAtF");
|
erroneousFields.add("separationAtF");
|
||||||
}
|
}
|
||||||
if ((!entry.separationAtP && entry.separationAtF)) {
|
if ((!entry.separationAtP && entry.separationAtF)) {
|
||||||
errors.push("missing separationAtP");
|
errors.add("missing separationAtP");
|
||||||
erroneousFields.add("separationAtP");
|
erroneousFields.add("separationAtP");
|
||||||
}
|
}
|
||||||
if (entry.c && entry.c.slice(0, 2) === "v." && entry.c.includes("comp.") && !entry.l) {
|
if (entry.c && entry.c.slice(0, 2) === "v." && entry.c.includes("comp.") && !entry.l) {
|
||||||
errors.push("missing complement for compound verb");
|
errors.add("missing complement for compound verb");
|
||||||
erroneousFields.add("l");
|
erroneousFields.add("l");
|
||||||
}
|
}
|
||||||
if (entry.c && entry.c.includes("stat. comp. intrans.") && !entry.p.endsWith("ېدل")) {
|
if (entry.c && entry.c.includes("stat. comp. intrans.") && !entry.p.endsWith("ېدل")) {
|
||||||
errors.push("wrong ending for intrans. stat. comp");
|
errors.add("wrong ending for intrans. stat. comp");
|
||||||
erroneousFields.add("p");
|
erroneousFields.add("p");
|
||||||
erroneousFields.add("f");
|
erroneousFields.add("f");
|
||||||
}
|
}
|
||||||
if (entry.c && entry.c.includes("stat. comp. trans.") && !entry.p.endsWith("ول")) {
|
if (entry.c && entry.c.includes("stat. comp. trans.") && !entry.p.endsWith("ول")) {
|
||||||
errors.push("wrong ending for trans. stat. comp");
|
errors.add("wrong ending for trans. stat. comp");
|
||||||
erroneousFields.add("p");
|
erroneousFields.add("p");
|
||||||
erroneousFields.add("f");
|
erroneousFields.add("f");
|
||||||
}
|
}
|
||||||
if (errors.length) {
|
if (errors.size) {
|
||||||
return {
|
return {
|
||||||
errors,
|
errors: Array.from(errors),
|
||||||
p: entry.p || "",
|
p: entry.p || "",
|
||||||
f: entry.f || "",
|
f: entry.f || "",
|
||||||
e: entry.e || "",
|
e: entry.e || "",
|
||||||
|
|
Loading…
Reference in New Issue