From b59709bc1ce56af14c8785c6fb0898a2eef7af90 Mon Sep 17 00:00:00 2001 From: adueck Date: Wed, 25 Jan 2023 18:57:52 +0500 Subject: [PATCH] add checking for spacing and hyphen discrepencies in entries --- package-lock.json | 4 +- package.json | 2 +- src/components/package.json | 2 +- src/lib/package.json | 2 +- src/lib/src/validate-entry.test.ts | 78 ++++++++++++++++++++++++++++++ src/lib/src/validate-entry.ts | 78 ++++++++++++++++++++---------- 6 files changed, 135 insertions(+), 31 deletions(-) diff --git a/package-lock.json b/package-lock.json index 84c00f6..0a0cc7b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "pashto-inflector", - "version": "5.5.1", + "version": "5.6.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "pashto-inflector", - "version": "5.5.1", + "version": "5.6.0", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index 14eb9bc..3b1d0dd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pashto-inflector", - "version": "5.5.1", + "version": "5.6.0", "author": "lingdocs.com", "description": "A Pashto inflection and verb conjugation engine, inculding React components for displaying Pashto text, inflections, and conjugations", "homepage": "https://verbs.lingdocs.com", diff --git a/src/components/package.json b/src/components/package.json index 532a6a0..c2a6a31 100644 --- a/src/components/package.json +++ b/src/components/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/ps-react", - "version": "5.5.1", + "version": "5.6.0", "description": "Pashto inflector library module with React components", "main": "dist/components/library.js", "module": "dist/components/library.js", diff --git a/src/lib/package.json b/src/lib/package.json index e3ecbb5..7a42c80 100644 --- a/src/lib/package.json +++ b/src/lib/package.json @@ -1,6 +1,6 @@ { "name": "@lingdocs/inflect", - "version": "5.5.1", + "version": "5.6.0", "description": "Pashto inflector library", "main": "dist/index.js", "types": "dist/lib/library.d.ts", diff --git a/src/lib/src/validate-entry.test.ts b/src/lib/src/validate-entry.test.ts index 065f73c..8de1570 100644 --- a/src/lib/src/validate-entry.test.ts +++ b/src/lib/src/validate-entry.test.ts @@ -175,6 +175,84 @@ const toTest: { input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true}, output: { ok: true }, }, + { + input: {"i":12,"ts":1575058859661,"p":"آبدار","f":"aawdáar","e":"watery, damp, humid, juicy","c":"adj.","diacExcept":true}, + output: { ok: true }, + }, + { + input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"bechaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."}, + output: { + errors: ["spacing discrepency between p and f"], + p: "بې چاره", + f: "bechaara", + e: "poor thing, pitiful", + ts: 1527812488, + erroneousFields: ["p", "f"], + }, + }, + { + input: {"ts":1527812488,"i":1934,"p":"بېچاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."}, + output: { + errors: ["spacing discrepency between p and f"], + p: "بېچاره", + f: "be chaara", + e: "poor thing, pitiful", + ts: 1527812488, + erroneousFields: ["p", "f"], + }, + }, + { + input: {"ts":1527812488,"i":1934,"p":"بې چاره","f":"be chaara","g":"bechaara","e":"poor thing, pitiful","r":3,"c":"adj."}, + output: { ok: true } + }, + { + input: {"ts":1527814265,"i":12969,"p":"مکتب","f":"maktab","g":"maktab","e":"school","r":4,"c":"n. m.","app":"مکاتب","apf":"ma kaatib"}, + output: { + errors: ["spacing discrepency between app and apf"], + p: "مکتب", + f: "maktab", + e: "school", + ts: 1527814265, + erroneousFields: ["app", "apf"], + }, + }, + { + input: {"ts":1594909066356,"i":5839,"p":"خوا و شا","f":"khwaa-U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."}, + output: { ok: true }, + }, + { + input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خوا و شا","f":"khwaaU-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."}, + output: { + errors: ["hyphen/spacing discrepency between p and f"], + p: "خوا و شا", + f: "khwaaU-shaa", + e: "around, in the area", + ts: 1594909066356, + erroneousFields: ["p", "f"], + }, + }, + { + input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خواو شا","f":"khwaa-U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."}, + output: { + errors: ["hyphen/spacing discrepency between p and f"], + p: "خواو شا", + f: "khwaa-U-shaa", + e: "around, in the area", + ts: 1594909066356, + erroneousFields: ["p", "f"], + }, + }, + { + input: {"diacExcept": true,"ts":1594909066356,"i":5839,"p":"خواو شا","f":"khwaa U-shaa","g":"khwaaUshaa","e":"around, in the area","r":4,"c":"adj. / loc. adv."}, + output: { + errors: ["presence of both hyphen and space in f"], + p: "خواو شا", + f: "khwaa U-shaa", + e: "around, in the area", + ts: 1594909066356, + erroneousFields: ["f"], + }, + }, ]; test("validateEntry should work", () => { diff --git a/src/lib/src/validate-entry.ts b/src/lib/src/validate-entry.ts index bb5f77f..7b1c6a4 100644 --- a/src/lib/src/validate-entry.ts +++ b/src/lib/src/validate-entry.ts @@ -7,6 +7,7 @@ */ import * as T from "../../types"; +import { removeFVarients } from "./accent-and-ps-utils"; import { phoneticsToDiacritics, } from "./phonetics-to-diacritics"; @@ -48,15 +49,15 @@ export function validateEntry(entry: T.DictionaryEntry): T.DictionaryEntryError } | { checkComplement: true, } { - let errors: string[] = []; + const errors = new Set(); const erroneousFields = new Set(); requiredFields.forEach((field) => { if (field !== "i" && !entry[field]) { - errors.push(`missing ${field}`); + errors.add(`missing ${field}`); erroneousFields.add(field); } if (field === "i" && typeof entry[field] !== "number") { - errors.push(`missing ${field}`); + errors.add(`missing ${field}`); erroneousFields.add(field); } }); @@ -65,52 +66,77 @@ export function validateEntry(entry: T.DictionaryEntry): T.DictionaryEntryError const fField = pair[1]; const p = entry[pField]; const f = entry[fField]; - if (!requiredFields.includes(pair[0])) { - if (!p && !f) { - return; - } - if (!p && f) { - errors.push(`missing ${pField}`); - erroneousFields.add(pField); - return; - } - if (p && !f) { - errors.push(`missing ${fField}`); - erroneousFields.add(fField); - return; - } + const isRequired = requiredFields.includes(pair[0]); + if (!isRequired && !p && !f) { + return; } - if (p && f && (!phoneticsToDiacritics(p, f) && !entry.diacExcept)) { - errors.push(`script and phonetics do not match for ${pField} and ${fField}`); + if (!p && !f) { + errors.add(`missing ${pField}`); + errors.add(`missing ${fField}`); + erroneousFields.add(pField); + erroneousFields.add(fField); + return; + } + if (!f || !p) { + const errField = !p ? pField : fField; + errors.add(`missing ${errField}`); + erroneousFields.add(errField); + return; + } + if (!phoneticsToDiacritics(p, f) && !entry.diacExcept) { + errors.add(`script and phonetics do not match for ${pField} and ${fField}`); erroneousFields.add(pField) erroneousFields.add(fField); } + const firstF = removeFVarients(f); + if (firstF.includes("-")) { + if (firstF.includes(" ")) { + errors.add(`presence of both hyphen and space in ${fField}`); + erroneousFields.add(fField); + } + const fWords = firstF.split("-"); + const pWords = p.split(" "); + if (fWords.length !== pWords.length) { + errors.add(`hyphen/spacing discrepency between ${pField} and ${fField}`); + erroneousFields.add(pField); + erroneousFields.add(fField); + } + } else { + // check spacing + const fWords = firstF.split(" "); + const pWords = p.split(" "); + if (fWords.length !== pWords.length) { + errors.add(`spacing discrepency between ${pField} and ${fField}`); + erroneousFields.add(pField); + erroneousFields.add(fField); + } + } }); if ((entry.separationAtP && !entry.separationAtF)) { - errors.push("missing separationAtF"); + errors.add("missing separationAtF"); erroneousFields.add("separationAtF"); } if ((!entry.separationAtP && entry.separationAtF)) { - errors.push("missing separationAtP"); + errors.add("missing separationAtP"); erroneousFields.add("separationAtP"); } if (entry.c && entry.c.slice(0, 2) === "v." && entry.c.includes("comp.") && !entry.l) { - errors.push("missing complement for compound verb"); + errors.add("missing complement for compound verb"); erroneousFields.add("l"); } if (entry.c && entry.c.includes("stat. comp. intrans.") && !entry.p.endsWith("ېدل")) { - errors.push("wrong ending for intrans. stat. comp"); + errors.add("wrong ending for intrans. stat. comp"); erroneousFields.add("p"); erroneousFields.add("f"); } if (entry.c && entry.c.includes("stat. comp. trans.") && !entry.p.endsWith("ول")) { - errors.push("wrong ending for trans. stat. comp"); + errors.add("wrong ending for trans. stat. comp"); erroneousFields.add("p"); erroneousFields.add("f"); } - if (errors.length) { + if (errors.size) { return { - errors, + errors: Array.from(errors), p: entry.p || "", f: entry.f || "", e: entry.e || "",