pretty full noun recognition - plural suffixes just started
This commit is contained in:
parent
6aec2dfeb2
commit
b672e19c1a
|
@ -61,7 +61,6 @@ function NPNounPicker(props: {
|
|||
opts: T.TextOptions;
|
||||
phraseIsComplete: boolean;
|
||||
}) {
|
||||
console.log({ noun: props.noun });
|
||||
// const [patternFilter, setPatternFilter] = useState<FilterPattern | undefined>(undefined);
|
||||
// const [showFilter, setShowFilter] = useState<boolean>(false)
|
||||
// const nounsFiltered = props.nouns
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import * as T from "../../../types";
|
||||
import { endsInConsonant } from "../p-text-helpers";
|
||||
import {
|
||||
isPattern1Entry,
|
||||
isPattern2Entry,
|
||||
|
@ -7,18 +8,26 @@ import {
|
|||
isPattern5Entry,
|
||||
isPattern4Entry,
|
||||
isPattern6FemEntry,
|
||||
isFemNounEntry,
|
||||
isAdjectiveEntry,
|
||||
isUnisexNounEntry,
|
||||
isPluralNounEntry,
|
||||
isNounEntry,
|
||||
isAnimNounEntry,
|
||||
isMascNounEntry,
|
||||
} from "../type-predicates";
|
||||
import { equals } from "rambda";
|
||||
|
||||
export function getInflectionQueries(
|
||||
s: string,
|
||||
includeNouns: boolean
|
||||
noun: boolean
|
||||
): {
|
||||
search: Partial<T.DictionaryEntry>;
|
||||
details: {
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
predicate: (e: T.AdjectiveEntry | T.NounEntry) => boolean;
|
||||
plural?: boolean;
|
||||
}[];
|
||||
}[] {
|
||||
const queries: {
|
||||
|
@ -26,6 +35,7 @@ export function getInflectionQueries(
|
|||
details: {
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
plural?: boolean;
|
||||
predicate: (e: T.NounEntry | T.AdjectiveEntry) => boolean;
|
||||
};
|
||||
}[] = [];
|
||||
|
@ -34,15 +44,111 @@ export function getInflectionQueries(
|
|||
details: {
|
||||
inflection: [0, 1, 2],
|
||||
gender: ["masc", "fem"],
|
||||
predicate: isPattern(0),
|
||||
predicate: (e) =>
|
||||
!(isNounEntry(e) && isPluralNounEntry(e)) &&
|
||||
isPattern(0)(e) &&
|
||||
isAdjectiveEntry(e),
|
||||
},
|
||||
});
|
||||
if (noun) {
|
||||
if (s.endsWith("ونه")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -3) },
|
||||
details: {
|
||||
inflection: [0],
|
||||
gender: ["masc"],
|
||||
plural: true,
|
||||
predicate: (e) =>
|
||||
isNounEntry(e) &&
|
||||
!isPluralNounEntry(e) &&
|
||||
!isPattern2Entry(e) &&
|
||||
!isPattern3Entry(e) &&
|
||||
!isPattern4Entry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -3) + "ه" },
|
||||
details: {
|
||||
inflection: [0],
|
||||
gender: ["masc"],
|
||||
plural: true,
|
||||
predicate: (e) =>
|
||||
isNounEntry(e) &&
|
||||
!isPluralNounEntry(e) &&
|
||||
!isPattern2Entry(e) &&
|
||||
!isPattern3Entry(e) &&
|
||||
!isPattern4Entry(e),
|
||||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("ونو")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -3) },
|
||||
details: {
|
||||
inflection: [1],
|
||||
gender: ["masc"],
|
||||
plural: true,
|
||||
predicate: (e) =>
|
||||
isNounEntry(e) &&
|
||||
!isPluralNounEntry(e) &&
|
||||
!isPattern2Entry(e) &&
|
||||
!isPattern3Entry(e) &&
|
||||
!isPattern4Entry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -3) + "ه" },
|
||||
details: {
|
||||
inflection: [1],
|
||||
gender: ["masc"],
|
||||
plural: true,
|
||||
predicate: (e) =>
|
||||
isNounEntry(e) &&
|
||||
!isPluralNounEntry(e) &&
|
||||
!isPattern2Entry(e) &&
|
||||
!isPattern3Entry(e) &&
|
||||
!isPattern4Entry(e),
|
||||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("و")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["fem"],
|
||||
predicate: (e) =>
|
||||
isNounEntry(e) && isAnimNounEntry(e) && isFemNounEntry(e),
|
||||
},
|
||||
});
|
||||
}
|
||||
queries.push({
|
||||
search: { p: s },
|
||||
details: {
|
||||
inflection: [0],
|
||||
gender: ["fem"],
|
||||
predicate: (e) =>
|
||||
isNounEntry(e) && isFemNounEntry(e) && isPattern1Entry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s },
|
||||
details: {
|
||||
inflection: [0, 1],
|
||||
gender: ["fem"],
|
||||
predicate: (e) =>
|
||||
isNounEntry(e) && isAnimNounEntry(e) && isFemNounEntry(e),
|
||||
},
|
||||
});
|
||||
}
|
||||
queries.push({
|
||||
search: { p: s },
|
||||
details: {
|
||||
inflection: [0, 1],
|
||||
gender: ["masc"],
|
||||
predicate: isPattern1Entry,
|
||||
predicate: (e) =>
|
||||
!(isNounEntry(e) && isPluralNounEntry(e)) &&
|
||||
(isPattern1Entry(e) || isPattern(0)(e)),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
|
@ -65,6 +171,17 @@ export function getInflectionQueries(
|
|||
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
|
||||
},
|
||||
});
|
||||
if (noun) {
|
||||
queries.push({
|
||||
search: { p: s },
|
||||
details: {
|
||||
inflection: [0],
|
||||
plural: true,
|
||||
gender: ["masc", "fem"],
|
||||
predicate: (e) => isNounEntry(e) && isPluralNounEntry(e),
|
||||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("ه")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) },
|
||||
|
@ -74,16 +191,6 @@ export function getInflectionQueries(
|
|||
predicate: isPattern1Entry,
|
||||
},
|
||||
});
|
||||
if (includeNouns) {
|
||||
queries.push({
|
||||
search: { p: s },
|
||||
details: {
|
||||
inflection: [0],
|
||||
gender: ["fem"],
|
||||
predicate: isPattern1Entry,
|
||||
},
|
||||
});
|
||||
}
|
||||
queries.push({
|
||||
search: { infbp: s.slice(0, -1) },
|
||||
details: {
|
||||
|
@ -101,7 +208,7 @@ export function getInflectionQueries(
|
|||
predicate: isPattern1Entry,
|
||||
},
|
||||
});
|
||||
if (includeNouns) {
|
||||
if (noun) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ه" },
|
||||
details: {
|
||||
|
@ -150,7 +257,7 @@ export function getInflectionQueries(
|
|||
details: {
|
||||
inflection: [2],
|
||||
gender: ["masc", "fem"],
|
||||
predicate: (e) => isPattern1Entry(e) || isPattern5Entry(e),
|
||||
predicate: (e) => isPattern1Entry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
|
@ -169,6 +276,48 @@ export function getInflectionQueries(
|
|||
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
|
||||
},
|
||||
});
|
||||
if (noun) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ه" },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["fem"],
|
||||
predicate: (e) => isPattern1Entry(e) || isFemNounEntry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ه" },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["masc"],
|
||||
predicate: isMascNounEntry,
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ې" },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["fem"],
|
||||
predicate: (e) => isNounEntry(e) || isFemNounEntry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ۍ" },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["fem"],
|
||||
predicate: (e) => isFemNounEntry(e) && isPattern3Entry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ي" },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["fem"],
|
||||
predicate: isPattern6FemEntry,
|
||||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("یو")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -2) + "ی" },
|
||||
|
@ -178,6 +327,24 @@ export function getInflectionQueries(
|
|||
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
|
||||
},
|
||||
});
|
||||
if (noun) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -2) + "ۍ" },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["fem"],
|
||||
predicate: (e) => isPattern3Entry(e) && isFemNounEntry(e),
|
||||
},
|
||||
});
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -2) + "ي" },
|
||||
details: {
|
||||
inflection: [2],
|
||||
gender: ["fem"],
|
||||
predicate: isPattern6FemEntry,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (s.endsWith("ۍ")) {
|
||||
queries.push({
|
||||
|
@ -188,7 +355,7 @@ export function getInflectionQueries(
|
|||
predicate: isPattern3Entry,
|
||||
},
|
||||
});
|
||||
if (includeNouns) {
|
||||
if (noun) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -1) + "ي" },
|
||||
details: {
|
||||
|
|
|
@ -1,8 +1,30 @@
|
|||
import nounsAdjs from "../../../nouns-adjs";
|
||||
import * as T from "../../../types";
|
||||
import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
|
||||
|
||||
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
|
||||
const [key, value] = Object.entries(s)[0];
|
||||
// @ts-ignore
|
||||
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
|
||||
}
|
||||
|
||||
export function wordQuery(word: string, type: "adj"): T.AdjectiveEntry;
|
||||
export function wordQuery(word: string, type: "noun"): T.NounEntry;
|
||||
export function wordQuery(
|
||||
word: string,
|
||||
type: "noun" | "adj"
|
||||
): T.NounEntry | T.AdjectiveEntry {
|
||||
const entry = nounsAdjs.find(
|
||||
(x) => x.p === word || x.f === word || x.g === word
|
||||
);
|
||||
if (!entry) {
|
||||
throw new Error(`missing ${word} in word query`);
|
||||
}
|
||||
if (type === "noun" && !isNounEntry(entry)) {
|
||||
throw new Error(`${word} is not a noun`);
|
||||
}
|
||||
if (type === "adj" && !isAdjectiveEntry(entry)) {
|
||||
throw new Error(`${word} is not an adjective`);
|
||||
}
|
||||
return entry as T.NounEntry | T.AdjectiveEntry;
|
||||
}
|
||||
|
|
|
@ -4,10 +4,10 @@ import { isAdjectiveEntry } from "../type-predicates";
|
|||
import { getInflectionQueries } from "./inflection-query";
|
||||
|
||||
export function parseAdjective(
|
||||
tokens: Readonly<string[]>,
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
|
||||
): [
|
||||
string[],
|
||||
T.Token[],
|
||||
{
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
|
@ -20,7 +20,7 @@ export function parseAdjective(
|
|||
return [];
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
const queries = getInflectionQueries(first, false);
|
||||
const queries = getInflectionQueries(first.s, false);
|
||||
queries.forEach(({ search, details }) => {
|
||||
const wideMatches = lookup(search).filter(isAdjectiveEntry);
|
||||
details.forEach((deets) => {
|
||||
|
@ -33,7 +33,7 @@ export function parseAdjective(
|
|||
selection,
|
||||
inflection: deets.inflection,
|
||||
gender: deets.gender,
|
||||
given: first,
|
||||
given: first.s,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,291 +0,0 @@
|
|||
import { makeNounSelection } from "../phrase-building/make-selections";
|
||||
import * as T from "../../../types";
|
||||
import { lookup } from "./lookup";
|
||||
import { parseNoun } from "./parse-noun";
|
||||
|
||||
const sarey = {
|
||||
ts: 1527815251,
|
||||
i: 8163,
|
||||
p: "سړی",
|
||||
f: "saRáy",
|
||||
g: "saRay",
|
||||
e: "man",
|
||||
r: 4,
|
||||
c: "n. m.",
|
||||
ec: "man",
|
||||
ep: "men",
|
||||
} as T.NounEntry;
|
||||
const dostee = {
|
||||
ts: 1527811877,
|
||||
i: 6627,
|
||||
p: "دوستي",
|
||||
f: "dostee",
|
||||
g: "dostee",
|
||||
e: "friendship",
|
||||
r: 3,
|
||||
c: "n. f.",
|
||||
} as T.NounEntry;
|
||||
const wreejze = {
|
||||
ts: 1586551382412,
|
||||
i: 14985,
|
||||
p: "وریژې",
|
||||
f: "wreejze",
|
||||
g: "wreejze",
|
||||
e: "rice",
|
||||
r: 4,
|
||||
c: "n. f. pl.",
|
||||
} as T.NounEntry;
|
||||
const xudza = {
|
||||
ts: 1527812797,
|
||||
i: 9018,
|
||||
p: "ښځه",
|
||||
f: "xúdza",
|
||||
g: "xudza",
|
||||
e: "woman, wife",
|
||||
r: 4,
|
||||
c: "n. f.",
|
||||
ec: "woman",
|
||||
ep: "women",
|
||||
} as T.NounEntry;
|
||||
const kursuy = {
|
||||
ts: 1527814203,
|
||||
i: 10573,
|
||||
p: "کرسۍ",
|
||||
f: "kUrsúy",
|
||||
g: "kUrsuy",
|
||||
e: "chair, seat, stool",
|
||||
r: 3,
|
||||
c: "n. f.",
|
||||
} as T.NounEntry;
|
||||
const kor = {
|
||||
ts: 1527812828,
|
||||
i: 11022,
|
||||
p: "کور",
|
||||
f: "kor",
|
||||
g: "kor",
|
||||
e: "house, home",
|
||||
r: 4,
|
||||
c: "n. m.",
|
||||
} as T.NounEntry;
|
||||
const daktar = {
|
||||
ts: 1527816747,
|
||||
i: 6709,
|
||||
p: "ډاکټر",
|
||||
f: "DaakTar",
|
||||
g: "DaakTar",
|
||||
e: "doctor",
|
||||
r: 4,
|
||||
c: "n. m. anim. unisex",
|
||||
} as T.NounEntry;
|
||||
|
||||
// TODO: test unisex ملګری etc
|
||||
|
||||
const tests: {
|
||||
category: string;
|
||||
cases: {
|
||||
input: string;
|
||||
output: {
|
||||
inflected: boolean;
|
||||
selection: T.NounSelection;
|
||||
}[];
|
||||
}[];
|
||||
}[] = [
|
||||
{
|
||||
category: "pattern 1 nouns",
|
||||
cases: [
|
||||
{
|
||||
input: "کور",
|
||||
output: [
|
||||
{
|
||||
inflected: false,
|
||||
selection: makeNounSelection(kor, undefined),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "کورو",
|
||||
output: [
|
||||
{
|
||||
inflected: true,
|
||||
selection: {
|
||||
...makeNounSelection(kor, undefined),
|
||||
number: "plural",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ډاکټره",
|
||||
output: [
|
||||
{
|
||||
inflected: false,
|
||||
selection: {
|
||||
...makeNounSelection(daktar, undefined),
|
||||
gender: "fem",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
input: "ډاکټرې",
|
||||
output: [
|
||||
{
|
||||
inflected: true,
|
||||
selection: {
|
||||
...makeNounSelection(daktar, undefined),
|
||||
gender: "fem",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
// {
|
||||
// input: "سړی",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: false,
|
||||
// selection: makeNounSelection(sarey, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "سړي",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: makeNounSelection(sarey, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "سړو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeNounSelection(sarey, undefined),
|
||||
// number: "plural",
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "سړیو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeNounSelection(sarey, undefined),
|
||||
// number: "plural",
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "دوستي",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: false,
|
||||
// selection: makeNounSelection(dostee, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "دوستۍ",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: makeNounSelection(dostee, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "دوستیو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeNounSelection(dostee, undefined),
|
||||
// number: "plural",
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "وریژې",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: false,
|
||||
// selection: makeNounSelection(wreejze, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "ښځه",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: false,
|
||||
// selection: makeNounSelection(xudza, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "ښځې",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: makeNounSelection(xudza, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "ښځو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeNounSelection(xudza, undefined),
|
||||
// number: "plural",
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "کرسۍ",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: false,
|
||||
// selection: makeNounSelection(kursuy, undefined),
|
||||
// },
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: makeNounSelection(kursuy, undefined),
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// {
|
||||
// input: "کرسیو",
|
||||
// output: [
|
||||
// {
|
||||
// inflected: true,
|
||||
// selection: {
|
||||
// ...makeNounSelection(kursuy, undefined),
|
||||
// number: "plural",
|
||||
// },
|
||||
// },
|
||||
// ],
|
||||
// },
|
||||
// ];
|
||||
|
||||
describe("parsing nouns", () => {
|
||||
tests.forEach(({ category, cases }) => {
|
||||
// eslint-disable-next-line jest/valid-title
|
||||
test(category, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
expect(parseNoun(input, lookup)).toEqual(output);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
|
@ -1,16 +1,22 @@
|
|||
import * as T from "../../../types";
|
||||
import { getInflectionPattern } from "../inflection-pattern";
|
||||
import { makeNounSelection } from "../phrase-building/make-selections";
|
||||
import {
|
||||
isFemNounEntry,
|
||||
isMascNounEntry,
|
||||
isNounEntry,
|
||||
isPluralNounEntry,
|
||||
isUnisexNounEntry,
|
||||
} from "../type-predicates";
|
||||
import { getInflectionQueries } from "./inflection-query";
|
||||
import { parseAdjective } from "./parse-adjective";
|
||||
|
||||
// TODO:
|
||||
// - cleanup the workflow and make sure all nouns are covered and test
|
||||
// - add possesive parsing
|
||||
|
||||
export function parseNoun(
|
||||
tokens: Readonly<string[]>,
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||
adjectives: {
|
||||
inflection: (0 | 1 | 2)[];
|
||||
|
@ -19,10 +25,7 @@ export function parseNoun(
|
|||
selection: T.AdjectiveSelection;
|
||||
}[]
|
||||
): {
|
||||
success: [
|
||||
string[],
|
||||
{ inflection: (0 | 1 | 2)[]; selection: T.NounSelection }
|
||||
][];
|
||||
success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][];
|
||||
errors: string[];
|
||||
} {
|
||||
if (tokens.length === 0) {
|
||||
|
@ -31,15 +34,19 @@ export function parseNoun(
|
|||
errors: [],
|
||||
};
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
// TODO: add recognition of او between adjectives
|
||||
const adjRes = parseAdjective(tokens, lookup);
|
||||
const withAdj = adjRes.map(([tkns, adj]) =>
|
||||
parseNoun(tkns, lookup, [...adjectives, adj])
|
||||
);
|
||||
const success: ReturnType<typeof parseNoun>["success"] = [];
|
||||
const errors: string[] = [];
|
||||
const [first, ...rest] = tokens;
|
||||
// const possesor =
|
||||
// first === "د" ? parseNoun(rest, lookup, adjectives).success : undefined;
|
||||
|
||||
const searches = getInflectionQueries(first.s, true);
|
||||
|
||||
const searches = getInflectionQueries(first, true);
|
||||
searches.forEach(({ search, details }) => {
|
||||
const nounEntries = lookup(search).filter(isNounEntry);
|
||||
details.forEach((deets) => {
|
||||
|
@ -47,65 +54,108 @@ export function parseNoun(
|
|||
fittingEntries.forEach((entry) => {
|
||||
if (isUnisexNounEntry(entry)) {
|
||||
deets.gender.forEach((gender) => {
|
||||
deets.inflection.forEach((inf) => {
|
||||
const { ok, error } = adjsMatch(
|
||||
adjectives,
|
||||
gender,
|
||||
deets.inflection
|
||||
inf,
|
||||
deets.plural
|
||||
);
|
||||
if (ok) {
|
||||
convertInflection(inf, entry, gender, deets.plural).forEach(
|
||||
({ inflected, number }) => {
|
||||
const selection = makeNounSelection(entry, undefined);
|
||||
success.push([
|
||||
rest,
|
||||
{
|
||||
inflection: deets.inflection,
|
||||
inflected,
|
||||
selection: {
|
||||
...makeNounSelection(entry, undefined),
|
||||
gender,
|
||||
...selection,
|
||||
gender: selection.genderCanChange
|
||||
? gender
|
||||
: selection.gender,
|
||||
number: selection.numberCanChange
|
||||
? number
|
||||
: selection.number,
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
},
|
||||
},
|
||||
]);
|
||||
}
|
||||
);
|
||||
} else {
|
||||
error.forEach((e) => {
|
||||
errors.push(e);
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
} else if (isMascNounEntry(entry) && deets.gender.includes("masc")) {
|
||||
const { ok, error } = adjsMatch(adjectives, "masc", deets.inflection);
|
||||
deets.inflection.forEach((inf) => {
|
||||
const { ok, error } = adjsMatch(
|
||||
adjectives,
|
||||
"masc",
|
||||
inf,
|
||||
deets.plural
|
||||
);
|
||||
if (ok) {
|
||||
convertInflection(inf, entry, "masc", deets.plural).forEach(
|
||||
({ inflected, number }) => {
|
||||
const selection = makeNounSelection(entry, undefined);
|
||||
success.push([
|
||||
rest,
|
||||
{
|
||||
inflection: deets.inflection,
|
||||
inflected,
|
||||
selection: {
|
||||
...makeNounSelection(entry, undefined),
|
||||
...selection,
|
||||
number: selection.numberCanChange
|
||||
? number
|
||||
: selection.number,
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
},
|
||||
},
|
||||
]);
|
||||
}
|
||||
);
|
||||
} else {
|
||||
error.forEach((e) => {
|
||||
errors.push(e);
|
||||
});
|
||||
}
|
||||
});
|
||||
} else if (isFemNounEntry(entry) && deets.gender.includes("fem")) {
|
||||
const { ok, error } = adjsMatch(adjectives, "fem", deets.inflection);
|
||||
deets.inflection.forEach((inf) => {
|
||||
const { ok, error } = adjsMatch(
|
||||
adjectives,
|
||||
"fem",
|
||||
inf,
|
||||
deets.plural
|
||||
);
|
||||
if (ok) {
|
||||
convertInflection(inf, entry, "fem", deets.plural).forEach(
|
||||
({ inflected, number }) => {
|
||||
const selection = makeNounSelection(entry, undefined);
|
||||
success.push([
|
||||
rest,
|
||||
{
|
||||
inflection: deets.inflection,
|
||||
inflected,
|
||||
selection: {
|
||||
...makeNounSelection(entry, undefined),
|
||||
...selection,
|
||||
number: selection.numberCanChange
|
||||
? number
|
||||
: selection.number,
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
},
|
||||
},
|
||||
]);
|
||||
}
|
||||
);
|
||||
} else {
|
||||
error.forEach((e) => {
|
||||
errors.push(e);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
@ -119,12 +169,14 @@ export function parseNoun(
|
|||
function adjsMatch(
|
||||
adjectives: Parameters<typeof parseNoun>[2],
|
||||
gender: T.Gender,
|
||||
inflection: (0 | 1 | 2)[]
|
||||
inf: 0 | 1 | 2,
|
||||
plural: boolean | undefined
|
||||
): { ok: boolean; error: string[] } {
|
||||
const inflection = (plural && inf < 2 ? inf + 1 : inf) as 0 | 1 | 2;
|
||||
const unmatching = adjectives.filter(
|
||||
(adj) =>
|
||||
!adj.gender.includes(gender) ||
|
||||
!adj.inflection.some((i) => inflection.includes(i))
|
||||
!adj.inflection.some((i) => i === inflection)
|
||||
);
|
||||
if (unmatching.length) {
|
||||
return {
|
||||
|
@ -134,9 +186,7 @@ function adjsMatch(
|
|||
x.given === x.selection.entry.p
|
||||
? x.given
|
||||
: `${x.given} (${x.selection.entry.p})`;
|
||||
const inflectionIssue = !x.inflection.some((x) =>
|
||||
inflection.includes(x)
|
||||
)
|
||||
const inflectionIssue = !x.inflection.some((x) => x === inflection)
|
||||
? ` should be ${showInflection(inflection)}`
|
||||
: ``;
|
||||
return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
|
||||
|
@ -150,14 +200,63 @@ function adjsMatch(
|
|||
}
|
||||
}
|
||||
|
||||
function showInflection(inf: (0 | 1 | 2)[]): string {
|
||||
const [last, ...rest] = inf.reverse();
|
||||
const template = rest.length
|
||||
? `${rest.join(", ")}, or ${last}`
|
||||
: last.toString();
|
||||
console.log(template);
|
||||
return template
|
||||
.replace("0", "plain")
|
||||
.replace("1", "first inflection")
|
||||
.replace("2", "second inflection");
|
||||
function convertInflection(
|
||||
inflection: 0 | 1 | 2,
|
||||
entry: T.NounEntry | T.AdjectiveEntry,
|
||||
gender: T.Gender,
|
||||
plural: boolean | undefined
|
||||
): {
|
||||
inflected: boolean;
|
||||
number: T.NounNumber;
|
||||
}[] {
|
||||
const pattern = getInflectionPattern(entry);
|
||||
const inf = (plural && inflection < 2 ? inflection + 1 : inflection) as
|
||||
| 0
|
||||
| 1
|
||||
| 2;
|
||||
if (inf === 0) {
|
||||
return [
|
||||
{
|
||||
inflected: false,
|
||||
number: "singular",
|
||||
},
|
||||
];
|
||||
} else if (inf === 1) {
|
||||
return [
|
||||
...(!((isNounEntry(entry) && isPluralNounEntry(entry)) || plural) &&
|
||||
!(pattern === 4 && entry.p.endsWith("ه") && gender === "masc")
|
||||
? [
|
||||
{
|
||||
inflected: true,
|
||||
number: "singular" as T.NounNumber,
|
||||
},
|
||||
]
|
||||
: []),
|
||||
...(pattern > 1 ||
|
||||
(pattern > 0 && gender === "fem") ||
|
||||
(isNounEntry(entry) && isPluralNounEntry(entry)) ||
|
||||
plural
|
||||
? [
|
||||
{
|
||||
inflected: false,
|
||||
number: "plural" as T.NounNumber,
|
||||
},
|
||||
]
|
||||
: []),
|
||||
];
|
||||
}
|
||||
return [
|
||||
{
|
||||
inflected: true,
|
||||
number: "plural",
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
function showInflection(inf: 0 | 1 | 2): string {
|
||||
return inf === 0
|
||||
? "plain"
|
||||
: inf === 1
|
||||
? "first inflection"
|
||||
: "second inflection";
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ import { parsePronoun } from "./parse-pronoun";
|
|||
import { parseNoun } from "./parse-noun";
|
||||
|
||||
export function parsePhrase(
|
||||
s: string[],
|
||||
s: T.Token[],
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
|
||||
): {
|
||||
success: any[];
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
import * as T from "../../../types";
|
||||
|
||||
export function parsePronoun(tokens: Readonly<string[]>): [
|
||||
string[],
|
||||
export function parsePronoun(tokens: Readonly<T.Token[]>): [
|
||||
T.Token[],
|
||||
{
|
||||
inflected: boolean[];
|
||||
selection: T.PronounSelection;
|
||||
}
|
||||
][] {
|
||||
const [first, ...rest] = tokens;
|
||||
const [{ s }, ...rest] = tokens;
|
||||
const w: ReturnType<typeof parsePronoun> = [];
|
||||
if (first === "زه") {
|
||||
if (s === "زه") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -32,7 +32,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "ته") {
|
||||
} else if (s === "ته") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -55,7 +55,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "هغه") {
|
||||
} else if (s === "هغه") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -78,7 +78,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "هغې") {
|
||||
} else if (s === "هغې") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -90,7 +90,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "دی") {
|
||||
} else if (s === "دی") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -102,7 +102,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "ده") {
|
||||
} else if (s === "ده") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -114,7 +114,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "دا") {
|
||||
} else if (s === "دا") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -126,7 +126,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (first === "دې") {
|
||||
} else if (s === "دې") {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -138,7 +138,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (["مونږ", "موږ"].includes(first)) {
|
||||
} else if (["مونږ", "موږ"].includes(s)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -161,7 +161,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (["تاسو", "تاسې"].includes(first)) {
|
||||
} else if (["تاسو", "تاسې"].includes(s)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -184,7 +184,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (["هغوي", "هغوی"].includes(first)) {
|
||||
} else if (["هغوي", "هغوی"].includes(s)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
@ -207,7 +207,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
|
|||
},
|
||||
},
|
||||
]);
|
||||
} else if (["دوي", "دوی"].includes(first)) {
|
||||
} else if (["دوي", "دوی"].includes(s)) {
|
||||
w.push([
|
||||
rest,
|
||||
{
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
export function tokenizer(s: string): string[] {
|
||||
return s.trim().split(" ");
|
||||
import { Token } from "../../../types";
|
||||
|
||||
export function tokenizer(s: string): Token[] {
|
||||
const words = s.trim().split(" ");
|
||||
const indexed: { i: number; s: string }[] = [];
|
||||
for (let i = 0; i < words.length; i++) {
|
||||
indexed.push({ i, s: words[i] });
|
||||
}
|
||||
return indexed;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -256,7 +256,10 @@ export function inflectRegularYayUnisex(
|
|||
fem: [
|
||||
[{ p: `${baseP}ې`, f: `${baseF}e` }],
|
||||
[{ p: `${baseP}ې`, f: `${baseF}e` }],
|
||||
[{ p: `${baseP}و`, f: `${baseF}o` }],
|
||||
[
|
||||
{ p: `${baseP}یو`, f: `${baseF}iyo` },
|
||||
{ p: `${baseP}و`, f: `${baseF}o` },
|
||||
],
|
||||
],
|
||||
};
|
||||
}
|
||||
|
@ -291,7 +294,7 @@ function inflectEmphasizedYayUnisex(p: string, f: string): T.UnisexInflections {
|
|||
[{ p, f }],
|
||||
[{ p: `${baseP}ي`, f: `${baseF}ée` }],
|
||||
[
|
||||
{ p: `${baseP}یو`, f: `${baseF}iyo` },
|
||||
{ p: `${baseP}یو`, f: `${baseF}íyo` },
|
||||
{ p: `${baseP}و`, f: `${baseF}ó` },
|
||||
],
|
||||
],
|
||||
|
@ -299,7 +302,7 @@ function inflectEmphasizedYayUnisex(p: string, f: string): T.UnisexInflections {
|
|||
[{ p: `${baseP}ۍ`, f: `${baseF}úy` }],
|
||||
[{ p: `${baseP}ۍ`, f: `${baseF}úy` }],
|
||||
[
|
||||
{ p: `${baseP}یو`, f: `${baseF}úyo` },
|
||||
{ p: `${baseP}یو`, f: `${baseF}íyo` },
|
||||
{ p: `${baseP}و`, f: `${baseF}ó` },
|
||||
],
|
||||
],
|
||||
|
@ -360,8 +363,8 @@ function inflectRegularEmphasizedYayMasc(p: string, f: string): T.Inflections {
|
|||
[{ p, f }],
|
||||
[{ p: `${baseP}ي`, f: `${baseF}ée` }],
|
||||
[
|
||||
{ p: `${baseP}یو`, f: `${baseF}iyo` },
|
||||
{ p: `${baseP}و`, f: `${baseF}o` },
|
||||
{ p: `${baseP}یو`, f: `${baseF}íyo` },
|
||||
{ p: `${baseP}و`, f: `${baseF}ó` },
|
||||
],
|
||||
],
|
||||
};
|
||||
|
@ -453,8 +456,8 @@ function inflectRegularUyFem(p: string, f: string): T.Inflections {
|
|||
[{ p, f: `${baseF}úy` }],
|
||||
[{ p, f: `${baseF}úy` }],
|
||||
[
|
||||
{ p: `${baseP}یو`, f: `${baseF}úyo` },
|
||||
{ p: `${baseP}و`, f: `${baseF}o` },
|
||||
{ p: `${baseP}یو`, f: `${baseF}íyo` },
|
||||
{ p: `${baseP}و`, f: `${baseF}ó` },
|
||||
],
|
||||
],
|
||||
};
|
||||
|
|
|
@ -1896,7 +1896,10 @@ const toTest = [
|
|||
fem: [
|
||||
[{ p: "ستړې", f: "stúRe" }],
|
||||
[{ p: "ستړې", f: "stúRe" }],
|
||||
[{ p: "ستړو", f: "stúRo" }],
|
||||
[
|
||||
{ p: "ستړیو", f: "stúRiyo" },
|
||||
{ p: "ستړو", f: "stúRo" },
|
||||
],
|
||||
],
|
||||
},
|
||||
},
|
||||
|
|
|
@ -1269,3 +1269,8 @@ export type OtherComp = {
|
|||
type: "Comp";
|
||||
ps: PsString;
|
||||
};
|
||||
|
||||
export type Token = {
|
||||
i: number;
|
||||
s: string;
|
||||
};
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
module.exports = [{ ts: 1527815333, e: "oven" }];
|
|
@ -7,6 +7,7 @@
|
|||
*/
|
||||
|
||||
module.exports = [
|
||||
{ ts: 1527816747, e: "doctor" }, // ډاکټر
|
||||
{ ts: 1527815408, e: "asleep" }, // ویده - weedú
|
||||
{ ts: 1527812796, e: "good" }, // ښه - xu
|
||||
{ ts: 1527821744, e: "cook, chef" }, // آشپز - aashpáz
|
||||
|
@ -124,4 +125,4 @@ module.exports = [
|
|||
{ ts: 1527811544, e: "standing" }, // ولاړ - waláaR, wuláaR
|
||||
{ ts: 1527815498, e: "aforementioned" }, // یاد - yaad
|
||||
{ ts: 1527815434, e: "cold" }, // یخ - yakh, yukh
|
||||
];
|
||||
];
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
module.exports = [
|
||||
{
|
||||
ts: 1527815177,
|
||||
e: "father",
|
||||
},
|
||||
{
|
||||
ts: 1527815129,
|
||||
e: "water",
|
||||
},
|
||||
{
|
||||
ts: 1527817330,
|
||||
e: "wheat",
|
||||
},
|
||||
{
|
||||
ts: 1527815206,
|
||||
e: "judge",
|
||||
},
|
||||
{
|
||||
ts: 1527812342,
|
||||
e: "people", // خلک
|
||||
},
|
||||
];
|
|
@ -0,0 +1,4 @@
|
|||
module.exports = [
|
||||
{ ts: 1527811441, e: "door" }, // ور
|
||||
{ ts: 1527813593, e: "mountain" }, // غر
|
||||
];
|
Loading…
Reference in New Issue