pretty full noun recognition - plural suffixes just started

This commit is contained in:
adueck 2023-08-01 16:11:10 +04:00
parent 6aec2dfeb2
commit b672e19c1a
18 changed files with 3284 additions and 1756 deletions

View File

@ -61,7 +61,6 @@ function NPNounPicker(props: {
opts: T.TextOptions; opts: T.TextOptions;
phraseIsComplete: boolean; phraseIsComplete: boolean;
}) { }) {
console.log({ noun: props.noun });
// const [patternFilter, setPatternFilter] = useState<FilterPattern | undefined>(undefined); // const [patternFilter, setPatternFilter] = useState<FilterPattern | undefined>(undefined);
// const [showFilter, setShowFilter] = useState<boolean>(false) // const [showFilter, setShowFilter] = useState<boolean>(false)
// const nounsFiltered = props.nouns // const nounsFiltered = props.nouns

View File

@ -1,4 +1,5 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { endsInConsonant } from "../p-text-helpers";
import { import {
isPattern1Entry, isPattern1Entry,
isPattern2Entry, isPattern2Entry,
@ -7,18 +8,26 @@ import {
isPattern5Entry, isPattern5Entry,
isPattern4Entry, isPattern4Entry,
isPattern6FemEntry, isPattern6FemEntry,
isFemNounEntry,
isAdjectiveEntry,
isUnisexNounEntry,
isPluralNounEntry,
isNounEntry,
isAnimNounEntry,
isMascNounEntry,
} from "../type-predicates"; } from "../type-predicates";
import { equals } from "rambda"; import { equals } from "rambda";
export function getInflectionQueries( export function getInflectionQueries(
s: string, s: string,
includeNouns: boolean noun: boolean
): { ): {
search: Partial<T.DictionaryEntry>; search: Partial<T.DictionaryEntry>;
details: { details: {
inflection: (0 | 1 | 2)[]; inflection: (0 | 1 | 2)[];
gender: T.Gender[]; gender: T.Gender[];
predicate: (e: T.AdjectiveEntry | T.NounEntry) => boolean; predicate: (e: T.AdjectiveEntry | T.NounEntry) => boolean;
plural?: boolean;
}[]; }[];
}[] { }[] {
const queries: { const queries: {
@ -26,6 +35,7 @@ export function getInflectionQueries(
details: { details: {
inflection: (0 | 1 | 2)[]; inflection: (0 | 1 | 2)[];
gender: T.Gender[]; gender: T.Gender[];
plural?: boolean;
predicate: (e: T.NounEntry | T.AdjectiveEntry) => boolean; predicate: (e: T.NounEntry | T.AdjectiveEntry) => boolean;
}; };
}[] = []; }[] = [];
@ -34,15 +44,111 @@ export function getInflectionQueries(
details: { details: {
inflection: [0, 1, 2], inflection: [0, 1, 2],
gender: ["masc", "fem"], gender: ["masc", "fem"],
predicate: isPattern(0), predicate: (e) =>
!(isNounEntry(e) && isPluralNounEntry(e)) &&
isPattern(0)(e) &&
isAdjectiveEntry(e),
},
});
if (noun) {
if (s.endsWith("ونه")) {
queries.push({
search: { p: s.slice(0, -3) },
details: {
inflection: [0],
gender: ["masc"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
!isPluralNounEntry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
queries.push({
search: { p: s.slice(0, -3) + "ه" },
details: {
inflection: [0],
gender: ["masc"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
!isPluralNounEntry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
}
if (s.endsWith("ونو")) {
queries.push({
search: { p: s.slice(0, -3) },
details: {
inflection: [1],
gender: ["masc"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
!isPluralNounEntry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
queries.push({
search: { p: s.slice(0, -3) + "ه" },
details: {
inflection: [1],
gender: ["masc"],
plural: true,
predicate: (e) =>
isNounEntry(e) &&
!isPluralNounEntry(e) &&
!isPattern2Entry(e) &&
!isPattern3Entry(e) &&
!isPattern4Entry(e),
},
});
}
if (s.endsWith("و")) {
queries.push({
search: { p: s.slice(0, -1) },
details: {
inflection: [2],
gender: ["fem"],
predicate: (e) =>
isNounEntry(e) && isAnimNounEntry(e) && isFemNounEntry(e),
},
});
}
queries.push({
search: { p: s },
details: {
inflection: [0],
gender: ["fem"],
predicate: (e) =>
isNounEntry(e) && isFemNounEntry(e) && isPattern1Entry(e),
}, },
}); });
queries.push({
search: { p: s },
details: {
inflection: [0, 1],
gender: ["fem"],
predicate: (e) =>
isNounEntry(e) && isAnimNounEntry(e) && isFemNounEntry(e),
},
});
}
queries.push({ queries.push({
search: { p: s }, search: { p: s },
details: { details: {
inflection: [0, 1], inflection: [0, 1],
gender: ["masc"], gender: ["masc"],
predicate: isPattern1Entry, predicate: (e) =>
!(isNounEntry(e) && isPluralNounEntry(e)) &&
(isPattern1Entry(e) || isPattern(0)(e)),
}, },
}); });
queries.push({ queries.push({
@ -65,6 +171,17 @@ export function getInflectionQueries(
predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e), predicate: (e) => isPattern4Entry(e) || isPattern5Entry(e),
}, },
}); });
if (noun) {
queries.push({
search: { p: s },
details: {
inflection: [0],
plural: true,
gender: ["masc", "fem"],
predicate: (e) => isNounEntry(e) && isPluralNounEntry(e),
},
});
}
if (s.endsWith("ه")) { if (s.endsWith("ه")) {
queries.push({ queries.push({
search: { p: s.slice(0, -1) }, search: { p: s.slice(0, -1) },
@ -74,16 +191,6 @@ export function getInflectionQueries(
predicate: isPattern1Entry, predicate: isPattern1Entry,
}, },
}); });
if (includeNouns) {
queries.push({
search: { p: s },
details: {
inflection: [0],
gender: ["fem"],
predicate: isPattern1Entry,
},
});
}
queries.push({ queries.push({
search: { infbp: s.slice(0, -1) }, search: { infbp: s.slice(0, -1) },
details: { details: {
@ -101,7 +208,7 @@ export function getInflectionQueries(
predicate: isPattern1Entry, predicate: isPattern1Entry,
}, },
}); });
if (includeNouns) { if (noun) {
queries.push({ queries.push({
search: { p: s.slice(0, -1) + "ه" }, search: { p: s.slice(0, -1) + "ه" },
details: { details: {
@ -150,7 +257,7 @@ export function getInflectionQueries(
details: { details: {
inflection: [2], inflection: [2],
gender: ["masc", "fem"], gender: ["masc", "fem"],
predicate: (e) => isPattern1Entry(e) || isPattern5Entry(e), predicate: (e) => isPattern1Entry(e),
}, },
}); });
queries.push({ queries.push({
@ -169,6 +276,48 @@ export function getInflectionQueries(
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e), predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
}, },
}); });
if (noun) {
queries.push({
search: { p: s.slice(0, -1) + "ه" },
details: {
inflection: [2],
gender: ["fem"],
predicate: (e) => isPattern1Entry(e) || isFemNounEntry(e),
},
});
queries.push({
search: { p: s.slice(0, -1) + "ه" },
details: {
inflection: [2],
gender: ["masc"],
predicate: isMascNounEntry,
},
});
queries.push({
search: { p: s.slice(0, -1) + "ې" },
details: {
inflection: [2],
gender: ["fem"],
predicate: (e) => isNounEntry(e) || isFemNounEntry(e),
},
});
queries.push({
search: { p: s.slice(0, -1) + "ۍ" },
details: {
inflection: [2],
gender: ["fem"],
predicate: (e) => isFemNounEntry(e) && isPattern3Entry(e),
},
});
queries.push({
search: { p: s.slice(0, -1) + "ي" },
details: {
inflection: [2],
gender: ["fem"],
predicate: isPattern6FemEntry,
},
});
}
if (s.endsWith("یو")) { if (s.endsWith("یو")) {
queries.push({ queries.push({
search: { p: s.slice(0, -2) + "ی" }, search: { p: s.slice(0, -2) + "ی" },
@ -178,6 +327,24 @@ export function getInflectionQueries(
predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e), predicate: (e) => isPattern2Entry(e) || isPattern3Entry(e),
}, },
}); });
if (noun) {
queries.push({
search: { p: s.slice(0, -2) + "ۍ" },
details: {
inflection: [2],
gender: ["fem"],
predicate: (e) => isPattern3Entry(e) && isFemNounEntry(e),
},
});
queries.push({
search: { p: s.slice(0, -2) + "ي" },
details: {
inflection: [2],
gender: ["fem"],
predicate: isPattern6FemEntry,
},
});
}
} }
} else if (s.endsWith("ۍ")) { } else if (s.endsWith("ۍ")) {
queries.push({ queries.push({
@ -188,7 +355,7 @@ export function getInflectionQueries(
predicate: isPattern3Entry, predicate: isPattern3Entry,
}, },
}); });
if (includeNouns) { if (noun) {
queries.push({ queries.push({
search: { p: s.slice(0, -1) + "ي" }, search: { p: s.slice(0, -1) + "ي" },
details: { details: {

View File

@ -1,8 +1,30 @@
import nounsAdjs from "../../../nouns-adjs"; import nounsAdjs from "../../../nouns-adjs";
import * as T from "../../../types"; import * as T from "../../../types";
import { isAdjectiveEntry, isNounEntry } from "../type-predicates";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] { export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
const [key, value] = Object.entries(s)[0]; const [key, value] = Object.entries(s)[0];
// @ts-ignore // @ts-ignore
return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[]; return nounsAdjs.filter((e) => e[key] === value) as T.DictionaryEntry[];
} }
export function wordQuery(word: string, type: "adj"): T.AdjectiveEntry;
export function wordQuery(word: string, type: "noun"): T.NounEntry;
export function wordQuery(
word: string,
type: "noun" | "adj"
): T.NounEntry | T.AdjectiveEntry {
const entry = nounsAdjs.find(
(x) => x.p === word || x.f === word || x.g === word
);
if (!entry) {
throw new Error(`missing ${word} in word query`);
}
if (type === "noun" && !isNounEntry(entry)) {
throw new Error(`${word} is not a noun`);
}
if (type === "adj" && !isAdjectiveEntry(entry)) {
throw new Error(`${word} is not an adjective`);
}
return entry as T.NounEntry | T.AdjectiveEntry;
}

View File

@ -4,10 +4,10 @@ import { isAdjectiveEntry } from "../type-predicates";
import { getInflectionQueries } from "./inflection-query"; import { getInflectionQueries } from "./inflection-query";
export function parseAdjective( export function parseAdjective(
tokens: Readonly<string[]>, tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[] lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): [ ): [
string[], T.Token[],
{ {
inflection: (0 | 1 | 2)[]; inflection: (0 | 1 | 2)[];
gender: T.Gender[]; gender: T.Gender[];
@ -20,7 +20,7 @@ export function parseAdjective(
return []; return [];
} }
const [first, ...rest] = tokens; const [first, ...rest] = tokens;
const queries = getInflectionQueries(first, false); const queries = getInflectionQueries(first.s, false);
queries.forEach(({ search, details }) => { queries.forEach(({ search, details }) => {
const wideMatches = lookup(search).filter(isAdjectiveEntry); const wideMatches = lookup(search).filter(isAdjectiveEntry);
details.forEach((deets) => { details.forEach((deets) => {
@ -33,7 +33,7 @@ export function parseAdjective(
selection, selection,
inflection: deets.inflection, inflection: deets.inflection,
gender: deets.gender, gender: deets.gender,
given: first, given: first.s,
}, },
]); ]);
}); });

File diff suppressed because it is too large Load Diff

View File

@ -1,291 +0,0 @@
import { makeNounSelection } from "../phrase-building/make-selections";
import * as T from "../../../types";
import { lookup } from "./lookup";
import { parseNoun } from "./parse-noun";
const sarey = {
ts: 1527815251,
i: 8163,
p: "سړی",
f: "saRáy",
g: "saRay",
e: "man",
r: 4,
c: "n. m.",
ec: "man",
ep: "men",
} as T.NounEntry;
const dostee = {
ts: 1527811877,
i: 6627,
p: "دوستي",
f: "dostee",
g: "dostee",
e: "friendship",
r: 3,
c: "n. f.",
} as T.NounEntry;
const wreejze = {
ts: 1586551382412,
i: 14985,
p: "وریژې",
f: "wreejze",
g: "wreejze",
e: "rice",
r: 4,
c: "n. f. pl.",
} as T.NounEntry;
const xudza = {
ts: 1527812797,
i: 9018,
p: "ښځه",
f: "xúdza",
g: "xudza",
e: "woman, wife",
r: 4,
c: "n. f.",
ec: "woman",
ep: "women",
} as T.NounEntry;
const kursuy = {
ts: 1527814203,
i: 10573,
p: "کرسۍ",
f: "kUrsúy",
g: "kUrsuy",
e: "chair, seat, stool",
r: 3,
c: "n. f.",
} as T.NounEntry;
const kor = {
ts: 1527812828,
i: 11022,
p: "کور",
f: "kor",
g: "kor",
e: "house, home",
r: 4,
c: "n. m.",
} as T.NounEntry;
const daktar = {
ts: 1527816747,
i: 6709,
p: "ډاکټر",
f: "DaakTar",
g: "DaakTar",
e: "doctor",
r: 4,
c: "n. m. anim. unisex",
} as T.NounEntry;
// TODO: test unisex ملګری etc
const tests: {
category: string;
cases: {
input: string;
output: {
inflected: boolean;
selection: T.NounSelection;
}[];
}[];
}[] = [
{
category: "pattern 1 nouns",
cases: [
{
input: "کور",
output: [
{
inflected: false,
selection: makeNounSelection(kor, undefined),
},
],
},
{
input: "کورو",
output: [
{
inflected: true,
selection: {
...makeNounSelection(kor, undefined),
number: "plural",
},
},
],
},
{
input: "ډاکټره",
output: [
{
inflected: false,
selection: {
...makeNounSelection(daktar, undefined),
gender: "fem",
},
},
],
},
{
input: "ډاکټرې",
output: [
{
inflected: true,
selection: {
...makeNounSelection(daktar, undefined),
gender: "fem",
},
},
],
},
],
},
];
// {
// input: "سړی",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(sarey, undefined),
// },
// ],
// },
// {
// input: "سړي",
// output: [
// {
// inflected: true,
// selection: makeNounSelection(sarey, undefined),
// },
// ],
// },
// {
// input: "سړو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(sarey, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "سړیو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(sarey, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "دوستي",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(dostee, undefined),
// },
// ],
// },
// {
// input: "دوستۍ",
// output: [
// {
// inflected: true,
// selection: makeNounSelection(dostee, undefined),
// },
// ],
// },
// {
// input: "دوستیو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(dostee, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "وریژې",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(wreejze, undefined),
// },
// ],
// },
// {
// input: "ښځه",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(xudza, undefined),
// },
// ],
// },
// {
// input: "ښځې",
// output: [
// {
// inflected: true,
// selection: makeNounSelection(xudza, undefined),
// },
// ],
// },
// {
// input: "ښځو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(xudza, undefined),
// number: "plural",
// },
// },
// ],
// },
// {
// input: "کرسۍ",
// output: [
// {
// inflected: false,
// selection: makeNounSelection(kursuy, undefined),
// },
// {
// inflected: true,
// selection: makeNounSelection(kursuy, undefined),
// },
// ],
// },
// {
// input: "کرسیو",
// output: [
// {
// inflected: true,
// selection: {
// ...makeNounSelection(kursuy, undefined),
// number: "plural",
// },
// },
// ],
// },
// ];
describe("parsing nouns", () => {
tests.forEach(({ category, cases }) => {
// eslint-disable-next-line jest/valid-title
test(category, () => {
cases.forEach(({ input, output }) => {
expect(parseNoun(input, lookup)).toEqual(output);
});
});
});
});

View File

@ -1,16 +1,22 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { getInflectionPattern } from "../inflection-pattern";
import { makeNounSelection } from "../phrase-building/make-selections"; import { makeNounSelection } from "../phrase-building/make-selections";
import { import {
isFemNounEntry, isFemNounEntry,
isMascNounEntry, isMascNounEntry,
isNounEntry, isNounEntry,
isPluralNounEntry,
isUnisexNounEntry, isUnisexNounEntry,
} from "../type-predicates"; } from "../type-predicates";
import { getInflectionQueries } from "./inflection-query"; import { getInflectionQueries } from "./inflection-query";
import { parseAdjective } from "./parse-adjective"; import { parseAdjective } from "./parse-adjective";
// TODO:
// - cleanup the workflow and make sure all nouns are covered and test
// - add possesive parsing
export function parseNoun( export function parseNoun(
tokens: Readonly<string[]>, tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[], lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
adjectives: { adjectives: {
inflection: (0 | 1 | 2)[]; inflection: (0 | 1 | 2)[];
@ -19,10 +25,7 @@ export function parseNoun(
selection: T.AdjectiveSelection; selection: T.AdjectiveSelection;
}[] }[]
): { ): {
success: [ success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][];
string[],
{ inflection: (0 | 1 | 2)[]; selection: T.NounSelection }
][];
errors: string[]; errors: string[];
} { } {
if (tokens.length === 0) { if (tokens.length === 0) {
@ -31,15 +34,19 @@ export function parseNoun(
errors: [], errors: [],
}; };
} }
const [first, ...rest] = tokens;
// TODO: add recognition of او between adjectives
const adjRes = parseAdjective(tokens, lookup); const adjRes = parseAdjective(tokens, lookup);
const withAdj = adjRes.map(([tkns, adj]) => const withAdj = adjRes.map(([tkns, adj]) =>
parseNoun(tkns, lookup, [...adjectives, adj]) parseNoun(tkns, lookup, [...adjectives, adj])
); );
const success: ReturnType<typeof parseNoun>["success"] = []; const success: ReturnType<typeof parseNoun>["success"] = [];
const errors: string[] = []; const errors: string[] = [];
const [first, ...rest] = tokens; // const possesor =
// first === "د" ? parseNoun(rest, lookup, adjectives).success : undefined;
const searches = getInflectionQueries(first.s, true);
const searches = getInflectionQueries(first, true);
searches.forEach(({ search, details }) => { searches.forEach(({ search, details }) => {
const nounEntries = lookup(search).filter(isNounEntry); const nounEntries = lookup(search).filter(isNounEntry);
details.forEach((deets) => { details.forEach((deets) => {
@ -47,65 +54,108 @@ export function parseNoun(
fittingEntries.forEach((entry) => { fittingEntries.forEach((entry) => {
if (isUnisexNounEntry(entry)) { if (isUnisexNounEntry(entry)) {
deets.gender.forEach((gender) => { deets.gender.forEach((gender) => {
deets.inflection.forEach((inf) => {
const { ok, error } = adjsMatch( const { ok, error } = adjsMatch(
adjectives, adjectives,
gender, gender,
deets.inflection inf,
deets.plural
); );
if (ok) { if (ok) {
convertInflection(inf, entry, gender, deets.plural).forEach(
({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined);
success.push([ success.push([
rest, rest,
{ {
inflection: deets.inflection, inflected,
selection: { selection: {
...makeNounSelection(entry, undefined), ...selection,
gender, gender: selection.genderCanChange
? gender
: selection.gender,
number: selection.numberCanChange
? number
: selection.number,
adjectives: adjectives.map((a) => a.selection), adjectives: adjectives.map((a) => a.selection),
}, },
}, },
]); ]);
}
);
} else { } else {
error.forEach((e) => { error.forEach((e) => {
errors.push(e); errors.push(e);
}); });
} }
}); });
});
} else if (isMascNounEntry(entry) && deets.gender.includes("masc")) { } else if (isMascNounEntry(entry) && deets.gender.includes("masc")) {
const { ok, error } = adjsMatch(adjectives, "masc", deets.inflection); deets.inflection.forEach((inf) => {
const { ok, error } = adjsMatch(
adjectives,
"masc",
inf,
deets.plural
);
if (ok) { if (ok) {
convertInflection(inf, entry, "masc", deets.plural).forEach(
({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined);
success.push([ success.push([
rest, rest,
{ {
inflection: deets.inflection, inflected,
selection: { selection: {
...makeNounSelection(entry, undefined), ...selection,
number: selection.numberCanChange
? number
: selection.number,
adjectives: adjectives.map((a) => a.selection), adjectives: adjectives.map((a) => a.selection),
}, },
}, },
]); ]);
}
);
} else { } else {
error.forEach((e) => { error.forEach((e) => {
errors.push(e); errors.push(e);
}); });
} }
});
} else if (isFemNounEntry(entry) && deets.gender.includes("fem")) { } else if (isFemNounEntry(entry) && deets.gender.includes("fem")) {
const { ok, error } = adjsMatch(adjectives, "fem", deets.inflection); deets.inflection.forEach((inf) => {
const { ok, error } = adjsMatch(
adjectives,
"fem",
inf,
deets.plural
);
if (ok) { if (ok) {
convertInflection(inf, entry, "fem", deets.plural).forEach(
({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined);
success.push([ success.push([
rest, rest,
{ {
inflection: deets.inflection, inflected,
selection: { selection: {
...makeNounSelection(entry, undefined), ...selection,
number: selection.numberCanChange
? number
: selection.number,
adjectives: adjectives.map((a) => a.selection), adjectives: adjectives.map((a) => a.selection),
}, },
}, },
]); ]);
}
);
} else { } else {
error.forEach((e) => { error.forEach((e) => {
errors.push(e); errors.push(e);
}); });
} }
});
} }
}); });
}); });
@ -119,12 +169,14 @@ export function parseNoun(
function adjsMatch( function adjsMatch(
adjectives: Parameters<typeof parseNoun>[2], adjectives: Parameters<typeof parseNoun>[2],
gender: T.Gender, gender: T.Gender,
inflection: (0 | 1 | 2)[] inf: 0 | 1 | 2,
plural: boolean | undefined
): { ok: boolean; error: string[] } { ): { ok: boolean; error: string[] } {
const inflection = (plural && inf < 2 ? inf + 1 : inf) as 0 | 1 | 2;
const unmatching = adjectives.filter( const unmatching = adjectives.filter(
(adj) => (adj) =>
!adj.gender.includes(gender) || !adj.gender.includes(gender) ||
!adj.inflection.some((i) => inflection.includes(i)) !adj.inflection.some((i) => i === inflection)
); );
if (unmatching.length) { if (unmatching.length) {
return { return {
@ -134,9 +186,7 @@ function adjsMatch(
x.given === x.selection.entry.p x.given === x.selection.entry.p
? x.given ? x.given
: `${x.given} (${x.selection.entry.p})`; : `${x.given} (${x.selection.entry.p})`;
const inflectionIssue = !x.inflection.some((x) => const inflectionIssue = !x.inflection.some((x) => x === inflection)
inflection.includes(x)
)
? ` should be ${showInflection(inflection)}` ? ` should be ${showInflection(inflection)}`
: ``; : ``;
return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`; return `Adjective agreement error: ${adjText} should be ${inflectionIssue} ${gender}.`;
@ -150,14 +200,63 @@ function adjsMatch(
} }
} }
function showInflection(inf: (0 | 1 | 2)[]): string { function convertInflection(
const [last, ...rest] = inf.reverse(); inflection: 0 | 1 | 2,
const template = rest.length entry: T.NounEntry | T.AdjectiveEntry,
? `${rest.join(", ")}, or ${last}` gender: T.Gender,
: last.toString(); plural: boolean | undefined
console.log(template); ): {
return template inflected: boolean;
.replace("0", "plain") number: T.NounNumber;
.replace("1", "first inflection") }[] {
.replace("2", "second inflection"); const pattern = getInflectionPattern(entry);
const inf = (plural && inflection < 2 ? inflection + 1 : inflection) as
| 0
| 1
| 2;
if (inf === 0) {
return [
{
inflected: false,
number: "singular",
},
];
} else if (inf === 1) {
return [
...(!((isNounEntry(entry) && isPluralNounEntry(entry)) || plural) &&
!(pattern === 4 && entry.p.endsWith("ه") && gender === "masc")
? [
{
inflected: true,
number: "singular" as T.NounNumber,
},
]
: []),
...(pattern > 1 ||
(pattern > 0 && gender === "fem") ||
(isNounEntry(entry) && isPluralNounEntry(entry)) ||
plural
? [
{
inflected: false,
number: "plural" as T.NounNumber,
},
]
: []),
];
}
return [
{
inflected: true,
number: "plural",
},
];
}
function showInflection(inf: 0 | 1 | 2): string {
return inf === 0
? "plain"
: inf === 1
? "first inflection"
: "second inflection";
} }

View File

@ -4,7 +4,7 @@ import { parsePronoun } from "./parse-pronoun";
import { parseNoun } from "./parse-noun"; import { parseNoun } from "./parse-noun";
export function parsePhrase( export function parsePhrase(
s: string[], s: T.Token[],
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[] lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
): { ): {
success: any[]; success: any[];

View File

@ -1,15 +1,15 @@
import * as T from "../../../types"; import * as T from "../../../types";
export function parsePronoun(tokens: Readonly<string[]>): [ export function parsePronoun(tokens: Readonly<T.Token[]>): [
string[], T.Token[],
{ {
inflected: boolean[]; inflected: boolean[];
selection: T.PronounSelection; selection: T.PronounSelection;
} }
][] { ][] {
const [first, ...rest] = tokens; const [{ s }, ...rest] = tokens;
const w: ReturnType<typeof parsePronoun> = []; const w: ReturnType<typeof parsePronoun> = [];
if (first === "زه") { if (s === "زه") {
w.push([ w.push([
rest, rest,
{ {
@ -32,7 +32,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (first === "ته") { } else if (s === "ته") {
w.push([ w.push([
rest, rest,
{ {
@ -55,7 +55,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (first === "هغه") { } else if (s === "هغه") {
w.push([ w.push([
rest, rest,
{ {
@ -78,7 +78,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (first === "هغې") { } else if (s === "هغې") {
w.push([ w.push([
rest, rest,
{ {
@ -90,7 +90,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (first === "دی") { } else if (s === "دی") {
w.push([ w.push([
rest, rest,
{ {
@ -102,7 +102,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (first === "ده") { } else if (s === "ده") {
w.push([ w.push([
rest, rest,
{ {
@ -114,7 +114,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (first === "دا") { } else if (s === "دا") {
w.push([ w.push([
rest, rest,
{ {
@ -126,7 +126,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (first === "دې") { } else if (s === "دې") {
w.push([ w.push([
rest, rest,
{ {
@ -138,7 +138,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (["مونږ", "موږ"].includes(first)) { } else if (["مونږ", "موږ"].includes(s)) {
w.push([ w.push([
rest, rest,
{ {
@ -161,7 +161,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (["تاسو", "تاسې"].includes(first)) { } else if (["تاسو", "تاسې"].includes(s)) {
w.push([ w.push([
rest, rest,
{ {
@ -184,7 +184,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (["هغوي", "هغوی"].includes(first)) { } else if (["هغوي", "هغوی"].includes(s)) {
w.push([ w.push([
rest, rest,
{ {
@ -207,7 +207,7 @@ export function parsePronoun(tokens: Readonly<string[]>): [
}, },
}, },
]); ]);
} else if (["دوي", "دوی"].includes(first)) { } else if (["دوي", "دوی"].includes(s)) {
w.push([ w.push([
rest, rest,
{ {

View File

@ -1,3 +1,10 @@
export function tokenizer(s: string): string[] { import { Token } from "../../../types";
return s.trim().split(" ");
export function tokenizer(s: string): Token[] {
const words = s.trim().split(" ");
const indexed: { i: number; s: string }[] = [];
for (let i = 0; i < words.length; i++) {
indexed.push({ i, s: words[i] });
}
return indexed;
} }

File diff suppressed because it is too large Load Diff

View File

@ -256,7 +256,10 @@ export function inflectRegularYayUnisex(
fem: [ fem: [
[{ p: `${baseP}ې`, f: `${baseF}e` }], [{ p: `${baseP}ې`, f: `${baseF}e` }],
[{ p: `${baseP}ې`, f: `${baseF}e` }], [{ p: `${baseP}ې`, f: `${baseF}e` }],
[{ p: `${baseP}و`, f: `${baseF}o` }], [
{ p: `${baseP}یو`, f: `${baseF}iyo` },
{ p: `${baseP}و`, f: `${baseF}o` },
],
], ],
}; };
} }
@ -291,7 +294,7 @@ function inflectEmphasizedYayUnisex(p: string, f: string): T.UnisexInflections {
[{ p, f }], [{ p, f }],
[{ p: `${baseP}ي`, f: `${baseF}ée` }], [{ p: `${baseP}ي`, f: `${baseF}ée` }],
[ [
{ p: `${baseP}یو`, f: `${baseF}iyo` }, { p: `${baseP}یو`, f: `${baseF}íyo` },
{ p: `${baseP}و`, f: `${baseF}ó` }, { p: `${baseP}و`, f: `${baseF}ó` },
], ],
], ],
@ -299,7 +302,7 @@ function inflectEmphasizedYayUnisex(p: string, f: string): T.UnisexInflections {
[{ p: `${baseP}ۍ`, f: `${baseF}úy` }], [{ p: `${baseP}ۍ`, f: `${baseF}úy` }],
[{ p: `${baseP}ۍ`, f: `${baseF}úy` }], [{ p: `${baseP}ۍ`, f: `${baseF}úy` }],
[ [
{ p: `${baseP}یو`, f: `${baseF}úyo` }, { p: `${baseP}یو`, f: `${baseF}íyo` },
{ p: `${baseP}و`, f: `${baseF}ó` }, { p: `${baseP}و`, f: `${baseF}ó` },
], ],
], ],
@ -360,8 +363,8 @@ function inflectRegularEmphasizedYayMasc(p: string, f: string): T.Inflections {
[{ p, f }], [{ p, f }],
[{ p: `${baseP}ي`, f: `${baseF}ée` }], [{ p: `${baseP}ي`, f: `${baseF}ée` }],
[ [
{ p: `${baseP}یو`, f: `${baseF}iyo` }, { p: `${baseP}یو`, f: `${baseF}íyo` },
{ p: `${baseP}و`, f: `${baseF}o` }, { p: `${baseP}و`, f: `${baseF}ó` },
], ],
], ],
}; };
@ -453,8 +456,8 @@ function inflectRegularUyFem(p: string, f: string): T.Inflections {
[{ p, f: `${baseF}úy` }], [{ p, f: `${baseF}úy` }],
[{ p, f: `${baseF}úy` }], [{ p, f: `${baseF}úy` }],
[ [
{ p: `${baseP}یو`, f: `${baseF}úyo` }, { p: `${baseP}یو`, f: `${baseF}íyo` },
{ p: `${baseP}و`, f: `${baseF}o` }, { p: `${baseP}و`, f: `${baseF}ó` },
], ],
], ],
}; };

View File

@ -1896,7 +1896,10 @@ const toTest = [
fem: [ fem: [
[{ p: "ستړې", f: "stúRe" }], [{ p: "ستړې", f: "stúRe" }],
[{ p: "ستړې", f: "stúRe" }], [{ p: "ستړې", f: "stúRe" }],
[{ p: "ستړو", f: "stúRo" }], [
{ p: "ستړیو", f: "stúRiyo" },
{ p: "ستړو", f: "stúRo" },
],
], ],
}, },
}, },

View File

@ -1269,3 +1269,8 @@ export type OtherComp = {
type: "Comp"; type: "Comp";
ps: PsString; ps: PsString;
}; };
export type Token = {
i: number;
s: string;
};

View File

@ -0,0 +1 @@
module.exports = [{ ts: 1527815333, e: "oven" }];

View File

@ -7,6 +7,7 @@
*/ */
module.exports = [ module.exports = [
{ ts: 1527816747, e: "doctor" }, // ډاکټر
{ ts: 1527815408, e: "asleep" }, // ویده - weedú { ts: 1527815408, e: "asleep" }, // ویده - weedú
{ ts: 1527812796, e: "good" }, // ښه - xu { ts: 1527812796, e: "good" }, // ښه - xu
{ ts: 1527821744, e: "cook, chef" }, // آشپز - aashpáz { ts: 1527821744, e: "cook, chef" }, // آشپز - aashpáz

View File

@ -0,0 +1,22 @@
module.exports = [
{
ts: 1527815177,
e: "father",
},
{
ts: 1527815129,
e: "water",
},
{
ts: 1527817330,
e: "wheat",
},
{
ts: 1527815206,
e: "judge",
},
{
ts: 1527812342,
e: "people", // خلک
},
];

View File

@ -0,0 +1,4 @@
module.exports = [
{ ts: 1527811441, e: "door" }, // ور
{ ts: 1527813593, e: "mountain" }, // غر
];