possesives sort of working
This commit is contained in:
parent
f0624252bc
commit
a084433064
|
@ -1,5 +1,4 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { endsInConsonant } from "../p-text-helpers";
|
|
||||||
import {
|
import {
|
||||||
isPattern1Entry,
|
isPattern1Entry,
|
||||||
isPattern2Entry,
|
isPattern2Entry,
|
||||||
|
@ -110,7 +109,10 @@ export function getInflectionQueries(
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if (s.endsWith("ان") && !["ا", "و"].includes(s.at(-3) || "")) {
|
if (
|
||||||
|
s.endsWith("ان") &&
|
||||||
|
!["ا", "و"].includes(s.charAt(s.length - 3) || "")
|
||||||
|
) {
|
||||||
queries.push({
|
queries.push({
|
||||||
search: { p: s.slice(0, -2) },
|
search: { p: s.slice(0, -2) },
|
||||||
details: {
|
details: {
|
||||||
|
@ -127,7 +129,10 @@ export function getInflectionQueries(
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if (s.endsWith("انې") && !["ا", "و"].includes(s.at(-4) || "")) {
|
if (
|
||||||
|
s.endsWith("انې") &&
|
||||||
|
!["ا", "و"].includes(s.charAt(s.length - 4) || "")
|
||||||
|
) {
|
||||||
queries.push({
|
queries.push({
|
||||||
search: { p: s.slice(0, -3) },
|
search: { p: s.slice(0, -3) },
|
||||||
details: {
|
details: {
|
||||||
|
@ -144,7 +149,10 @@ export function getInflectionQueries(
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if (s.endsWith("ګان") && ["ا", "و"].includes(s.at(-4) || "")) {
|
if (
|
||||||
|
s.endsWith("ګان") &&
|
||||||
|
["ا", "و"].includes(s.charAt(s.length - 4) || "")
|
||||||
|
) {
|
||||||
queries.push({
|
queries.push({
|
||||||
search: { p: s.slice(0, -3) },
|
search: { p: s.slice(0, -3) },
|
||||||
details: {
|
details: {
|
||||||
|
@ -160,7 +168,10 @@ export function getInflectionQueries(
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if (s.endsWith("ګانې") && ["ا", "و"].includes(s.at(-5) || "")) {
|
if (
|
||||||
|
s.endsWith("ګانې") &&
|
||||||
|
["ا", "و"].includes(s.charAt(s.length - 5) || "")
|
||||||
|
) {
|
||||||
queries.push({
|
queries.push({
|
||||||
search: { p: s.slice(0, -4) },
|
search: { p: s.slice(0, -4) },
|
||||||
details: {
|
details: {
|
||||||
|
@ -176,7 +187,7 @@ export function getInflectionQueries(
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if (s.endsWith("وې") && ["ا", "و"].includes(s.at(-3) || "")) {
|
if (s.endsWith("وې") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) {
|
||||||
queries.push({
|
queries.push({
|
||||||
search: { p: s.slice(0, -2) },
|
search: { p: s.slice(0, -2) },
|
||||||
details: {
|
details: {
|
||||||
|
@ -192,7 +203,7 @@ export function getInflectionQueries(
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if (s.endsWith("وو") && ["ا", "و"].includes(s.at(-3) || "")) {
|
if (s.endsWith("وو") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) {
|
||||||
queries.push({
|
queries.push({
|
||||||
search: { p: s.slice(0, -2) },
|
search: { p: s.slice(0, -2) },
|
||||||
details: {
|
details: {
|
||||||
|
@ -208,7 +219,10 @@ export function getInflectionQueries(
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if (s.endsWith("ګانو") && ["ا", "و"].includes(s.at(-5) || "")) {
|
if (
|
||||||
|
s.endsWith("ګانو") &&
|
||||||
|
["ا", "و"].includes(s.charAt(s.length - 5) || "")
|
||||||
|
) {
|
||||||
queries.push({
|
queries.push({
|
||||||
search: { p: s.slice(0, -4) },
|
search: { p: s.slice(0, -4) },
|
||||||
details: {
|
details: {
|
||||||
|
|
|
@ -1301,7 +1301,7 @@ describe("parsing nouns", () => {
|
||||||
test(category, () => {
|
test(category, () => {
|
||||||
cases.forEach(({ input, output }) => {
|
cases.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const { success } = parseNoun(tokens, lookup, []);
|
const { success } = parseNoun(tokens, lookup, undefined);
|
||||||
const res = success.map(([tkns, r]) => r);
|
const res = success.map(([tkns, r]) => r);
|
||||||
expect(res).toEqual(output);
|
expect(res).toEqual(output);
|
||||||
});
|
});
|
||||||
|
@ -1408,7 +1408,8 @@ const adjsTests: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
// TODO: WHY DOES ADDING زړو break this ???
|
// TODO: testing issue with the parser returning multiple options needs
|
||||||
|
// to be worked out to test double adjectives
|
||||||
{
|
{
|
||||||
input: "غټو کورونو",
|
input: "غټو کورونو",
|
||||||
output: [
|
output: [
|
||||||
|
@ -1435,9 +1436,9 @@ describe("parsing nouns with adjectives", () => {
|
||||||
test(category, () => {
|
test(category, () => {
|
||||||
cases.forEach(({ input, output }) => {
|
cases.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
expect(parseNoun(tokens, lookup, []).success.map((x) => x[1])).toEqual(
|
expect(
|
||||||
output
|
parseNoun(tokens, lookup, undefined).success.map((x) => x[1])
|
||||||
);
|
).toEqual(output);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -2,7 +2,6 @@ import * as T from "../../../types";
|
||||||
import { getInflectionPattern } from "../inflection-pattern";
|
import { getInflectionPattern } from "../inflection-pattern";
|
||||||
import { makeNounSelection } from "../phrase-building/make-selections";
|
import { makeNounSelection } from "../phrase-building/make-selections";
|
||||||
import {
|
import {
|
||||||
isFemNounEntry,
|
|
||||||
isMascNounEntry,
|
isMascNounEntry,
|
||||||
isNounEntry,
|
isNounEntry,
|
||||||
isPluralNounEntry,
|
isPluralNounEntry,
|
||||||
|
@ -18,6 +17,71 @@ import { parseAdjective } from "./parse-adjective";
|
||||||
export function parseNoun(
|
export function parseNoun(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||||
|
prevPossesor: T.NounSelection | undefined
|
||||||
|
): {
|
||||||
|
success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][];
|
||||||
|
errors: string[];
|
||||||
|
} {
|
||||||
|
if (tokens.length === 0) {
|
||||||
|
return {
|
||||||
|
success: [],
|
||||||
|
errors: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const [first, ...rest] = tokens;
|
||||||
|
const possesor =
|
||||||
|
first.s === "د" ? parseNoun(rest, lookup, undefined) : undefined;
|
||||||
|
if (possesor) {
|
||||||
|
const runsAfterPossesor: [
|
||||||
|
Readonly<T.Token[]>,
|
||||||
|
{ inflected: boolean; selection: T.NounSelection } | undefined
|
||||||
|
][] = possesor ? [...possesor.success] : [[tokens, undefined]];
|
||||||
|
// could be a case for a monad ??
|
||||||
|
return runsAfterPossesor.reduce<ReturnType<typeof parseNoun>>(
|
||||||
|
(acc, [tokens, possesor]) => {
|
||||||
|
if (possesor?.inflected === false) {
|
||||||
|
return {
|
||||||
|
success: [...acc.success],
|
||||||
|
errors: [...acc.errors, "possesor should be inflected"],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const { success, errors } = parseNoun(
|
||||||
|
tokens,
|
||||||
|
lookup,
|
||||||
|
possesor
|
||||||
|
? {
|
||||||
|
...possesor.selection,
|
||||||
|
possesor: prevPossesor
|
||||||
|
? {
|
||||||
|
shrunken: false,
|
||||||
|
np: {
|
||||||
|
type: "NP",
|
||||||
|
selection: prevPossesor,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
: undefined,
|
||||||
|
}
|
||||||
|
: undefined
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
success: [...acc.success, ...success],
|
||||||
|
errors: [...acc.errors, ...errors],
|
||||||
|
};
|
||||||
|
},
|
||||||
|
{ success: [], errors: [] }
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
return parseNounAfterPossesor(tokens, lookup, prevPossesor, []);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// create NP parsing function for that
|
||||||
|
// TODO with possesor, parse an NP not a noun
|
||||||
|
|
||||||
|
function parseNounAfterPossesor(
|
||||||
|
tokens: Readonly<T.Token[]>,
|
||||||
|
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||||
|
possesor: T.NounSelection | undefined,
|
||||||
adjectives: {
|
adjectives: {
|
||||||
inflection: (0 | 1 | 2)[];
|
inflection: (0 | 1 | 2)[];
|
||||||
gender: T.Gender[];
|
gender: T.Gender[];
|
||||||
|
@ -34,16 +98,14 @@ export function parseNoun(
|
||||||
errors: [],
|
errors: [],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
const [first, ...rest] = tokens;
|
|
||||||
// TODO: add recognition of او between adjectives
|
// TODO: add recognition of او between adjectives
|
||||||
const adjRes = parseAdjective(tokens, lookup);
|
const adjRes = parseAdjective(tokens, lookup);
|
||||||
const withAdj = adjRes.map(([tkns, adj]) =>
|
const withAdj = adjRes.map(([tkns, adj]) =>
|
||||||
parseNoun(tkns, lookup, [...adjectives, adj])
|
parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
|
||||||
);
|
);
|
||||||
|
const [first, ...rest] = tokens;
|
||||||
const success: ReturnType<typeof parseNoun>["success"] = [];
|
const success: ReturnType<typeof parseNoun>["success"] = [];
|
||||||
const errors: string[] = [];
|
const errors: string[] = [];
|
||||||
// const possesor =
|
|
||||||
// first === "د" ? parseNoun(rest, lookup, adjectives).success : undefined;
|
|
||||||
|
|
||||||
const searches = getInflectionQueries(first.s, true);
|
const searches = getInflectionQueries(first.s, true);
|
||||||
|
|
||||||
|
@ -52,8 +114,13 @@ export function parseNoun(
|
||||||
details.forEach((deets) => {
|
details.forEach((deets) => {
|
||||||
const fittingEntries = nounEntries.filter(deets.predicate);
|
const fittingEntries = nounEntries.filter(deets.predicate);
|
||||||
fittingEntries.forEach((entry) => {
|
fittingEntries.forEach((entry) => {
|
||||||
if (isUnisexNounEntry(entry)) {
|
const genders: T.Gender[] = isUnisexNounEntry(entry)
|
||||||
deets.gender.forEach((gender) => {
|
? ["masc", "fem"]
|
||||||
|
: isMascNounEntry(entry)
|
||||||
|
? ["masc"]
|
||||||
|
: ["fem"];
|
||||||
|
deets.gender.forEach((gender) => {
|
||||||
|
if (genders.includes(gender)) {
|
||||||
deets.inflection.forEach((inf) => {
|
deets.inflection.forEach((inf) => {
|
||||||
const { ok, error } = adjsMatch(
|
const { ok, error } = adjsMatch(
|
||||||
adjectives,
|
adjectives,
|
||||||
|
@ -78,6 +145,17 @@ export function parseNoun(
|
||||||
? number
|
? number
|
||||||
: selection.number,
|
: selection.number,
|
||||||
adjectives: adjectives.map((a) => a.selection),
|
adjectives: adjectives.map((a) => a.selection),
|
||||||
|
// TODO: could be nicer to validate that the possesor is inflected before
|
||||||
|
// and just pass in the selection
|
||||||
|
possesor: possesor
|
||||||
|
? {
|
||||||
|
shrunken: false,
|
||||||
|
np: {
|
||||||
|
type: "NP",
|
||||||
|
selection: possesor,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
: undefined,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
@ -89,74 +167,8 @@ export function parseNoun(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
}
|
||||||
} else if (isMascNounEntry(entry) && deets.gender.includes("masc")) {
|
});
|
||||||
deets.inflection.forEach((inf) => {
|
|
||||||
const { ok, error } = adjsMatch(
|
|
||||||
adjectives,
|
|
||||||
"masc",
|
|
||||||
inf,
|
|
||||||
deets.plural
|
|
||||||
);
|
|
||||||
if (ok) {
|
|
||||||
convertInflection(inf, entry, "masc", deets.plural).forEach(
|
|
||||||
({ inflected, number }) => {
|
|
||||||
const selection = makeNounSelection(entry, undefined);
|
|
||||||
success.push([
|
|
||||||
rest,
|
|
||||||
{
|
|
||||||
inflected,
|
|
||||||
selection: {
|
|
||||||
...selection,
|
|
||||||
number: selection.numberCanChange
|
|
||||||
? number
|
|
||||||
: selection.number,
|
|
||||||
adjectives: adjectives.map((a) => a.selection),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
error.forEach((e) => {
|
|
||||||
errors.push(e);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} else if (isFemNounEntry(entry) && deets.gender.includes("fem")) {
|
|
||||||
deets.inflection.forEach((inf) => {
|
|
||||||
const { ok, error } = adjsMatch(
|
|
||||||
adjectives,
|
|
||||||
"fem",
|
|
||||||
inf,
|
|
||||||
deets.plural
|
|
||||||
);
|
|
||||||
if (ok) {
|
|
||||||
convertInflection(inf, entry, "fem", deets.plural).forEach(
|
|
||||||
({ inflected, number }) => {
|
|
||||||
const selection = makeNounSelection(entry, undefined);
|
|
||||||
success.push([
|
|
||||||
rest,
|
|
||||||
{
|
|
||||||
inflected,
|
|
||||||
selection: {
|
|
||||||
...selection,
|
|
||||||
number: selection.numberCanChange
|
|
||||||
? number
|
|
||||||
: selection.number,
|
|
||||||
adjectives: adjectives.map((a) => a.selection),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
error.forEach((e) => {
|
|
||||||
errors.push(e);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -167,7 +179,7 @@ export function parseNoun(
|
||||||
}
|
}
|
||||||
|
|
||||||
function adjsMatch(
|
function adjsMatch(
|
||||||
adjectives: Parameters<typeof parseNoun>[2],
|
adjectives: Parameters<typeof parseNounAfterPossesor>[3],
|
||||||
gender: T.Gender,
|
gender: T.Gender,
|
||||||
inf: 0 | 1 | 2,
|
inf: 0 | 1 | 2,
|
||||||
plural: boolean | undefined
|
plural: boolean | undefined
|
||||||
|
|
|
@ -12,7 +12,7 @@ export function parsePhrase(
|
||||||
} {
|
} {
|
||||||
const adjsRes = parseAdjective(s, lookup);
|
const adjsRes = parseAdjective(s, lookup);
|
||||||
const prnsRes = parsePronoun(s);
|
const prnsRes = parsePronoun(s);
|
||||||
const nounsRes = parseNoun(s, lookup, []);
|
const nounsRes = parseNoun(s, lookup, undefined);
|
||||||
|
|
||||||
const correct = [...adjsRes, ...prnsRes, ...nounsRes.success]
|
const correct = [...adjsRes, ...prnsRes, ...nounsRes.success]
|
||||||
.filter(([tkns]) => tkns.length === 0)
|
.filter(([tkns]) => tkns.length === 0)
|
||||||
|
|
Loading…
Reference in New Issue