possesives sort of working

This commit is contained in:
adueck 2023-08-01 20:19:03 +04:00
parent f0624252bc
commit a084433064
4 changed files with 117 additions and 90 deletions

View File

@ -1,5 +1,4 @@
import * as T from "../../../types";
import { endsInConsonant } from "../p-text-helpers";
import {
isPattern1Entry,
isPattern2Entry,
@ -110,7 +109,10 @@ export function getInflectionQueries(
},
});
}
if (s.endsWith("ان") && !["ا", "و"].includes(s.at(-3) || "")) {
if (
s.endsWith("ان") &&
!["ا", "و"].includes(s.charAt(s.length - 3) || "")
) {
queries.push({
search: { p: s.slice(0, -2) },
details: {
@ -127,7 +129,10 @@ export function getInflectionQueries(
},
});
}
if (s.endsWith("انې") && !["ا", "و"].includes(s.at(-4) || "")) {
if (
s.endsWith("انې") &&
!["ا", "و"].includes(s.charAt(s.length - 4) || "")
) {
queries.push({
search: { p: s.slice(0, -3) },
details: {
@ -144,7 +149,10 @@ export function getInflectionQueries(
},
});
}
if (s.endsWith("ګان") && ["ا", "و"].includes(s.at(-4) || "")) {
if (
s.endsWith("ګان") &&
["ا", "و"].includes(s.charAt(s.length - 4) || "")
) {
queries.push({
search: { p: s.slice(0, -3) },
details: {
@ -160,7 +168,10 @@ export function getInflectionQueries(
},
});
}
if (s.endsWith("ګانې") && ["ا", "و"].includes(s.at(-5) || "")) {
if (
s.endsWith("ګانې") &&
["ا", "و"].includes(s.charAt(s.length - 5) || "")
) {
queries.push({
search: { p: s.slice(0, -4) },
details: {
@ -176,7 +187,7 @@ export function getInflectionQueries(
},
});
}
if (s.endsWith("وې") && ["ا", "و"].includes(s.at(-3) || "")) {
if (s.endsWith("وې") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) {
queries.push({
search: { p: s.slice(0, -2) },
details: {
@ -192,7 +203,7 @@ export function getInflectionQueries(
},
});
}
if (s.endsWith("وو") && ["ا", "و"].includes(s.at(-3) || "")) {
if (s.endsWith("وو") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) {
queries.push({
search: { p: s.slice(0, -2) },
details: {
@ -208,7 +219,10 @@ export function getInflectionQueries(
},
});
}
if (s.endsWith("ګانو") && ["ا", "و"].includes(s.at(-5) || "")) {
if (
s.endsWith("ګانو") &&
["ا", "و"].includes(s.charAt(s.length - 5) || "")
) {
queries.push({
search: { p: s.slice(0, -4) },
details: {

View File

@ -1301,7 +1301,7 @@ describe("parsing nouns", () => {
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const { success } = parseNoun(tokens, lookup, []);
const { success } = parseNoun(tokens, lookup, undefined);
const res = success.map(([tkns, r]) => r);
expect(res).toEqual(output);
});
@ -1408,7 +1408,8 @@ const adjsTests: {
},
],
},
// TODO: WHY DOES ADDING زړو break this ???
// TODO: testing issue with the parser returning multiple options needs
// to be worked out to test double adjectives
{
input: "غټو کورونو",
output: [
@ -1435,9 +1436,9 @@ describe("parsing nouns with adjectives", () => {
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
expect(parseNoun(tokens, lookup, []).success.map((x) => x[1])).toEqual(
output
);
expect(
parseNoun(tokens, lookup, undefined).success.map((x) => x[1])
).toEqual(output);
});
});
});

View File

@ -2,7 +2,6 @@ import * as T from "../../../types";
import { getInflectionPattern } from "../inflection-pattern";
import { makeNounSelection } from "../phrase-building/make-selections";
import {
isFemNounEntry,
isMascNounEntry,
isNounEntry,
isPluralNounEntry,
@ -18,6 +17,71 @@ import { parseAdjective } from "./parse-adjective";
export function parseNoun(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
prevPossesor: T.NounSelection | undefined
): {
success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][];
errors: string[];
} {
if (tokens.length === 0) {
return {
success: [],
errors: [],
};
}
const [first, ...rest] = tokens;
const possesor =
first.s === "د" ? parseNoun(rest, lookup, undefined) : undefined;
if (possesor) {
const runsAfterPossesor: [
Readonly<T.Token[]>,
{ inflected: boolean; selection: T.NounSelection } | undefined
][] = possesor ? [...possesor.success] : [[tokens, undefined]];
// could be a case for a monad ??
return runsAfterPossesor.reduce<ReturnType<typeof parseNoun>>(
(acc, [tokens, possesor]) => {
if (possesor?.inflected === false) {
return {
success: [...acc.success],
errors: [...acc.errors, "possesor should be inflected"],
};
}
const { success, errors } = parseNoun(
tokens,
lookup,
possesor
? {
...possesor.selection,
possesor: prevPossesor
? {
shrunken: false,
np: {
type: "NP",
selection: prevPossesor,
},
}
: undefined,
}
: undefined
);
return {
success: [...acc.success, ...success],
errors: [...acc.errors, ...errors],
};
},
{ success: [], errors: [] }
);
} else {
return parseNounAfterPossesor(tokens, lookup, prevPossesor, []);
}
}
// create NP parsing function for that
// TODO with possesor, parse an NP not a noun
function parseNounAfterPossesor(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
possesor: T.NounSelection | undefined,
adjectives: {
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
@ -34,16 +98,14 @@ export function parseNoun(
errors: [],
};
}
const [first, ...rest] = tokens;
// TODO: add recognition of او between adjectives
const adjRes = parseAdjective(tokens, lookup);
const withAdj = adjRes.map(([tkns, adj]) =>
parseNoun(tkns, lookup, [...adjectives, adj])
parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
);
const [first, ...rest] = tokens;
const success: ReturnType<typeof parseNoun>["success"] = [];
const errors: string[] = [];
// const possesor =
// first === "د" ? parseNoun(rest, lookup, adjectives).success : undefined;
const searches = getInflectionQueries(first.s, true);
@ -52,8 +114,13 @@ export function parseNoun(
details.forEach((deets) => {
const fittingEntries = nounEntries.filter(deets.predicate);
fittingEntries.forEach((entry) => {
if (isUnisexNounEntry(entry)) {
deets.gender.forEach((gender) => {
const genders: T.Gender[] = isUnisexNounEntry(entry)
? ["masc", "fem"]
: isMascNounEntry(entry)
? ["masc"]
: ["fem"];
deets.gender.forEach((gender) => {
if (genders.includes(gender)) {
deets.inflection.forEach((inf) => {
const { ok, error } = adjsMatch(
adjectives,
@ -78,6 +145,17 @@ export function parseNoun(
? number
: selection.number,
adjectives: adjectives.map((a) => a.selection),
// TODO: could be nicer to validate that the possesor is inflected before
// and just pass in the selection
possesor: possesor
? {
shrunken: false,
np: {
type: "NP",
selection: possesor,
},
}
: undefined,
},
},
]);
@ -89,74 +167,8 @@ export function parseNoun(
});
}
});
});
} else if (isMascNounEntry(entry) && deets.gender.includes("masc")) {
deets.inflection.forEach((inf) => {
const { ok, error } = adjsMatch(
adjectives,
"masc",
inf,
deets.plural
);
if (ok) {
convertInflection(inf, entry, "masc", deets.plural).forEach(
({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined);
success.push([
rest,
{
inflected,
selection: {
...selection,
number: selection.numberCanChange
? number
: selection.number,
adjectives: adjectives.map((a) => a.selection),
},
},
]);
}
);
} else {
error.forEach((e) => {
errors.push(e);
});
}
});
} else if (isFemNounEntry(entry) && deets.gender.includes("fem")) {
deets.inflection.forEach((inf) => {
const { ok, error } = adjsMatch(
adjectives,
"fem",
inf,
deets.plural
);
if (ok) {
convertInflection(inf, entry, "fem", deets.plural).forEach(
({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined);
success.push([
rest,
{
inflected,
selection: {
...selection,
number: selection.numberCanChange
? number
: selection.number,
adjectives: adjectives.map((a) => a.selection),
},
},
]);
}
);
} else {
error.forEach((e) => {
errors.push(e);
});
}
});
}
}
});
});
});
});
@ -167,7 +179,7 @@ export function parseNoun(
}
function adjsMatch(
adjectives: Parameters<typeof parseNoun>[2],
adjectives: Parameters<typeof parseNounAfterPossesor>[3],
gender: T.Gender,
inf: 0 | 1 | 2,
plural: boolean | undefined

View File

@ -12,7 +12,7 @@ export function parsePhrase(
} {
const adjsRes = parseAdjective(s, lookup);
const prnsRes = parsePronoun(s);
const nounsRes = parseNoun(s, lookup, []);
const nounsRes = parseNoun(s, lookup, undefined);
const correct = [...adjsRes, ...prnsRes, ...nounsRes.success]
.filter(([tkns]) => tkns.length === 0)