possesives sort of working

This commit is contained in:
adueck 2023-08-01 20:19:03 +04:00
parent f0624252bc
commit a084433064
4 changed files with 117 additions and 90 deletions

View File

@ -1,5 +1,4 @@
import * as T from "../../../types"; import * as T from "../../../types";
import { endsInConsonant } from "../p-text-helpers";
import { import {
isPattern1Entry, isPattern1Entry,
isPattern2Entry, isPattern2Entry,
@ -110,7 +109,10 @@ export function getInflectionQueries(
}, },
}); });
} }
if (s.endsWith("ان") && !["ا", "و"].includes(s.at(-3) || "")) { if (
s.endsWith("ان") &&
!["ا", "و"].includes(s.charAt(s.length - 3) || "")
) {
queries.push({ queries.push({
search: { p: s.slice(0, -2) }, search: { p: s.slice(0, -2) },
details: { details: {
@ -127,7 +129,10 @@ export function getInflectionQueries(
}, },
}); });
} }
if (s.endsWith("انې") && !["ا", "و"].includes(s.at(-4) || "")) { if (
s.endsWith("انې") &&
!["ا", "و"].includes(s.charAt(s.length - 4) || "")
) {
queries.push({ queries.push({
search: { p: s.slice(0, -3) }, search: { p: s.slice(0, -3) },
details: { details: {
@ -144,7 +149,10 @@ export function getInflectionQueries(
}, },
}); });
} }
if (s.endsWith("ګان") && ["ا", "و"].includes(s.at(-4) || "")) { if (
s.endsWith("ګان") &&
["ا", "و"].includes(s.charAt(s.length - 4) || "")
) {
queries.push({ queries.push({
search: { p: s.slice(0, -3) }, search: { p: s.slice(0, -3) },
details: { details: {
@ -160,7 +168,10 @@ export function getInflectionQueries(
}, },
}); });
} }
if (s.endsWith("ګانې") && ["ا", "و"].includes(s.at(-5) || "")) { if (
s.endsWith("ګانې") &&
["ا", "و"].includes(s.charAt(s.length - 5) || "")
) {
queries.push({ queries.push({
search: { p: s.slice(0, -4) }, search: { p: s.slice(0, -4) },
details: { details: {
@ -176,7 +187,7 @@ export function getInflectionQueries(
}, },
}); });
} }
if (s.endsWith("وې") && ["ا", "و"].includes(s.at(-3) || "")) { if (s.endsWith("وې") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) {
queries.push({ queries.push({
search: { p: s.slice(0, -2) }, search: { p: s.slice(0, -2) },
details: { details: {
@ -192,7 +203,7 @@ export function getInflectionQueries(
}, },
}); });
} }
if (s.endsWith("وو") && ["ا", "و"].includes(s.at(-3) || "")) { if (s.endsWith("وو") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) {
queries.push({ queries.push({
search: { p: s.slice(0, -2) }, search: { p: s.slice(0, -2) },
details: { details: {
@ -208,7 +219,10 @@ export function getInflectionQueries(
}, },
}); });
} }
if (s.endsWith("ګانو") && ["ا", "و"].includes(s.at(-5) || "")) { if (
s.endsWith("ګانو") &&
["ا", "و"].includes(s.charAt(s.length - 5) || "")
) {
queries.push({ queries.push({
search: { p: s.slice(0, -4) }, search: { p: s.slice(0, -4) },
details: { details: {

View File

@ -1301,7 +1301,7 @@ describe("parsing nouns", () => {
test(category, () => { test(category, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
const { success } = parseNoun(tokens, lookup, []); const { success } = parseNoun(tokens, lookup, undefined);
const res = success.map(([tkns, r]) => r); const res = success.map(([tkns, r]) => r);
expect(res).toEqual(output); expect(res).toEqual(output);
}); });
@ -1408,7 +1408,8 @@ const adjsTests: {
}, },
], ],
}, },
// TODO: WHY DOES ADDING زړو break this ??? // TODO: testing issue with the parser returning multiple options needs
// to be worked out to test double adjectives
{ {
input: "غټو کورونو", input: "غټو کورونو",
output: [ output: [
@ -1435,9 +1436,9 @@ describe("parsing nouns with adjectives", () => {
test(category, () => { test(category, () => {
cases.forEach(({ input, output }) => { cases.forEach(({ input, output }) => {
const tokens = tokenizer(input); const tokens = tokenizer(input);
expect(parseNoun(tokens, lookup, []).success.map((x) => x[1])).toEqual( expect(
output parseNoun(tokens, lookup, undefined).success.map((x) => x[1])
); ).toEqual(output);
}); });
}); });
}); });

View File

@ -2,7 +2,6 @@ import * as T from "../../../types";
import { getInflectionPattern } from "../inflection-pattern"; import { getInflectionPattern } from "../inflection-pattern";
import { makeNounSelection } from "../phrase-building/make-selections"; import { makeNounSelection } from "../phrase-building/make-selections";
import { import {
isFemNounEntry,
isMascNounEntry, isMascNounEntry,
isNounEntry, isNounEntry,
isPluralNounEntry, isPluralNounEntry,
@ -18,6 +17,71 @@ import { parseAdjective } from "./parse-adjective";
export function parseNoun( export function parseNoun(
tokens: Readonly<T.Token[]>, tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[], lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
prevPossesor: T.NounSelection | undefined
): {
success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][];
errors: string[];
} {
if (tokens.length === 0) {
return {
success: [],
errors: [],
};
}
const [first, ...rest] = tokens;
const possesor =
first.s === "د" ? parseNoun(rest, lookup, undefined) : undefined;
if (possesor) {
const runsAfterPossesor: [
Readonly<T.Token[]>,
{ inflected: boolean; selection: T.NounSelection } | undefined
][] = possesor ? [...possesor.success] : [[tokens, undefined]];
// could be a case for a monad ??
return runsAfterPossesor.reduce<ReturnType<typeof parseNoun>>(
(acc, [tokens, possesor]) => {
if (possesor?.inflected === false) {
return {
success: [...acc.success],
errors: [...acc.errors, "possesor should be inflected"],
};
}
const { success, errors } = parseNoun(
tokens,
lookup,
possesor
? {
...possesor.selection,
possesor: prevPossesor
? {
shrunken: false,
np: {
type: "NP",
selection: prevPossesor,
},
}
: undefined,
}
: undefined
);
return {
success: [...acc.success, ...success],
errors: [...acc.errors, ...errors],
};
},
{ success: [], errors: [] }
);
} else {
return parseNounAfterPossesor(tokens, lookup, prevPossesor, []);
}
}
// create NP parsing function for that
// TODO with possesor, parse an NP not a noun
function parseNounAfterPossesor(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
possesor: T.NounSelection | undefined,
adjectives: { adjectives: {
inflection: (0 | 1 | 2)[]; inflection: (0 | 1 | 2)[];
gender: T.Gender[]; gender: T.Gender[];
@ -34,16 +98,14 @@ export function parseNoun(
errors: [], errors: [],
}; };
} }
const [first, ...rest] = tokens;
// TODO: add recognition of او between adjectives // TODO: add recognition of او between adjectives
const adjRes = parseAdjective(tokens, lookup); const adjRes = parseAdjective(tokens, lookup);
const withAdj = adjRes.map(([tkns, adj]) => const withAdj = adjRes.map(([tkns, adj]) =>
parseNoun(tkns, lookup, [...adjectives, adj]) parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
); );
const [first, ...rest] = tokens;
const success: ReturnType<typeof parseNoun>["success"] = []; const success: ReturnType<typeof parseNoun>["success"] = [];
const errors: string[] = []; const errors: string[] = [];
// const possesor =
// first === "د" ? parseNoun(rest, lookup, adjectives).success : undefined;
const searches = getInflectionQueries(first.s, true); const searches = getInflectionQueries(first.s, true);
@ -52,8 +114,13 @@ export function parseNoun(
details.forEach((deets) => { details.forEach((deets) => {
const fittingEntries = nounEntries.filter(deets.predicate); const fittingEntries = nounEntries.filter(deets.predicate);
fittingEntries.forEach((entry) => { fittingEntries.forEach((entry) => {
if (isUnisexNounEntry(entry)) { const genders: T.Gender[] = isUnisexNounEntry(entry)
deets.gender.forEach((gender) => { ? ["masc", "fem"]
: isMascNounEntry(entry)
? ["masc"]
: ["fem"];
deets.gender.forEach((gender) => {
if (genders.includes(gender)) {
deets.inflection.forEach((inf) => { deets.inflection.forEach((inf) => {
const { ok, error } = adjsMatch( const { ok, error } = adjsMatch(
adjectives, adjectives,
@ -78,6 +145,17 @@ export function parseNoun(
? number ? number
: selection.number, : selection.number,
adjectives: adjectives.map((a) => a.selection), adjectives: adjectives.map((a) => a.selection),
// TODO: could be nicer to validate that the possesor is inflected before
// and just pass in the selection
possesor: possesor
? {
shrunken: false,
np: {
type: "NP",
selection: possesor,
},
}
: undefined,
}, },
}, },
]); ]);
@ -89,74 +167,8 @@ export function parseNoun(
}); });
} }
}); });
}); }
} else if (isMascNounEntry(entry) && deets.gender.includes("masc")) { });
deets.inflection.forEach((inf) => {
const { ok, error } = adjsMatch(
adjectives,
"masc",
inf,
deets.plural
);
if (ok) {
convertInflection(inf, entry, "masc", deets.plural).forEach(
({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined);
success.push([
rest,
{
inflected,
selection: {
...selection,
number: selection.numberCanChange
? number
: selection.number,
adjectives: adjectives.map((a) => a.selection),
},
},
]);
}
);
} else {
error.forEach((e) => {
errors.push(e);
});
}
});
} else if (isFemNounEntry(entry) && deets.gender.includes("fem")) {
deets.inflection.forEach((inf) => {
const { ok, error } = adjsMatch(
adjectives,
"fem",
inf,
deets.plural
);
if (ok) {
convertInflection(inf, entry, "fem", deets.plural).forEach(
({ inflected, number }) => {
const selection = makeNounSelection(entry, undefined);
success.push([
rest,
{
inflected,
selection: {
...selection,
number: selection.numberCanChange
? number
: selection.number,
adjectives: adjectives.map((a) => a.selection),
},
},
]);
}
);
} else {
error.forEach((e) => {
errors.push(e);
});
}
});
}
}); });
}); });
}); });
@ -167,7 +179,7 @@ export function parseNoun(
} }
function adjsMatch( function adjsMatch(
adjectives: Parameters<typeof parseNoun>[2], adjectives: Parameters<typeof parseNounAfterPossesor>[3],
gender: T.Gender, gender: T.Gender,
inf: 0 | 1 | 2, inf: 0 | 1 | 2,
plural: boolean | undefined plural: boolean | undefined

View File

@ -12,7 +12,7 @@ export function parsePhrase(
} { } {
const adjsRes = parseAdjective(s, lookup); const adjsRes = parseAdjective(s, lookup);
const prnsRes = parsePronoun(s); const prnsRes = parsePronoun(s);
const nounsRes = parseNoun(s, lookup, []); const nounsRes = parseNoun(s, lookup, undefined);
const correct = [...adjsRes, ...prnsRes, ...nounsRes.success] const correct = [...adjsRes, ...prnsRes, ...nounsRes.success]
.filter(([tkns]) => tkns.length === 0) .filter(([tkns]) => tkns.length === 0)