possesives sort of working
This commit is contained in:
parent
f0624252bc
commit
a084433064
|
@ -1,5 +1,4 @@
|
|||
import * as T from "../../../types";
|
||||
import { endsInConsonant } from "../p-text-helpers";
|
||||
import {
|
||||
isPattern1Entry,
|
||||
isPattern2Entry,
|
||||
|
@ -110,7 +109,10 @@ export function getInflectionQueries(
|
|||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("ان") && !["ا", "و"].includes(s.at(-3) || "")) {
|
||||
if (
|
||||
s.endsWith("ان") &&
|
||||
!["ا", "و"].includes(s.charAt(s.length - 3) || "")
|
||||
) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -2) },
|
||||
details: {
|
||||
|
@ -127,7 +129,10 @@ export function getInflectionQueries(
|
|||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("انې") && !["ا", "و"].includes(s.at(-4) || "")) {
|
||||
if (
|
||||
s.endsWith("انې") &&
|
||||
!["ا", "و"].includes(s.charAt(s.length - 4) || "")
|
||||
) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -3) },
|
||||
details: {
|
||||
|
@ -144,7 +149,10 @@ export function getInflectionQueries(
|
|||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("ګان") && ["ا", "و"].includes(s.at(-4) || "")) {
|
||||
if (
|
||||
s.endsWith("ګان") &&
|
||||
["ا", "و"].includes(s.charAt(s.length - 4) || "")
|
||||
) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -3) },
|
||||
details: {
|
||||
|
@ -160,7 +168,10 @@ export function getInflectionQueries(
|
|||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("ګانې") && ["ا", "و"].includes(s.at(-5) || "")) {
|
||||
if (
|
||||
s.endsWith("ګانې") &&
|
||||
["ا", "و"].includes(s.charAt(s.length - 5) || "")
|
||||
) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -4) },
|
||||
details: {
|
||||
|
@ -176,7 +187,7 @@ export function getInflectionQueries(
|
|||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("وې") && ["ا", "و"].includes(s.at(-3) || "")) {
|
||||
if (s.endsWith("وې") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -2) },
|
||||
details: {
|
||||
|
@ -192,7 +203,7 @@ export function getInflectionQueries(
|
|||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("وو") && ["ا", "و"].includes(s.at(-3) || "")) {
|
||||
if (s.endsWith("وو") && ["ا", "و"].includes(s.charAt(s.length - 3) || "")) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -2) },
|
||||
details: {
|
||||
|
@ -208,7 +219,10 @@ export function getInflectionQueries(
|
|||
},
|
||||
});
|
||||
}
|
||||
if (s.endsWith("ګانو") && ["ا", "و"].includes(s.at(-5) || "")) {
|
||||
if (
|
||||
s.endsWith("ګانو") &&
|
||||
["ا", "و"].includes(s.charAt(s.length - 5) || "")
|
||||
) {
|
||||
queries.push({
|
||||
search: { p: s.slice(0, -4) },
|
||||
details: {
|
||||
|
|
|
@ -1301,7 +1301,7 @@ describe("parsing nouns", () => {
|
|||
test(category, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
const { success } = parseNoun(tokens, lookup, []);
|
||||
const { success } = parseNoun(tokens, lookup, undefined);
|
||||
const res = success.map(([tkns, r]) => r);
|
||||
expect(res).toEqual(output);
|
||||
});
|
||||
|
@ -1408,7 +1408,8 @@ const adjsTests: {
|
|||
},
|
||||
],
|
||||
},
|
||||
// TODO: WHY DOES ADDING زړو break this ???
|
||||
// TODO: testing issue with the parser returning multiple options needs
|
||||
// to be worked out to test double adjectives
|
||||
{
|
||||
input: "غټو کورونو",
|
||||
output: [
|
||||
|
@ -1435,9 +1436,9 @@ describe("parsing nouns with adjectives", () => {
|
|||
test(category, () => {
|
||||
cases.forEach(({ input, output }) => {
|
||||
const tokens = tokenizer(input);
|
||||
expect(parseNoun(tokens, lookup, []).success.map((x) => x[1])).toEqual(
|
||||
output
|
||||
);
|
||||
expect(
|
||||
parseNoun(tokens, lookup, undefined).success.map((x) => x[1])
|
||||
).toEqual(output);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -2,7 +2,6 @@ import * as T from "../../../types";
|
|||
import { getInflectionPattern } from "../inflection-pattern";
|
||||
import { makeNounSelection } from "../phrase-building/make-selections";
|
||||
import {
|
||||
isFemNounEntry,
|
||||
isMascNounEntry,
|
||||
isNounEntry,
|
||||
isPluralNounEntry,
|
||||
|
@ -18,6 +17,71 @@ import { parseAdjective } from "./parse-adjective";
|
|||
export function parseNoun(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||
prevPossesor: T.NounSelection | undefined
|
||||
): {
|
||||
success: [T.Token[], { inflected: boolean; selection: T.NounSelection }][];
|
||||
errors: string[];
|
||||
} {
|
||||
if (tokens.length === 0) {
|
||||
return {
|
||||
success: [],
|
||||
errors: [],
|
||||
};
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
const possesor =
|
||||
first.s === "د" ? parseNoun(rest, lookup, undefined) : undefined;
|
||||
if (possesor) {
|
||||
const runsAfterPossesor: [
|
||||
Readonly<T.Token[]>,
|
||||
{ inflected: boolean; selection: T.NounSelection } | undefined
|
||||
][] = possesor ? [...possesor.success] : [[tokens, undefined]];
|
||||
// could be a case for a monad ??
|
||||
return runsAfterPossesor.reduce<ReturnType<typeof parseNoun>>(
|
||||
(acc, [tokens, possesor]) => {
|
||||
if (possesor?.inflected === false) {
|
||||
return {
|
||||
success: [...acc.success],
|
||||
errors: [...acc.errors, "possesor should be inflected"],
|
||||
};
|
||||
}
|
||||
const { success, errors } = parseNoun(
|
||||
tokens,
|
||||
lookup,
|
||||
possesor
|
||||
? {
|
||||
...possesor.selection,
|
||||
possesor: prevPossesor
|
||||
? {
|
||||
shrunken: false,
|
||||
np: {
|
||||
type: "NP",
|
||||
selection: prevPossesor,
|
||||
},
|
||||
}
|
||||
: undefined,
|
||||
}
|
||||
: undefined
|
||||
);
|
||||
return {
|
||||
success: [...acc.success, ...success],
|
||||
errors: [...acc.errors, ...errors],
|
||||
};
|
||||
},
|
||||
{ success: [], errors: [] }
|
||||
);
|
||||
} else {
|
||||
return parseNounAfterPossesor(tokens, lookup, prevPossesor, []);
|
||||
}
|
||||
}
|
||||
|
||||
// create NP parsing function for that
|
||||
// TODO with possesor, parse an NP not a noun
|
||||
|
||||
function parseNounAfterPossesor(
|
||||
tokens: Readonly<T.Token[]>,
|
||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
||||
possesor: T.NounSelection | undefined,
|
||||
adjectives: {
|
||||
inflection: (0 | 1 | 2)[];
|
||||
gender: T.Gender[];
|
||||
|
@ -34,16 +98,14 @@ export function parseNoun(
|
|||
errors: [],
|
||||
};
|
||||
}
|
||||
const [first, ...rest] = tokens;
|
||||
// TODO: add recognition of او between adjectives
|
||||
const adjRes = parseAdjective(tokens, lookup);
|
||||
const withAdj = adjRes.map(([tkns, adj]) =>
|
||||
parseNoun(tkns, lookup, [...adjectives, adj])
|
||||
parseNounAfterPossesor(tkns, lookup, possesor, [...adjectives, adj])
|
||||
);
|
||||
const [first, ...rest] = tokens;
|
||||
const success: ReturnType<typeof parseNoun>["success"] = [];
|
||||
const errors: string[] = [];
|
||||
// const possesor =
|
||||
// first === "د" ? parseNoun(rest, lookup, adjectives).success : undefined;
|
||||
|
||||
const searches = getInflectionQueries(first.s, true);
|
||||
|
||||
|
@ -52,8 +114,13 @@ export function parseNoun(
|
|||
details.forEach((deets) => {
|
||||
const fittingEntries = nounEntries.filter(deets.predicate);
|
||||
fittingEntries.forEach((entry) => {
|
||||
if (isUnisexNounEntry(entry)) {
|
||||
const genders: T.Gender[] = isUnisexNounEntry(entry)
|
||||
? ["masc", "fem"]
|
||||
: isMascNounEntry(entry)
|
||||
? ["masc"]
|
||||
: ["fem"];
|
||||
deets.gender.forEach((gender) => {
|
||||
if (genders.includes(gender)) {
|
||||
deets.inflection.forEach((inf) => {
|
||||
const { ok, error } = adjsMatch(
|
||||
adjectives,
|
||||
|
@ -78,73 +145,17 @@ export function parseNoun(
|
|||
? number
|
||||
: selection.number,
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
// TODO: could be nicer to validate that the possesor is inflected before
|
||||
// and just pass in the selection
|
||||
possesor: possesor
|
||||
? {
|
||||
shrunken: false,
|
||||
np: {
|
||||
type: "NP",
|
||||
selection: possesor,
|
||||
},
|
||||
},
|
||||
]);
|
||||
}
|
||||
);
|
||||
} else {
|
||||
error.forEach((e) => {
|
||||
errors.push(e);
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
} else if (isMascNounEntry(entry) && deets.gender.includes("masc")) {
|
||||
deets.inflection.forEach((inf) => {
|
||||
const { ok, error } = adjsMatch(
|
||||
adjectives,
|
||||
"masc",
|
||||
inf,
|
||||
deets.plural
|
||||
);
|
||||
if (ok) {
|
||||
convertInflection(inf, entry, "masc", deets.plural).forEach(
|
||||
({ inflected, number }) => {
|
||||
const selection = makeNounSelection(entry, undefined);
|
||||
success.push([
|
||||
rest,
|
||||
{
|
||||
inflected,
|
||||
selection: {
|
||||
...selection,
|
||||
number: selection.numberCanChange
|
||||
? number
|
||||
: selection.number,
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
},
|
||||
},
|
||||
]);
|
||||
}
|
||||
);
|
||||
} else {
|
||||
error.forEach((e) => {
|
||||
errors.push(e);
|
||||
});
|
||||
}
|
||||
});
|
||||
} else if (isFemNounEntry(entry) && deets.gender.includes("fem")) {
|
||||
deets.inflection.forEach((inf) => {
|
||||
const { ok, error } = adjsMatch(
|
||||
adjectives,
|
||||
"fem",
|
||||
inf,
|
||||
deets.plural
|
||||
);
|
||||
if (ok) {
|
||||
convertInflection(inf, entry, "fem", deets.plural).forEach(
|
||||
({ inflected, number }) => {
|
||||
const selection = makeNounSelection(entry, undefined);
|
||||
success.push([
|
||||
rest,
|
||||
{
|
||||
inflected,
|
||||
selection: {
|
||||
...selection,
|
||||
number: selection.numberCanChange
|
||||
? number
|
||||
: selection.number,
|
||||
adjectives: adjectives.map((a) => a.selection),
|
||||
: undefined,
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
@ -160,6 +171,7 @@ export function parseNoun(
|
|||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
return {
|
||||
success: [...withAdj.map((x) => x.success).flat(), ...success],
|
||||
errors: [...withAdj.map((x) => x.errors).flat(), ...errors],
|
||||
|
@ -167,7 +179,7 @@ export function parseNoun(
|
|||
}
|
||||
|
||||
function adjsMatch(
|
||||
adjectives: Parameters<typeof parseNoun>[2],
|
||||
adjectives: Parameters<typeof parseNounAfterPossesor>[3],
|
||||
gender: T.Gender,
|
||||
inf: 0 | 1 | 2,
|
||||
plural: boolean | undefined
|
||||
|
|
|
@ -12,7 +12,7 @@ export function parsePhrase(
|
|||
} {
|
||||
const adjsRes = parseAdjective(s, lookup);
|
||||
const prnsRes = parsePronoun(s);
|
||||
const nounsRes = parseNoun(s, lookup, []);
|
||||
const nounsRes = parseNoun(s, lookup, undefined);
|
||||
|
||||
const correct = [...adjsRes, ...prnsRes, ...nounsRes.success]
|
||||
.filter(([tkns]) => tkns.length === 0)
|
||||
|
|
Loading…
Reference in New Issue