fix yoR issue

This commit is contained in:
adueck 2023-08-29 17:20:52 +04:00
parent 56890cf4b9
commit 035e59ae19
21 changed files with 200 additions and 116 deletions

View File

@ -130,20 +130,26 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
<NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
) : "verb" in res ? (
(() => {
const rendered = renderVP(res);
const compiled = compileVP(rendered, res.form);
return (
<div>
<CompiledPTextDisplay compiled={compiled} opts={opts} />
{compiled.e && (
<div className={`text-muted mt-2 text-center`}>
{compiled.e.map((e, i) => (
<div key={i}>{e}</div>
))}
</div>
)}
</div>
);
try {
const rendered = renderVP(res);
const compiled = compileVP(rendered, res.form);
return (
<div>
<CompiledPTextDisplay compiled={compiled} opts={opts} />
{compiled.e && (
<div className={`text-muted mt-2 text-center`}>
{compiled.e.map((e, i) => (
<div key={i}>{e}</div>
))}
</div>
)}
</div>
);
} catch (e) {
console.error(e);
console.log({ res });
return <div>ERROR</div>;
}
})()
) : (
<samp>

View File

@ -87,6 +87,9 @@ export function accentFSylsOnNFromEnd(
if (typeof syls === "string") {
return accentFSylsOnNFromEnd(splitUpSyllables(syls), n);
}
if (syls.length === 0) {
return "";
}
return [
...syls.slice(0, syls.length - (n + 1)), // before accent
accentLetter(syls[syls.length - (n + 1)]), // syllable to be accented
@ -114,6 +117,9 @@ const accentReplacer = [
];
export function accentLetter(s: string): string {
if (!s) {
console.log("will crash", s);
}
return s.replace(/a|ă|e|i|o|u|U/, (match) => {
const r = accentReplacer.find((x) => x.vowel === match);
/* istanbul ignore next */

View File

@ -0,0 +1 @@

View File

@ -7,7 +7,33 @@ import { splitVarients, undoAaXuPattern } from "../p-text-helpers";
import { arraysHaveCommon } from "../misc-helpers";
import { shortVerbEndConsonant } from "./misc";
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
export type LookupFunction = typeof lookup;
export function lookup(
s: Partial<T.DictionaryEntry>,
type: "nounAdj"
): T.DictionaryEntry[];
export function lookup(s: string, type: "verb" | "participle"): T.VerbEntry[];
export function lookup(
s: string | Partial<T.DictionaryEntry>,
type: "nounAdj" | "verb" | "participle"
): T.DictionaryEntry[] | T.VerbEntry[] {
if (type === "nounAdj") {
if (typeof s !== "object") {
throw new Error("invalid query for noun / adj lookup");
}
return nounAdjLookup(s);
}
if (typeof s === "object") {
throw new Error("invalid query");
}
if (type === "verb") {
return verbLookup(s);
}
return participleLookup(s);
}
function nounAdjLookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
const [key, value] = Object.entries(s)[0];
// TODO: could make this more efficient - merging ppp and app queries?
if (key === "ppp") {
@ -42,7 +68,7 @@ export function shouldCheckTpp(s: string): boolean {
);
}
export function participleLookup(input: string): T.VerbEntry[] {
function participleLookup(input: string): T.VerbEntry[] {
if (input.endsWith("ل")) {
return verbs.filter((e) => e.entry.p === input);
}
@ -59,7 +85,7 @@ export function participleLookup(input: string): T.VerbEntry[] {
return [];
}
export function verbLookup(input: string): T.VerbEntry[] {
function verbLookup(input: string): T.VerbEntry[] {
// TODO:
// only look up forms if there's an ending
// or is third person thing

View File

@ -2,10 +2,11 @@ import * as T from "../../../types";
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
import { isAdjectiveEntry } from "../type-predicates";
import { getInflectionQueries } from "./inflection-query";
import { LookupFunction } from "./lookup";
export function parseAdjective(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
lookup: LookupFunction
): T.ParseResult<{
inflection: (0 | 1 | 2)[];
gender: T.Gender[];
@ -19,7 +20,7 @@ export function parseAdjective(
const [first, ...rest] = tokens;
const queries = getInflectionQueries(first.s, false);
queries.forEach(({ search, details }) => {
const wideMatches = lookup(search).filter(isAdjectiveEntry);
const wideMatches = lookup(search, "nounAdj").filter(isAdjectiveEntry);
details.forEach((deets) => {
const matches = wideMatches.filter(deets.predicate);
matches.forEach((m) => {

View File

@ -1,4 +1,5 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parseKidsSection } from "./parse-kids-section";
import { parseNeg } from "./parse-negative";
import { parseNP } from "./parse-np";
@ -8,9 +9,7 @@ import { bindParseResult, returnParseResult } from "./utils";
export function parseBlocks(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
verbLookup: (s: string) => T.VerbEntry[],
participleLookup: (s: string) => T.VerbEntry[],
lookup: LookupFunction,
blocks: T.ParsedBlock[],
kids: T.ParsedKid[]
): T.ParseResult<{
@ -24,9 +23,9 @@ export function parseBlocks(
(b): b is T.ParsedPH => b.type === "PH"
);
const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
const np = prevPh ? [] : parseNP(tokens, lookup, participleLookup);
const np = prevPh ? [] : parseNP(tokens, lookup);
const ph = vbExists || prevPh ? [] : parsePH(tokens);
const vb = parseVerb(tokens, verbLookup);
const vb = parseVerb(tokens, lookup);
const neg = parseNeg(tokens);
const kidsR = parseKidsSection(tokens, []);
const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
@ -50,14 +49,7 @@ export function parseBlocks(
const errors: T.ParseError[] = [];
if (r.type === "kids") {
return {
next: parseBlocks(
tokens,
lookup,
verbLookup,
participleLookup,
blocks,
[...kids, ...r.kids]
),
next: parseBlocks(tokens, lookup, blocks, [...kids, ...r.kids]),
errors:
blocks.length !== 1
? [{ message: "kids' section out of place" }]
@ -78,14 +70,7 @@ export function parseBlocks(
return [];
}
return {
next: parseBlocks(
tokens,
lookup,
verbLookup,
participleLookup,
[...blocks, r],
kids
),
next: parseBlocks(tokens, lookup, [...blocks, r], kids),
errors,
};
});

View File

@ -3,7 +3,7 @@ import {
makeNounSelection,
} from "../phrase-building/make-selections";
import * as T from "../../../types";
import { lookup, participleLookup, wordQuery } from "./lookup";
import { lookup, wordQuery } from "./lookup";
import { parseNoun } from "./parse-noun";
import { tokenizer } from "./tokenizer";
import { isCompleteResult } from "./utils";
@ -1371,9 +1371,7 @@ describe("parsing nouns", () => {
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup, participleLookup).map(
({ body }) => body
);
const res = parseNoun(tokens, lookup).map(({ body }) => body);
expect(res).toEqual(output);
});
});
@ -1505,7 +1503,7 @@ describe("parsing nouns with adjectives", () => {
test(category, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const res = parseNoun(tokens, lookup, participleLookup)
const res = parseNoun(tokens, lookup)
.filter(isCompleteResult)
.map(({ body }) => body);
expect(res).toEqual(output);

View File

@ -8,6 +8,7 @@ import {
isUnisexNounEntry,
} from "../type-predicates";
import { getInflectionQueries } from "./inflection-query";
import { LookupFunction } from "./lookup";
import { parseAdjective } from "./parse-adjective";
import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils";
@ -16,13 +17,12 @@ type NounResult = { inflected: boolean; selection: T.NounSelection };
export function parseNoun(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
pariticipleLookup: (s: string) => T.VerbEntry[]
lookup: LookupFunction
): T.ParseResult<NounResult>[] {
if (tokens.length === 0) {
return [];
}
const possesor = parsePossesor(tokens, lookup, pariticipleLookup, undefined);
const possesor = parsePossesor(tokens, lookup, undefined);
if (possesor.length) {
return bindParseResult(possesor, (tokens, p) => {
return parseNounAfterPossesor(tokens, lookup, p, []);
@ -33,7 +33,7 @@ export function parseNoun(
function parseNounAfterPossesor(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
lookup: LookupFunction,
possesor: T.PossesorSelection | undefined,
adjectives: {
inflection: (0 | 1 | 2)[];
@ -55,7 +55,7 @@ function parseNounAfterPossesor(
const w: ReturnType<typeof parseNoun> = [];
searches.forEach(({ search, details }) => {
const nounEntries = lookup(search).filter(isNounEntry);
const nounEntries = lookup(search, "nounAdj").filter(isNounEntry);
details.forEach((deets) => {
const fittingEntries = nounEntries.filter(deets.predicate);
fittingEntries.forEach((entry) => {

View File

@ -3,11 +3,11 @@ import { parsePronoun } from "./parse-pronoun";
import { parseNoun } from "./parse-noun";
import { fmapParseResult } from "../fp-ps";
import { parseParticiple } from "./parse-participle";
import { LookupFunction } from "./lookup";
export function parseNP(
s: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
participleLookup: (input: string) => T.VerbEntry[]
lookup: LookupFunction
): T.ParseResult<T.ParsedNP>[] {
if (s.length === 0) {
return [];
@ -40,7 +40,7 @@ export function parseNP(
return fmapParseResult(makeNPSl, [
...parsePronoun(s),
...parseNoun(s, lookup, participleLookup),
...parseParticiple(s, lookup, participleLookup),
...parseNoun(s, lookup),
...parseParticiple(s, lookup),
]);
}

View File

@ -4,7 +4,7 @@ import {
makePossesorSelection,
} from "../phrase-building/make-selections";
import * as T from "../../../types";
import { lookup, participleLookup, wordQuery } from "./lookup";
import { lookup, wordQuery } from "./lookup";
import { tokenizer } from "./tokenizer";
import { parseParticiple } from "./parse-participle";
@ -123,9 +123,7 @@ describe("parsing participles", () => {
test(label, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const res = parseParticiple(tokens, lookup, participleLookup).map(
({ body }) => body
);
const res = parseParticiple(tokens, lookup).map(({ body }) => body);
expect(res).toEqual(output);
});
});

View File

@ -1,4 +1,5 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parsePossesor } from "./parse-possesor";
import { bindParseResult } from "./utils";
@ -9,25 +10,24 @@ type ParticipleResult = {
export function parseParticiple(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
participleLookup: (s: string) => T.VerbEntry[]
lookup: LookupFunction
): T.ParseResult<ParticipleResult>[] {
if (tokens.length === 0) {
return [];
}
const possesor = parsePossesor(tokens, lookup, participleLookup, undefined);
const possesor = parsePossesor(tokens, lookup, undefined);
if (possesor.length) {
return bindParseResult(possesor, (tokens, p) => {
return parseParticipleAfterPossesor(tokens, participleLookup, p);
return parseParticipleAfterPossesor(tokens, lookup, p);
});
}
return parseParticipleAfterPossesor(tokens, participleLookup, undefined);
return parseParticipleAfterPossesor(tokens, lookup, undefined);
}
// TODO: should have adverbs with participle
function parseParticipleAfterPossesor(
tokens: Readonly<T.Token[]>,
participleLookup: (s: string) => T.VerbEntry[],
lookup: LookupFunction,
possesor: T.PossesorSelection | undefined
): T.ParseResult<ParticipleResult>[] {
if (tokens.length === 0) {
@ -38,7 +38,7 @@ function parseParticipleAfterPossesor(
return [];
}
const inflected = first.s.endsWith("و");
const matches = participleLookup(first.s);
const matches = lookup(first.s, "participle");
return matches.map<T.ParseResult<ParticipleResult>>((verb) => ({
tokens: rest,
body: {

View File

@ -14,6 +14,7 @@ const phs = [
"را",
"ور",
"پرا",
"لا",
];
export function parsePH(

View File

@ -1,5 +1,5 @@
import * as T from "../../../types";
import { verbLookup, lookup, participleLookup } from "./lookup";
import { lookup } from "./lookup";
import { parseNP } from "./parse-np";
import { parseVP } from "./parse-vp";
@ -17,11 +17,9 @@ export function parsePhrase(s: T.Token[]): {
errors: string[];
} {
const res = [
...parseNP(s, lookup, participleLookup).filter(
({ tokens }) => !tokens.length
),
...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
// ...parseVerb(s, verbLookup),
...parseVP(s, lookup, verbLookup, participleLookup),
...parseVP(s, lookup),
];
const success = res.map((x) => x.body);

View File

@ -5,7 +5,7 @@ import {
makeNounSelection,
makePronounSelection,
} from "../phrase-building/make-selections";
import { lookup, participleLookup, wordQuery } from "./lookup";
import { lookup, wordQuery } from "./lookup";
import { parsePossesor } from "./parse-possesor";
import { tokenizer } from "./tokenizer";
import { isCompleteResult } from "./utils";
@ -110,12 +110,12 @@ const tests: {
test("parse possesor", () => {
tests.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const parsed = parsePossesor(tokens, lookup, participleLookup, undefined);
const parsed = parsePossesor(tokens, lookup, undefined);
if (output === "error") {
expect(parsed.some((x) => x.errors.length)).toBe(true);
} else {
expect(
parsePossesor(tokens, lookup, participleLookup, undefined)
parsePossesor(tokens, lookup, undefined)
.filter(isCompleteResult)
.map((x) => x.body.np.selection)
).toEqual(output);

View File

@ -1,4 +1,5 @@
import * as T from "../../../types";
import { LookupFunction } from "./lookup";
import { parseNP } from "./parse-np";
import { bindParseResult } from "./utils";
// TODO: maybe contractions should just be male to cut down on the
@ -18,8 +19,7 @@ const contractions: [string[], T.Person[]][] = [
export function parsePossesor(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
participleLookup: (s: string) => T.VerbEntry[],
lookup: LookupFunction,
prevPossesor: T.PossesorSelection | undefined
): T.ParseResult<T.PossesorSelection>[] {
if (tokens.length === 0) {
@ -43,14 +43,14 @@ export function parsePossesor(
? [{ message: "a pronoun cannot have a possesor" }]
: [];
return contractions
.flatMap((p) => parsePossesor(rest, lookup, participleLookup, p))
.flatMap((p) => parsePossesor(rest, lookup, p))
.map((x) => ({
...x,
errors: [...errors, ...x.errors],
}));
}
if (first.s === "د") {
const np = parseNP(rest, lookup, participleLookup);
const np = parseNP(rest, lookup);
return bindParseResult(np, (tokens, body) => {
const possesor: T.PossesorSelection = {
shrunken: false,
@ -63,12 +63,7 @@ export function parsePossesor(
[{ message: `possesor should be inflected` }]
: [],
// add and check error - can't add possesor to pronoun
next: parsePossesor(
tokens,
lookup,
participleLookup,
addPoss(prevPossesor, possesor)
),
next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)),
};
});
}

View File

@ -8,10 +8,10 @@ import {
wartlul,
raatlul,
} from "./irreg-verbs";
import { verbLookup, wordQuery } from "./lookup";
import { lookup, wordQuery } from "./lookup";
import { parseVerb } from "./parse-verb";
import { tokenizer } from "./tokenizer";
import { removeKeys } from "./utils";
import { getPeople, removeKeys } from "./utils";
const wahul = wordQuery("وهل", "verb");
const leekul = wordQuery("لیکل", "verb");
@ -615,6 +615,18 @@ const tests: {
},
],
},
{
input: "لووت",
output: [
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: alwatul,
},
],
},
{
input: "سم",
output: [
@ -627,6 +639,53 @@ const tests: {
},
],
},
{
input: "ړلم",
output: [
{
root: {
persons: getPeople(1, "sing"),
aspects: ["perfective"],
},
verb: wurul,
},
{
root: {
persons: getPeople(1, "sing"),
aspects: ["perfective"],
},
verb: tlul,
},
],
},
{
input: "ړ",
output: [
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: wurul,
},
{
root: {
persons: [T.Person.ThirdSingMale],
aspects: ["perfective"],
},
verb: tlul,
},
],
},
// should not match with the prefix for perfective
{
input: "یوړله",
output: [],
},
{
input: "یوړ",
output: [],
},
],
},
{
@ -930,7 +989,7 @@ tests.forEach(({ label, cases }) => {
test(label, () => {
cases.forEach(({ input, output }) => {
const tokens = tokenizer(input);
const vbs = parseVerb(tokens, verbLookup).map((r) => r.body);
const vbs = parseVerb(tokens, lookup).map((r) => r.body);
const madeVbsS = output.reduce<T.ParsedVBE[]>((acc, o) => {
return [
...acc,

View File

@ -9,6 +9,7 @@ import {
tlul,
wartlul,
} from "./irreg-verbs";
import { LookupFunction } from "./lookup";
import { shortVerbEndConsonant } from "./misc";
// big problem ما سړی یوړ crashes it !!
@ -20,9 +21,11 @@ import { shortVerbEndConsonant } from "./misc";
// check څاته
// laaRa shum etc
// TODO: هغه لاړ
export function parseVerb(
tokens: Readonly<T.Token[]>,
verbLookup: (s: string) => T.VerbEntry[]
lookup: LookupFunction
): T.ParseResult<T.ParsedVBE>[] {
if (tokens.length === 0) {
return [];
@ -39,7 +42,7 @@ export function parseVerb(
const people = getVerbEnding(first.s);
// First do rough verb lookup, grab wide pool of possible verbs (low searching complexity for fast lookup)
// TODO: can optimize this to not have to look for possible stems/roots if none
const verbs = verbLookup(first.s);
const verbs = lookup(first.s, "verb");
// if (first.s === "سم") {
// console.log({ verbs: JSON.stringify(verbs) });
// }
@ -197,15 +200,26 @@ function matchVerbs(
const oEnd = s.at(-1) === "و";
const abruptEnd = shortVerbEndConsonant.includes(s.slice(-1));
const tppMatches = {
imperfective: entries.filter(
({ entry: e }) =>
!e.c.includes("comp") &&
(isInVarients(e.tppp, s) ||
(oEnd && [e.p, e.p.slice(0, -1)].includes(base)) ||
(lastVowelNotA(e.g.slice(0, -2)) &&
(hamzaEnd ? base : abruptEnd ? s : "") === e.p.slice(0, -1)))
imperfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
const e = entry.entry;
if (e.c.includes("comp")) {
return acc;
}
if (!e.prp && isInVarients(e.tppp, s)) {
return [...acc, entry];
}
if (oEnd && matchShortOrLong(base, e.p)) {
return [...acc, entry];
}
if (
lastVowelNotA(e.g.slice(0, -2)) &&
(hamzaEnd ? base : abruptEnd ? s : "XX") === e.p.slice(0, -1)
) {
return [...acc, entry];
}
// TODO: if check for modified aaXu thing!
),
return acc;
}, []),
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
const e = entry.entry;
if (e.c.includes("comp")) {
@ -243,10 +257,12 @@ function matchVerbs(
}
}
}
if (isInVarients(e.tppp, s)) {
return [...acc, entry];
} else if (isInVarients(e.tppp, "ا" + s)) {
return [...acc, entry];
if (!e.separationAtP) {
if (isInVarients(e.tppp, s)) {
return [...acc, entry];
} else if (isInVarients(e.tppp, "ا" + s)) {
return [...acc, entry];
}
}
return acc;
}, []),

View File

@ -9,7 +9,7 @@ import {
makeNounSelection,
makePronounSelection,
} from "../phrase-building/make-selections";
import { lookup, participleLookup, verbLookup, wordQuery } from "./lookup";
import { lookup, wordQuery } from "./lookup";
import { parseVP } from "./parse-vp";
import { tokenizer } from "./tokenizer";
import { tlul } from "./irreg-verbs";
@ -1382,7 +1382,7 @@ tests.forEach(({ label, cases }) => {
test(label, () => {
cases.forEach(({ input, output, error }) => {
const tokens = tokenizer(input);
const parsed = parseVP(tokens, lookup, verbLookup, participleLookup);
const parsed = parseVP(tokens, lookup);
if (error) {
expect(parsed.filter((x) => x.errors.length).length).toBeTruthy();
} else {

View File

@ -12,6 +12,7 @@ import {
import { parseBlocks } from "./parse-blocks";
import { makePronounSelection } from "../phrase-building/make-selections";
import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
import { LookupFunction } from "./lookup";
// to hide equatives type-doubling issue
// this should also conjugate to
@ -30,21 +31,12 @@ import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
export function parseVP(
tokens: Readonly<T.Token[]>,
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
verbLookup: (s: string) => T.VerbEntry[],
participleLookup: (s: string) => T.VerbEntry[]
lookup: LookupFunction
): T.ParseResult<T.VPSelectionComplete>[] {
if (tokens.length === 0) {
return [];
}
const blocks = parseBlocks(
tokens,
lookup,
verbLookup,
participleLookup,
[],
[]
);
const blocks = parseBlocks(tokens, lookup, [], []);
return bindParseResult(blocks, (tokens, { blocks, kids }) => {
const phIndex = blocks.findIndex((x) => x.type === "PH");
const vbeIndex = blocks.findIndex((x) => x.type === "VB");

View File

@ -13,7 +13,7 @@ import {
} from "../type-predicates";
import { shortVerbEndConsonant } from "../parsing/misc";
import { removeL } from "../new-verb-engine/rs-helpers";
import { applySingleOrLengthOpts, fmapSingleOrLengthOpts } from "../fp-ps";
import { applySingleOrLengthOpts } from "../fp-ps";
import { accentOnNFromEnd } from "../accent-helpers";
// TODO: can have subject and objects in possesors!!

View File

@ -1031,7 +1031,9 @@ export type Rendered<
type: "undefined";
ps: PsString;
}
: ReplaceKey<
: // TODO: this will be a problem (removing the change gender etc)
// if we want to make the sentence diagram interactive
ReplaceKey<
Omit<
T,
| "changeGender"
@ -1043,7 +1045,7 @@ export type Rendered<
"e",
string
> & {
ps: PsString[];
ps: SingleOrLengthOpts<PsString[]>;
e?: string;
inflected: boolean;
person: Person;