fix yoR issue
This commit is contained in:
parent
56890cf4b9
commit
035e59ae19
|
@ -130,6 +130,7 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
|
||||||
<NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
|
<NPDisplay NP={res.selection} inflected={res.inflected} opts={opts} />
|
||||||
) : "verb" in res ? (
|
) : "verb" in res ? (
|
||||||
(() => {
|
(() => {
|
||||||
|
try {
|
||||||
const rendered = renderVP(res);
|
const rendered = renderVP(res);
|
||||||
const compiled = compileVP(rendered, res.form);
|
const compiled = compileVP(rendered, res.form);
|
||||||
return (
|
return (
|
||||||
|
@ -144,6 +145,11 @@ function ParserDemo({ opts }: { opts: T.TextOptions }) {
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
|
console.log({ res });
|
||||||
|
return <div>ERROR</div>;
|
||||||
|
}
|
||||||
})()
|
})()
|
||||||
) : (
|
) : (
|
||||||
<samp>
|
<samp>
|
||||||
|
|
|
@ -87,6 +87,9 @@ export function accentFSylsOnNFromEnd(
|
||||||
if (typeof syls === "string") {
|
if (typeof syls === "string") {
|
||||||
return accentFSylsOnNFromEnd(splitUpSyllables(syls), n);
|
return accentFSylsOnNFromEnd(splitUpSyllables(syls), n);
|
||||||
}
|
}
|
||||||
|
if (syls.length === 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
return [
|
return [
|
||||||
...syls.slice(0, syls.length - (n + 1)), // before accent
|
...syls.slice(0, syls.length - (n + 1)), // before accent
|
||||||
accentLetter(syls[syls.length - (n + 1)]), // syllable to be accented
|
accentLetter(syls[syls.length - (n + 1)]), // syllable to be accented
|
||||||
|
@ -114,6 +117,9 @@ const accentReplacer = [
|
||||||
];
|
];
|
||||||
|
|
||||||
export function accentLetter(s: string): string {
|
export function accentLetter(s: string): string {
|
||||||
|
if (!s) {
|
||||||
|
console.log("will crash", s);
|
||||||
|
}
|
||||||
return s.replace(/a|ă|e|i|o|u|U/, (match) => {
|
return s.replace(/a|ă|e|i|o|u|U/, (match) => {
|
||||||
const r = accentReplacer.find((x) => x.vowel === match);
|
const r = accentReplacer.find((x) => x.vowel === match);
|
||||||
/* istanbul ignore next */
|
/* istanbul ignore next */
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
|
|
@ -7,7 +7,33 @@ import { splitVarients, undoAaXuPattern } from "../p-text-helpers";
|
||||||
import { arraysHaveCommon } from "../misc-helpers";
|
import { arraysHaveCommon } from "../misc-helpers";
|
||||||
import { shortVerbEndConsonant } from "./misc";
|
import { shortVerbEndConsonant } from "./misc";
|
||||||
|
|
||||||
export function lookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
|
export type LookupFunction = typeof lookup;
|
||||||
|
|
||||||
|
export function lookup(
|
||||||
|
s: Partial<T.DictionaryEntry>,
|
||||||
|
type: "nounAdj"
|
||||||
|
): T.DictionaryEntry[];
|
||||||
|
export function lookup(s: string, type: "verb" | "participle"): T.VerbEntry[];
|
||||||
|
export function lookup(
|
||||||
|
s: string | Partial<T.DictionaryEntry>,
|
||||||
|
type: "nounAdj" | "verb" | "participle"
|
||||||
|
): T.DictionaryEntry[] | T.VerbEntry[] {
|
||||||
|
if (type === "nounAdj") {
|
||||||
|
if (typeof s !== "object") {
|
||||||
|
throw new Error("invalid query for noun / adj lookup");
|
||||||
|
}
|
||||||
|
return nounAdjLookup(s);
|
||||||
|
}
|
||||||
|
if (typeof s === "object") {
|
||||||
|
throw new Error("invalid query");
|
||||||
|
}
|
||||||
|
if (type === "verb") {
|
||||||
|
return verbLookup(s);
|
||||||
|
}
|
||||||
|
return participleLookup(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
function nounAdjLookup(s: Partial<T.DictionaryEntry>): T.DictionaryEntry[] {
|
||||||
const [key, value] = Object.entries(s)[0];
|
const [key, value] = Object.entries(s)[0];
|
||||||
// TODO: could make this more efficient - merging ppp and app queries?
|
// TODO: could make this more efficient - merging ppp and app queries?
|
||||||
if (key === "ppp") {
|
if (key === "ppp") {
|
||||||
|
@ -42,7 +68,7 @@ export function shouldCheckTpp(s: string): boolean {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function participleLookup(input: string): T.VerbEntry[] {
|
function participleLookup(input: string): T.VerbEntry[] {
|
||||||
if (input.endsWith("ل")) {
|
if (input.endsWith("ل")) {
|
||||||
return verbs.filter((e) => e.entry.p === input);
|
return verbs.filter((e) => e.entry.p === input);
|
||||||
}
|
}
|
||||||
|
@ -59,7 +85,7 @@ export function participleLookup(input: string): T.VerbEntry[] {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
export function verbLookup(input: string): T.VerbEntry[] {
|
function verbLookup(input: string): T.VerbEntry[] {
|
||||||
// TODO:
|
// TODO:
|
||||||
// only look up forms if there's an ending
|
// only look up forms if there's an ending
|
||||||
// or is third person thing
|
// or is third person thing
|
||||||
|
|
|
@ -2,10 +2,11 @@ import * as T from "../../../types";
|
||||||
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
|
import { makeAdjectiveSelection } from "../phrase-building/make-selections";
|
||||||
import { isAdjectiveEntry } from "../type-predicates";
|
import { isAdjectiveEntry } from "../type-predicates";
|
||||||
import { getInflectionQueries } from "./inflection-query";
|
import { getInflectionQueries } from "./inflection-query";
|
||||||
|
import { LookupFunction } from "./lookup";
|
||||||
|
|
||||||
export function parseAdjective(
|
export function parseAdjective(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[]
|
lookup: LookupFunction
|
||||||
): T.ParseResult<{
|
): T.ParseResult<{
|
||||||
inflection: (0 | 1 | 2)[];
|
inflection: (0 | 1 | 2)[];
|
||||||
gender: T.Gender[];
|
gender: T.Gender[];
|
||||||
|
@ -19,7 +20,7 @@ export function parseAdjective(
|
||||||
const [first, ...rest] = tokens;
|
const [first, ...rest] = tokens;
|
||||||
const queries = getInflectionQueries(first.s, false);
|
const queries = getInflectionQueries(first.s, false);
|
||||||
queries.forEach(({ search, details }) => {
|
queries.forEach(({ search, details }) => {
|
||||||
const wideMatches = lookup(search).filter(isAdjectiveEntry);
|
const wideMatches = lookup(search, "nounAdj").filter(isAdjectiveEntry);
|
||||||
details.forEach((deets) => {
|
details.forEach((deets) => {
|
||||||
const matches = wideMatches.filter(deets.predicate);
|
const matches = wideMatches.filter(deets.predicate);
|
||||||
matches.forEach((m) => {
|
matches.forEach((m) => {
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
|
import { LookupFunction } from "./lookup";
|
||||||
import { parseKidsSection } from "./parse-kids-section";
|
import { parseKidsSection } from "./parse-kids-section";
|
||||||
import { parseNeg } from "./parse-negative";
|
import { parseNeg } from "./parse-negative";
|
||||||
import { parseNP } from "./parse-np";
|
import { parseNP } from "./parse-np";
|
||||||
|
@ -8,9 +9,7 @@ import { bindParseResult, returnParseResult } from "./utils";
|
||||||
|
|
||||||
export function parseBlocks(
|
export function parseBlocks(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
lookup: LookupFunction,
|
||||||
verbLookup: (s: string) => T.VerbEntry[],
|
|
||||||
participleLookup: (s: string) => T.VerbEntry[],
|
|
||||||
blocks: T.ParsedBlock[],
|
blocks: T.ParsedBlock[],
|
||||||
kids: T.ParsedKid[]
|
kids: T.ParsedKid[]
|
||||||
): T.ParseResult<{
|
): T.ParseResult<{
|
||||||
|
@ -24,9 +23,9 @@ export function parseBlocks(
|
||||||
(b): b is T.ParsedPH => b.type === "PH"
|
(b): b is T.ParsedPH => b.type === "PH"
|
||||||
);
|
);
|
||||||
const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
|
const vbExists = blocks.some((b) => "type" in b && b.type === "VB");
|
||||||
const np = prevPh ? [] : parseNP(tokens, lookup, participleLookup);
|
const np = prevPh ? [] : parseNP(tokens, lookup);
|
||||||
const ph = vbExists || prevPh ? [] : parsePH(tokens);
|
const ph = vbExists || prevPh ? [] : parsePH(tokens);
|
||||||
const vb = parseVerb(tokens, verbLookup);
|
const vb = parseVerb(tokens, lookup);
|
||||||
const neg = parseNeg(tokens);
|
const neg = parseNeg(tokens);
|
||||||
const kidsR = parseKidsSection(tokens, []);
|
const kidsR = parseKidsSection(tokens, []);
|
||||||
const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
|
const allResults: T.ParseResult<T.ParsedBlock | T.ParsedKidsSection>[] = [
|
||||||
|
@ -50,14 +49,7 @@ export function parseBlocks(
|
||||||
const errors: T.ParseError[] = [];
|
const errors: T.ParseError[] = [];
|
||||||
if (r.type === "kids") {
|
if (r.type === "kids") {
|
||||||
return {
|
return {
|
||||||
next: parseBlocks(
|
next: parseBlocks(tokens, lookup, blocks, [...kids, ...r.kids]),
|
||||||
tokens,
|
|
||||||
lookup,
|
|
||||||
verbLookup,
|
|
||||||
participleLookup,
|
|
||||||
blocks,
|
|
||||||
[...kids, ...r.kids]
|
|
||||||
),
|
|
||||||
errors:
|
errors:
|
||||||
blocks.length !== 1
|
blocks.length !== 1
|
||||||
? [{ message: "kids' section out of place" }]
|
? [{ message: "kids' section out of place" }]
|
||||||
|
@ -78,14 +70,7 @@ export function parseBlocks(
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
next: parseBlocks(
|
next: parseBlocks(tokens, lookup, [...blocks, r], kids),
|
||||||
tokens,
|
|
||||||
lookup,
|
|
||||||
verbLookup,
|
|
||||||
participleLookup,
|
|
||||||
[...blocks, r],
|
|
||||||
kids
|
|
||||||
),
|
|
||||||
errors,
|
errors,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
|
@ -3,7 +3,7 @@ import {
|
||||||
makeNounSelection,
|
makeNounSelection,
|
||||||
} from "../phrase-building/make-selections";
|
} from "../phrase-building/make-selections";
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { lookup, participleLookup, wordQuery } from "./lookup";
|
import { lookup, wordQuery } from "./lookup";
|
||||||
import { parseNoun } from "./parse-noun";
|
import { parseNoun } from "./parse-noun";
|
||||||
import { tokenizer } from "./tokenizer";
|
import { tokenizer } from "./tokenizer";
|
||||||
import { isCompleteResult } from "./utils";
|
import { isCompleteResult } from "./utils";
|
||||||
|
@ -1371,9 +1371,7 @@ describe("parsing nouns", () => {
|
||||||
test(category, () => {
|
test(category, () => {
|
||||||
cases.forEach(({ input, output }) => {
|
cases.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const res = parseNoun(tokens, lookup, participleLookup).map(
|
const res = parseNoun(tokens, lookup).map(({ body }) => body);
|
||||||
({ body }) => body
|
|
||||||
);
|
|
||||||
expect(res).toEqual(output);
|
expect(res).toEqual(output);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -1505,7 +1503,7 @@ describe("parsing nouns with adjectives", () => {
|
||||||
test(category, () => {
|
test(category, () => {
|
||||||
cases.forEach(({ input, output }) => {
|
cases.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const res = parseNoun(tokens, lookup, participleLookup)
|
const res = parseNoun(tokens, lookup)
|
||||||
.filter(isCompleteResult)
|
.filter(isCompleteResult)
|
||||||
.map(({ body }) => body);
|
.map(({ body }) => body);
|
||||||
expect(res).toEqual(output);
|
expect(res).toEqual(output);
|
||||||
|
|
|
@ -8,6 +8,7 @@ import {
|
||||||
isUnisexNounEntry,
|
isUnisexNounEntry,
|
||||||
} from "../type-predicates";
|
} from "../type-predicates";
|
||||||
import { getInflectionQueries } from "./inflection-query";
|
import { getInflectionQueries } from "./inflection-query";
|
||||||
|
import { LookupFunction } from "./lookup";
|
||||||
import { parseAdjective } from "./parse-adjective";
|
import { parseAdjective } from "./parse-adjective";
|
||||||
import { parsePossesor } from "./parse-possesor";
|
import { parsePossesor } from "./parse-possesor";
|
||||||
import { bindParseResult } from "./utils";
|
import { bindParseResult } from "./utils";
|
||||||
|
@ -16,13 +17,12 @@ type NounResult = { inflected: boolean; selection: T.NounSelection };
|
||||||
|
|
||||||
export function parseNoun(
|
export function parseNoun(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
lookup: LookupFunction
|
||||||
pariticipleLookup: (s: string) => T.VerbEntry[]
|
|
||||||
): T.ParseResult<NounResult>[] {
|
): T.ParseResult<NounResult>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const possesor = parsePossesor(tokens, lookup, pariticipleLookup, undefined);
|
const possesor = parsePossesor(tokens, lookup, undefined);
|
||||||
if (possesor.length) {
|
if (possesor.length) {
|
||||||
return bindParseResult(possesor, (tokens, p) => {
|
return bindParseResult(possesor, (tokens, p) => {
|
||||||
return parseNounAfterPossesor(tokens, lookup, p, []);
|
return parseNounAfterPossesor(tokens, lookup, p, []);
|
||||||
|
@ -33,7 +33,7 @@ export function parseNoun(
|
||||||
|
|
||||||
function parseNounAfterPossesor(
|
function parseNounAfterPossesor(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
lookup: LookupFunction,
|
||||||
possesor: T.PossesorSelection | undefined,
|
possesor: T.PossesorSelection | undefined,
|
||||||
adjectives: {
|
adjectives: {
|
||||||
inflection: (0 | 1 | 2)[];
|
inflection: (0 | 1 | 2)[];
|
||||||
|
@ -55,7 +55,7 @@ function parseNounAfterPossesor(
|
||||||
|
|
||||||
const w: ReturnType<typeof parseNoun> = [];
|
const w: ReturnType<typeof parseNoun> = [];
|
||||||
searches.forEach(({ search, details }) => {
|
searches.forEach(({ search, details }) => {
|
||||||
const nounEntries = lookup(search).filter(isNounEntry);
|
const nounEntries = lookup(search, "nounAdj").filter(isNounEntry);
|
||||||
details.forEach((deets) => {
|
details.forEach((deets) => {
|
||||||
const fittingEntries = nounEntries.filter(deets.predicate);
|
const fittingEntries = nounEntries.filter(deets.predicate);
|
||||||
fittingEntries.forEach((entry) => {
|
fittingEntries.forEach((entry) => {
|
||||||
|
|
|
@ -3,11 +3,11 @@ import { parsePronoun } from "./parse-pronoun";
|
||||||
import { parseNoun } from "./parse-noun";
|
import { parseNoun } from "./parse-noun";
|
||||||
import { fmapParseResult } from "../fp-ps";
|
import { fmapParseResult } from "../fp-ps";
|
||||||
import { parseParticiple } from "./parse-participle";
|
import { parseParticiple } from "./parse-participle";
|
||||||
|
import { LookupFunction } from "./lookup";
|
||||||
|
|
||||||
export function parseNP(
|
export function parseNP(
|
||||||
s: Readonly<T.Token[]>,
|
s: Readonly<T.Token[]>,
|
||||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
lookup: LookupFunction
|
||||||
participleLookup: (input: string) => T.VerbEntry[]
|
|
||||||
): T.ParseResult<T.ParsedNP>[] {
|
): T.ParseResult<T.ParsedNP>[] {
|
||||||
if (s.length === 0) {
|
if (s.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
|
@ -40,7 +40,7 @@ export function parseNP(
|
||||||
|
|
||||||
return fmapParseResult(makeNPSl, [
|
return fmapParseResult(makeNPSl, [
|
||||||
...parsePronoun(s),
|
...parsePronoun(s),
|
||||||
...parseNoun(s, lookup, participleLookup),
|
...parseNoun(s, lookup),
|
||||||
...parseParticiple(s, lookup, participleLookup),
|
...parseParticiple(s, lookup),
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@ import {
|
||||||
makePossesorSelection,
|
makePossesorSelection,
|
||||||
} from "../phrase-building/make-selections";
|
} from "../phrase-building/make-selections";
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { lookup, participleLookup, wordQuery } from "./lookup";
|
import { lookup, wordQuery } from "./lookup";
|
||||||
import { tokenizer } from "./tokenizer";
|
import { tokenizer } from "./tokenizer";
|
||||||
import { parseParticiple } from "./parse-participle";
|
import { parseParticiple } from "./parse-participle";
|
||||||
|
|
||||||
|
@ -123,9 +123,7 @@ describe("parsing participles", () => {
|
||||||
test(label, () => {
|
test(label, () => {
|
||||||
cases.forEach(({ input, output }) => {
|
cases.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const res = parseParticiple(tokens, lookup, participleLookup).map(
|
const res = parseParticiple(tokens, lookup).map(({ body }) => body);
|
||||||
({ body }) => body
|
|
||||||
);
|
|
||||||
expect(res).toEqual(output);
|
expect(res).toEqual(output);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
|
import { LookupFunction } from "./lookup";
|
||||||
import { parsePossesor } from "./parse-possesor";
|
import { parsePossesor } from "./parse-possesor";
|
||||||
import { bindParseResult } from "./utils";
|
import { bindParseResult } from "./utils";
|
||||||
|
|
||||||
|
@ -9,25 +10,24 @@ type ParticipleResult = {
|
||||||
|
|
||||||
export function parseParticiple(
|
export function parseParticiple(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
lookup: LookupFunction
|
||||||
participleLookup: (s: string) => T.VerbEntry[]
|
|
||||||
): T.ParseResult<ParticipleResult>[] {
|
): T.ParseResult<ParticipleResult>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const possesor = parsePossesor(tokens, lookup, participleLookup, undefined);
|
const possesor = parsePossesor(tokens, lookup, undefined);
|
||||||
if (possesor.length) {
|
if (possesor.length) {
|
||||||
return bindParseResult(possesor, (tokens, p) => {
|
return bindParseResult(possesor, (tokens, p) => {
|
||||||
return parseParticipleAfterPossesor(tokens, participleLookup, p);
|
return parseParticipleAfterPossesor(tokens, lookup, p);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return parseParticipleAfterPossesor(tokens, participleLookup, undefined);
|
return parseParticipleAfterPossesor(tokens, lookup, undefined);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: should have adverbs with participle
|
// TODO: should have adverbs with participle
|
||||||
function parseParticipleAfterPossesor(
|
function parseParticipleAfterPossesor(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
participleLookup: (s: string) => T.VerbEntry[],
|
lookup: LookupFunction,
|
||||||
possesor: T.PossesorSelection | undefined
|
possesor: T.PossesorSelection | undefined
|
||||||
): T.ParseResult<ParticipleResult>[] {
|
): T.ParseResult<ParticipleResult>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
|
@ -38,7 +38,7 @@ function parseParticipleAfterPossesor(
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const inflected = first.s.endsWith("و");
|
const inflected = first.s.endsWith("و");
|
||||||
const matches = participleLookup(first.s);
|
const matches = lookup(first.s, "participle");
|
||||||
return matches.map<T.ParseResult<ParticipleResult>>((verb) => ({
|
return matches.map<T.ParseResult<ParticipleResult>>((verb) => ({
|
||||||
tokens: rest,
|
tokens: rest,
|
||||||
body: {
|
body: {
|
||||||
|
|
|
@ -14,6 +14,7 @@ const phs = [
|
||||||
"را",
|
"را",
|
||||||
"ور",
|
"ور",
|
||||||
"پرا",
|
"پرا",
|
||||||
|
"لا",
|
||||||
];
|
];
|
||||||
|
|
||||||
export function parsePH(
|
export function parsePH(
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
import { verbLookup, lookup, participleLookup } from "./lookup";
|
import { lookup } from "./lookup";
|
||||||
import { parseNP } from "./parse-np";
|
import { parseNP } from "./parse-np";
|
||||||
import { parseVP } from "./parse-vp";
|
import { parseVP } from "./parse-vp";
|
||||||
|
|
||||||
|
@ -17,11 +17,9 @@ export function parsePhrase(s: T.Token[]): {
|
||||||
errors: string[];
|
errors: string[];
|
||||||
} {
|
} {
|
||||||
const res = [
|
const res = [
|
||||||
...parseNP(s, lookup, participleLookup).filter(
|
...parseNP(s, lookup).filter(({ tokens }) => !tokens.length),
|
||||||
({ tokens }) => !tokens.length
|
|
||||||
),
|
|
||||||
// ...parseVerb(s, verbLookup),
|
// ...parseVerb(s, verbLookup),
|
||||||
...parseVP(s, lookup, verbLookup, participleLookup),
|
...parseVP(s, lookup),
|
||||||
];
|
];
|
||||||
|
|
||||||
const success = res.map((x) => x.body);
|
const success = res.map((x) => x.body);
|
||||||
|
|
|
@ -5,7 +5,7 @@ import {
|
||||||
makeNounSelection,
|
makeNounSelection,
|
||||||
makePronounSelection,
|
makePronounSelection,
|
||||||
} from "../phrase-building/make-selections";
|
} from "../phrase-building/make-selections";
|
||||||
import { lookup, participleLookup, wordQuery } from "./lookup";
|
import { lookup, wordQuery } from "./lookup";
|
||||||
import { parsePossesor } from "./parse-possesor";
|
import { parsePossesor } from "./parse-possesor";
|
||||||
import { tokenizer } from "./tokenizer";
|
import { tokenizer } from "./tokenizer";
|
||||||
import { isCompleteResult } from "./utils";
|
import { isCompleteResult } from "./utils";
|
||||||
|
@ -110,12 +110,12 @@ const tests: {
|
||||||
test("parse possesor", () => {
|
test("parse possesor", () => {
|
||||||
tests.forEach(({ input, output }) => {
|
tests.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const parsed = parsePossesor(tokens, lookup, participleLookup, undefined);
|
const parsed = parsePossesor(tokens, lookup, undefined);
|
||||||
if (output === "error") {
|
if (output === "error") {
|
||||||
expect(parsed.some((x) => x.errors.length)).toBe(true);
|
expect(parsed.some((x) => x.errors.length)).toBe(true);
|
||||||
} else {
|
} else {
|
||||||
expect(
|
expect(
|
||||||
parsePossesor(tokens, lookup, participleLookup, undefined)
|
parsePossesor(tokens, lookup, undefined)
|
||||||
.filter(isCompleteResult)
|
.filter(isCompleteResult)
|
||||||
.map((x) => x.body.np.selection)
|
.map((x) => x.body.np.selection)
|
||||||
).toEqual(output);
|
).toEqual(output);
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import * as T from "../../../types";
|
import * as T from "../../../types";
|
||||||
|
import { LookupFunction } from "./lookup";
|
||||||
import { parseNP } from "./parse-np";
|
import { parseNP } from "./parse-np";
|
||||||
import { bindParseResult } from "./utils";
|
import { bindParseResult } from "./utils";
|
||||||
// TODO: maybe contractions should just be male to cut down on the
|
// TODO: maybe contractions should just be male to cut down on the
|
||||||
|
@ -18,8 +19,7 @@ const contractions: [string[], T.Person[]][] = [
|
||||||
|
|
||||||
export function parsePossesor(
|
export function parsePossesor(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
lookup: LookupFunction,
|
||||||
participleLookup: (s: string) => T.VerbEntry[],
|
|
||||||
prevPossesor: T.PossesorSelection | undefined
|
prevPossesor: T.PossesorSelection | undefined
|
||||||
): T.ParseResult<T.PossesorSelection>[] {
|
): T.ParseResult<T.PossesorSelection>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
|
@ -43,14 +43,14 @@ export function parsePossesor(
|
||||||
? [{ message: "a pronoun cannot have a possesor" }]
|
? [{ message: "a pronoun cannot have a possesor" }]
|
||||||
: [];
|
: [];
|
||||||
return contractions
|
return contractions
|
||||||
.flatMap((p) => parsePossesor(rest, lookup, participleLookup, p))
|
.flatMap((p) => parsePossesor(rest, lookup, p))
|
||||||
.map((x) => ({
|
.map((x) => ({
|
||||||
...x,
|
...x,
|
||||||
errors: [...errors, ...x.errors],
|
errors: [...errors, ...x.errors],
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
if (first.s === "د") {
|
if (first.s === "د") {
|
||||||
const np = parseNP(rest, lookup, participleLookup);
|
const np = parseNP(rest, lookup);
|
||||||
return bindParseResult(np, (tokens, body) => {
|
return bindParseResult(np, (tokens, body) => {
|
||||||
const possesor: T.PossesorSelection = {
|
const possesor: T.PossesorSelection = {
|
||||||
shrunken: false,
|
shrunken: false,
|
||||||
|
@ -63,12 +63,7 @@ export function parsePossesor(
|
||||||
[{ message: `possesor should be inflected` }]
|
[{ message: `possesor should be inflected` }]
|
||||||
: [],
|
: [],
|
||||||
// add and check error - can't add possesor to pronoun
|
// add and check error - can't add possesor to pronoun
|
||||||
next: parsePossesor(
|
next: parsePossesor(tokens, lookup, addPoss(prevPossesor, possesor)),
|
||||||
tokens,
|
|
||||||
lookup,
|
|
||||||
participleLookup,
|
|
||||||
addPoss(prevPossesor, possesor)
|
|
||||||
),
|
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,10 +8,10 @@ import {
|
||||||
wartlul,
|
wartlul,
|
||||||
raatlul,
|
raatlul,
|
||||||
} from "./irreg-verbs";
|
} from "./irreg-verbs";
|
||||||
import { verbLookup, wordQuery } from "./lookup";
|
import { lookup, wordQuery } from "./lookup";
|
||||||
import { parseVerb } from "./parse-verb";
|
import { parseVerb } from "./parse-verb";
|
||||||
import { tokenizer } from "./tokenizer";
|
import { tokenizer } from "./tokenizer";
|
||||||
import { removeKeys } from "./utils";
|
import { getPeople, removeKeys } from "./utils";
|
||||||
|
|
||||||
const wahul = wordQuery("وهل", "verb");
|
const wahul = wordQuery("وهل", "verb");
|
||||||
const leekul = wordQuery("لیکل", "verb");
|
const leekul = wordQuery("لیکل", "verb");
|
||||||
|
@ -615,6 +615,18 @@ const tests: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
input: "لووت",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale],
|
||||||
|
aspects: ["perfective"],
|
||||||
|
},
|
||||||
|
verb: alwatul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
input: "سم",
|
input: "سم",
|
||||||
output: [
|
output: [
|
||||||
|
@ -627,6 +639,53 @@ const tests: {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
input: "ړلم",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: getPeople(1, "sing"),
|
||||||
|
aspects: ["perfective"],
|
||||||
|
},
|
||||||
|
verb: wurul,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: getPeople(1, "sing"),
|
||||||
|
aspects: ["perfective"],
|
||||||
|
},
|
||||||
|
verb: tlul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "ړ",
|
||||||
|
output: [
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale],
|
||||||
|
aspects: ["perfective"],
|
||||||
|
},
|
||||||
|
verb: wurul,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
root: {
|
||||||
|
persons: [T.Person.ThirdSingMale],
|
||||||
|
aspects: ["perfective"],
|
||||||
|
},
|
||||||
|
verb: tlul,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// should not match with the prefix for perfective
|
||||||
|
{
|
||||||
|
input: "یوړله",
|
||||||
|
output: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "یوړ",
|
||||||
|
output: [],
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -930,7 +989,7 @@ tests.forEach(({ label, cases }) => {
|
||||||
test(label, () => {
|
test(label, () => {
|
||||||
cases.forEach(({ input, output }) => {
|
cases.forEach(({ input, output }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const vbs = parseVerb(tokens, verbLookup).map((r) => r.body);
|
const vbs = parseVerb(tokens, lookup).map((r) => r.body);
|
||||||
const madeVbsS = output.reduce<T.ParsedVBE[]>((acc, o) => {
|
const madeVbsS = output.reduce<T.ParsedVBE[]>((acc, o) => {
|
||||||
return [
|
return [
|
||||||
...acc,
|
...acc,
|
||||||
|
|
|
@ -9,6 +9,7 @@ import {
|
||||||
tlul,
|
tlul,
|
||||||
wartlul,
|
wartlul,
|
||||||
} from "./irreg-verbs";
|
} from "./irreg-verbs";
|
||||||
|
import { LookupFunction } from "./lookup";
|
||||||
import { shortVerbEndConsonant } from "./misc";
|
import { shortVerbEndConsonant } from "./misc";
|
||||||
|
|
||||||
// big problem ما سړی یوړ crashes it !!
|
// big problem ما سړی یوړ crashes it !!
|
||||||
|
@ -20,9 +21,11 @@ import { shortVerbEndConsonant } from "./misc";
|
||||||
// check څاته
|
// check څاته
|
||||||
// laaRa shum etc
|
// laaRa shum etc
|
||||||
|
|
||||||
|
// TODO: هغه لاړ
|
||||||
|
|
||||||
export function parseVerb(
|
export function parseVerb(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
verbLookup: (s: string) => T.VerbEntry[]
|
lookup: LookupFunction
|
||||||
): T.ParseResult<T.ParsedVBE>[] {
|
): T.ParseResult<T.ParsedVBE>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
|
@ -39,7 +42,7 @@ export function parseVerb(
|
||||||
const people = getVerbEnding(first.s);
|
const people = getVerbEnding(first.s);
|
||||||
// First do rough verb lookup, grab wide pool of possible verbs (low searching complexity for fast lookup)
|
// First do rough verb lookup, grab wide pool of possible verbs (low searching complexity for fast lookup)
|
||||||
// TODO: can optimize this to not have to look for possible stems/roots if none
|
// TODO: can optimize this to not have to look for possible stems/roots if none
|
||||||
const verbs = verbLookup(first.s);
|
const verbs = lookup(first.s, "verb");
|
||||||
// if (first.s === "سم") {
|
// if (first.s === "سم") {
|
||||||
// console.log({ verbs: JSON.stringify(verbs) });
|
// console.log({ verbs: JSON.stringify(verbs) });
|
||||||
// }
|
// }
|
||||||
|
@ -197,15 +200,26 @@ function matchVerbs(
|
||||||
const oEnd = s.at(-1) === "و";
|
const oEnd = s.at(-1) === "و";
|
||||||
const abruptEnd = shortVerbEndConsonant.includes(s.slice(-1));
|
const abruptEnd = shortVerbEndConsonant.includes(s.slice(-1));
|
||||||
const tppMatches = {
|
const tppMatches = {
|
||||||
imperfective: entries.filter(
|
imperfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
||||||
({ entry: e }) =>
|
const e = entry.entry;
|
||||||
!e.c.includes("comp") &&
|
if (e.c.includes("comp")) {
|
||||||
(isInVarients(e.tppp, s) ||
|
return acc;
|
||||||
(oEnd && [e.p, e.p.slice(0, -1)].includes(base)) ||
|
}
|
||||||
(lastVowelNotA(e.g.slice(0, -2)) &&
|
if (!e.prp && isInVarients(e.tppp, s)) {
|
||||||
(hamzaEnd ? base : abruptEnd ? s : "") === e.p.slice(0, -1)))
|
return [...acc, entry];
|
||||||
|
}
|
||||||
|
if (oEnd && matchShortOrLong(base, e.p)) {
|
||||||
|
return [...acc, entry];
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
lastVowelNotA(e.g.slice(0, -2)) &&
|
||||||
|
(hamzaEnd ? base : abruptEnd ? s : "XX") === e.p.slice(0, -1)
|
||||||
|
) {
|
||||||
|
return [...acc, entry];
|
||||||
|
}
|
||||||
// TODO: if check for modified aaXu thing!
|
// TODO: if check for modified aaXu thing!
|
||||||
),
|
return acc;
|
||||||
|
}, []),
|
||||||
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
perfective: entries.reduce<T.VerbEntry[]>((acc, entry) => {
|
||||||
const e = entry.entry;
|
const e = entry.entry;
|
||||||
if (e.c.includes("comp")) {
|
if (e.c.includes("comp")) {
|
||||||
|
@ -243,11 +257,13 @@ function matchVerbs(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!e.separationAtP) {
|
||||||
if (isInVarients(e.tppp, s)) {
|
if (isInVarients(e.tppp, s)) {
|
||||||
return [...acc, entry];
|
return [...acc, entry];
|
||||||
} else if (isInVarients(e.tppp, "ا" + s)) {
|
} else if (isInVarients(e.tppp, "ا" + s)) {
|
||||||
return [...acc, entry];
|
return [...acc, entry];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return acc;
|
return acc;
|
||||||
}, []),
|
}, []),
|
||||||
};
|
};
|
||||||
|
|
|
@ -9,7 +9,7 @@ import {
|
||||||
makeNounSelection,
|
makeNounSelection,
|
||||||
makePronounSelection,
|
makePronounSelection,
|
||||||
} from "../phrase-building/make-selections";
|
} from "../phrase-building/make-selections";
|
||||||
import { lookup, participleLookup, verbLookup, wordQuery } from "./lookup";
|
import { lookup, wordQuery } from "./lookup";
|
||||||
import { parseVP } from "./parse-vp";
|
import { parseVP } from "./parse-vp";
|
||||||
import { tokenizer } from "./tokenizer";
|
import { tokenizer } from "./tokenizer";
|
||||||
import { tlul } from "./irreg-verbs";
|
import { tlul } from "./irreg-verbs";
|
||||||
|
@ -1382,7 +1382,7 @@ tests.forEach(({ label, cases }) => {
|
||||||
test(label, () => {
|
test(label, () => {
|
||||||
cases.forEach(({ input, output, error }) => {
|
cases.forEach(({ input, output, error }) => {
|
||||||
const tokens = tokenizer(input);
|
const tokens = tokenizer(input);
|
||||||
const parsed = parseVP(tokens, lookup, verbLookup, participleLookup);
|
const parsed = parseVP(tokens, lookup);
|
||||||
if (error) {
|
if (error) {
|
||||||
expect(parsed.filter((x) => x.errors.length).length).toBeTruthy();
|
expect(parsed.filter((x) => x.errors.length).length).toBeTruthy();
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -12,6 +12,7 @@ import {
|
||||||
import { parseBlocks } from "./parse-blocks";
|
import { parseBlocks } from "./parse-blocks";
|
||||||
import { makePronounSelection } from "../phrase-building/make-selections";
|
import { makePronounSelection } from "../phrase-building/make-selections";
|
||||||
import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
|
import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
|
||||||
|
import { LookupFunction } from "./lookup";
|
||||||
// to hide equatives type-doubling issue
|
// to hide equatives type-doubling issue
|
||||||
|
|
||||||
// this should also conjugate to
|
// this should also conjugate to
|
||||||
|
@ -30,21 +31,12 @@ import { isFirstOrSecondPersPronoun } from "../phrase-building/render-vp";
|
||||||
|
|
||||||
export function parseVP(
|
export function parseVP(
|
||||||
tokens: Readonly<T.Token[]>,
|
tokens: Readonly<T.Token[]>,
|
||||||
lookup: (s: Partial<T.DictionaryEntry>) => T.DictionaryEntry[],
|
lookup: LookupFunction
|
||||||
verbLookup: (s: string) => T.VerbEntry[],
|
|
||||||
participleLookup: (s: string) => T.VerbEntry[]
|
|
||||||
): T.ParseResult<T.VPSelectionComplete>[] {
|
): T.ParseResult<T.VPSelectionComplete>[] {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const blocks = parseBlocks(
|
const blocks = parseBlocks(tokens, lookup, [], []);
|
||||||
tokens,
|
|
||||||
lookup,
|
|
||||||
verbLookup,
|
|
||||||
participleLookup,
|
|
||||||
[],
|
|
||||||
[]
|
|
||||||
);
|
|
||||||
return bindParseResult(blocks, (tokens, { blocks, kids }) => {
|
return bindParseResult(blocks, (tokens, { blocks, kids }) => {
|
||||||
const phIndex = blocks.findIndex((x) => x.type === "PH");
|
const phIndex = blocks.findIndex((x) => x.type === "PH");
|
||||||
const vbeIndex = blocks.findIndex((x) => x.type === "VB");
|
const vbeIndex = blocks.findIndex((x) => x.type === "VB");
|
||||||
|
|
|
@ -13,7 +13,7 @@ import {
|
||||||
} from "../type-predicates";
|
} from "../type-predicates";
|
||||||
import { shortVerbEndConsonant } from "../parsing/misc";
|
import { shortVerbEndConsonant } from "../parsing/misc";
|
||||||
import { removeL } from "../new-verb-engine/rs-helpers";
|
import { removeL } from "../new-verb-engine/rs-helpers";
|
||||||
import { applySingleOrLengthOpts, fmapSingleOrLengthOpts } from "../fp-ps";
|
import { applySingleOrLengthOpts } from "../fp-ps";
|
||||||
import { accentOnNFromEnd } from "../accent-helpers";
|
import { accentOnNFromEnd } from "../accent-helpers";
|
||||||
|
|
||||||
// TODO: can have subject and objects in possesors!!
|
// TODO: can have subject and objects in possesors!!
|
||||||
|
|
|
@ -1031,7 +1031,9 @@ export type Rendered<
|
||||||
type: "undefined";
|
type: "undefined";
|
||||||
ps: PsString;
|
ps: PsString;
|
||||||
}
|
}
|
||||||
: ReplaceKey<
|
: // TODO: this will be a problem (removing the change gender etc)
|
||||||
|
// if we want to make the sentence diagram interactive
|
||||||
|
ReplaceKey<
|
||||||
Omit<
|
Omit<
|
||||||
T,
|
T,
|
||||||
| "changeGender"
|
| "changeGender"
|
||||||
|
@ -1043,7 +1045,7 @@ export type Rendered<
|
||||||
"e",
|
"e",
|
||||||
string
|
string
|
||||||
> & {
|
> & {
|
||||||
ps: PsString[];
|
ps: SingleOrLengthOpts<PsString[]>;
|
||||||
e?: string;
|
e?: string;
|
||||||
inflected: boolean;
|
inflected: boolean;
|
||||||
person: Person;
|
person: Person;
|
||||||
|
|
Loading…
Reference in New Issue