pashto-dictionary/website/src/lib/scriptToPhonetics.ts

115 lines
3.9 KiB
TypeScript

import { conjugateVerb, getVerbInfo, inflectWord, isNounAdjOrVerb, isPashtoScript, removeFVarients, standardizePashto } from "@lingdocs/ps-react";
import { dictionary, allEntries } from "./dictionary";
import {
Types as T,
} from "@lingdocs/ps-react";
import {
InflectionSearchResult,
} from "../types/dictionary-types";
import { searchPile } from "./search-pile";
/**
* Converts some Pashto texts to phonetics by looking up each word in the dictionary and finding
* the phonetic equivalent
*
* @param p
* @returns
*/
export function scriptToPhonetics(p: string): string {
const words = splitWords(p);
const entries = allEntries();
const f = (w: string) => wordToPhonetics(w, entries);
return words.map(f).join("");
}
function wordToPhonetics(p: string, entries: T.DictionaryEntry[]): string {
if (!isPashtoScript(p)) {
return p;
}
const results = dictionary.exactPashtoSearch(p);
const entryFs = results.map(entry => removeFVarients(entry.f));
const inflectionsR = searchAllInflectionsCore(entries, p);
// TODO: also add directional prefix stuff
const inflections = inflectionsR.map(result => result.forms)
.flatMap(form => form.flatMap(x => x.matches.map(x => x.ps.f)));
const possibilities = [...new Set([...entryFs, ...inflections])];
if (possibilities.length === 0) {
return p;
}
return possibilities.join("/");
}
export function searchAllInflectionsCore(allDocs: T.DictionaryEntry[], searchValue: string): InflectionSearchResult[] {
const preSearchFun = (ps: T.PsString) => ps.p.slice(0, 2) === searchValue.slice(0, 2);
const searchFun = (ps: T.PsString) => ps.p === searchValue;
// console.time(timerLabel);
return allDocs.reduce((all: InflectionSearchResult[], entry) => {
const type = isNounAdjOrVerb(entry);
if (entry.c && type === "verb") {
try {
const complement = (entry.l && entry.c.includes("comp.")) ? dictionary.findOneByTs(entry.l) : undefined;
const verbInfo = getVerbInfo(entry, complement);
const initialResults = searchPile(verbInfo as any, preSearchFun);
if (!initialResults.length) return all;
const conjugation = conjugateVerb(
entry,
complement,
);
const forms = searchPile(
conjugation as any,
searchFun,
);
if (forms.length) {
return [...all, { entry, forms }];
}
return all;
} catch (e) {
console.error(e);
console.error("error inflecting", entry.p);
return all;
}
}
if (entry.c && type === "nounAdj") {
const inflections = inflectWord(entry);
if (!inflections) return all;
const forms = searchPile(inflections as any, searchFun);
if (forms.length) {
return [...all, { entry, forms }];
}
}
return all;
}, []);
}
function splitWords(p: string): string[] {
function isP(c: string): boolean {
return !!c.match(/[\u0621-\u065f\u0670-\u06d3\u06d5]/);
}
const words: string[] = [];
let current = "";
let onP: boolean = true;
const chars = p.split("");
for (let char of chars) {
const p = isP(char);
if (p) {
if (onP) {
current += char;
} else {
words.push(current);
current = char;
onP = true;
}
} else {
if (onP) {
words.push(current);
current = char;
onP = false;
} else {
current += char;
}
}
}
words.push(current);
return words.map(standardizePashto);
}