rough try of text conversion thing

This commit is contained in:
adueck 2023-01-19 17:24:11 +05:00
parent 27141ecf90
commit 40641faf7a
3 changed files with 182 additions and 1 deletions

View File

@ -81,6 +81,7 @@ import { searchAllInflections } from "./lib/search-all-inflections";
import {
addToWordlist,
} from "./lib/wordlist-database";
import ScriptToPhonetics from "./screens/ScriptToPhonetics";
// to allow Moustrap key combos even when input fields are in focus
Mousetrap.prototype.stopCallback = function () {
@ -97,7 +98,7 @@ if (prod) {
const possibleLandingPages = [
"/", "/about", "/settings", "/word", "/account", "/new-entries", "/share-target", "/phrase-builder",
"/privacy",
"/privacy", "/script-to-phonetics"
];
const editorOnlyPages = [
"/edit", "/review-tasks",
@ -656,6 +657,9 @@ class App extends Component<RouteComponentProps, State> {
loadUser={this.handleLoadUser}
/>
</Route>
<Route path="/script-to-phonetics">
<ScriptToPhonetics />
</Route>
{this.state.user?.level === "editor" && <Route path="/edit">
<EntryEditor
isolatedEntry={this.state.isolatedEntry}

View File

@ -0,0 +1,115 @@
import { conjugateVerb, getVerbInfo, inflectWord, isNounAdjOrVerb, isPashtoScript, removeFVarients, standardizePashto } from "@lingdocs/ps-react";
import { dictionary, allEntries } from "./dictionary";
import {
Types as T,
} from "@lingdocs/ps-react";
import {
InflectionSearchResult,
} from "../types/dictionary-types";
import { searchPile } from "./search-pile";
/**
* Converts some Pashto texts to phonetics by looking up each word in the dictionary and finding
* the phonetic equivalent
*
* @param p
* @returns
*/
export function scriptToPhonetics(p: string): string {
const words = splitWords(p);
const entries = allEntries();
const f = (w: string) => wordToPhonetics(w, entries);
return words.map(f).join(" ");
}
function wordToPhonetics(p: string, entries: T.DictionaryEntry[]): string {
if (!isPashtoScript(p)) {
return p;
}
const results = dictionary.exactPashtoSearch(p);
const entryFs = results.map(entry => removeFVarients(entry.f));
const inflectionsR = searchAllInflectionsCore(entries, p);
// TODO: also add directional prefix stuff
const inflections = inflectionsR.map(result => result.forms)
.flatMap(form => form.flatMap(x => x.matches.map(x => x.ps.f)));
const possibilities = [...new Set([...entryFs, ...inflections])];
if (possibilities.length === 0) {
return p;
}
return possibilities.join("/");
}
export function searchAllInflectionsCore(allDocs: T.DictionaryEntry[], searchValue: string): InflectionSearchResult[] {
const preSearchFun = (ps: T.PsString) => ps.p.slice(0, 2) === searchValue.slice(0, 2);
const searchFun = (ps: T.PsString) => ps.p === searchValue;
// console.time(timerLabel);
return allDocs.reduce((all: InflectionSearchResult[], entry) => {
const type = isNounAdjOrVerb(entry);
if (entry.c && type === "verb") {
try {
const complement = (entry.l && entry.c.includes("comp.")) ? dictionary.findOneByTs(entry.l) : undefined;
const verbInfo = getVerbInfo(entry, complement);
const initialResults = searchPile(verbInfo as any, preSearchFun);
if (!initialResults.length) return all;
const conjugation = conjugateVerb(
entry,
complement,
);
const forms = searchPile(
conjugation as any,
searchFun,
);
if (forms.length) {
return [...all, { entry, forms }];
}
return all;
} catch (e) {
console.error(e);
console.error("error inflecting", entry.p);
return all;
}
}
if (entry.c && type === "nounAdj") {
const inflections = inflectWord(entry);
if (!inflections) return all;
const forms = searchPile(inflections as any, searchFun);
if (forms.length) {
return [...all, { entry, forms }];
}
}
return all;
}, []);
}
function splitWords(p: string): string[] {
function isP(c: string): boolean {
return !!c.match(/[\u0621-\u065f\u0670-\u06d3\u06d5]/);
}
const words: string[] = [];
let current = "";
let onP: boolean = true;
const chars = p.split("");
for (let char of chars) {
const p = isP(char);
if (p) {
if (onP) {
current += char;
} else {
words.push(current);
current = char;
onP = true;
}
} else {
if (onP) {
words.push(current);
current = char;
onP = false;
} else {
current += char;
}
}
}
words.push(current);
return words.map(standardizePashto);
}

View File

@ -0,0 +1,62 @@
/**
* Copyright (c) 2021 lingdocs.com
*
* This source code is licensed under the GPL3 license found in the
* LICENSE file in the root directory of this source tree.
*
*/
import { FormEvent, useState } from "react";
import { Helmet } from "react-helmet";
import { scriptToPhonetics } from "../lib/scriptToPhonetics";
const preStyle: React.CSSProperties = {
overflowX: "auto",
whiteSpace: "pre-wrap",
wordWrap: "break-word",
lineHeight: "1.5",
};
const ScriptToPhonetics = () => {
const [text, setText] = useState<string>("");
const [result, setResult] = useState<string>("");
function handleConversion(e: FormEvent<HTMLFormElement>) {
e.preventDefault();
setResult("Converting... Please wait...");
setTimeout(() => {
setResult(scriptToPhonetics(text));
}, 50);
}
return <div className="width-limiter">
<Helmet>
<link rel="canonical" href="https://dictionary.lingdocs.com/script-to-phonetics" />
<meta name="description" content="Convert Pashto Script to Phonetics" />
<title>Script to Phonetics - LingDocs Pashto Dictionary</title>
</Helmet>
<h2>Script to Phonetics</h2>
<form onSubmit={handleConversion}>
<div className="form-group">
<label htmlFor="pashto-text">Pashto Script</label>
<textarea
className="form-control"
id="pashto-text"
rows={4}
value={text}
onChange={e => setText(e.target.value)}
/>
</div>
<div>
<button type="submit" className="btn btn-primary">
<i className="fas fa-exchange-alt mr-2"/> Convert
</button>
</div>
{result && <div className="mt-3">
<label>Phonetics</label>
<pre style={preStyle}>{result}</pre>
</div>}
</form>
</div>
};
export default ScriptToPhonetics;