rough try of text conversion thing
This commit is contained in:
parent
27141ecf90
commit
40641faf7a
|
@ -81,6 +81,7 @@ import { searchAllInflections } from "./lib/search-all-inflections";
|
|||
import {
|
||||
addToWordlist,
|
||||
} from "./lib/wordlist-database";
|
||||
import ScriptToPhonetics from "./screens/ScriptToPhonetics";
|
||||
|
||||
// to allow Moustrap key combos even when input fields are in focus
|
||||
Mousetrap.prototype.stopCallback = function () {
|
||||
|
@ -97,7 +98,7 @@ if (prod) {
|
|||
|
||||
const possibleLandingPages = [
|
||||
"/", "/about", "/settings", "/word", "/account", "/new-entries", "/share-target", "/phrase-builder",
|
||||
"/privacy",
|
||||
"/privacy", "/script-to-phonetics"
|
||||
];
|
||||
const editorOnlyPages = [
|
||||
"/edit", "/review-tasks",
|
||||
|
@ -656,6 +657,9 @@ class App extends Component<RouteComponentProps, State> {
|
|||
loadUser={this.handleLoadUser}
|
||||
/>
|
||||
</Route>
|
||||
<Route path="/script-to-phonetics">
|
||||
<ScriptToPhonetics />
|
||||
</Route>
|
||||
{this.state.user?.level === "editor" && <Route path="/edit">
|
||||
<EntryEditor
|
||||
isolatedEntry={this.state.isolatedEntry}
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
import { conjugateVerb, getVerbInfo, inflectWord, isNounAdjOrVerb, isPashtoScript, removeFVarients, standardizePashto } from "@lingdocs/ps-react";
|
||||
import { dictionary, allEntries } from "./dictionary";
|
||||
import {
|
||||
Types as T,
|
||||
} from "@lingdocs/ps-react";
|
||||
import {
|
||||
InflectionSearchResult,
|
||||
} from "../types/dictionary-types";
|
||||
import { searchPile } from "./search-pile";
|
||||
|
||||
/**
|
||||
* Converts some Pashto texts to phonetics by looking up each word in the dictionary and finding
|
||||
* the phonetic equivalent
|
||||
*
|
||||
* @param p
|
||||
* @returns
|
||||
*/
|
||||
export function scriptToPhonetics(p: string): string {
|
||||
const words = splitWords(p);
|
||||
const entries = allEntries();
|
||||
const f = (w: string) => wordToPhonetics(w, entries);
|
||||
return words.map(f).join(" ");
|
||||
}
|
||||
|
||||
|
||||
function wordToPhonetics(p: string, entries: T.DictionaryEntry[]): string {
|
||||
if (!isPashtoScript(p)) {
|
||||
return p;
|
||||
}
|
||||
const results = dictionary.exactPashtoSearch(p);
|
||||
const entryFs = results.map(entry => removeFVarients(entry.f));
|
||||
const inflectionsR = searchAllInflectionsCore(entries, p);
|
||||
// TODO: also add directional prefix stuff
|
||||
const inflections = inflectionsR.map(result => result.forms)
|
||||
.flatMap(form => form.flatMap(x => x.matches.map(x => x.ps.f)));
|
||||
const possibilities = [...new Set([...entryFs, ...inflections])];
|
||||
if (possibilities.length === 0) {
|
||||
return p;
|
||||
}
|
||||
return possibilities.join("/");
|
||||
}
|
||||
|
||||
export function searchAllInflectionsCore(allDocs: T.DictionaryEntry[], searchValue: string): InflectionSearchResult[] {
|
||||
const preSearchFun = (ps: T.PsString) => ps.p.slice(0, 2) === searchValue.slice(0, 2);
|
||||
const searchFun = (ps: T.PsString) => ps.p === searchValue;
|
||||
// console.time(timerLabel);
|
||||
return allDocs.reduce((all: InflectionSearchResult[], entry) => {
|
||||
const type = isNounAdjOrVerb(entry);
|
||||
if (entry.c && type === "verb") {
|
||||
try {
|
||||
const complement = (entry.l && entry.c.includes("comp.")) ? dictionary.findOneByTs(entry.l) : undefined;
|
||||
const verbInfo = getVerbInfo(entry, complement);
|
||||
const initialResults = searchPile(verbInfo as any, preSearchFun);
|
||||
if (!initialResults.length) return all;
|
||||
const conjugation = conjugateVerb(
|
||||
entry,
|
||||
complement,
|
||||
);
|
||||
const forms = searchPile(
|
||||
conjugation as any,
|
||||
searchFun,
|
||||
);
|
||||
if (forms.length) {
|
||||
return [...all, { entry, forms }];
|
||||
}
|
||||
return all;
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
console.error("error inflecting", entry.p);
|
||||
return all;
|
||||
}
|
||||
}
|
||||
if (entry.c && type === "nounAdj") {
|
||||
const inflections = inflectWord(entry);
|
||||
if (!inflections) return all;
|
||||
const forms = searchPile(inflections as any, searchFun);
|
||||
if (forms.length) {
|
||||
return [...all, { entry, forms }];
|
||||
}
|
||||
}
|
||||
return all;
|
||||
}, []);
|
||||
}
|
||||
|
||||
function splitWords(p: string): string[] {
|
||||
function isP(c: string): boolean {
|
||||
return !!c.match(/[\u0621-\u065f\u0670-\u06d3\u06d5]/);
|
||||
}
|
||||
const words: string[] = [];
|
||||
let current = "";
|
||||
let onP: boolean = true;
|
||||
const chars = p.split("");
|
||||
for (let char of chars) {
|
||||
const p = isP(char);
|
||||
if (p) {
|
||||
if (onP) {
|
||||
current += char;
|
||||
} else {
|
||||
words.push(current);
|
||||
current = char;
|
||||
onP = true;
|
||||
}
|
||||
} else {
|
||||
if (onP) {
|
||||
words.push(current);
|
||||
current = char;
|
||||
onP = false;
|
||||
} else {
|
||||
current += char;
|
||||
}
|
||||
}
|
||||
}
|
||||
words.push(current);
|
||||
return words.map(standardizePashto);
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Copyright (c) 2021 lingdocs.com
|
||||
*
|
||||
* This source code is licensed under the GPL3 license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*
|
||||
*/
|
||||
|
||||
import { FormEvent, useState } from "react";
|
||||
import { Helmet } from "react-helmet";
|
||||
import { scriptToPhonetics } from "../lib/scriptToPhonetics";
|
||||
|
||||
const preStyle: React.CSSProperties = {
|
||||
overflowX: "auto",
|
||||
whiteSpace: "pre-wrap",
|
||||
wordWrap: "break-word",
|
||||
lineHeight: "1.5",
|
||||
};
|
||||
|
||||
const ScriptToPhonetics = () => {
|
||||
const [text, setText] = useState<string>("");
|
||||
const [result, setResult] = useState<string>("");
|
||||
function handleConversion(e: FormEvent<HTMLFormElement>) {
|
||||
e.preventDefault();
|
||||
setResult("Converting... Please wait...");
|
||||
setTimeout(() => {
|
||||
setResult(scriptToPhonetics(text));
|
||||
}, 50);
|
||||
}
|
||||
|
||||
return <div className="width-limiter">
|
||||
<Helmet>
|
||||
<link rel="canonical" href="https://dictionary.lingdocs.com/script-to-phonetics" />
|
||||
<meta name="description" content="Convert Pashto Script to Phonetics" />
|
||||
<title>Script to Phonetics - LingDocs Pashto Dictionary</title>
|
||||
</Helmet>
|
||||
<h2>Script to Phonetics</h2>
|
||||
<form onSubmit={handleConversion}>
|
||||
<div className="form-group">
|
||||
<label htmlFor="pashto-text">Pashto Script</label>
|
||||
<textarea
|
||||
className="form-control"
|
||||
id="pashto-text"
|
||||
rows={4}
|
||||
value={text}
|
||||
onChange={e => setText(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<button type="submit" className="btn btn-primary">
|
||||
<i className="fas fa-exchange-alt mr-2"/> Convert
|
||||
</button>
|
||||
</div>
|
||||
{result && <div className="mt-3">
|
||||
<label>Phonetics</label>
|
||||
<pre style={preStyle}>{result}</pre>
|
||||
</div>}
|
||||
</form>
|
||||
</div>
|
||||
};
|
||||
|
||||
export default ScriptToPhonetics;
|
Loading…
Reference in New Issue