add extract_latex to backend; use it for latex build

This commit is contained in:
Damien Elmes 2020-02-11 14:20:07 +10:00
parent 79c1732b00
commit 7f365faf3f
5 changed files with 80 additions and 70 deletions

View file

@ -24,7 +24,7 @@ message BackendInput {
int64 local_minutes_west = 22;
string strip_av_tags = 23;
ExtractAVTagsIn extract_av_tags = 24;
string expand_clozes_to_reveal_latex = 25;
ExtractLatexIn extract_latex = 25;
AddFileToMediaFolderIn add_file_to_media_folder = 26;
SyncMediaIn sync_media = 27;
Empty check_media = 28;
@ -42,7 +42,7 @@ message BackendOutput {
sint32 local_minutes_west = 22;
string strip_av_tags = 23;
ExtractAVTagsOut extract_av_tags = 24;
string expand_clozes_to_reveal_latex = 25;
ExtractLatexOut extract_latex = 25;
string add_file_to_media_folder = 26;
Empty sync_media = 27;
MediaCheckOut check_media = 28;
@ -239,6 +239,21 @@ message TTSTag {
repeated string other_args = 5;
}
message ExtractLatexIn {
string text = 1;
bool svg = 2;
}
message ExtractLatexOut {
string text = 1;
repeated ExtractedLatex latex = 2;
}
message ExtractedLatex {
string filename = 1;
string latex_body = 2;
}
message AddFileToMediaFolderIn {
string desired_name = 1;
bytes data = 2;

View file

@ -13,6 +13,7 @@ import anki
from anki import hooks
from anki.lang import _
from anki.models import NoteType
from anki.rsbackend import ExtractedLatex
from anki.template import TemplateRenderContext, TemplateRenderOutput
from anki.utils import call, checksum, isMac, namedtmp, stripHTML, tmpdir
@ -47,60 +48,27 @@ def on_card_did_render(output: TemplateRenderOutput, ctx: TemplateRenderContext)
def render_latex(html: str, model: NoteType, col: anki.storage._Collection,) -> str:
"Convert TEXT with embedded latex tags to image links."
for match in regexps["standard"].finditer(html):
html = html.replace(match.group(), _imgLink(col, match.group(1), model))
for match in regexps["expression"].finditer(html):
html = html.replace(
match.group(), _imgLink(col, "$" + match.group(1) + "$", model)
)
for match in regexps["math"].finditer(html):
html = html.replace(
match.group(),
_imgLink(
col,
"\\begin{displaymath}" + match.group(1) + "\\end{displaymath}",
model,
),
)
svg = model.get("latexsvg", False)
header = model["latexPre"]
footer = model["latexPost"]
out = col.backend.extract_latex(html, svg)
html = out.html
for latex in out.latex:
# don't need to render?
if not build or col.media.have(latex.filename):
continue
err = _save_latex_image(col, latex, header, footer, svg)
if err is not None:
html += err
return html
def _imgLink(col, latex: str, model: NoteType) -> str:
"Return an img link for LATEX, creating if necesssary."
txt = _latexFromHtml(col, latex)
if model.get("latexsvg", False):
ext = "svg"
else:
ext = "png"
# is there an existing file?
fname = "latex-%s.%s" % (checksum(txt.encode("utf8")), ext)
link = '<img class=latex src="%s">' % fname
if os.path.exists(fname):
return link
# building disabled?
if not build:
return "[latex]%s[/latex]" % latex
err = _buildImg(col, txt, fname, model)
if err:
return err
else:
return link
def _latexFromHtml(col, latex: str) -> str:
"Convert entities and fix newlines."
latex = re.sub("<br( /)?>|<div>", "\n", latex)
latex = stripHTML(latex)
return latex
def _buildImg(col, latex: str, fname: str, model: NoteType) -> Optional[str]:
def _save_latex_image(col: anki.storage._Collection, extracted: ExtractedLatex, header: str, footer: str, svg: bool) -> Optional[str]:
# add header/footer
latex = model["latexPre"] + "\n" + latex + "\n" + model["latexPost"]
latex = header + "\n" + extracted.latex_body + "\n" + footer
# it's only really secure if run in a jail, but these are the most common
tmplatex = latex.replace("\\includegraphics", "")
for bad in (
@ -128,8 +96,8 @@ package in the LaTeX header instead."""
% bad
)
# commands to use?
if model.get("latexsvg", False):
# commands to use
if svg:
latexCmds = svgCommands
ext = "svg"
else:
@ -152,7 +120,7 @@ package in the LaTeX header instead."""
if call(latexCmd, stdout=log, stderr=log):
return _errMsg(latexCmd[0], texpath)
# add to media
shutil.copyfile(png, os.path.join(mdir, fname))
shutil.copyfile(png, os.path.join(mdir, extracted.filename))
return None
finally:
os.chdir(oldcwd)

View file

@ -155,12 +155,6 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
) -> List[str]:
l = []
model = self.col.models.get(mid)
if model["type"] == MODEL_CLOZE and "{{c" in string:
# if the field has clozes in it, we'll need to expand the
# possibilities so we can render latex
strings = self.col.backend.expand_clozes_to_reveal_latex(string)
else:
strings = string
# handle latex
string = render_latex(string, model, self.col)
# extract filenames

View file

@ -127,6 +127,18 @@ MediaSyncProgress = pb.MediaSyncProgress
MediaCheckOutput = pb.MediaCheckOut
@dataclass
class ExtractedLatex:
filename: str
latex_body: str
@dataclass
class ExtractedLatexOutput:
html: str
latex: List[ExtractedLatex]
class ProgressKind(enum.Enum):
MediaSync = 0
MediaCheck = 1
@ -268,10 +280,18 @@ class RustBackend:
return out.text, native_tags
def expand_clozes_to_reveal_latex(self, text: str) -> str:
return self._run_command(
pb.BackendInput(expand_clozes_to_reveal_latex=text)
).expand_clozes_to_reveal_latex
def extract_latex(self, text: str, svg: bool) -> ExtractedLatexOutput:
out = self._run_command(
pb.BackendInput(extract_latex=pb.ExtractLatexIn(text=text, svg=svg))
).extract_latex
return ExtractedLatexOutput(
html=out.text,
latex=[
ExtractedLatex(filename=l.filename, latex_body=l.latex_body)
for l in out.latex
],
)
def add_file_to_media_folder(self, desired_name: str, data: bytes) -> str:
return self._run_command(

View file

@ -4,8 +4,8 @@
use crate::backend_proto as pt;
use crate::backend_proto::backend_input::Value;
use crate::backend_proto::{Empty, RenderedTemplateReplacement, SyncMediaIn};
use crate::cloze::expand_clozes_to_reveal_latex;
use crate::err::{AnkiError, NetworkErrorKind, Result, SyncErrorKind};
use crate::latex::{extract_latex, ExtractedLatex};
use crate::media::check::MediaChecker;
use crate::media::sync::MediaSyncProgress;
use crate::media::MediaManager;
@ -178,9 +178,7 @@ impl Backend {
}
Value::StripAvTags(text) => OValue::StripAvTags(strip_av_tags(&text).into()),
Value::ExtractAvTags(input) => OValue::ExtractAvTags(self.extract_av_tags(input)),
Value::ExpandClozesToRevealLatex(input) => {
OValue::ExpandClozesToRevealLatex(expand_clozes_to_reveal_latex(&input))
}
Value::ExtractLatex(input) => OValue::ExtractLatex(self.extract_latex(input)),
Value::AddFileToMediaFolder(input) => {
OValue::AddFileToMediaFolder(self.add_file_to_media_folder(input)?)
}
@ -315,6 +313,21 @@ impl Backend {
}
}
fn extract_latex(&self, input: pt::ExtractLatexIn) -> pt::ExtractLatexOut {
let (text, extracted) = extract_latex(&input.text, input.svg);
pt::ExtractLatexOut {
text,
latex: extracted
.into_iter()
.map(|e: ExtractedLatex| pt::ExtractedLatex {
filename: e.fname,
latex_body: e.latex,
})
.collect(),
}
}
fn add_file_to_media_folder(&mut self, input: pt::AddFileToMediaFolderIn) -> Result<String> {
let mgr = MediaManager::new(&self.media_folder, &self.media_db)?;
let mut ctx = mgr.dbctx();