add extract_latex to backend; use it for latex build

This commit is contained in:
Damien Elmes 2020-02-11 14:20:07 +10:00
parent 79c1732b00
commit 7f365faf3f
5 changed files with 80 additions and 70 deletions

View file

@ -24,7 +24,7 @@ message BackendInput {
int64 local_minutes_west = 22; int64 local_minutes_west = 22;
string strip_av_tags = 23; string strip_av_tags = 23;
ExtractAVTagsIn extract_av_tags = 24; ExtractAVTagsIn extract_av_tags = 24;
string expand_clozes_to_reveal_latex = 25; ExtractLatexIn extract_latex = 25;
AddFileToMediaFolderIn add_file_to_media_folder = 26; AddFileToMediaFolderIn add_file_to_media_folder = 26;
SyncMediaIn sync_media = 27; SyncMediaIn sync_media = 27;
Empty check_media = 28; Empty check_media = 28;
@ -42,7 +42,7 @@ message BackendOutput {
sint32 local_minutes_west = 22; sint32 local_minutes_west = 22;
string strip_av_tags = 23; string strip_av_tags = 23;
ExtractAVTagsOut extract_av_tags = 24; ExtractAVTagsOut extract_av_tags = 24;
string expand_clozes_to_reveal_latex = 25; ExtractLatexOut extract_latex = 25;
string add_file_to_media_folder = 26; string add_file_to_media_folder = 26;
Empty sync_media = 27; Empty sync_media = 27;
MediaCheckOut check_media = 28; MediaCheckOut check_media = 28;
@ -239,6 +239,21 @@ message TTSTag {
repeated string other_args = 5; repeated string other_args = 5;
} }
message ExtractLatexIn {
string text = 1;
bool svg = 2;
}
message ExtractLatexOut {
string text = 1;
repeated ExtractedLatex latex = 2;
}
message ExtractedLatex {
string filename = 1;
string latex_body = 2;
}
message AddFileToMediaFolderIn { message AddFileToMediaFolderIn {
string desired_name = 1; string desired_name = 1;
bytes data = 2; bytes data = 2;

View file

@ -13,6 +13,7 @@ import anki
from anki import hooks from anki import hooks
from anki.lang import _ from anki.lang import _
from anki.models import NoteType from anki.models import NoteType
from anki.rsbackend import ExtractedLatex
from anki.template import TemplateRenderContext, TemplateRenderOutput from anki.template import TemplateRenderContext, TemplateRenderOutput
from anki.utils import call, checksum, isMac, namedtmp, stripHTML, tmpdir from anki.utils import call, checksum, isMac, namedtmp, stripHTML, tmpdir
@ -47,60 +48,27 @@ def on_card_did_render(output: TemplateRenderOutput, ctx: TemplateRenderContext)
def render_latex(html: str, model: NoteType, col: anki.storage._Collection,) -> str: def render_latex(html: str, model: NoteType, col: anki.storage._Collection,) -> str:
"Convert TEXT with embedded latex tags to image links." "Convert TEXT with embedded latex tags to image links."
for match in regexps["standard"].finditer(html): svg = model.get("latexsvg", False)
html = html.replace(match.group(), _imgLink(col, match.group(1), model)) header = model["latexPre"]
for match in regexps["expression"].finditer(html): footer = model["latexPost"]
html = html.replace(
match.group(), _imgLink(col, "$" + match.group(1) + "$", model) out = col.backend.extract_latex(html, svg)
) html = out.html
for match in regexps["math"].finditer(html):
html = html.replace( for latex in out.latex:
match.group(), # don't need to render?
_imgLink( if not build or col.media.have(latex.filename):
col, continue
"\\begin{displaymath}" + match.group(1) + "\\end{displaymath}",
model, err = _save_latex_image(col, latex, header, footer, svg)
), if err is not None:
) html += err
return html return html
def _save_latex_image(col: anki.storage._Collection, extracted: ExtractedLatex, header: str, footer: str, svg: bool) -> Optional[str]:
def _imgLink(col, latex: str, model: NoteType) -> str:
"Return an img link for LATEX, creating if necesssary."
txt = _latexFromHtml(col, latex)
if model.get("latexsvg", False):
ext = "svg"
else:
ext = "png"
# is there an existing file?
fname = "latex-%s.%s" % (checksum(txt.encode("utf8")), ext)
link = '<img class=latex src="%s">' % fname
if os.path.exists(fname):
return link
# building disabled?
if not build:
return "[latex]%s[/latex]" % latex
err = _buildImg(col, txt, fname, model)
if err:
return err
else:
return link
def _latexFromHtml(col, latex: str) -> str:
"Convert entities and fix newlines."
latex = re.sub("<br( /)?>|<div>", "\n", latex)
latex = stripHTML(latex)
return latex
def _buildImg(col, latex: str, fname: str, model: NoteType) -> Optional[str]:
# add header/footer # add header/footer
latex = model["latexPre"] + "\n" + latex + "\n" + model["latexPost"] latex = header + "\n" + extracted.latex_body + "\n" + footer
# it's only really secure if run in a jail, but these are the most common # it's only really secure if run in a jail, but these are the most common
tmplatex = latex.replace("\\includegraphics", "") tmplatex = latex.replace("\\includegraphics", "")
for bad in ( for bad in (
@ -128,8 +96,8 @@ package in the LaTeX header instead."""
% bad % bad
) )
# commands to use? # commands to use
if model.get("latexsvg", False): if svg:
latexCmds = svgCommands latexCmds = svgCommands
ext = "svg" ext = "svg"
else: else:
@ -152,7 +120,7 @@ package in the LaTeX header instead."""
if call(latexCmd, stdout=log, stderr=log): if call(latexCmd, stdout=log, stderr=log):
return _errMsg(latexCmd[0], texpath) return _errMsg(latexCmd[0], texpath)
# add to media # add to media
shutil.copyfile(png, os.path.join(mdir, fname)) shutil.copyfile(png, os.path.join(mdir, extracted.filename))
return None return None
finally: finally:
os.chdir(oldcwd) os.chdir(oldcwd)

View file

@ -155,12 +155,6 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
) -> List[str]: ) -> List[str]:
l = [] l = []
model = self.col.models.get(mid) model = self.col.models.get(mid)
if model["type"] == MODEL_CLOZE and "{{c" in string:
# if the field has clozes in it, we'll need to expand the
# possibilities so we can render latex
strings = self.col.backend.expand_clozes_to_reveal_latex(string)
else:
strings = string
# handle latex # handle latex
string = render_latex(string, model, self.col) string = render_latex(string, model, self.col)
# extract filenames # extract filenames

View file

@ -127,6 +127,18 @@ MediaSyncProgress = pb.MediaSyncProgress
MediaCheckOutput = pb.MediaCheckOut MediaCheckOutput = pb.MediaCheckOut
@dataclass
class ExtractedLatex:
filename: str
latex_body: str
@dataclass
class ExtractedLatexOutput:
html: str
latex: List[ExtractedLatex]
class ProgressKind(enum.Enum): class ProgressKind(enum.Enum):
MediaSync = 0 MediaSync = 0
MediaCheck = 1 MediaCheck = 1
@ -268,10 +280,18 @@ class RustBackend:
return out.text, native_tags return out.text, native_tags
def expand_clozes_to_reveal_latex(self, text: str) -> str: def extract_latex(self, text: str, svg: bool) -> ExtractedLatexOutput:
return self._run_command( out = self._run_command(
pb.BackendInput(expand_clozes_to_reveal_latex=text) pb.BackendInput(extract_latex=pb.ExtractLatexIn(text=text, svg=svg))
).expand_clozes_to_reveal_latex ).extract_latex
return ExtractedLatexOutput(
html=out.text,
latex=[
ExtractedLatex(filename=l.filename, latex_body=l.latex_body)
for l in out.latex
],
)
def add_file_to_media_folder(self, desired_name: str, data: bytes) -> str: def add_file_to_media_folder(self, desired_name: str, data: bytes) -> str:
return self._run_command( return self._run_command(

View file

@ -4,8 +4,8 @@
use crate::backend_proto as pt; use crate::backend_proto as pt;
use crate::backend_proto::backend_input::Value; use crate::backend_proto::backend_input::Value;
use crate::backend_proto::{Empty, RenderedTemplateReplacement, SyncMediaIn}; use crate::backend_proto::{Empty, RenderedTemplateReplacement, SyncMediaIn};
use crate::cloze::expand_clozes_to_reveal_latex;
use crate::err::{AnkiError, NetworkErrorKind, Result, SyncErrorKind}; use crate::err::{AnkiError, NetworkErrorKind, Result, SyncErrorKind};
use crate::latex::{extract_latex, ExtractedLatex};
use crate::media::check::MediaChecker; use crate::media::check::MediaChecker;
use crate::media::sync::MediaSyncProgress; use crate::media::sync::MediaSyncProgress;
use crate::media::MediaManager; use crate::media::MediaManager;
@ -178,9 +178,7 @@ impl Backend {
} }
Value::StripAvTags(text) => OValue::StripAvTags(strip_av_tags(&text).into()), Value::StripAvTags(text) => OValue::StripAvTags(strip_av_tags(&text).into()),
Value::ExtractAvTags(input) => OValue::ExtractAvTags(self.extract_av_tags(input)), Value::ExtractAvTags(input) => OValue::ExtractAvTags(self.extract_av_tags(input)),
Value::ExpandClozesToRevealLatex(input) => { Value::ExtractLatex(input) => OValue::ExtractLatex(self.extract_latex(input)),
OValue::ExpandClozesToRevealLatex(expand_clozes_to_reveal_latex(&input))
}
Value::AddFileToMediaFolder(input) => { Value::AddFileToMediaFolder(input) => {
OValue::AddFileToMediaFolder(self.add_file_to_media_folder(input)?) OValue::AddFileToMediaFolder(self.add_file_to_media_folder(input)?)
} }
@ -315,6 +313,21 @@ impl Backend {
} }
} }
fn extract_latex(&self, input: pt::ExtractLatexIn) -> pt::ExtractLatexOut {
let (text, extracted) = extract_latex(&input.text, input.svg);
pt::ExtractLatexOut {
text,
latex: extracted
.into_iter()
.map(|e: ExtractedLatex| pt::ExtractedLatex {
filename: e.fname,
latex_body: e.latex,
})
.collect(),
}
}
fn add_file_to_media_folder(&mut self, input: pt::AddFileToMediaFolderIn) -> Result<String> { fn add_file_to_media_folder(&mut self, input: pt::AddFileToMediaFolderIn) -> Result<String> {
let mgr = MediaManager::new(&self.media_folder, &self.media_db)?; let mgr = MediaManager::new(&self.media_folder, &self.media_db)?;
let mut ctx = mgr.dbctx(); let mut ctx = mgr.dbctx();