Anki/pylib/anki/latex.py
Damien Elmes 616db33c0e refactor protobuf handling for split/import
In order to split backend.proto into a more manageable size, the protobuf
handling needed to be updated. This took more time than I would have
liked, as each language handles protobuf differently:

- The Python Protobuf code ignores "package" directives, and relies
solely on how the files are laid out on disk. While it would have been
nice to keep the generated files in a private subpackage, Protobuf gets
confused if the files are located in a location that does not match
their original .proto layout, so the old approach of storing them in
_backend/ will not work. They now clutter up pylib/anki instead. I'm
rather annoyed by that, but alternatives seem to be having to add an extra
level to the Protobuf path, making the other languages suffer, or trying
to hack around the issue by munging sys.modules.
- Protobufjs fails to expose packages if they don't start with a capital
letter, despite the fact that lowercase packages are the norm in most
languages :-( This required a patch to fix.
- Rust was the easiest, as Prost is relatively straightforward compared
to Google's tools.

The Protobuf files are now stored in /proto/anki, with a separate package
for each file. I've split backend.proto into a few files as a test, but
the majority of that work is still to come.

The Python Protobuf building is a bit of a hack at the moment, hard-coding
"proto" as the top level folder, but it seems to get the job done for now.

Also changed the workspace name, as there seems to be a number of Bazel
repos moving away from the more awkward reverse DNS naming style.
2021-07-10 19:17:05 +10:00

183 lines
5 KiB
Python

# Copyright: Ankitects Pty Ltd and contributors
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
from __future__ import annotations
import html
import os
import re
from dataclasses import dataclass
from typing import Any, List, Optional, Tuple
import anki
import anki.backend_pb2 as _pb
from anki import hooks
from anki.models import NotetypeDict
from anki.template import TemplateRenderContext, TemplateRenderOutput
from anki.utils import call, isMac, namedtmp, tmpdir
pngCommands = [
["latex", "-interaction=nonstopmode", "tmp.tex"],
["dvipng", "-D", "200", "-T", "tight", "tmp.dvi", "-o", "tmp.png"],
]
svgCommands = [
["latex", "-interaction=nonstopmode", "tmp.tex"],
["dvisvgm", "--no-fonts", "--exact", "-Z", "2", "tmp.dvi", "-o", "tmp.svg"],
]
build = True # if off, use existing media but don't create new
# add standard tex install location to osx
if isMac:
os.environ["PATH"] += ":/usr/texbin:/Library/TeX/texbin"
@dataclass
class ExtractedLatex:
filename: str
latex_body: str
@dataclass
class ExtractedLatexOutput:
html: str
latex: List[ExtractedLatex]
@staticmethod
def from_proto(proto: _pb.ExtractLatexResponse) -> ExtractedLatexOutput:
return ExtractedLatexOutput(
html=proto.text,
latex=[
ExtractedLatex(filename=l.filename, latex_body=l.latex_body)
for l in proto.latex
],
)
def on_card_did_render(
output: TemplateRenderOutput, ctx: TemplateRenderContext
) -> None:
output.question_text = render_latex(
output.question_text, ctx.note_type(), ctx.col()
)
output.answer_text = render_latex(output.answer_text, ctx.note_type(), ctx.col())
def render_latex(
html: str, model: NotetypeDict, col: anki.collection.Collection
) -> str:
"Convert embedded latex tags in text to image links."
html, err = render_latex_returning_errors(html, model, col)
if err:
html += "\n".join(err)
return html
def render_latex_returning_errors(
html: str,
model: NotetypeDict,
col: anki.collection.Collection,
expand_clozes: bool = False,
) -> Tuple[str, List[str]]:
"""Returns (text, errors).
errors will be non-empty if LaTeX failed to render."""
svg = model.get("latexsvg", False)
header = model["latexPre"]
footer = model["latexPost"]
proto = col._backend.extract_latex(text=html, svg=svg, expand_clozes=expand_clozes)
out = ExtractedLatexOutput.from_proto(proto)
errors = []
html = out.html
for latex in out.latex:
# don't need to render?
if not build or col.media.have(latex.filename):
continue
err = _save_latex_image(col, latex, header, footer, svg)
if err is not None:
errors.append(err)
return html, errors
def _save_latex_image(
col: anki.collection.Collection,
extracted: ExtractedLatex,
header: str,
footer: str,
svg: bool,
) -> Optional[str]:
# add header/footer
latex = f"{header}\n{extracted.latex_body}\n{footer}"
# it's only really secure if run in a jail, but these are the most common
tmplatex = latex.replace("\\includegraphics", "")
for bad in (
"\\write18",
"\\readline",
"\\input",
"\\include",
"\\catcode",
"\\openout",
"\\write",
"\\loop",
"\\def",
"\\shipout",
):
# don't mind if the sequence is only part of a command
bad_re = f"\\{bad}[^a-zA-Z]"
if re.search(bad_re, tmplatex):
return col.tr.media_for_security_reasons_is_not(val=bad)
# commands to use
if svg:
latexCmds = svgCommands
ext = "svg"
else:
latexCmds = pngCommands
ext = "png"
# write into a temp file
log = open(namedtmp("latex_log.txt"), "w")
texpath = namedtmp("tmp.tex")
texfile = open(texpath, "w", encoding="utf8")
texfile.write(latex)
texfile.close()
oldcwd = os.getcwd()
png_or_svg = namedtmp(f"tmp.{ext}")
try:
# generate png/svg
os.chdir(tmpdir())
for latexCmd in latexCmds:
if call(latexCmd, stdout=log, stderr=log):
return _errMsg(col, latexCmd[0], texpath)
# add to media
with open(png_or_svg, "rb") as file:
data = file.read()
col.media.write_data(extracted.filename, data)
os.unlink(png_or_svg)
return None
finally:
os.chdir(oldcwd)
log.close()
def _errMsg(col: anki.collection.Collection, type: str, texpath: str) -> Any:
msg = f"{col.tr.media_error_executing(val=type)}<br>"
msg += f"{col.tr.media_generated_file(val=texpath)}<br>"
try:
with open(namedtmp("latex_log.txt", rm=False)) as f:
log = f.read()
if not log:
raise Exception()
msg += f"<small><pre>{html.escape(log)}</pre></small>"
except:
msg += col.tr.media_have_you_installed_latex_and_dvipngdvisvgm()
return msg
def setup_hook() -> None:
hooks.card_did_render.append(on_card_did_render)