need to keep latex in unicode until preamble concat

This commit is contained in:
Damien Elmes 2011-10-29 21:36:29 +09:00
parent 82f9d484da
commit e99e7baa3c

View file

@ -46,12 +46,12 @@ def mungeQA(html, type, fields, model, data, deck):
def _imgLink(deck, latex, model): def _imgLink(deck, latex, model):
"Return an img link for LATEX, creating if necesssary." "Return an img link for LATEX, creating if necesssary."
txt = _latexFromHtml(deck, latex) txt = _latexFromHtml(deck, latex)
fname = "latex-%s.png" % checksum(txt) fname = "latex-%s.png" % checksum(txt.encode("utf8"))
link = '<img src="%s">' % fname link = '<img src="%s">' % fname
if os.path.exists(fname): if os.path.exists(fname):
return link return link
elif not build: elif not build:
return "[latex]"+latex+"[/latex]" return u"[latex]%s[/latex]" % latex
else: else:
err = _buildImg(deck, txt, fname, model) err = _buildImg(deck, txt, fname, model)
if err: if err:
@ -60,20 +60,20 @@ def _imgLink(deck, latex, model):
return link return link
def _latexFromHtml(deck, latex): def _latexFromHtml(deck, latex):
"Convert entities, fix newlines, and convert to utf8." "Convert entities and fix newlines."
for match in re.compile("&([a-z]+);", re.IGNORECASE).finditer(latex): for match in re.compile("&([a-z]+);", re.IGNORECASE).finditer(latex):
if match.group(1) in entitydefs: if match.group(1) in entitydefs:
latex = latex.replace(match.group(), entitydefs[match.group(1)]) latex = latex.replace(match.group(), entitydefs[match.group(1)])
latex = re.sub("<br( /)?>", "\n", latex) latex = re.sub("<br( /)?>", "\n", latex)
latex = stripHTML(latex) latex = stripHTML(latex)
latex = latex.encode("utf-8")
return latex return latex
def _buildImg(deck, latex, fname, model): def _buildImg(deck, latex, fname, model):
# add header/footer # add header/footer & convert to utf8
latex = (model["latexPre"] + "\n" + latex = (model["latexPre"] + "\n" +
latex + "\n" + latex + "\n" +
model["latexPost"]) model["latexPost"])
latex = latex.encode("utf8")
# it's only really secure if run in a jail, but these are the most common # it's only really secure if run in a jail, but these are the most common
for bad in ("write18", "\\readline", "\\input", "\\include", "\\catcode", for bad in ("write18", "\\readline", "\\input", "\\include", "\\catcode",
"\\openout", "\\write", "\\loop", "\\def", "\\shipout"): "\\openout", "\\write", "\\loop", "\\def", "\\shipout"):