mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 06:22:22 -04:00
Document newly found bug in _removeFormattingFromMathjax
Also adds some comments I wrote to help me understand what's going on in the code. I hope to fix this bug myself, but I think it might be beyond what you can do with Python regexes and might require writing a proper parser. So, as step 1, I'm adding in a couple comments explaining that the bug exists and how to reproduce it.
This commit is contained in:
parent
59ce08bc4e
commit
331781cf45
3 changed files with 53 additions and 1 deletions
|
@ -4,6 +4,9 @@ from typing import Any, Callable, Dict, Pattern
|
||||||
from anki.hooks import runFilter
|
from anki.hooks import runFilter
|
||||||
from anki.utils import stripHTML, stripHTMLMedia
|
from anki.utils import stripHTML, stripHTMLMedia
|
||||||
|
|
||||||
|
# The (?si) flags make the regex match case-insensitively and make . match any
|
||||||
|
# character including newlines.
|
||||||
|
# See: https://docs.python.org/3/howto/regex.html#compilation-flags
|
||||||
clozeReg = r"(?si)\{\{(c)%s::(.*?)(::(.*?))?\}\}"
|
clozeReg = r"(?si)\{\{(c)%s::(.*?)(::(.*?))?\}\}"
|
||||||
|
|
||||||
modifiers: Dict[str, Callable] = {}
|
modifiers: Dict[str, Callable] = {}
|
||||||
|
@ -34,6 +37,7 @@ def get_or_attr(obj, name, default=None) -> Any:
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Template:
|
class Template:
|
||||||
# The regular expression used to find a #section
|
# The regular expression used to find a #section
|
||||||
section_re: Pattern = None
|
section_re: Pattern = None
|
||||||
|
@ -197,6 +201,7 @@ class Template:
|
||||||
def clozeText(self, txt, ord, type) -> str:
|
def clozeText(self, txt, ord, type) -> str:
|
||||||
reg = clozeReg
|
reg = clozeReg
|
||||||
if not re.search(reg%ord, txt):
|
if not re.search(reg%ord, txt):
|
||||||
|
# No Cloze deletion was found in txt.
|
||||||
return ""
|
return ""
|
||||||
txt = self._removeFormattingFromMathjax(txt, ord)
|
txt = self._removeFormattingFromMathjax(txt, ord)
|
||||||
def repl(m):
|
def repl(m):
|
||||||
|
@ -216,13 +221,31 @@ class Template:
|
||||||
# and display other clozes normally
|
# and display other clozes normally
|
||||||
return re.sub(reg%r"\d+", "\\2", txt)
|
return re.sub(reg%r"\d+", "\\2", txt)
|
||||||
|
|
||||||
# look for clozes wrapped in mathjax, and change {{cx to {{Cx
|
|
||||||
def _removeFormattingFromMathjax(self, txt, ord) -> str:
|
def _removeFormattingFromMathjax(self, txt, ord) -> str:
|
||||||
|
"""Marks all clozes within MathJax to prevent formatting them.
|
||||||
|
|
||||||
|
Active Cloze deletions within MathJax should not be wrapped inside
|
||||||
|
a Cloze <span>, as that would interfere with MathJax.
|
||||||
|
|
||||||
|
This method finds all Cloze deletions number `ord` in `txt` which are
|
||||||
|
inside MathJax inline or display formulas, and replaces their opening
|
||||||
|
'{{c123' with a '{{C123'. The clozeText method interprets the upper-case
|
||||||
|
C as "don't wrap this Cloze in a <span>".
|
||||||
|
"""
|
||||||
|
# TODO: There is a bug in this method.
|
||||||
|
# Say txt = r'\(a\) {{c1::b}} \[ {{c1::c}} \]', ord = 1.
|
||||||
|
#
|
||||||
|
# This method should return: '\(a\) {{c1::b}} \[ {{C1::c}} \]'.
|
||||||
|
# Since the {{c1::c}} occurs within a MathJax display formula.
|
||||||
|
# However, it returns '\(a\) {{c1::b}} \[ {{c1::c}} \]'.
|
||||||
|
# This causes the Cloze within the MathJax display formula
|
||||||
|
# to be erroneously formatted with a <span>.
|
||||||
opening = ["\\(", "\\["]
|
opening = ["\\(", "\\["]
|
||||||
closing = ["\\)", "\\]"]
|
closing = ["\\)", "\\]"]
|
||||||
# flags in middle of expression deprecated
|
# flags in middle of expression deprecated
|
||||||
creg = clozeReg.replace("(?si)", "")
|
creg = clozeReg.replace("(?si)", "")
|
||||||
regex = r"(?si)(\\[([])(.*?)"+(creg%ord)+r"(.*?)(\\[\])])"
|
regex = r"(?si)(\\[([])(.*?)"+(creg%ord)+r"(.*?)(\\[\])])"
|
||||||
|
|
||||||
def repl(m):
|
def repl(m):
|
||||||
enclosed = True
|
enclosed = True
|
||||||
for s in closing:
|
for s in closing:
|
||||||
|
|
|
@ -199,6 +199,19 @@ def test_cloze_mathjax():
|
||||||
assert "class=cloze" in f.cards()[3].q()
|
assert "class=cloze" in f.cards()[3].q()
|
||||||
assert "class=cloze" in f.cards()[4].q()
|
assert "class=cloze" in f.cards()[4].q()
|
||||||
|
|
||||||
|
def test_cloze_mathjax_bug():
|
||||||
|
d = getEmptyCol()
|
||||||
|
d.models.setCurrent(d.models.byName("Cloze"))
|
||||||
|
|
||||||
|
f = d.newNote()
|
||||||
|
f['Text'] = r'\(a\) {{c1::b}} \[ {{c1::c}} \]'
|
||||||
|
assert d.addNote(f)
|
||||||
|
assert len(f.cards()) == 1
|
||||||
|
|
||||||
|
# TODO: The following assertion should work, but currently fails due
|
||||||
|
# to a bug in _removeFormatingFromMathjax.
|
||||||
|
# assert f.cards()[0].q() == '\(a\) <span class=cloze>[...]</span> \[ [...] \]'
|
||||||
|
|
||||||
def test_chained_mods():
|
def test_chained_mods():
|
||||||
d = getEmptyCol()
|
d = getEmptyCol()
|
||||||
d.models.setCurrent(d.models.byName("Cloze"))
|
d.models.setCurrent(d.models.byName("Cloze"))
|
||||||
|
|
16
tests/test_template.py
Normal file
16
tests/test_template.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
from anki.template import Template
|
||||||
|
|
||||||
|
|
||||||
|
def test_remove_formatting_from_mathjax():
|
||||||
|
t = Template('')
|
||||||
|
assert t._removeFormattingFromMathjax(r'\(2^{{c3::2}}\)', 3) == r'\(2^{{C3::2}}\)'
|
||||||
|
|
||||||
|
txt = (r'{{c1::ok}} \(2^2\) {{c2::not ok}} \(2^{{c3::2}}\) \(x^3\) '
|
||||||
|
r'{{c4::blah}} {{c5::text with \(x^2\) jax}}')
|
||||||
|
# Cloze 2 is not in MathJax, so it should not get protected against
|
||||||
|
# formatting.
|
||||||
|
assert t._removeFormattingFromMathjax(txt, 2) == txt
|
||||||
|
|
||||||
|
# TODO: r'\(a\) {{c1::b}} \[ {{c1::c}} \]', ord=1 should return
|
||||||
|
# r'\(a\) {{c1::b}} \[ {{C1::c}} \]', but actually fails to mark the cloze
|
||||||
|
# as not-to-be-formatted.
|
Loading…
Reference in a new issue