From 331781cf453d0e6511e2bf340829eabe1ab6b56f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Pokorn=C3=BD=20=28Rai=29?= Date: Sun, 22 Dec 2019 04:28:29 +0100 Subject: [PATCH] Document newly found bug in _removeFormattingFromMathjax Also adds some comments I wrote to help me understand what's going on in the code. I hope to fix this bug myself, but I think it might be beyond what you can do with Python regexes and might require writing a proper parser. So, as step 1, I'm adding in a couple comments explaining that the bug exists and how to reproduce it. --- anki/template/template.py | 25 ++++++++++++++++++++++++- tests/test_models.py | 13 +++++++++++++ tests/test_template.py | 16 ++++++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 tests/test_template.py diff --git a/anki/template/template.py b/anki/template/template.py index e5f780df6..baa2c34dd 100644 --- a/anki/template/template.py +++ b/anki/template/template.py @@ -4,6 +4,9 @@ from typing import Any, Callable, Dict, Pattern from anki.hooks import runFilter from anki.utils import stripHTML, stripHTMLMedia +# The (?si) flags make the regex match case-insensitively and make . match any +# character including newlines. +# See: https://docs.python.org/3/howto/regex.html#compilation-flags clozeReg = r"(?si)\{\{(c)%s::(.*?)(::(.*?))?\}\}" modifiers: Dict[str, Callable] = {} @@ -34,6 +37,7 @@ def get_or_attr(obj, name, default=None) -> Any: return default + class Template: # The regular expression used to find a #section section_re: Pattern = None @@ -197,6 +201,7 @@ class Template: def clozeText(self, txt, ord, type) -> str: reg = clozeReg if not re.search(reg%ord, txt): + # No Cloze deletion was found in txt. return "" txt = self._removeFormattingFromMathjax(txt, ord) def repl(m): @@ -216,13 +221,31 @@ class Template: # and display other clozes normally return re.sub(reg%r"\d+", "\\2", txt) - # look for clozes wrapped in mathjax, and change {{cx to {{Cx def _removeFormattingFromMathjax(self, txt, ord) -> str: + """Marks all clozes within MathJax to prevent formatting them. + + Active Cloze deletions within MathJax should not be wrapped inside + a Cloze , as that would interfere with MathJax. + + This method finds all Cloze deletions number `ord` in `txt` which are + inside MathJax inline or display formulas, and replaces their opening + '{{c123' with a '{{C123'. The clozeText method interprets the upper-case + C as "don't wrap this Cloze in a ". + """ + # TODO: There is a bug in this method. + # Say txt = r'\(a\) {{c1::b}} \[ {{c1::c}} \]', ord = 1. + # + # This method should return: '\(a\) {{c1::b}} \[ {{C1::c}} \]'. + # Since the {{c1::c}} occurs within a MathJax display formula. + # However, it returns '\(a\) {{c1::b}} \[ {{c1::c}} \]'. + # This causes the Cloze within the MathJax display formula + # to be erroneously formatted with a . opening = ["\\(", "\\["] closing = ["\\)", "\\]"] # flags in middle of expression deprecated creg = clozeReg.replace("(?si)", "") regex = r"(?si)(\\[([])(.*?)"+(creg%ord)+r"(.*?)(\\[\])])" + def repl(m): enclosed = True for s in closing: diff --git a/tests/test_models.py b/tests/test_models.py index b1cfbce5f..a85230531 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -199,6 +199,19 @@ def test_cloze_mathjax(): assert "class=cloze" in f.cards()[3].q() assert "class=cloze" in f.cards()[4].q() +def test_cloze_mathjax_bug(): + d = getEmptyCol() + d.models.setCurrent(d.models.byName("Cloze")) + + f = d.newNote() + f['Text'] = r'\(a\) {{c1::b}} \[ {{c1::c}} \]' + assert d.addNote(f) + assert len(f.cards()) == 1 + + # TODO: The following assertion should work, but currently fails due + # to a bug in _removeFormatingFromMathjax. + # assert f.cards()[0].q() == '\(a\) [...] \[ [...] \]' + def test_chained_mods(): d = getEmptyCol() d.models.setCurrent(d.models.byName("Cloze")) diff --git a/tests/test_template.py b/tests/test_template.py new file mode 100644 index 000000000..59f935736 --- /dev/null +++ b/tests/test_template.py @@ -0,0 +1,16 @@ +from anki.template import Template + + +def test_remove_formatting_from_mathjax(): + t = Template('') + assert t._removeFormattingFromMathjax(r'\(2^{{c3::2}}\)', 3) == r'\(2^{{C3::2}}\)' + + txt = (r'{{c1::ok}} \(2^2\) {{c2::not ok}} \(2^{{c3::2}}\) \(x^3\) ' + r'{{c4::blah}} {{c5::text with \(x^2\) jax}}') + # Cloze 2 is not in MathJax, so it should not get protected against + # formatting. + assert t._removeFormattingFromMathjax(txt, 2) == txt + + # TODO: r'\(a\) {{c1::b}} \[ {{c1::c}} \]', ord=1 should return + # r'\(a\) {{c1::b}} \[ {{C1::c}} \]', but actually fails to mark the cloze + # as not-to-be-formatted.