diff --git a/anki/storage.py b/anki/storage.py index e70383431..86461255d 100644 --- a/anki/storage.py +++ b/anki/storage.py @@ -420,6 +420,13 @@ def _postSchemaUpgrade(deck): "revCardsDue", "revCardsRandom", "acqCardsRandom", "acqCardsOld", "acqCardsNew"): deck.db.execute("drop view if exists %s" % v) + # minimize qt's bold/italics/underline cruft. we made need to use lxml to + # do this properly + from anki.utils import minimizeHTML + r = [(minimizeHTML(x[2]), x[0], x[1]) for x in deck.db.execute( + "select fid, fmid, val from fdata")] + deck.db.executemany("update fdata set val = ? where fid = ? and fmid = ?", + r) # ensure all templates use the new style field format, and update cach for m in deck.allModels(): for t in m.templates: diff --git a/anki/utils.py b/anki/utils.py index f717ee328..4daa82440 100644 --- a/anki/utils.py +++ b/anki/utils.py @@ -139,6 +139,16 @@ def stripHTMLMedia(s): s = re.sub("]+)[\"']? ?/?>", " \\1 ", s) return stripHTML(s) +def minimizeHTML(s): + "Correct Qt's verbose bold/underline/etc." + s = re.sub('(.*?)', '\\1', + s) + s = re.sub('(.*?)', '\\1', + s) + s = re.sub('(.*?)', + '\\1', s) + return s + def tidyHTML(html): "Remove cruft like body tags and return just the important part." # contents of body - no head or html tags @@ -160,6 +170,7 @@ def tidyHTML(html): html = re.sub(u"^
(.*)
$", u"\\1", html) # this is being added by qt's html editor, and leads to unwanted spaces html = re.sub(u"^

(.*?)

$", u'\\1', html) + html = minimizeHTML(html) return html def entsToTxt(html): diff --git a/tests/support/anki12.anki b/tests/support/anki12.anki index f39aeda6c..226abcab8 100644 Binary files a/tests/support/anki12.anki and b/tests/support/anki12.anki differ