diff --git a/anki/storage.py b/anki/storage.py
index e70383431..86461255d 100644
--- a/anki/storage.py
+++ b/anki/storage.py
@@ -420,6 +420,13 @@ def _postSchemaUpgrade(deck):
"revCardsDue", "revCardsRandom", "acqCardsRandom",
"acqCardsOld", "acqCardsNew"):
deck.db.execute("drop view if exists %s" % v)
+ # minimize qt's bold/italics/underline cruft. we made need to use lxml to
+ # do this properly
+ from anki.utils import minimizeHTML
+ r = [(minimizeHTML(x[2]), x[0], x[1]) for x in deck.db.execute(
+ "select fid, fmid, val from fdata")]
+ deck.db.executemany("update fdata set val = ? where fid = ? and fmid = ?",
+ r)
# ensure all templates use the new style field format, and update cach
for m in deck.allModels():
for t in m.templates:
diff --git a/anki/utils.py b/anki/utils.py
index f717ee328..4daa82440 100644
--- a/anki/utils.py
+++ b/anki/utils.py
@@ -139,6 +139,16 @@ def stripHTMLMedia(s):
s = re.sub("]+)[\"']? ?/?>", " \\1 ", s)
return stripHTML(s)
+def minimizeHTML(s):
+ "Correct Qt's verbose bold/underline/etc."
+ s = re.sub('(.*?)', '\\1',
+ s)
+ s = re.sub('(.*?)', '\\1',
+ s)
+ s = re.sub('(.*?)',
+ '\\1', s)
+ return s
+
def tidyHTML(html):
"Remove cruft like body tags and return just the important part."
# contents of body - no head or html tags
@@ -160,6 +170,7 @@ def tidyHTML(html):
html = re.sub(u"^
| (.*) |
(.*?)
$", u'\\1', html) + html = minimizeHTML(html) return html def entsToTxt(html): diff --git a/tests/support/anki12.anki b/tests/support/anki12.anki index f39aeda6c..226abcab8 100644 Binary files a/tests/support/anki12.anki and b/tests/support/anki12.anki differ