simplify bold/italic/underline tags from qt in upgrade

This commit is contained in:
Damien Elmes 2011-03-07 17:40:15 +09:00
parent e21c944aeb
commit 3cb4ade4a1
3 changed files with 18 additions and 0 deletions

View file

@ -420,6 +420,13 @@ def _postSchemaUpgrade(deck):
"revCardsDue", "revCardsRandom", "acqCardsRandom",
"acqCardsOld", "acqCardsNew"):
deck.db.execute("drop view if exists %s" % v)
# minimize qt's bold/italics/underline cruft. we made need to use lxml to
# do this properly
from anki.utils import minimizeHTML
r = [(minimizeHTML(x[2]), x[0], x[1]) for x in deck.db.execute(
"select fid, fmid, val from fdata")]
deck.db.executemany("update fdata set val = ? where fid = ? and fmid = ?",
r)
# ensure all templates use the new style field format, and update cach
for m in deck.allModels():
for t in m.templates:

View file

@ -139,6 +139,16 @@ def stripHTMLMedia(s):
s = re.sub("<img src=[\"']?([^\"'>]+)[\"']? ?/?>", " \\1 ", s)
return stripHTML(s)
def minimizeHTML(s):
"Correct Qt's verbose bold/underline/etc."
s = re.sub('<span style="font-weight:600;">(.*?)</span>', '<b>\\1</b>',
s)
s = re.sub('<span style="font-style:italic;">(.*?)</span>', '<i>\\1</i>',
s)
s = re.sub('<span style="text-decoration: underline;">(.*?)</span>',
'<u>\\1</u>', s)
return s
def tidyHTML(html):
"Remove cruft like body tags and return just the important part."
# contents of body - no head or html tags
@ -160,6 +170,7 @@ def tidyHTML(html):
html = re.sub(u"^<table><tr><td style=\"border: none;\">(.*)<br></td></tr></table>$", u"\\1", html)
# this is being added by qt's html editor, and leads to unwanted spaces
html = re.sub(u"^<p dir='rtl'>(.*?)</p>$", u'\\1', html)
html = minimizeHTML(html)
return html
def entsToTxt(html):

Binary file not shown.