mirror of
https://github.com/ankitects/anki.git
synced 2025-09-18 22:12:21 -04:00
always keep as unicode
This commit is contained in:
parent
fbcdae4c51
commit
8f983d5bcc
2 changed files with 12 additions and 22 deletions
10
anki/db.py
10
anki/db.py
|
@ -40,16 +40,6 @@ except ImportError:
|
|||
from sqlalchemy import Unicode
|
||||
UnicodeText = Unicode
|
||||
|
||||
# dump location of non-unicode string
|
||||
from sqlalchemy import util
|
||||
if getattr(util, 'warn', None):
|
||||
import traceback
|
||||
oldWarn = util.warn
|
||||
def newWarn(*args, **kwargs):
|
||||
traceback.print_stack()
|
||||
oldWarn(*args, **kwargs)
|
||||
util.warn = newWarn
|
||||
|
||||
# shared metadata
|
||||
metadata = MetaData()
|
||||
|
||||
|
|
|
@ -146,25 +146,25 @@ def stripHTML(s):
|
|||
def tidyHTML(html):
|
||||
"Remove cruft like body tags and return just the important part."
|
||||
# contents of body - no head or html tags
|
||||
html = re.sub(".*<body.*?>(.*)</body></html>",
|
||||
html = re.sub(u".*<body.*?>(.*)</body></html>",
|
||||
"\\1", html.replace("\n", u""))
|
||||
# strip superfluous Qt formatting
|
||||
html = re.sub("margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
|
||||
html = re.sub(u"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
|
||||
"margin-right:\d+px; -qt-block-indent:0; "
|
||||
"text-indent:0px;", "", html)
|
||||
html = re.sub("-qt-paragraph-type:empty;", "", html)
|
||||
"text-indent:0px;", u"", html)
|
||||
html = re.sub(u"-qt-paragraph-type:empty;", u"", html)
|
||||
# collapse multiple spaces into one
|
||||
html = re.sub(" +", " ", html)
|
||||
html = re.sub(u" +", u" ", html)
|
||||
# strip leading space in style statements, and remove if no contents
|
||||
html = re.sub('style=" ', 'style="', html)
|
||||
html = re.sub(' style=""', "", html)
|
||||
html = re.sub(u'style=" ', u'style="', html)
|
||||
html = re.sub(u' style=""', u"", html)
|
||||
# convert P tags into SPAN and/or BR
|
||||
html = re.sub('<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
|
||||
html = re.sub('<p>(.*?)</p>', u'\\1<br>', html)
|
||||
html = re.sub('<br>$', u'', html)
|
||||
html = re.sub(u'<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
|
||||
html = re.sub(u'<p>(.*?)</p>', u'\\1<br>', html)
|
||||
html = re.sub(u'<br>$', u'', html)
|
||||
# remove leading or trailing whitespace
|
||||
html = re.sub('^ +', u'', html)
|
||||
html = re.sub(' +$', u'', html)
|
||||
html = re.sub(u'^ +', u'', html)
|
||||
html = re.sub(u' +$', u'', html)
|
||||
return html
|
||||
|
||||
def genID(static=[]):
|
||||
|
|
Loading…
Reference in a new issue