mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 14:32:22 -04:00
always keep as unicode
This commit is contained in:
parent
fbcdae4c51
commit
8f983d5bcc
2 changed files with 12 additions and 22 deletions
10
anki/db.py
10
anki/db.py
|
@ -40,16 +40,6 @@ except ImportError:
|
||||||
from sqlalchemy import Unicode
|
from sqlalchemy import Unicode
|
||||||
UnicodeText = Unicode
|
UnicodeText = Unicode
|
||||||
|
|
||||||
# dump location of non-unicode string
|
|
||||||
from sqlalchemy import util
|
|
||||||
if getattr(util, 'warn', None):
|
|
||||||
import traceback
|
|
||||||
oldWarn = util.warn
|
|
||||||
def newWarn(*args, **kwargs):
|
|
||||||
traceback.print_stack()
|
|
||||||
oldWarn(*args, **kwargs)
|
|
||||||
util.warn = newWarn
|
|
||||||
|
|
||||||
# shared metadata
|
# shared metadata
|
||||||
metadata = MetaData()
|
metadata = MetaData()
|
||||||
|
|
||||||
|
|
|
@ -146,25 +146,25 @@ def stripHTML(s):
|
||||||
def tidyHTML(html):
|
def tidyHTML(html):
|
||||||
"Remove cruft like body tags and return just the important part."
|
"Remove cruft like body tags and return just the important part."
|
||||||
# contents of body - no head or html tags
|
# contents of body - no head or html tags
|
||||||
html = re.sub(".*<body.*?>(.*)</body></html>",
|
html = re.sub(u".*<body.*?>(.*)</body></html>",
|
||||||
"\\1", html.replace("\n", u""))
|
"\\1", html.replace("\n", u""))
|
||||||
# strip superfluous Qt formatting
|
# strip superfluous Qt formatting
|
||||||
html = re.sub("margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
|
html = re.sub(u"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
|
||||||
"margin-right:\d+px; -qt-block-indent:0; "
|
"margin-right:\d+px; -qt-block-indent:0; "
|
||||||
"text-indent:0px;", "", html)
|
"text-indent:0px;", u"", html)
|
||||||
html = re.sub("-qt-paragraph-type:empty;", "", html)
|
html = re.sub(u"-qt-paragraph-type:empty;", u"", html)
|
||||||
# collapse multiple spaces into one
|
# collapse multiple spaces into one
|
||||||
html = re.sub(" +", " ", html)
|
html = re.sub(u" +", u" ", html)
|
||||||
# strip leading space in style statements, and remove if no contents
|
# strip leading space in style statements, and remove if no contents
|
||||||
html = re.sub('style=" ', 'style="', html)
|
html = re.sub(u'style=" ', u'style="', html)
|
||||||
html = re.sub(' style=""', "", html)
|
html = re.sub(u' style=""', u"", html)
|
||||||
# convert P tags into SPAN and/or BR
|
# convert P tags into SPAN and/or BR
|
||||||
html = re.sub('<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
|
html = re.sub(u'<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
|
||||||
html = re.sub('<p>(.*?)</p>', u'\\1<br>', html)
|
html = re.sub(u'<p>(.*?)</p>', u'\\1<br>', html)
|
||||||
html = re.sub('<br>$', u'', html)
|
html = re.sub(u'<br>$', u'', html)
|
||||||
# remove leading or trailing whitespace
|
# remove leading or trailing whitespace
|
||||||
html = re.sub('^ +', u'', html)
|
html = re.sub(u'^ +', u'', html)
|
||||||
html = re.sub(' +$', u'', html)
|
html = re.sub(u' +$', u'', html)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def genID(static=[]):
|
def genID(static=[]):
|
||||||
|
|
Loading…
Reference in a new issue