From 8f983d5bccd4d48bc3f4543a563b1da0c2bd9a0c Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Fri, 21 Nov 2008 23:41:14 +0900 Subject: [PATCH] always keep as unicode --- anki/db.py | 10 ---------- anki/utils.py | 24 ++++++++++++------------ 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/anki/db.py b/anki/db.py index 7ce0465f8..5ae209a19 100644 --- a/anki/db.py +++ b/anki/db.py @@ -40,16 +40,6 @@ except ImportError: from sqlalchemy import Unicode UnicodeText = Unicode -# dump location of non-unicode string -from sqlalchemy import util -if getattr(util, 'warn', None): - import traceback - oldWarn = util.warn - def newWarn(*args, **kwargs): - traceback.print_stack() - oldWarn(*args, **kwargs) - util.warn = newWarn - # shared metadata metadata = MetaData() diff --git a/anki/utils.py b/anki/utils.py index 6991ead46..8348913be 100644 --- a/anki/utils.py +++ b/anki/utils.py @@ -146,25 +146,25 @@ def stripHTML(s): def tidyHTML(html): "Remove cruft like body tags and return just the important part." # contents of body - no head or html tags - html = re.sub(".*(.*)", + html = re.sub(u".*(.*)", "\\1", html.replace("\n", u"")) # strip superfluous Qt formatting - html = re.sub("margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; " + html = re.sub(u"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; " "margin-right:\d+px; -qt-block-indent:0; " - "text-indent:0px;", "", html) - html = re.sub("-qt-paragraph-type:empty;", "", html) + "text-indent:0px;", u"", html) + html = re.sub(u"-qt-paragraph-type:empty;", u"", html) # collapse multiple spaces into one - html = re.sub(" +", " ", html) + html = re.sub(u" +", u" ", html) # strip leading space in style statements, and remove if no contents - html = re.sub('style=" ', 'style="', html) - html = re.sub(' style=""', "", html) + html = re.sub(u'style=" ', u'style="', html) + html = re.sub(u' style=""', u"", html) # convert P tags into SPAN and/or BR - html = re.sub('(.*?)

', u'\\2
', html) - html = re.sub('

(.*?)

', u'\\1
', html) - html = re.sub('
$', u'', html) + html = re.sub(u'(.*?)

', u'\\2
', html) + html = re.sub(u'

(.*?)

', u'\\1
', html) + html = re.sub(u'
$', u'', html) # remove leading or trailing whitespace - html = re.sub('^ +', u'', html) - html = re.sub(' +$', u'', html) + html = re.sub(u'^ +', u'', html) + html = re.sub(u' +$', u'', html) return html def genID(static=[]):