always keep as unicode

2025-12-31 07:43:02 -05:00 · 2008-11-21 23:41:14 +09:00 · 2008-11-21 23:41:14 +09:00 · 8f983d5bcc
commit 8f983d5bcc
parent fbcdae4c51
2 changed files with 12 additions and 22 deletions
--- a/anki/db.py
+++ b/anki/db.py
@ -40,16 +40,6 @@ except ImportError:
    from sqlalchemy import Unicode
    UnicodeText = Unicode
 # dump location of non-unicode string
 from sqlalchemy import util
 if getattr(util, 'warn', None):
    import traceback
    oldWarn = util.warn
    def newWarn(*args, **kwargs):
        traceback.print_stack()
        oldWarn(*args, **kwargs)
    util.warn = newWarn
 # shared metadata
 metadata = MetaData()
--- a/anki/utils.py
+++ b/anki/utils.py
@ -146,25 +146,25 @@ def stripHTML(s):
 def tidyHTML(html):
    "Remove cruft like body tags and return just the important part."
    # contents of body - no head or html tags
-    html = re.sub(".*<body.*?>(.*)</body></html>",
+    html = re.sub(u".*<body.*?>(.*)</body></html>",
                  "\\1", html.replace("\n", u""))
    # strip superfluous Qt formatting
-    html = re.sub("margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
+    html = re.sub(u"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
                  "margin-right:\d+px; -qt-block-indent:0; "
-                  "text-indent:0px;", "", html)
+                  "text-indent:0px;", u"", html)
-    html = re.sub("-qt-paragraph-type:empty;", "", html)
+    html = re.sub(u"-qt-paragraph-type:empty;", u"", html)
    # collapse multiple spaces into one
-    html = re.sub("  +", " ", html)
+    html = re.sub(u"  +", u" ", html)
    # strip leading space in style statements, and remove if no contents
-    html = re.sub('style=" ', 'style="', html)
+    html = re.sub(u'style=" ', u'style="', html)
-    html = re.sub(' style=""', "", html)
+    html = re.sub(u' style=""', u"", html)
    # convert P tags into SPAN and/or BR
-    html = re.sub('<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
+    html = re.sub(u'<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
-    html = re.sub('<p>(.*?)</p>', u'\\1<br>', html)
+    html = re.sub(u'<p>(.*?)</p>', u'\\1<br>', html)
-    html = re.sub('<br>$', u'', html)
+    html = re.sub(u'<br>$', u'', html)
    # remove leading or trailing whitespace
-    html = re.sub('^ +', u'', html)
+    html = re.sub(u'^ +', u'', html)
-    html = re.sub(' +$', u'', html)
+    html = re.sub(u' +$', u'', html)
    return html
 def genID(static=[]):