make it easier to get media dir; remove tidyHTML()

This commit is contained in:
Damien Elmes 2011-04-05 20:28:21 +09:00
parent 6ed48171fc
commit c682080890
4 changed files with 4 additions and 26 deletions

View file

@ -4,7 +4,7 @@
import time, os, random, re, stat, simplejson, datetime, copy
from anki.lang import _, ngettext
from anki.utils import parseTags, tidyHTML, ids2str, hexifyID, \
from anki.utils import parseTags, ids2str, hexifyID, \
checksum, fieldChecksum, addTags, delTags, stripHTML, intTime, \
splitFields
from anki.hooks import runHook, runFilter

View file

@ -6,7 +6,6 @@ import codecs, csv, re
from anki.importing import Importer, ForeignCard
from anki.lang import _
from anki.errors import *
from anki.utils import tidyHTML
class TextImporter(Importer):

View file

@ -20,8 +20,11 @@ class MediaRegistry(object):
self._updateDir()
def dir(self, create=False):
"Call with create=None to retrieve dir without creating."
if self._dir:
return self._dir
elif create == None:
return self._updateDir(create)
elif create:
self._updateDir(True)
return self._dir

View file

@ -149,30 +149,6 @@ def minimizeHTML(s):
'<u>\\1</u>', s)
return s
def tidyHTML(html):
"Remove cruft like body tags and return just the important part."
# contents of body - no head or html tags
html = re.sub(u".*<body.*?>(.*)</body></html>",
"\\1", html.replace("\n", u""))
# strip superfluous Qt formatting
html = re.sub(u"(?:-qt-table-type: root; )?"
"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
"margin-right:\d+px;(?: -qt-block-indent:0; "
"text-indent:0px;)?", u"", html)
html = re.sub(u"-qt-paragraph-type:empty;", u"", html)
# strip leading space in style execute, and remove if no contents
html = re.sub(u'style=" ', u'style="', html)
html = re.sub(u' style=""', u"", html)
# convert P tags into SPAN and/or BR
html = re.sub(u'<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
html = re.sub(u'<p>(.*?)</p>', u'\\1<br>', html)
html = re.sub(u'<br>$', u'', html)
html = re.sub(u"^<table><tr><td style=\"border: none;\">(.*)<br></td></tr></table>$", u"\\1", html)
# this is being added by qt's html editor, and leads to unwanted spaces
html = re.sub(u"^<p dir='rtl'>(.*?)</p>$", u'\\1', html)
html = minimizeHTML(html)
return html
def entsToTxt(html):
def fixup(m):
text = m.group(0)