mirror of
https://github.com/ankitects/anki.git
synced 2025-09-25 01:06:35 -04:00
make it easier to get media dir; remove tidyHTML()
This commit is contained in:
parent
6ed48171fc
commit
c682080890
4 changed files with 4 additions and 26 deletions
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
import time, os, random, re, stat, simplejson, datetime, copy
|
import time, os, random, re, stat, simplejson, datetime, copy
|
||||||
from anki.lang import _, ngettext
|
from anki.lang import _, ngettext
|
||||||
from anki.utils import parseTags, tidyHTML, ids2str, hexifyID, \
|
from anki.utils import parseTags, ids2str, hexifyID, \
|
||||||
checksum, fieldChecksum, addTags, delTags, stripHTML, intTime, \
|
checksum, fieldChecksum, addTags, delTags, stripHTML, intTime, \
|
||||||
splitFields
|
splitFields
|
||||||
from anki.hooks import runHook, runFilter
|
from anki.hooks import runHook, runFilter
|
||||||
|
|
|
@ -6,7 +6,6 @@ import codecs, csv, re
|
||||||
from anki.importing import Importer, ForeignCard
|
from anki.importing import Importer, ForeignCard
|
||||||
from anki.lang import _
|
from anki.lang import _
|
||||||
from anki.errors import *
|
from anki.errors import *
|
||||||
from anki.utils import tidyHTML
|
|
||||||
|
|
||||||
class TextImporter(Importer):
|
class TextImporter(Importer):
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,11 @@ class MediaRegistry(object):
|
||||||
self._updateDir()
|
self._updateDir()
|
||||||
|
|
||||||
def dir(self, create=False):
|
def dir(self, create=False):
|
||||||
|
"Call with create=None to retrieve dir without creating."
|
||||||
if self._dir:
|
if self._dir:
|
||||||
return self._dir
|
return self._dir
|
||||||
|
elif create == None:
|
||||||
|
return self._updateDir(create)
|
||||||
elif create:
|
elif create:
|
||||||
self._updateDir(True)
|
self._updateDir(True)
|
||||||
return self._dir
|
return self._dir
|
||||||
|
|
|
@ -149,30 +149,6 @@ def minimizeHTML(s):
|
||||||
'<u>\\1</u>', s)
|
'<u>\\1</u>', s)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def tidyHTML(html):
|
|
||||||
"Remove cruft like body tags and return just the important part."
|
|
||||||
# contents of body - no head or html tags
|
|
||||||
html = re.sub(u".*<body.*?>(.*)</body></html>",
|
|
||||||
"\\1", html.replace("\n", u""))
|
|
||||||
# strip superfluous Qt formatting
|
|
||||||
html = re.sub(u"(?:-qt-table-type: root; )?"
|
|
||||||
"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
|
|
||||||
"margin-right:\d+px;(?: -qt-block-indent:0; "
|
|
||||||
"text-indent:0px;)?", u"", html)
|
|
||||||
html = re.sub(u"-qt-paragraph-type:empty;", u"", html)
|
|
||||||
# strip leading space in style execute, and remove if no contents
|
|
||||||
html = re.sub(u'style=" ', u'style="', html)
|
|
||||||
html = re.sub(u' style=""', u"", html)
|
|
||||||
# convert P tags into SPAN and/or BR
|
|
||||||
html = re.sub(u'<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
|
|
||||||
html = re.sub(u'<p>(.*?)</p>', u'\\1<br>', html)
|
|
||||||
html = re.sub(u'<br>$', u'', html)
|
|
||||||
html = re.sub(u"^<table><tr><td style=\"border: none;\">(.*)<br></td></tr></table>$", u"\\1", html)
|
|
||||||
# this is being added by qt's html editor, and leads to unwanted spaces
|
|
||||||
html = re.sub(u"^<p dir='rtl'>(.*?)</p>$", u'\\1', html)
|
|
||||||
html = minimizeHTML(html)
|
|
||||||
return html
|
|
||||||
|
|
||||||
def entsToTxt(html):
|
def entsToTxt(html):
|
||||||
def fixup(m):
|
def fixup(m):
|
||||||
text = m.group(0)
|
text = m.group(0)
|
||||||
|
|
Loading…
Reference in a new issue