mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 22:42:25 -04:00
use html parser on text export instead of fragile regex
This commit is contained in:
parent
28ff71a031
commit
b4f1d6622a
1 changed files with 12 additions and 5 deletions
|
@ -17,6 +17,7 @@ from anki.lang import _
|
||||||
from anki.utils import findTag, parseTags, stripHTML, ids2str
|
from anki.utils import findTag, parseTags, stripHTML, ids2str
|
||||||
from anki.tags import tagIds
|
from anki.tags import tagIds
|
||||||
from anki.db import *
|
from anki.db import *
|
||||||
|
from BeautifulSoup import BeautifulSoup as BS
|
||||||
|
|
||||||
class Exporter(object):
|
class Exporter(object):
|
||||||
def __init__(self, deck):
|
def __init__(self, deck):
|
||||||
|
@ -29,11 +30,16 @@ class Exporter(object):
|
||||||
self.doExport(file)
|
self.doExport(file)
|
||||||
file.close()
|
file.close()
|
||||||
|
|
||||||
def escapeText(self, text):
|
def escapeText(self, text, removeFields=False):
|
||||||
"Escape newlines and tabs, and strip Anki HTML."
|
"Escape newlines and tabs, and strip Anki HTML."
|
||||||
text = text.replace("\n", "<br>")
|
text = text.replace("\n", "<br>")
|
||||||
text = text.replace("\t", " " * 8)
|
text = text.replace("\t", " " * 8)
|
||||||
text = re.sub('<span class="fm.*?">(.*?)</span>', '\\1', text)
|
if removeFields:
|
||||||
|
s = BS(text)
|
||||||
|
all = s('span', {'class': re.compile("fm.*")})
|
||||||
|
for e in all:
|
||||||
|
e.replaceWith("".join([unicode(x) for x in e.contents]))
|
||||||
|
text = unicode(s)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def cardIds(self):
|
def cardIds(self):
|
||||||
|
@ -180,9 +186,10 @@ select cards.id, cards.tags || "," || facts.tags from cards, facts
|
||||||
where cards.factId = facts.id
|
where cards.factId = facts.id
|
||||||
and cards.id in %s
|
and cards.id in %s
|
||||||
order by cards.created""" % strids))
|
order by cards.created""" % strids))
|
||||||
out = u"\n".join(["%s\t%s%s" % (self.escapeText(c[0]),
|
out = u"\n".join(["%s\t%s%s" % (
|
||||||
self.escapeText(c[1]),
|
self.escapeText(c[0], removeFields=True),
|
||||||
self.tags(c[2]))
|
self.escapeText(c[1], removeFields=True),
|
||||||
|
self.tags(c[2]))
|
||||||
for c in cards])
|
for c in cards])
|
||||||
if out:
|
if out:
|
||||||
out += "\n"
|
out += "\n"
|
||||||
|
|
Loading…
Reference in a new issue