add option to strip html in export

This commit is contained in:
Damien Elmes 2019-03-05 08:57:53 +10:00
parent 46fa0c2d7b
commit b168ce6eed
4 changed files with 46 additions and 6 deletions

View file

@ -6,11 +6,13 @@ import re, os, zipfile, shutil, unicodedata
import json
from anki.lang import _
from anki.utils import ids2str, splitFields, namedtmp
from anki.utils import ids2str, splitFields, namedtmp, stripHTML
from anki.hooks import runHook
from anki import Collection
class Exporter:
includeHTML = None
def __init__(self, col, did=None):
self.col = col
self.did = did
@ -24,6 +26,14 @@ class Exporter:
self.doExport(file)
file.close()
def processText(self, text):
if self.includeHTML is False:
text = self.stripHTML(text)
text = self.escapeText(text)
return text
def escapeText(self, text):
"Escape newlines, tabs, CSS and quotechar."
# fixme: we should probably quote fields with newlines
@ -36,6 +46,16 @@ class Exporter:
text = "\"" + text.replace("\"", "\"\"") + "\""
return text
def stripHTML(self, text):
# very basic conversion to text
s = text
s = re.sub(r"(?i)<(br ?/?|div|p)>", " ", s)
s = re.sub(r"\[sound:[^]]+\]", "", s)
s = stripHTML(s)
s = re.sub(r"[ \n\t]+", " ", s)
s = s.strip()
return s
def cardIds(self):
if not self.did:
cids = self.col.db.list("select id from cards")
@ -51,6 +71,7 @@ class TextCardExporter(Exporter):
key = _("Cards in Plain Text")
ext = ".txt"
includeHTML = True
def __init__(self, col):
Exporter.__init__(self, col)
@ -61,7 +82,7 @@ class TextCardExporter(Exporter):
def esc(s):
# strip off the repeated question in answer if exists
s = re.sub("(?si)^.*<hr id=answer>\n*", "", s)
return self.escapeText(s)
return self.processText(s)
out = ""
for cid in ids:
c = self.col.getCard(cid)
@ -77,6 +98,7 @@ class TextNoteExporter(Exporter):
key = _("Notes in Plain Text")
ext = ".txt"
includeTags = True
includeHTML = True
def __init__(self, col):
Exporter.__init__(self, col)
@ -95,7 +117,7 @@ where cards.id in %s)""" % ids2str(cardIds)):
if self.includeID:
row.append(str(id))
# fields
row.extend([self.escapeText(f) for f in splitFields(flds)])
row.extend([self.processText(f) for f in splitFields(flds)])
# tags
if self.includeTags:
row.append(tags.strip())

View file

@ -61,6 +61,12 @@ class ExportDialog(QDialog):
getattr(self.exporter, "includeMedia", None) is not None)
self.frm.includeTags.setVisible(
getattr(self.exporter, "includeTags", None) is not None)
html = getattr(self.exporter, "includeHTML", None)
if html is not None:
self.frm.includeHTML.setVisible(True)
self.frm.includeHTML.setChecked(html)
else:
self.frm.includeHTML.setVisible(False)
# show deck list?
self.frm.deck.setVisible(not self.isVerbatim)
@ -71,6 +77,8 @@ class ExportDialog(QDialog):
self.frm.includeMedia.isChecked())
self.exporter.includeTags = (
self.frm.includeTags.isChecked())
self.exporter.includeHTML = (
self.frm.includeHTML.isChecked())
if not self.frm.deck.currentIndex():
self.exporter.did = None
else:

View file

@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>295</width>
<height>202</height>
<height>223</height>
</rect>
</property>
<property name="windowTitle">
@ -76,6 +76,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="includeHTML">
<property name="text">
<string>Include HTML and media references</string>
</property>
</widget>
</item>
</layout>
</item>
<item>

View file

@ -14,7 +14,7 @@ def setup1():
global deck
deck = getEmptyCol()
f = deck.newNote()
f['Front'] = "foo"; f['Back'] = "bar"; f.tags = ["tag", "tag2"]
f['Front'] = "foo"; f['Back'] = "bar<br>"; f.tags = ["tag", "tag2"]
deck.addNote(f)
# with a different deck
f = deck.newNote()
@ -129,8 +129,11 @@ def test_export_textnote():
os.close(fd)
os.unlink(f)
e.exportInto(f)
e.includeTags = True
assert open(f).readline() == "foo\tbar<br>\ttag tag2\n"
e.includeTags = False
e.includeHTML = False
e.exportInto(f)
assert open(f).readline() == "foo\tbar\n"
def test_exporters():
assert "*.apkg" in str(exporters())