add option to strip html in export

This commit is contained in:
Damien Elmes 2019-03-05 08:57:53 +10:00
parent 46fa0c2d7b
commit b168ce6eed
4 changed files with 46 additions and 6 deletions

View file

@ -6,11 +6,13 @@ import re, os, zipfile, shutil, unicodedata
import json import json
from anki.lang import _ from anki.lang import _
from anki.utils import ids2str, splitFields, namedtmp from anki.utils import ids2str, splitFields, namedtmp, stripHTML
from anki.hooks import runHook from anki.hooks import runHook
from anki import Collection from anki import Collection
class Exporter: class Exporter:
includeHTML = None
def __init__(self, col, did=None): def __init__(self, col, did=None):
self.col = col self.col = col
self.did = did self.did = did
@ -24,6 +26,14 @@ class Exporter:
self.doExport(file) self.doExport(file)
file.close() file.close()
def processText(self, text):
if self.includeHTML is False:
text = self.stripHTML(text)
text = self.escapeText(text)
return text
def escapeText(self, text): def escapeText(self, text):
"Escape newlines, tabs, CSS and quotechar." "Escape newlines, tabs, CSS and quotechar."
# fixme: we should probably quote fields with newlines # fixme: we should probably quote fields with newlines
@ -36,6 +46,16 @@ class Exporter:
text = "\"" + text.replace("\"", "\"\"") + "\"" text = "\"" + text.replace("\"", "\"\"") + "\""
return text return text
def stripHTML(self, text):
# very basic conversion to text
s = text
s = re.sub(r"(?i)<(br ?/?|div|p)>", " ", s)
s = re.sub(r"\[sound:[^]]+\]", "", s)
s = stripHTML(s)
s = re.sub(r"[ \n\t]+", " ", s)
s = s.strip()
return s
def cardIds(self): def cardIds(self):
if not self.did: if not self.did:
cids = self.col.db.list("select id from cards") cids = self.col.db.list("select id from cards")
@ -51,6 +71,7 @@ class TextCardExporter(Exporter):
key = _("Cards in Plain Text") key = _("Cards in Plain Text")
ext = ".txt" ext = ".txt"
includeHTML = True
def __init__(self, col): def __init__(self, col):
Exporter.__init__(self, col) Exporter.__init__(self, col)
@ -61,7 +82,7 @@ class TextCardExporter(Exporter):
def esc(s): def esc(s):
# strip off the repeated question in answer if exists # strip off the repeated question in answer if exists
s = re.sub("(?si)^.*<hr id=answer>\n*", "", s) s = re.sub("(?si)^.*<hr id=answer>\n*", "", s)
return self.escapeText(s) return self.processText(s)
out = "" out = ""
for cid in ids: for cid in ids:
c = self.col.getCard(cid) c = self.col.getCard(cid)
@ -77,6 +98,7 @@ class TextNoteExporter(Exporter):
key = _("Notes in Plain Text") key = _("Notes in Plain Text")
ext = ".txt" ext = ".txt"
includeTags = True includeTags = True
includeHTML = True
def __init__(self, col): def __init__(self, col):
Exporter.__init__(self, col) Exporter.__init__(self, col)
@ -95,7 +117,7 @@ where cards.id in %s)""" % ids2str(cardIds)):
if self.includeID: if self.includeID:
row.append(str(id)) row.append(str(id))
# fields # fields
row.extend([self.escapeText(f) for f in splitFields(flds)]) row.extend([self.processText(f) for f in splitFields(flds)])
# tags # tags
if self.includeTags: if self.includeTags:
row.append(tags.strip()) row.append(tags.strip())

View file

@ -61,6 +61,12 @@ class ExportDialog(QDialog):
getattr(self.exporter, "includeMedia", None) is not None) getattr(self.exporter, "includeMedia", None) is not None)
self.frm.includeTags.setVisible( self.frm.includeTags.setVisible(
getattr(self.exporter, "includeTags", None) is not None) getattr(self.exporter, "includeTags", None) is not None)
html = getattr(self.exporter, "includeHTML", None)
if html is not None:
self.frm.includeHTML.setVisible(True)
self.frm.includeHTML.setChecked(html)
else:
self.frm.includeHTML.setVisible(False)
# show deck list? # show deck list?
self.frm.deck.setVisible(not self.isVerbatim) self.frm.deck.setVisible(not self.isVerbatim)
@ -71,6 +77,8 @@ class ExportDialog(QDialog):
self.frm.includeMedia.isChecked()) self.frm.includeMedia.isChecked())
self.exporter.includeTags = ( self.exporter.includeTags = (
self.frm.includeTags.isChecked()) self.frm.includeTags.isChecked())
self.exporter.includeHTML = (
self.frm.includeHTML.isChecked())
if not self.frm.deck.currentIndex(): if not self.frm.deck.currentIndex():
self.exporter.did = None self.exporter.did = None
else: else:

View file

@ -7,7 +7,7 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>295</width> <width>295</width>
<height>202</height> <height>223</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
@ -76,6 +76,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="includeHTML">
<property name="text">
<string>Include HTML and media references</string>
</property>
</widget>
</item>
</layout> </layout>
</item> </item>
<item> <item>

View file

@ -14,7 +14,7 @@ def setup1():
global deck global deck
deck = getEmptyCol() deck = getEmptyCol()
f = deck.newNote() f = deck.newNote()
f['Front'] = "foo"; f['Back'] = "bar"; f.tags = ["tag", "tag2"] f['Front'] = "foo"; f['Back'] = "bar<br>"; f.tags = ["tag", "tag2"]
deck.addNote(f) deck.addNote(f)
# with a different deck # with a different deck
f = deck.newNote() f = deck.newNote()
@ -129,8 +129,11 @@ def test_export_textnote():
os.close(fd) os.close(fd)
os.unlink(f) os.unlink(f)
e.exportInto(f) e.exportInto(f)
e.includeTags = True assert open(f).readline() == "foo\tbar<br>\ttag tag2\n"
e.includeTags = False
e.includeHTML = False
e.exportInto(f) e.exportInto(f)
assert open(f).readline() == "foo\tbar\n"
def test_exporters(): def test_exporters():
assert "*.apkg" in str(exporters()) assert "*.apkg" in str(exporters())