From 1006943d8eb503a21fb1c9e9d83d5612da91f685 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Sun, 26 Jul 2009 12:38:36 +0900 Subject: [PATCH] SuperMemo import fix from Petr Fix supermemo import of Q&A part in escaped html. Supermemo 2004/2006 export can contain unescaped chars > or < that confuse beautifulsoap library. Switch autocreation of tags from all titles to True. --- anki/importing/supermemo_xml.py | 14 ++++++++++---- .../supermemo_ENGLISHFORBEGGINERS_oem_1250.xml | 4 ++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/anki/importing/supermemo_xml.py b/anki/importing/supermemo_xml.py index b84fcc014..09086cc18 100644 --- a/anki/importing/supermemo_xml.py +++ b/anki/importing/supermemo_xml.py @@ -111,7 +111,7 @@ class SupermemoXmlImporter(Importer): self.META.resetLearningData = False # implemented self.META.onlyMemorizedItems = False # implemented self.META.loggerLevel = 2 # implemented 0no,1info,2error,3debug - self.META.tagAllTopics = False + self.META.tagAllTopics = True self.META.pathsToBeTagged = ['English for begginers', 'Advanced English 97', 'Phrasal Verbs'] # path patterns to be tagged - in gui entered like 'Advanced English 97|My Vocablary' self.META.tagMemorizedItems = True # implemented self.META.logToStdOutput = False # implemented @@ -132,10 +132,16 @@ class SupermemoXmlImporter(Importer): def _decode_htmlescapes(self,s): """Unescape HTML code.""" - from BeautifulSoup import BeautifulStoneSoup - #my sm2004 also ecaped & chars in escaped sequences. + #In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code + from BeautifulSoup import BeautifulStoneSoup as btflsoup + + #my sm2004 also ecaped & char in escaped sequences. s = re.sub(u'&',u'&',s) - return unicode(BeautifulStoneSoup(s,convertEntities=BeautifulStoneSoup.HTML_ENTITIES )) + #unescaped solitary chars < or > that were ok for minidom confuse btfl soup + s = re.sub(u'>',u'>',s) + s = re.sub(u'<',u'<',s) + + return unicode(btflsoup(s,convertEntities=btflsoup.HTML_ENTITIES )) def _unescape(self,s,initilize): diff --git a/tests/importing/supermemo_ENGLISHFORBEGGINERS_oem_1250.xml b/tests/importing/supermemo_ENGLISHFORBEGGINERS_oem_1250.xml index 19f45a47a..1728c61a7 100644 --- a/tests/importing/supermemo_ENGLISHFORBEGGINERS_oem_1250.xml +++ b/tests/importing/supermemo_ENGLISHFORBEGGINERS_oem_1250.xml @@ -202,7 +202,7 @@ Item - rozum&#283;t + <><TEST<TEST>TES>T>TESTTEST rozum&#283;t understand [,and&#273;r'st&#281;nd] @@ -926,4 +926,4 @@ - \ No newline at end of file +