SuperMemo import fix from Petr

Fix supermemo import of Q&A part in escaped html. Supermemo 2004/2006 export
can contain unescaped chars > or < that confuse beautifulsoap library. Switch
autocreation of tags from all titles to True.
This commit is contained in:
Damien Elmes 2009-07-26 12:38:36 +09:00
parent 4be3de322c
commit 1006943d8e
2 changed files with 12 additions and 6 deletions

View file

@ -111,7 +111,7 @@ class SupermemoXmlImporter(Importer):
self.META.resetLearningData = False # implemented
self.META.onlyMemorizedItems = False # implemented
self.META.loggerLevel = 2 # implemented 0no,1info,2error,3debug
self.META.tagAllTopics = False
self.META.tagAllTopics = True
self.META.pathsToBeTagged = ['English for begginers', 'Advanced English 97', 'Phrasal Verbs'] # path patterns to be tagged - in gui entered like 'Advanced English 97|My Vocablary'
self.META.tagMemorizedItems = True # implemented
self.META.logToStdOutput = False # implemented
@ -132,10 +132,16 @@ class SupermemoXmlImporter(Importer):
def _decode_htmlescapes(self,s):
"""Unescape HTML code."""
from BeautifulSoup import BeautifulStoneSoup
#my sm2004 also ecaped & chars in escaped sequences.
#In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code
from BeautifulSoup import BeautifulStoneSoup as btflsoup
#my sm2004 also ecaped & char in escaped sequences.
s = re.sub(u'&amp;',u'&',s)
return unicode(BeautifulStoneSoup(s,convertEntities=BeautifulStoneSoup.HTML_ENTITIES ))
#unescaped solitary chars < or > that were ok for minidom confuse btfl soup
s = re.sub(u'>',u'&gt;',s)
s = re.sub(u'<',u'&lt;',s)
return unicode(btflsoup(s,convertEntities=btflsoup.HTML_ENTITIES ))
def _unescape(self,s,initilize):

View file

@ -202,7 +202,7 @@
<Type>Item</Type>
<Content>
<Question>rozum&amp;#283;t</Question>
<Question>&lt;&gt;&lt;TEST&lt;TEST>TES>T&gt;TESTTEST rozum&amp;#283;t</Question>
<Answer>understand [,and&amp;#273;r'st&amp;#281;nd]</Answer>
</Content>
@ -926,4 +926,4 @@
</SuperMemoElement>
</SuperMemoElement>
</SuperMemoElement>
</SuperMemoCollection>
</SuperMemoCollection>