From fe99ff751882d152a2ecdc03c7a47c39f90d9b13 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Thu, 9 Jul 2009 23:03:23 +0900 Subject: [PATCH] add supermemo importer from Petr Michalec --- anki/importing/__init__.py | 2 + anki/importing/supermemo_xml.py | 486 +++ .../supermemo_ENGLISHFORBEGGINERS_noOEM.xml | 2637 +++++++++++++++++ ...supermemo_ENGLISHFORBEGGINERS_oem_1250.xml | 929 ++++++ .../supermemo_ENGLISHVOCABULARYBUILDER.xml | 1 + .../supermemo_EnglishPronunciationTop100.xml | 1 + tests/test_importing.py | 40 +- 7 files changed, 4095 insertions(+), 1 deletion(-) create mode 100644 anki/importing/supermemo_xml.py create mode 100644 tests/importing/supermemo_ENGLISHFORBEGGINERS_noOEM.xml create mode 100644 tests/importing/supermemo_ENGLISHFORBEGGINERS_oem_1250.xml create mode 100644 tests/importing/supermemo_ENGLISHVOCABULARYBUILDER.xml create mode 100644 tests/importing/supermemo_EnglishPronunciationTop100.xml diff --git a/anki/importing/__init__.py b/anki/importing/__init__.py index 3a36c0e3a..9fdfe229b 100644 --- a/anki/importing/__init__.py +++ b/anki/importing/__init__.py @@ -260,10 +260,12 @@ from anki.importing.csvfile import TextImporter from anki.importing.anki10 import Anki10Importer from anki.importing.mnemosyne10 import Mnemosyne10Importer from anki.importing.wcu import WCUImporter +from anki.importing.supermemo_xml import SupermemoXmlImporter Importers = ( (_("Text separated by tabs or semicolons (*)"), TextImporter), (_("Anki Deck (*.anki)"), Anki10Importer), (_("Mnemosyne Deck (*.mem)"), Mnemosyne10Importer), (_("CueCard Deck (*.wcu)"), WCUImporter), + (_("Supermemo XML export (*.xml)"), SupermemoXmlImporter), ) diff --git a/anki/importing/supermemo_xml.py b/anki/importing/supermemo_xml.py new file mode 100644 index 000000000..af78e44b7 --- /dev/null +++ b/anki/importing/supermemo_xml.py @@ -0,0 +1,486 @@ +# -*- coding: utf-8 -*- +# Copyright: petr.michalec@gmail.com +# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html + +"""\ +Importing Supermemo XML decks +============================== +""" +__docformat__ = 'restructuredtext' + +import sys + +from anki.importing import Importer, ForeignCard +from anki.lang import _ +from anki.errors import * + +from xml.dom import minidom, Node +from types import DictType, InstanceType +from string import capwords, maketrans +import re, unicodedata, time +from BeautifulSoup import BeautifulStoneSoup +#import chardet + + +from anki.deck import Deck + +class SmartDict(dict): + """ + See http://www.peterbe.com/plog/SmartDict + Copyright 2005, Peter Bengtsson, peter@fry-it.com + + A smart dict can be instanciated either from a pythonic dict + or an instance object (eg. SQL recordsets) but it ensures that you can + do all the convenient lookups such as x.first_name, x['first_name'] or + x.get('first_name'). + """ + + def __init__(self, *a, **kw): + if a: + if type(a[0]) is DictType: + kw.update(a[0]) + elif type(a[0]) is InstanceType: + kw.update(a[0].__dict__) + elif hasattr(a[0], '__class__') and a[0].__class__.__name__=='SmartDict': + kw.update(a[0].__dict__) + + dict.__init__(self, **kw) + self.__dict__ = self + +class SuperMemoElement(SmartDict): + "SmartDict wrapper to store SM Element data" + + def __init__(self, *a, **kw): + SmartDict.__init__(self, *a, **kw) + #default content + self.__dict__['lTitle'] = None + self.__dict__['Title'] = None + self.__dict__['Question'] = None + self.__dict__['Answer'] = None + self.__dict__['Count'] = None + self.__dict__['Type'] = None + self.__dict__['ID'] = None + self.__dict__['Interval'] = None + self.__dict__['Lapses'] = None + self.__dict__['Repetitions'] = None + self.__dict__['LastRepetiton'] = None + self.__dict__['AFactor'] = None + self.__dict__['UFactor'] = None + + + +# This is an AnkiImporter +class SupermemoXmlImporter(Importer): + """ + Supermemo XML export's to Anki parser. + Goes through a SM collection and fetch all elements. + + My SM collection was a big mess where topics and items were mixed. + I was unable to parse my content in a regular way like for loop on + minidom.getElementsByTagName() etc. My collection had also an + limitation, topics were splited into branches with max 100 items + on each. Learning themes were in deep structure. I wanted to have + full title on each element to be stored in tags. + + Code should be upgrade to support importing of SM2006 exports. + """ + + def __init__(self, *args): + """Initialize internal varables. + Pameters to be exposed to GUI are stored in self.META""" + + Importer.__init__(self, *args) + self.lines = None + self.numFields=int(2) + + # SmXmlParse VARIABLES + self.xmldoc = None + self.pieces = [] + self.cntBuf = [] #to store last parsed data + self.cntElm = [] #to store SM Elements data + self.cntCol = [] #to store SM Colections data + + # store some meta info related to parse algorithm + # SmartDict works like dict / class wrapper + self.cntMeta = SmartDict() + self.cntMeta.popTitles = False + self.cntMeta.title = [] + + # META stores controls of import scritp, should be + # exposed to import dialog. These are default values. + self.META = SmartDict() + self.META.resetLearningData = False # implemented + self.META.onlyMemorizedItems = False # implemented + self.META.loggerLevel = 2 # implemented 0no,1info,2error,3debug + self.META.tagAllTopics = False + self.META.pathsToBeTagged = ['English for begginers', 'Advanced English 97', 'Phrasal Verbs'] # path patterns to be tagged - in gui entered like 'Advanced English 97|My Vocablary' + self.META.tagMemorizedItems = True # implemented + self.META.logToStdOutput = False # implemented + + self.cards = [] + +## TOOLS + + def _fudgeText(self, text): + "Replace sm syntax to Anki syntax" + text = text.replace("\n\r", u"
") + text = text.replace("\n", u"
") + return text + + def _unicode2ascii(self,str): + "Remove diacritic punctuation from strings (titles)" + return u"".join([ c for c in unicodedata.normalize('NFKD', str) if not unicodedata.combining(c)]) + + def _decode_htmlescapes(self,s): + """Unescape HTML code.""" + + #my sm2004 also ecaped & chars in escaped sequences. + s = re.sub(u'&',u'&',s) + return unicode(BeautifulStoneSoup(s,convertEntities=BeautifulStoneSoup.HTML_ENTITIES )) + + + def _unescape(self,s,initilize): + """Note: This method is not used, BeautifulSoup does better job. + """ + + if self._unescape_trtable == None: + self._unescape_trtable = ( + ('€',u'€'), (' ',u' '), ('!',u'!'), ('"',u'"'), ('#',u'#'), ('$',u'$'), ('%',u'%'), ('&',u'&'), (''',u"'"), + ('(',u'('), (')',u')'), ('*',u'*'), ('+',u'+'), (',',u','), ('-',u'-'), ('.',u'.'), ('/',u'/'), ('0',u'0'), + ('1',u'1'), ('2',u'2'), ('3',u'3'), ('4',u'4'), ('5',u'5'), ('6',u'6'), ('7',u'7'), ('8',u'8'), ('9',u'9'), + (':',u':'), (';',u';'), ('<',u'<'), ('=',u'='), ('>',u'>'), ('?',u'?'), ('@',u'@'), ('A',u'A'), ('B',u'B'), + ('C',u'C'), ('D',u'D'), ('E',u'E'), ('F',u'F'), ('G',u'G'), ('H',u'H'), ('I',u'I'), ('J',u'J'), ('K',u'K'), + ('L',u'L'), ('M',u'M'), ('N',u'N'), ('O',u'O'), ('P',u'P'), ('Q',u'Q'), ('R',u'R'), ('S',u'S'), ('T',u'T'), + ('U',u'U'), ('V',u'V'), ('W',u'W'), ('X',u'X'), ('Y',u'Y'), ('Z',u'Z'), ('[',u'['), ('\',u'\\'), (']',u']'), + ('^',u'^'), ('_',u'_'), ('`',u'`'), ('a',u'a'), ('b',u'b'), ('c',u'c'), ('d',u'd'), ('e',u'e'), ('f',u'f'), + ('g',u'g'), ('h',u'h'), ('i',u'i'), ('j',u'j'), ('k',u'k'), ('l',u'l'), ('m',u'm'), ('n',u'n'), + ('o',u'o'), ('p',u'p'), ('q',u'q'), ('r',u'r'), ('s',u's'), ('t',u't'), ('u',u'u'), ('v',u'v'), + ('w',u'w'), ('x',u'x'), ('y',u'y'), ('z',u'z'), ('{',u'{'), ('|',u'|'), ('}',u'}'), ('~',u'~'), + (' ',u' '), ('¡',u'¡'), ('¢',u'¢'), ('£',u'£'), ('¤',u'¤'), ('¥',u'¥'), ('¦',u'¦'), ('§',u'§'), + ('¨',u'¨'), ('©',u'©'), ('ª',u'ª'), ('«',u'«'), ('¬',u'¬'), ('­',u'­'), ('®',u'®'), ('¯',u'¯'), + ('°',u'°'), ('±',u'±'), ('²',u'²'), ('³',u'³'), ('´',u'´'), ('µ',u'µ'), ('¶',u'¶'), ('·',u'·'), + ('¸',u'¸'), ('¹',u'¹'), ('º',u'º'), ('»',u'»'), ('¼',u'¼'), ('½',u'½'), ('¾',u'¾'), ('¿',u'¿'), + ('À',u'À'), ('Á',u'Á'), ('Â',u'Â'), ('Ã',u'Ã'), ('Ä',u'Ä'), ('Å',u'Å'), ('Å',u'Å'), ('Æ',u'Æ'), + ('Ç',u'Ç'), ('È',u'È'), ('É',u'É'), ('Ê',u'Ê'), ('Ë',u'Ë'), ('Ì',u'Ì'), ('Í',u'Í'), ('Î',u'Î'), + ('Ï',u'Ï'), ('Ð',u'Ð'), ('Ñ',u'Ñ'), ('Ò',u'Ò'), ('Ó',u'Ó'), ('Ô',u'Ô'), ('Õ',u'Õ'), ('Ö',u'Ö'), + ('×',u'×'), ('Ø',u'Ø'), ('Ù',u'Ù'), ('Ú',u'Ú'), ('Û',u'Û'), ('Ü',u'Ü'), ('Ý',u'Ý'), ('Þ',u'Þ'), + ('ß',u'ß'), ('à',u'à'), ('á',u'á'), ('â',u'â'), ('ã',u'ã'), ('ä',u'ä'), ('å',u'å'), ('æ',u'æ'), + ('ç',u'ç'), ('è',u'è'), ('é',u'é'), ('ê',u'ê'), ('ë',u'ë'), ('ì',u'ì'), ('í',u'í'), ('í',u'í'), + ('î',u'î'), ('ï',u'ï'), ('ð',u'ð'), ('ñ',u'ñ'), ('ò',u'ò'), ('ó',u'ó'), ('ô',u'ô'), ('õ',u'õ'), + ('ö',u'ö'), ('÷',u'÷'), ('ø',u'ø'), ('ù',u'ù'), ('ú',u'ú'), ('û',u'û'), ('ü',u'ü'), ('ý',u'ý'), + ('þ',u'þ'), ('ÿ',u'ÿ'), ('Ā',u'Ā'), ('ā',u'ā'), ('Ă',u'Ă'), ('ă',u'ă'), ('Ą',u'Ą'), ('ą',u'ą'), + ('Ć',u'Ć'), ('ć',u'ć'), ('Ĉ',u'Ĉ'), ('ĉ',u'ĉ'), ('Ċ',u'Ċ'), ('ċ',u'ċ'), ('Č',u'Č'), ('č',u'č'), + ('Ď',u'Ď'), ('ď',u'ď'), ('Đ',u'Đ'), ('đ',u'đ'), ('Ē',u'Ē'), ('ē',u'ē'), ('Ĕ',u'Ĕ'), ('ĕ',u'ĕ'), + ('Ė',u'Ė'), ('ė',u'ė'), ('Ę',u'Ę'), ('ę',u'ę'), ('Ě',u'Ě'), ('ě',u'ě'), ('Ĝ',u'Ĝ'), ('ĝ',u'ĝ'), + ('Ğ',u'Ğ'), ('ğ',u'ğ'), ('Ġ',u'Ġ'), ('ġ',u'ġ'), ('Ģ',u'Ģ'), ('ģ',u'ģ'), ('Ĥ',u'Ĥ'), ('ĥ',u'ĥ'), + ('Ħ',u'Ħ'), ('ħ',u'ħ'), ('Ĩ',u'Ĩ'), ('ĩ',u'ĩ'), ('Ī',u'Ī'), ('ī',u'ī'), ('Ĭ',u'Ĭ'), ('ĭ',u'ĭ'), + ('Į',u'Į'), ('į',u'į'), ('İ',u'İ'), ('ı',u'ı'), ('IJ',u'IJ'), ('ij',u'ij'), ('Ĵ',u'Ĵ'), ('ĵ',u'ĵ'), + ('Ķ',u'Ķ'), ('ķ',u'ķ'), ('ĸ',u'ĸ'), ('Ĺ',u'Ĺ'), ('ĺ',u'ĺ'), ('Ļ',u'Ļ'), ('ļ',u'ļ'), ('Ľ',u'Ľ'), + ('ľ',u'ľ'), ('Ŀ',u'Ŀ'), ('ŀ',u'ŀ'), ('Ł',u'Ł'), ('ł',u'ł'), ('Ń',u'Ń'), ('ń',u'ń'), ('Ņ',u'Ņ'), + ('ņ',u'ņ'), ('Ň',u'Ň'), ('ň',u'ň'), ('ʼn',u'ʼn'), ('Ŋ',u'Ŋ'), ('ŋ',u'ŋ'), ('Ō',u'Ō'), ('ō',u'ō'), + ('Ŏ',u'Ŏ'), ('ŏ',u'ŏ'), ('Ő',u'Ő'), ('ő',u'ő'), ('Œ',u'Œ'), ('œ',u'œ'), ('Ŕ',u'Ŕ'), ('ŕ',u'ŕ'), + ('Ŗ',u'Ŗ'), ('ŗ',u'ŗ'), ('Ř',u'Ř'), ('ř',u'ř'), ('Ś',u'Ś'), ('ś',u'ś'), ('Ŝ',u'Ŝ'), ('ŝ',u'ŝ'), + ('Ş',u'Ş'), ('ş',u'ş'), ('Š',u'Š'), ('š',u'š'), ('Ţ',u'Ţ'), ('ţ',u'ţ'), ('Ť',u'Ť'), ('ť',u'ť'), + ('Ŧ',u'Ŧ'), ('ŧ',u'ŧ'), ('Ũ',u'Ũ'), ('ũ',u'ũ'), ('Ū',u'Ū'), ('ū',u'ū'), ('Ŭ',u'Ŭ'), ('ŭ',u'ŭ'), + ('Ů',u'Ů'), ('ů',u'ů'), ('Ű',u'Ű'), ('ű',u'ű'), ('Ų',u'Ų'), ('ų',u'ų'), ('Ŵ',u'Ŵ'), ('ŵ',u'ŵ'), + ('Ŷ',u'Ŷ'), ('ŷ',u'ŷ'), ('Ÿ',u'Ÿ'), ('Ź',u'Ź'), ('ź',u'ź'), ('Ż',u'Ż'), ('ż',u'ż'), ('Ž',u'Ž'), + ('ž',u'ž'), ('ſ',u'ſ'), ('Ŕ',u'Ŕ'), ('ŕ',u'ŕ'), ('Ŗ',u'Ŗ'), ('ŗ',u'ŗ'), ('Ř',u'Ř'), ('ř',u'ř'), + ('Ś',u'Ś'), ('ś',u'ś'), ('Ŝ',u'Ŝ'), ('ŝ',u'ŝ'), ('Ş',u'Ş'), ('ş',u'ş'), ('Š',u'Š'), ('š',u'š'), + ('Ţ',u'Ţ'), ('ţ',u'ţ'), ('Ť',u'Ť'), ('Ɂ',u'ť'), ('Ŧ',u'Ŧ'), ('ŧ',u'ŧ'), ('Ũ',u'Ũ'), ('ũ',u'ũ'), + ('Ū',u'Ū'), ('ū',u'ū'), ('Ŭ',u'Ŭ'), ('ŭ',u'ŭ'), ('Ů',u'Ů'), ('ů',u'ů'), ('Ű',u'Ű'), ('ű',u'ű'), + ('Ų',u'Ų'), ('ų',u'ų'), ('Ŵ',u'Ŵ'), ('ŵ',u'ŵ'), ('Ŷ',u'Ŷ'), ('ŷ',u'ŷ'), ('Ÿ',u'Ÿ'), ('Ź',u'Ź'), + ('ź',u'ź'), ('Ż',u'Ż'), ('ż',u'ż'), ('Ž',u'Ž'), ('ž',u'ž'), ('ſ',u'ſ'), + ) + + + #m = re.match() + #s = s.replace(code[0], code[1]) + +## DEFAULT IMPORTER METHODS + + def foreignCards(self): + + # Load file and parse it by minidom + self.loadSource(self.file) + + # Migrating content / time consuming part + # addItemToCards is called for each sm element + self.logger(u'Parsing started.') + self.parse() + self.logger(u'Parsing done.') + + # Return imported cards + return self.cards + + def fields(self): + return 2 + +## PARSER METHODS + + def addItemToCards(self,item): + "This method actually do conversion" + + # new anki card + card = ForeignCard() + + # clean Q and A + card.fields.append(self._fudgeText(self._decode_htmlescapes(item.Question))) + card.fields.append(self._fudgeText(self._decode_htmlescapes(item.Answer))) + card.tags = u"" + + # pre-process scheduling data + tLastrep = time.mktime(time.strptime(item.LastRepetition, '%d.%m.%Y')) + tToday = time.time() + + # convert learning data + if not self.META.resetLearningData: + # migration of LearningData algorithm + card.interval = item.Interval + card.successive = item.Repetitions + ##card.due = tToday + (float(item.Interval) * 86400.0) - tLastrep + card.due = tLastrep + (float(item.Interval) * 86400.0) + card.lastDue = 0 + + card.factor = float(item.AFactor.replace(',','.')) + card.lastFactor = float(item.AFactor.replace(',','.')) + + # SM is not exporting all the information Anki keeps track off, so it + # needs to be fudged + card.youngEase0 = item.Lapses + card.youngEase3 = item.Repetitions + item.Lapses + card.yesCount = item.Repetitions + card.noCount = item.Lapses + card.reps = card.yesCount + card.noCount + card.spaceUntil = card.due + card.combinedDue = card.due + + # categories & tags + # it's worth to have every theme (tree structure of sm collection) stored in tags, but sometimes not + # you can deceide if you are going to tag all toppics or just that containing some pattern + tTaggTitle = False + for pattern in self.META.pathsToBeTagged: + if item.lTitle != None and pattern.lower() in u" ".join(item.lTitle).lower(): + tTaggTitle = True + break + if tTaggTitle or self.META.tagAllTopics: + # normalize - remove diacritic punctuation from unicode chars to ascii + item.lTitle = [ self._unicode2ascii(topic) for topic in item.lTitle] + + # Transfrom xyz / aaa / bbb / ccc on Title path to Tag xyzAaaBbbCcc + # clean things like [999] or [111-2222] from title path, example: xyz / [1000-1200] zyx / xyz + # clean whitespaces + # set Capital letters for first char of the word + tmp = list(set([ re.sub('(\[[0-9]+\])' , ' ' , i ).replace('_',' ') for i in item.lTitle ])) + tmp = list(set([ re.sub('(\W)',' ', i ) for i in tmp ])) + tmp = list(set([ re.sub( '^[0-9 ]+$','',i) for i in tmp ])) + tmp = list(set([ capwords(i).replace(' ','') for i in tmp ])) + tags = [ j[0].lower() + j[1:] for j in tmp if j.strip() <> ''] + + card.tags += u" ".join(tags) + + if self.META.tagMemorizedItems and item.Interval >0: + card.tags += " Memorized" + + self.logger(u'Element tags\t- ' + card.tags, level=3) + + self.cards.append(card) + + def logger(self,text,level=1): + "Wrapper for Anki logger" + + dLevels={0:'',1:u'Info',2:u'Verbose',3:u'Debug'} + if level<=self.META.loggerLevel: + self.deck.updateProgress(_(text)) + + if self.META.logToStdOutput: + print self.__class__.__name__+ u" - " + dLevels[level].ljust(9) +u' -\t'+ _(text) + + + # OPEN AND LOAD + def openAnything(self,source): + "Open any source / actually only openig of files is used" + + if source == "-": + return sys.stdin + + # try to open with urllib (if source is http, ftp, or file URL) + import urllib + try: + return urllib.urlopen(source) + except (IOError, OSError): + pass + + # try to open with native open function (if source is pathname) + try: + return open(source) + except (IOError, OSError): + pass + + # treat source as string + import StringIO + return StringIO.StringIO(str(source)) + + def loadSource(self, source): + """Load source file and parse with xml.dom.minidom""" + self.source = source + self.logger(u'Load started...') + sock = self.openAnything(self.source) + self.xmldoc = minidom.parse(sock).documentElement + sock.close() + self.logger(u'Load done.') + + + # PARSE + def parse(self, node=None): + "Parse method - parses document elements" + + if node==None and self.xmldoc<>None: + node = self.xmldoc + + _method = "parse_%s" % node.__class__.__name__ + if hasattr(self,_method): + parseMethod = getattr(self, _method) + parseMethod(node) + else: + self.logger(u'No handler for method %s' % _method, level=3) + + def parse_Document(self, node): + "Parse XML document" + + self.parse(node.documentElement) + + def parse_Element(self, node): + "Parse XML element" + + _method = "do_%s" % node.tagName + if hasattr(self,_method): + handlerMethod = getattr(self, _method) + handlerMethod(node) + else: + self.logger(u'No handler for method %s' % _method, level=3) + #print traceback.print_exc() + + def parse_Text(self, node): + "Parse text inside elements. Text is stored into local buffer." + + text = node.data + self.cntBuf.append(text) + + #def parse_Comment(self, node): + # """ + # Source can contain XML comments, but we ignore them + # """ + # pass + + + # DO + def do_SuperMemoCollection(self, node): + "Process SM Collection" + + for child in node.childNodes: self.parse(child) + + def do_SuperMemoElement(self, node): + "Process SM Element (Type - Title,Topics)" + + self.logger('='*45, level=3) + + self.cntElm.append(SuperMemoElement()) + self.cntElm[-1]['lTitle'] = self.cntMeta['title'] + + #parse all child elements + for child in node.childNodes: self.parse(child) + + #strip all saved strings, just for sure + for key in self.cntElm[-1].keys(): + if hasattr(self.cntElm[-1][key], 'strip'): + self.cntElm[-1][key]=self.cntElm[-1][key].strip() + + #pop current element + smel = self.cntElm.pop() + + # Process cntElm if is valid Item (and not an Topic etc..) + # if smel.Lapses != None and smel.Interval != None and smel.Question != None and smel.Answer != None: + if smel.Title == None and smel.Question != None and smel.Answer != None: + if smel.Answer.strip() !='' and smel.Question.strip() !='': + + # migrate only memorized otherway skip/continue + if self.META.onlyMemorizedItems and not(int(smel.Interval) > 0): + self.logger(u'Element skiped \t- not memorized ...', level=3) + else: + #import sm element data to Anki + self.addItemToCards(smel) + self.logger(u"Import element \t- " + smel['Question'], level=3) + + #print element + self.logger('-'*45, level=3) + for key in smel.keys(): + self.logger('\t%s %s' % ((key+':').ljust(15),smel[key]), level=3 ) + else: + self.logger(u'Element skiped \t- no valid Q and A ...', level=3) + + + else: + # now we know that item was topic + # parseing of whole node is now finished + + # test if it's really topic + if smel.Title != None: + # remove topic from title list + t = self.cntMeta['title'].pop() + self.logger(u'End of topic \t- %s' % (t), level=2) + + def do_Content(self, node): + "Process SM element Content" + + for child in node.childNodes: + if hasattr(child,'tagName') and child.firstChild != None: + self.cntElm[-1][child.tagName]=child.firstChild.data + + def do_LearningData(self, node): + "Process SM element LearningData" + + for child in node.childNodes: + if hasattr(child,'tagName') and child.firstChild != None: + self.cntElm[-1][child.tagName]=child.firstChild.data + + # It's being processed in do_Content now + #def do_Question(self, node): + # for child in node.childNodes: self.parse(child) + # self.cntElm[-1][node.tagName]=self.cntBuf.pop() + + # It's being processed in do_Content now + #def do_Answer(self, node): + # for child in node.childNodes: self.parse(child) + # self.cntElm[-1][node.tagName]=self.cntBuf.pop() + + def do_Title(self, node): + "Process SM element Title" + + t = self._decode_htmlescapes(node.firstChild.data) + self.cntElm[-1][node.tagName] = t + self.cntMeta['title'].append(t) + self.cntElm[-1]['lTitle'] = self.cntMeta['title'] + self.logger(u'Start of topic \t- ' + u" / ".join(self.cntMeta['title']), level=2) + + + def do_Type(self, node): + "Process SM element Type" + + if len(self.cntBuf) >=1 : + self.cntElm[-1][node.tagName]=self.cntBuf.pop() + + +if __name__ == '__main__': + + # for testing you can start it standalone + + #file = u'/home/epcim/hg2g/dev/python/sm2anki/ADVENG2EXP.xxe.esc.zaloha_FINAL.xml' + #file = u'/home/epcim/hg2g/dev/python/anki/libanki/tests/importing/supermemo/original_ENGLISHFORBEGGINERS_noOEM.xml' + #file = u'/home/epcim/hg2g/dev/python/anki/libanki/tests/importing/supermemo/original_ENGLISHFORBEGGINERS_oem_1250.xml' + file = str(sys.argv[1]) + impo = SupermemoXmlImporter(Deck(),file) + impo.foreignCards() + + sys.exit(1) + +# vim: ts=4 sts=2 ft=python diff --git a/tests/importing/supermemo_ENGLISHFORBEGGINERS_noOEM.xml b/tests/importing/supermemo_ENGLISHFORBEGGINERS_noOEM.xml new file mode 100644 index 000000000..717bb47c9 --- /dev/null +++ b/tests/importing/supermemo_ENGLISHFORBEGGINERS_noOEM.xml @@ -0,0 +1,2637 @@ + + + 3572 + + + 1 + + Topic + + + + + + + + + 40326 + + English for begginers - czech + + Topic + + + 40327 + + 1-400 + + Topic + + + 40615 + + dolů ... pohybovat + + Topic + + + 10247 + + Item + + + sedět + + sit [sit] + + + + 1844 + + 7 + + 0 + + 19.09.2002 + + 5,701 + + 2,452 + + + + + 40616 + + Item + + + dolů (např.: dívat se dolů) + + down [daun] (look down) + + + + 8201 + + 6 + + 0 + + 20.01.2004 + + 6,699 + + 6,635 + + + + + 40617 + + Item + + + létat, letět + + fly [flai] + + + + 4917 + + 7 + + 0 + + 28.12.2002 + + 6,416 + + 6,272 + + + + + 40618 + + Item + + + slyšet + + hear [hiđr] + + + + 6296 + + 7 + + 0 + + 21.01.2004 + + 6,416 + + 5,835 + + + + + 40619 + + Item + + + dívat se (na) + + look (at) [luk] + + + + 1662 + + 6 + + 0 + + 22.09.2002 + + 5,401 + + 2,115 + + + + + 40620 + + Item + + + mnoho + + many ['męni]<br>a lot of [đ lot đv] + + + + 1706 + + 8 + + 0 + + 04.07.2005 + + 3,805 + + 1,644 + + + + + 40621 + + Item + + + musím to udělat + + I must do it [ai mast du: it] + + + + 3500 + + 8 + + 0 + + 28.12.2002 + + 5,954 + + 5,564 + + + + + 40622 + + Item + + + nic + + nothing ['naőiű] + + + + 4897 + + 7 + + 0 + + 28.12.2002 + + 6,410 + + 6,302 + + + + + 40623 + + Item + + + prosím (např.: buďte potichu prosím) + + please [pli:z] (be quiet please) + + + + 5819 + + 6 + + 0 + + 28.12.2002 + + 6,698 + + 6,583 + + + + + 40624 + + Item + + + dát, dávat, položit + + put [put] + + + + 5000 + + 7 + + 0 + + 28.12.2002 + + 6,763 + + 6,906 + + + + + 40625 + + Item + + + věta (jako skupina výrazů) + + sentence ['sentđns] + + + + 4862 + + 7 + + 0 + + 28.12.2002 + + 6,416 + + 6,298 + + + + + 40626 + + Item + + + on by měl, on musí + + he should [- šud]<br>he ought to [- o:t + -] + + + + 1111 + + 8 + + 0 + + 31.12.2004 + + 3,893 + + 1,337 + + + + + 40627 + + Item + + + pravdivý, věrný (např.: pravdivá zpráva) + + true [tru:] (true message) + + + + 5051 + + 7 + + 0 + + 28.12.2002 + + 6,407 + + 6,290 + + + + + 40628 + + Item + + + (det) each [i:č] (in: each time I see you) + + every + + + + 2700 + + 9 + + 1 + + 01.10.2002 + + 5,854 + + 3,694 + + + + + 40629 + + Item + + + angličtina, Angličané + + English ['iűgliš] + + + + 4187 + + 7 + + 0 + + 28.12.2002 + + 6,208 + + 6,287 + + + + + 40630 + + Item + + + na shledanou, sbohem + + goodbye [gud'bai] + + + + 3314 + + 7 + + 0 + + 08.11.2004 + + 4,541 + + 2,245 + + + + + 40631 + + Item + + + je deset hodin + + it's ten o'clock [- - đ'klok] + + + + 4810 + + 7 + + 0 + + 28.12.2002 + + 6,409 + + 6,371 + + + + + 40632 + + Item + + + hrát, hrát si + + play [plei] + + + + 4508 + + 7 + + 0 + + 28.12.2002 + + 6,416 + + 6,296 + + + + + 40633 + + Item + + + cesta, způsob (např.: to je jediný způsob) + + way [wei] (it's the only way) + + + + 5478 + + 6 + + 0 + + 28.12.2002 + + 6,446 + + 6,297 + + + + + 40634 + + Item + + + pohybovat, přemisťovat, stěhovat + + move [mu:v] + + + + 6976 + + 6 + + 0 + + 21.01.2004 + + 6,446 + + 5,917 + + + + + + 40392 + + Osobní zájmena, členy, sloveso býti + + Topic + + + 38619 + + Item + + + ono, to + + it [it] + + + + 4478 + + 7 + + 0 + + 28.12.2002 + + 6,321 + + 6,289 + + + + + 40393 + + Item + + + + + I [ai] + + + + 4328 + + 7 + + 0 + + 28.12.2002 + + 6,249 + + 6,263 + + + + + 40394 + + Item + + + ty + + you [ju] + + + + 1861 + + 7 + + 0 + + 28.12.2002 + + 4,254 + + 2,275 + + + + + 40395 + + Item + + + on + + he [hi:] + + + + 3919 + + 7 + + 0 + + 19.01.2004 + + 5,059 + + 3,302 + + + + + 40396 + + Item + + + ona + + she [ši:] + + + + 1869 + + 7 + + 0 + + 28.12.2002 + + 4,254 + + 2,307 + + + + + 40397 + + Item + + + my + + we [wi:] + + + + 4860 + + 7 + + 0 + + 28.12.2002 + + 6,322 + + 6,320 + + + + + 40398 + + Item + + + vy + + you [ju:] + + + + 8075 + + 6 + + 0 + + 06.11.2004 + + 6,446 + + 5,482 + + + + + 40399 + + Item + + + oni + + they [÷ei] + + + + 6993 + + 6 + + 0 + + 20.01.2004 + + 6,446 + + 6,086 + + + + + 40400 + + Item + + + člen předcházející neznámým nebo + dříve<br>neuvedeným věcem + + a [đ]<br>an [đn] (před samohláskou) + + + + 6802 + + 6 + + 0 + + 20.01.2004 + + 6,446 + + 5,988 + + + + + 40401 + + Item + + + člen předcházející dříve uváděným podst. + jménům + + the [÷đ] + + + + 7480 + + 7 + + 0 + + 19.01.2004 + + 6,249 + + 6,302 + + + + + 40402 + + Item + + + být + + be [bi:] + + + + 2932 + + 7 + + 0 + + 01.10.2002 + + 6,076 + + 3,598 + + + + + 40403 + + Item + + + já jsem + + I am [ai đm]<br>I'm [aim] + + + + 5285 + + 7 + + 0 + + 28.12.2002 + + 6,249 + + 6,299 + + + + + 40404 + + Item + + + nejsem + + I am not [ai đm not]<br>I'm not [aim not] + + + + 5056 + + 7 + + 0 + + 28.12.2002 + + 6,317 + + 6,242 + + + + + 40405 + + Item + + + ty jsi + + you are [ju a:r]<br>you're [jđr] + + + + 4846 + + 7 + + 0 + + 28.12.2002 + + 6,249 + + 6,343 + + + + + 40406 + + Item + + + on (ona,ono) je + + he (she, it) is [hi: iz]<br>he's [hiz] + + + + 4934 + + 7 + + 0 + + 28.12.2002 + + 6,249 + + 6,342 + + + + + 40407 + + Item + + + my jsme + + we are [wi: a:r] + + + + 6831 + + 6 + + 0 + + 22.01.2004 + + 6,446 + + 5,740 + + + + + 40408 + + Item + + + vy jste + + you are [ju: a:r] + + + + 4367 + + 7 + + 0 + + 28.12.2002 + + 6,318 + + 6,320 + + + + + 40409 + + Item + + + oni jsou + + they are [÷ei a:r] + + + + 5326 + + 6 + + 0 + + 28.12.2002 + + 6,446 + + 6,281 + + + + + 40410 + + Item + + + zkrácený tvar: I am + + I'm [aim] + + + + 6890 + + 6 + + 0 + + 21.01.2004 + + 6,446 + + 5,864 + + + + + 40411 + + Item + + + zkrácený tvar: he is + + he's [hi:z] + + + + 0 + + 0 + + 0 + + 04.08.2000 + + 3,000 + + 0,000 + + + + + 40412 + + Item + + + zkrácený tvar: she is + + she's [ši:z] + + + + 5296 + + 6 + + 0 + + 28.12.2002 + + 6,446 + + 6,245 + + + + + 40413 + + Item + + + zkrácený tvar: is not + + isn't ['izđnt] + + + + 6885 + + 6 + + 0 + + 19.01.2004 + + 6,446 + + 6,115 + + + + + 40414 + + Item + + + zkrácený tvar: are not + + aren't [a:nt] + + + + 5097 + + 7 + + 0 + + 28.12.2002 + + 6,322 + + 6,293 + + + + + 40415 + + Item + + + zkrácený tvar: you are + + you're [jđr] + + + + 7328 + + 7 + + 0 + + 19.01.2004 + + 6,249 + + 6,163 + + + + + Topic + + + Následuje prvých 400 položek <br>= dvojic otázka - + odpověď + + + + + + 0 + + 0 + + 0 + + 04.08.2000 + + 3,000 + + 0,000 + + + + 40328 + + Topic + + + ano, ne, základní číslovky + + + + + + 0 + + 0 + + 0 + + 04.08.2000 + + 3,000 + + 0,000 + + + + 40329 + + Item + + + ano + + yes [jes] + + + + 0 + + 0 + + 0 + + 04.08.2000 + + 3,000 + + 0,000 + + + + + 40330 + + Item + + + ne + + no [nđu] + + + + 3627 + + 6 + + 0 + + 01.10.2002 + + 6,155 + + 4,450 + + + + + 40331 + + Item + + + nula + + zero ['zi:rđu] + + + + 5044 + + 7 + + 0 + + 28.12.2002 + + 6,247 + + 6,385 + + + + + 40332 + + Item + + + jeden + + one [wan] + + + + 4974 + + 7 + + 0 + + 28.12.2002 + + 6,247 + + 6,272 + + + + + 40333 + + Item + + + dva + + two [tu:] + + + + 5316 + + 6 + + 0 + + 28.12.2002 + + 6,444 + + 6,269 + + + + + 40334 + + Item + + + tři + + three [őri:] + + + + 5165 + + 7 + + 0 + + 28.12.2002 + + 6,416 + + 6,306 + + + + + 40335 + + Topic + + + Zobrazuje se vám správně přepis + výslovnosti?<br><br>Pokud ne, možná jste po + instalaci programu neprovedli reset počítače, aby se mohlo + použít nově nainstalované písmo Fonetik.ttf. Zkuste provést + reset a znovu spustit program. Nebo nemáte Fonetik.ttf (nebo + Eng_Cze.ttf) ve Windows nainstalován. Zkontrolujete to tak, že + vyberete Start: Nastavení: Ovládací panely: Písma; v seznamu + písem byste jej měli najít. Není-li tam, můžete jej nainstalovat + pomocí nabídky Soubor: Přidat nové písmo (Fonetik.ttf je na + CD-ROM). + + + + + + 0 + + 0 + + 0 + + 04.08.2000 + + 1,010 + + 0,000 + + + + + 40336 + + Item + + + čtyři + + four [fo:r] + + + + 4382 + + 7 + + 0 + + 28.12.2002 + + 6,416 + + 6,342 + + + + + 40337 + + Item + + + pět + + five [faiv] + + + + 1844 + + 7 + + 0 + + 28.12.2002 + + 4,252 + + 2,355 + + + + + 40338 + + Item + + + šest + + six [siks] + + + + 5089 + + 7 + + 0 + + 28.12.2002 + + 6,247 + + 6,322 + + + + + 40339 + + Item + + + sedm + + seven ['sevđn] + + + + 4010 + + 7 + + 0 + + 28.12.2002 + + 6,247 + + 6,266 + + + + + 40340 + + Item + + + osm + + eight [eit] + + + + 4678 + + 7 + + 0 + + 28.12.2002 + + 6,416 + + 6,330 + + + + + 40341 + + Item + + + devět + + nine [nain] + + + + 3717 + + 7 + + 0 + + 20.01.2004 + + 5,051 + + 3,155 + + + + + 40342 + + Item + + + deset + + ten [ten] + + + + 6570 + + 6 + + 0 + + 22.01.2004 + + 6,444 + + 5,558 + + + + + 40343 + + Item + + + jedenáct + + eleven [i'levđn] + + + + 4556 + + 7 + + 0 + + 28.12.2002 + + 6,245 + + 6,293 + + + + + 40344 + + Item + + + dvanáct + + twelve [twelv] + + + + 7962 + + 6 + + 0 + + 21.01.2004 + + 6,703 + + 6,624 + + + + + 40345 + + Item + + + třináct + + thirteen [,őđ:r'ti:n] + + + + 4802 + + 7 + + 0 + + 28.12.2002 + + 6,416 + + 6,360 + + + + + 40346 + + Item + + + čtrnáct + + fourteen [,fo:r'ti:n] + + + + 4931 + + 7 + + 0 + + 28.12.2002 + + 6,413 + + 6,290 + + + + + 40347 + + Item + + + patnáct + + fifteen [fif'ti:n] + + + + 2731 + + 7 + + 0 + + 13.10.2002 + + 5,595 + + 3,646 + + + + + 40348 + + Item + + + šestnáct + + sixteen [,sik'sti:n] + + + + 7499 + + 6 + + 0 + + 20.01.2004 + + 6,442 + + 6,122 + + + + + 40349 + + Item + + + sedmnáct + + seventeen [,sevđn'ti:n] + + + + 1785 + + 7 + + 0 + + 14.11.2002 + + 4,314 + + 2,280 + + + + + 40350 + + Item + + + osmnáct + + eighteen [ei'ti:n] + + + + 6815 + + 6 + + 0 + + 22.01.2004 + + 6,442 + + 5,712 + + + + + 40351 + + Item + + + devatenáct + + nineteen [,nain'ti:n] + + + + 1671 + + 6 + + 0 + + 28.12.2002 + + 3,974 + + 2,008 + + + + + 40352 + + Item + + + dvacet + + twenty ['twenti] + + + + 4916 + + 7 + + 0 + + 28.12.2002 + + 6,305 + + 6,254 + + + + + 40353 + + Item + + + třicet + + thirty ['őđ:rti] + + + + 7717 + + 7 + + 0 + + 19.01.2004 + + 6,602 + + 6,658 + + + + + 40354 + + Item + + + čtyřicet + + forty ['fo:rti] + + + + 5537 + + 6 + + 0 + + 28.12.2002 + + 6,432 + + 6,314 + + + + + 40355 + + Item + + + padesát + + fifty ['fifti] + + + + 4939 + + 7 + + 0 + + 28.12.2002 + + 6,312 + + 6,348 + + + + + 40356 + + Item + + + šedesát + + sixty ['siksti] + + + + 4557 + + 7 + + 0 + + 28.12.2002 + + 6,313 + + 6,294 + + + + + 40357 + + Item + + + sedmdesát + + seventy ['sevđnti] + + + + 1293 + + 7 + + 0 + + 13.11.2002 + + 3,572 + + 1,671 + + + + + 40358 + + Item + + + osmdesát + + eighty ['eiti] + + + + 3778 + + 7 + + 0 + + 20.01.2004 + + 4,989 + + 3,167 + + + + + 40359 + + Item + + + devadesát + + ninety ['nainti] + + + + 2437 + + 7 + + 0 + + 01.10.2002 + + 5,701 + + 3,190 + + + + + 40360 + + Item + + + sto + + (one) hundred [- 'handrid] + + + + 4932 + + 7 + + 0 + + 28.12.2002 + + 6,315 + + 6,243 + + + + + 40361 + + Item + + + dvěstě + + two hundred [- -] + + + + 5162 + + 7 + + 0 + + 28.12.2002 + + 6,316 + + 6,303 + + + + + 40362 + + Item + + + tisíc + + (one) thousand ['őauzđnd] + + + + 5604 + + 6 + + 0 + + 28.12.2002 + + 6,552 + + 6,608 + + + + + 40363 + + Item + + + milion + + (one) million [- 'miljđn] + + + + 5305 + + 7 + + 0 + + 28.12.2002 + + 6,320 + + 6,323 + + + + + 40364 + + Item + + + miliarda + + (one) billion [- 'biljđn] + + + + 7132 + + 7 + + 0 + + 20.01.2004 + + 6,321 + + 5,988 + + + + + + 40719 + + dohromady ... řidič + + Topic + + + 40720 + + Item + + + dohromady, společně + + together [tđ'ge÷đr] + + + + 2389 + + 6 + + 0 + + 22.09.2002 + + 5,969 + + 2,997 + + + + + 40721 + + Item + + + strýc + + uncle ['aűkđl] + + + + 3331 + + 6 + + 0 + + 20.01.2004 + + 4,863 + + 2,797 + + + + + + + + 43617 + + Vocabulary + + Topic + + + 43832 + + gramatika + + Item + + + 43872 + + lekce9 + + Item + + + 16328 + + Item + + + tazaci dovetky + + let => shall ..<br>.. ?? + + + + 2768 + + 4 + + 1 + + 21.09.2005 + + 5,797 + + 3,647 + + + + + 43873 + + Item + + + Which pouzijeme mluvime li o ..<br>What + --//-- + + which - o omezene moznosti vyberu //... which do you + want?<br><br>what - mame na mysly neomez. vyber // + What car do you drive? + + + + 674 + + 4 + + 2 + + 08.11.2004 + + 4,912 + + 2,390 + + + + + + 43877 + + lekce10 + + Item + + + 40126 + + Item + + + Nechala by nas myt se ve studene vode. + + She WOULD make us wash in ice-cold water. + + + + 82 + + 4 + + 2 + + 27.09.2005 + + 5,470 + + 6,308 + + + + + 43878 + + Item + + + prislovce castosti deje..<br><br>- malokdy + casto obvykle zridka malokdy normalne<br>- chovani ve + vete?<br><br>dalsi: nekdy prilezitostne + + hardly frequently usually rarely seldom + normally<br><br>davaji se pred hlavni vyznamove + sloveso.. !!<br>We don't usually eat + fish.<br><br>ostani : sometimes + occasionally + + + + 69 + + 2 + + 3 + + 05.09.2005 + + 3,036 + + 1,971 + + + + + + 43849 + + lekce8 + + Item + + + 43850 + + Item + + + modalni slovesa pravdepodobnosti<br>- + nenistejsi<br>- mene jista + + - will a won't // I'll see you later.. urcite<br>- + might/could<br><br>Will a won't -> take, jme + presvedceni nebo povazujeme z a pravdive v soucasnosti + + + + 806 + + 5 + + 2 + + 16.11.2004 + + 5,072 + + 2,931 + + + + + 43851 + + Item + + + modalni slovesa pravdepodobnosti<br>- + nejlogictejsi, vyklad je mene jisty nez s will + + must a can't<br><br>He must be + joking!<br><br>can't -> jsme-li presvedceni ze se + to nestalo ci to nemuze byt log pravdive.. + + + + 1613 + + 9 + + 0 + + 22.01.2004 + + 5,706 + + 4,136 + + + + + + + 43760 + + testy + + Item + + + 43761 + + 01 + + Item + + + 16676 + + Item + + + Icouldn't get __ to Joe. I don't thing his phone's + working. + + get throught + + + + 1307 + + 7 + + 0 + + 22.01.2004 + + 5,223 + + 2,964 + + + + + 43762 + + Item + + + You always get __ of doing the washing-up. It's not + fair. + + get out + + + + 704 + + 4 + + 3 + + 04.07.2005 + + 3,080 + + 1,326 + + + + + + 43812 + + 03 + + Item + + + 38568 + + Item + + + Did you succeed __ telling ... + + succeed in + + + + 2861 + + 6 + + 1 + + 20.01.2004 + + 6,340 + + 6,011 + + + + + 43831 + + Item + + + + + + + + + + + 43778 + + 02 + + Item + + + 43524 + + Item + + + bez vasne xx + + dispassionate + + + + 2719 + + 7 + + 0 + + 21.11.2004 + + 5,712 + + 3,918 + + + + + 43811 + + Item + + + When She started her career, she ___ racism and + prejudice. + + ?? + + + + 0 + + 0 + + 0 + + 04.08.2000 + + 3,000 + + 0,000 + + + + + + + \ No newline at end of file diff --git a/tests/importing/supermemo_ENGLISHFORBEGGINERS_oem_1250.xml b/tests/importing/supermemo_ENGLISHFORBEGGINERS_oem_1250.xml new file mode 100644 index 000000000..19f45a47a --- /dev/null +++ b/tests/importing/supermemo_ENGLISHFORBEGGINERS_oem_1250.xml @@ -0,0 +1,929 @@ + + + 3572 + + + 1 + + Topic + + + + + + + + + 40326 + + English for begginers - czech + + Topic + + + 40327 + + 1-400 + + Topic + + + 40615 + + dolů ... pohybovat + + Topic + + + 40618 + + Item + + + sly&#353;et + + hear [hi&#273;r] + + + + 6296 + + 7 + + 0 + + 21.01.2004 + + 6,416 + + 5,835 + + + + + 40619 + + Item + + + d&#237;vat se (na) + + look (at) [luk] + + + + 1662 + + 6 + + 0 + + 22.09.2002 + + 5,401 + + 2,115 + + + + + Topic + + + N&#225;sleduje prv&#253;ch 400 polo&#382;ek = + dvojic ot&#225;zka - odpov&#283;&#271; + + + + + + 0 + + 0 + + 0 + + 04.08.2000 + + 3,000 + + 0,000 + + + + 40677 + + pršet ... obvykle + + Topic + + + 40678 + + Item + + + pr&#353;et (nap&#345;.: + pr&#353;&#237;) + + rain [rein] (it's raining) + + + + 1734 + + 6 + + 0 + + 28.12.2002 + + 4,221 + + 2,206 + + + + + 40679 + + Item + + + sestra + + sister ['sist&#273;r] + + + + 4890 + + 5 + + 0 + + 28.12.2002 + + 6,322 + + 6,302 + + + + + 40680 + + Item + + + syn + + son [san] + + + + 5260 + + 6 + + 0 + + 28.12.2002 + + 6,238 + + 6,284 + + + + + 40681 + + Item + + + rozum&#283;t + + understand [,and&#273;r'st&#281;nd] + + + + 5115 + + 5 + + 0 + + 28.12.2002 + + 6,329 + + 6,292 + + + + + 40682 + + Item + + + nav&#353;t&#237;vit + + visit ['vizit] + + + + 4797 + + 5 + + 0 + + 28.12.2002 + + 6,329 + + 6,295 + + + + + 40683 + + Item + + + n&#225;v&#353;t&#283;va + + visit ['vizit] + + + + 4437 + + 5 + + 0 + + 15.10.2002 + + 6,256 + + 5,398 + + + + + 40684 + + Item + + + bez (nap&#345;.: bez n&#225;s) + + without [wi&#247;'aut] (without us) + + + + 4826 + + 6 + + 0 + + 28.12.2002 + + 6,188 + + 6,276 + + + + + 40685 + + Item + + + potkat + + meet [mi:t] + + + + 1164 + + 6 + + 1 + + 14.11.2002 + + 3,976 + + 1,953 + + + + + 40686 + + Item + + + postel, l&#367;&#382;ko + + bed [bed] + + + + 5023 + + 6 + + 0 + + 28.12.2002 + + 6,188 + + 6,302 + + + + + 40687 + + Item + + + tma + + darkness [da:rkn&#273;s] + + + + 1186 + + 8 + + 0 + + 28.12.2002 + + 3,801 + + 1,944 + + + + + 40688 + + Item + + + doktor, l&#233;ka&#345; + + doctor ['dokt&#273;r] + + + + 5237 + + 6 + + 0 + + 28.12.2002 + + 6,216 + + 6,310 + + + + + 40689 + + Item + + + hodina (60 minut) + + hour [au&#273;r] + + + + 1771 + + 6 + + 0 + + 28.12.2002 + + 4,193 + + 2,288 + + + + + 40690 + + Item + + + druh (typ) + + kind [kaind] sort [so:rt] (type) + + + + 1813 + + 6 + + 0 + + 06.09.2002 + + 5,705 + + 2,457 + + + + + 40691 + + Item + + + posledn&#237; + + last [la:st] + + + + 4914 + + 6 + + 0 + + 28.12.2002 + + 6,216 + + 6,332 + + + + + 40692 + + Item + + + cena + + price [prais] + + + + 5178 + + 6 + + 0 + + 28.12.2002 + + 6,202 + + 6,322 + + + + + 40693 + + Item + + + pamatovat + + remember [ri'memb&#273;r] + + + + 1843 + + 6 + + 0 + + 28.12.2002 + + 4,193 + + 2,267 + + + + + 40694 + + Item + + + bohat&#253; + + rich [ri&#269;] + + + + 3958 + + 8 + + 1 + + 28.12.2002 + + 6,373 + + 6,243 + + + + + 40695 + + Item + + + slunce + + sun [san] + + + + 1924 + + 6 + + 0 + + 28.12.2002 + + 4,193 + + 2,293 + + + + + 40696 + + Item + + + plavat (o &#269;lov&#283;ku nebo + zv&#237;&#345;eti) + + swim [swim] + + + + 4792 + + 6 + + 0 + + 28.12.2002 + + 6,238 + + 6,305 + + + + + 40697 + + Item + + + obvykle + + usually ['ju&#382;u&#273;li] + + + + 3770 + + 7 + + 1 + + 28.12.2002 + + 5,896 + + 5,610 + + + + + + + + 43617 + + Vocabulary + + Topic + + + 43832 + + gramatika + + Item + + + 43872 + + lekce9 + + Item + + + 43876 + + Item + + + Are you coming? - kratke odp. + + I thing so > Mysli ze ano I belive so I hope so I + don't thing so I hope not I thing not + + + + 525 + + 5 + + 1 + + 08.11.2004 + + 4,799 + + 1,888 + + + + + + 43877 + + lekce10 + + Item + + + 40126 + + Item + + + Nechala by nas myt se ve studene vode. + + She WOULD make us wash in ice-cold water. + + + + 82 + + 4 + + 2 + + 27.09.2005 + + 5,470 + + 6,308 + + + + + 43884 + + Item + + + Zvykam si na podnebi. + + I'm getting used to the climate. ----------- get > + zmena stavu + + + + 384 + + 5 + + 2 + + 05.09.2005 + + 3,699 + + 1,770 + + + + + + + + 43404 + + Vocabulary - books + + Item + + + 29049 + + Item + + + znep&#345;&#225;telit se + + fall out + + + + 2447 + + 6 + + 4 + + 13.10.2002 + + 6,079 + + 4,582 + + + + + 29101 + + Item + + + bla&#382;en&#253; + + blissful [blisfl] + + + + 1356 + + 6 + + 1 + + 14.11.2002 + + 4,236 + + 2,291 + + + + + 29964 + + Item + + + p&#345;evz&#237;t + + take over + + + + 1929 + + 7 + + 2 + + 04.07.2005 + + 4,877 + + 1,939 + + + + + 38828 + + Item + + + + + + + + + + 39795 + + Item + + + &#382;ivotopis + + curiculum vitae + + + + 1999 + + 6 + + 2 + + 13.10.2002 + + 5,705 + + 3,675 + + + + Item + + + BOOKs - in english + + + + + + 0 + + 0 + + 0 + + 04.08.2000 + + 3,000 + + 0,000 + + + + 43499 + + Item + + + p&#345;epojit + + put through + + + + 1739 + + 8 + + 1 + + 13.10.2002 + + 5,520 + + 3,257 + + + + + \ No newline at end of file diff --git a/tests/importing/supermemo_ENGLISHVOCABULARYBUILDER.xml b/tests/importing/supermemo_ENGLISHVOCABULARYBUILDER.xml new file mode 100644 index 000000000..3f44c2f65 --- /dev/null +++ b/tests/importing/supermemo_ENGLISHVOCABULARYBUILDER.xml @@ -0,0 +1 @@ +631TopicEnglish Vocabulary Builder00001.06.20093,0000,0002PrefixesTopic3Itemprefix: Latin: beforepre- (e.g. prehistoric)00001.06.20093,0000,0004ItemEnglish: pre- (e.g. to predict)(prefix)(Latin)before00001.06.20093,0000,0005Itemprefix: Latin: from / downde- (e.g. to detain / to devalue)00001.06.20093,0000,0006ItemEnglish: de- (e.g. to detain / to devalue)(prefix)(Latin)from / down00001.06.20093,0000,0007Itemprefix: Latin: betweeninter- (e.g. interstate)00001.06.20093,0000,0008ItemEnglish: inter- (e.g. interstate)(prefix)(Latin)between00001.06.20093,0000,0009Itemprefix: Latin: againstob- (e.g. to object)00001.06.20093,0000,00010ItemEnglish: ob- (e.g. to object)(prefix)(Latin)against00001.06.20093,0000,00011Itemprefix: Latin: notdis- / in- / non- (e.g. to disagree / inattention / a non-smoker)00001.06.20093,0000,00012ItemEnglish: dis- / in- / non- (e.g. to disagree / inattention / a non-smoker)(prefix)(Latin)not00001.06.20093,0000,00013Itemprefix: Greek: onemono- (e.g. monolingual)00001.06.20093,0000,00014ItemEnglish: mono- (e.g. monolingual)(prefix)(Greek)one00001.06.20093,0000,00015Itemprefix: Greek: uponepi- (e.g. (an) epilog)00001.06.20093,0000,00016ItemEnglish: epi- (e.g. (an) epilog)(prefix)(Greek)upon00001.06.20093,0000,00017Itemprefix: Latin: to/towardsad- (e.g. to adapt)00001.06.20093,0000,00018ItemEnglish: ad- (e.g. to adapt)(prefix)(Latin)to/towards00001.06.20093,0000,00019Itemprefix: Latin/Greek/French: notun- (e.g. unhappy)00001.06.20093,0000,00020ItemEnglish: un- (e.g. unhappy)(prefix)(Latin/Greek/French)not00001.06.20093,0000,00021Itemprefix: Latin: togethercom- (e.g. companion)00001.06.20093,0000,00022ItemEnglish: com- (e.g. companion)(prefix)(Latin)together00001.06.20093,0000,00023Itemprefix: Latin: former / out ofex- (e.g. an ex-grilfriend / to exhale)00001.06.20093,0000,00024ItemEnglish: ex- (e.g. an ex-grilfriend / to exhale)(prefix)(Latin)former / out of00001.06.20093,0000,00025Itemprefix: Latin: again / backre- (e.g. to repeat / to return)00001.06.20093,0000,00026ItemEnglish: re- (e.g. to repeat / to return)(prefix)(Latin)again / back00001.06.20093,0000,00027Itemprefix: Latin: in favorpro- (e.g. pro-American)00001.06.20093,0000,00028ItemEnglish: pro- (e.g. pro-American)(prefix)(Latin)in favor00001.06.20093,0000,00029Itemprefix: above / beyondover- (e.g. overhanging / to overcharge)00001.06.20093,0000,00030Itemover- (e.g. overhanging / to overcharge)(prefix)above / beyond00001.06.20093,0000,00031Itemprefix: Latin: under / belowsub- (e.g. submarine / subzero)00001.06.20093,0000,00032ItemEnglish: sub- (e.g. submarine / subzero)(prefix)(Latin)under / below00001.06.20093,0000,00033Itemprefix: bad(ly) / wrong(ly)mis- (e.g. (a) misfortune / to misunderstand)00001.06.20093,0000,00034Itemmis- (e.g. (a) misfortune / to misunderstand)(prefix)bad(ly) / wrong(ly)00001.06.20093,0000,00035Itemprefix: Latin: across / beyond / throughtrans- (e.g. transatlantic / transuranium / transcutaneous)00001.06.20093,0000,00036ItemEnglish: trans- (e.g. transatlantic / transuranium / transcutaneous)(prefix)(Latin)across / beyond / through00001.06.20093,0000,00037RootsTopic38Itemroot: Latin: to take / to holdcap (from capere; e.g. captive / capacious)00001.06.20093,0000,00039ItemEnglish: cap (from capere; e.g. captive / capacious)(root)(Latin)to take / to hold00001.06.20093,0000,00040Itemroot: Latin: to holdten (from tenere; e.g. tenant)00001.06.20093,0000,00041ItemEnglish: ten (from tenere; e.g. tenant)(root)(Latin)to hold00001.06.20093,0000,00042Itemroot: Latin: to sendmit (from mittere; e.g. to remit)00001.06.20093,0000,00043ItemEnglish: mit (from mittere; e.g. to remit)(root)(Latin)to send00001.06.20093,0000,00044Itemroot: Latin: to bear/to carryfer (from ferre; e.g. a referendum)00001.06.20093,0000,00045ItemEnglish: fer (from ferre; e.g. a referendum)(root)(Latin)to bear/to carry00001.06.20093,0000,00046Itemroot: Latin: to standsta (from stare; e.g. stable)00001.06.20093,0000,00047ItemEnglish: sta (from stare; e.g. stable)(root)(Latin)to stand00001.06.20093,0000,00048Itemroot: Latin: to writescrib (from scribere; e.g. to describe)00001.06.20093,0000,00049ItemEnglish: scrib (from scribere; e.g. to describe)(root)(Latin)to write00001.06.20093,0000,00050Itemroot: Greek: study (of)log (from logos; e.g. logic)00001.06.20093,0000,00051ItemEnglish: log (from logos; e.g. logic)(root)(Greek)study (of)00001.06.20093,0000,00052Itemroot: Latin: to look (at)spec (from specere; e.g. prospect)00001.06.20093,0000,00053ItemEnglish: spec (from specere; e.g. prospect)(root)(Latin)to look (at)00001.06.20093,0000,00054Itemroot: Latin: to foldplica (from plicare; e.g. to complicate)00001.06.20093,0000,00055ItemEnglish: plica (from plicare; e.g. to complicate)(root)(Latin)to fold00001.06.20093,0000,00056Itemroot: Latin: to stretchten (from tendere; e.g. to extend)00001.06.20093,0000,00057ItemEnglish: ten (from tendere; e.g. to extend)(root)(Latin)to stretch00001.06.20093,0000,00058Itemroot: Latin: to leadduc (from ducere; e.g. a duke)00001.06.20093,0000,00059ItemEnglish: duc (from ducere; e.g. a duke)(root)(Latin)to lead00001.06.20093,0000,00060Itemroot: Latin: to placepo (from ponere; e.g. a depot)00001.06.20093,0000,00061ItemEnglish: po (from ponere; e.g. a depot)(root)(Latin)to place00001.06.20093,0000,00062Itemroot: Latin: to make/to dofac (from facere; e.g. manufacture)00001.06.20093,0000,00063ItemEnglish: fac (from facere; e.g. manufacture)(root)(Latin)to make/to do00001.06.20093,0000,000 diff --git a/tests/importing/supermemo_EnglishPronunciationTop100.xml b/tests/importing/supermemo_EnglishPronunciationTop100.xml new file mode 100644 index 000000000..cea0dba28 --- /dev/null +++ b/tests/importing/supermemo_EnglishPronunciationTop100.xml @@ -0,0 +1 @@ +1031TopicEnglish Pronunciation00001.06.20091,7000,00052[1] English Pronunciation - Top 100Topic2Itempronounce: weight/weIt/00001.06.20093,0000,0003Itempronounce: quantity/kwontIti/00001.06.20093,0000,0004Itempronounce: quality/'kwolIti/00001.06.20093,0000,0005Itempronounce: several/sev..r..l/00001.06.20093,0000,0006Itempronounce: through/thru:/00001.06.20093,0000,0007Itempronounce: available/@'veil..b..l/00001.06.20093,0000,0008Itempronounce: thorough/'th^rOu/00001.06.20093,0000,0009Itempronounce: exceedingly/ik'si:diNli/00001.06.20093,0000,00010Itempronounce: scissors/siz..rz/00001.06.20093,0000,00011Itempronounce: readily/redIly/00001.06.20093,0000,00012Itempronounce: accidentally/@ksIdentli/00001.06.20093,0000,00013Itempronounce: adventurer/..d'ventS..r..(r)/00001.06.20093,0000,00014Itempronounce: virus/vaI..r..s/00001.06.20093,0000,00015Itempronounce: gallery/g@l..ri/00001.06.20093,0000,00016Itempronounce: fever/'fi:v..(r)/00001.06.20093,0000,00017Itempronounce: reply/ri'plai/00001.06.20093,0000,00018Itempronounce: unmovable/..nmu:v..b..l/00001.06.20093,0000,00019Itempronounce: image/'imidZ/00001.06.20093,0000,00020Itempronounce: anxiety/@N'zai..ti/00001.06.20093,0000,00021Itempronounce: peculiarly/pI'kju:li..(r)li/00001.06.20093,0000,00022Itempronounce: bankruptcy/'b@nkr^ptsi/00001.06.20093,0000,00023Itempronounce: budget/b^dZIt/00001.06.20093,0000,00024Itempronounce: knead/ni:d/00001.06.20093,0000,00025Itempronounce: injured/'indZ..d/00001.06.20093,0000,00026Itempronounce: path/p@:th/00001.06.20093,0000,00027Itempronounce: tree/tri:/00001.06.20093,0000,00028Itempronounce: three/thri:/00001.06.20093,0000,00029Itempronounce: free/fri:/00001.06.20093,0000,00030Itempronounce: nephew/'nefju:/00001.06.20093,0000,00031Itempronounce: hyperbole/hai'pe:rb..li/00001.06.20093,0000,00032Itempronounce: appreciation/..pri:Si'eiS..n/00001.06.20093,0000,00033Itempronounce: gourmet/'gu..mei/00001.06.20093,0000,00034Itempronounce: vulnerable/v^ln..r..b..l/00001.06.20093,0000,00035Itempronounce: trough/tro:f/00001.06.20093,0000,00036Itempronounce: invariably/in've..ri..bli/00001.06.20093,0000,00037Itempronounce: conscious/'konS..s/00001.06.20093,0000,00038Itempronounce: 6.89six point eight nine00001.06.20093,0000,00039Itempronounce: purpose/pe:rp..s/00001.06.20093,0000,00040Itempronounce: achieve/..'tSi:v/00001.06.20093,0000,00041Itempronounce: revealing/ri'vi:liN/00001.06.20093,0000,00042Itempronounce: cloak/kl..uk/00001.06.20093,0000,00043Itempronounce: buckle/b^k..l/00001.06.20093,0000,00044Itempronounce: eyrie/i..ri/00001.06.20093,0000,00045Itempronounce: bough/bau/00001.06.20093,0000,00046Itempronounce: fiery/'fai..ri/00001.06.20093,0000,00047Itempronounce: wholesome/'h..uls..m/00001.06.20093,0000,00048Itempronounce: foam/f..um/00001.06.20093,0000,00049Itempronounce: convict/k..n'vikt/00001.06.20093,0000,00050Itempronounce: humiliate/hju:'milieit/00001.06.20093,0000,00051Itempronounce: meanwhile/'mi:nwail/00001.06.20093,0000,00053[2] English Pronunciation - Top 100Topic54Itempronounce: detain/di'tein/00001.06.20093,0000,00055Itempronounce: designate/'dezigneit/00001.06.20093,0000,00056Itempronounce: outbreak/'autbreik/00001.06.20093,0000,00057Itempronounce: nope/n..up/ $ /noup/00001.06.20093,0000,00058Itempronounce: behest/bi'hest/00001.06.20093,0000,00059Itempronounce: awkward/'o:kw..(r)d/00001.06.20093,0000,00060Itempronounce: tension/'tenS..n/00001.06.20093,0000,00061Itempronounce: adolescence/@d..'les..ns/00001.06.20093,0000,00062Itempronounce: coverage/'k^v..ridZ/00001.06.20093,0000,00063Itempronounce: insight/'insait/00001.06.20093,0000,00064Itempronounce: doubt/daut/00001.06.20093,0000,00065Itempronounce: ash/@S/00001.06.20093,0000,00066Itempronounce: avalanche/'@v..la:ntS/00001.06.20093,0000,00067Itempronounce: convinced/k..n'vinst/00001.06.20093,0000,00068Itempronounce: draught/dra:ft/00001.06.20093,0000,00069Itempronounce: considerable/k..n'sid..r..b..l/00001.06.20093,0000,00070Itempronounce: behead/bi'hed/00001.06.20093,0000,00071Itempronounce: archaeologist/a:(r)ki'ol..dZist/00001.06.20093,0000,00072Itempronounce: sword/so:(r)d/00001.06.20093,0000,00073Itempronounce: emphasize/'emf..saiz/00001.06.20093,0000,00074Itempronounce: disposable/di'sp..uz..b..l/00001.06.20093,0000,00075Itempronounce: archive/'a:(r)kaiv/00001.06.20093,0000,00076Itempronounce: broadsheet/'bro:dSi:t/00001.06.20093,0000,00077Itempronounce: channel/'tS@nl/00001.06.20093,0000,00078Itempronounce: affair/..'fe../00001.06.20093,0000,00079Itempronounce: armor/'a:m../00001.06.20093,0000,00080Itempronounce: camouflage/'k@m..fla:Z/00001.06.20093,0000,00081Itempronounce: surgery/'se:dZ..ri/00001.06.20093,0000,00082Itempronounce: diversity/dai've:s..ti/00001.06.20093,0000,00083Itempronounce: variety/v..'rai..ti/00001.06.20093,0000,00084Itempronounce: lens/lenz/00001.06.20093,0000,00085Itempronounce: scar/ska:(r)/00001.06.20093,0000,00086Itempronounce: source/so:s/00001.06.20093,0000,00087Itempronounce: supervisor/'su:p..vaiz..(r)/00001.06.20093,0000,00088Itempronounce: exploit/ik'sploit/00001.06.20093,0000,00089Itempronounce: neglect/ni'glekt/00001.06.20093,0000,00090Itempronounce: unanimous/ju'n@nim..s/00001.06.20093,0000,00091Itempronounce: nuisance/'nju:sns/00001.06.20093,0000,00092Itempronounce: enhance/in'ha:ns/00001.06.20093,0000,00093Itempronounce: psychiatrist/sai'kai..trIst/00001.06.20093,0000,00094Itempronounce: overwhelming/..uv..(r)'welmiN/00001.06.20093,0000,00095Itempronounce: resentment/ri'zentm..nt/00001.06.20093,0000,00096Itempronounce: ironically/ai'ronikli/00001.06.20093,0000,00097Itempronounce: commit/k..'mit/00001.06.20093,0000,00098Itempronounce: feud/fju:d/00001.06.20093,0000,00099Itempronounce: discerning/di'se:(r)niN/00001.06.20093,0000,000100Itempronounce: eager/'i:g..(r)/00001.06.20093,0000,000101Itempronounce: queue/kju:/00001.06.20093,0000,000102Itempronounce: quay/ki:/00001.06.20093,0000,000103Itempronounce: majority/m..'dZorIti/00001.06.20093,0000,000 diff --git a/tests/test_importing.py b/tests/test_importing.py index 7731f0616..d79384d2f 100644 --- a/tests/test_importing.py +++ b/tests/test_importing.py @@ -5,7 +5,7 @@ from tests.shared import assertException from anki.errors import * from anki import DeckStorage -from anki.importing import anki10, csvfile, mnemosyne10 +from anki.importing import anki10, csvfile, mnemosyne10, supermemo_xml from anki.stdmodels import BasicModel from anki.facts import Fact from anki.sync import SyncClient, SyncServer @@ -45,6 +45,44 @@ def test_mnemosyne10(): assert i.total == 5 deck.s.close() +def test_supermemo_xml_01_unicode(): + deck = DeckStorage.Deck() + deck.addModel(BasicModel()) + file = unicode(os.path.join(testDir, "importing/supermemo_ENGLISHFORBEGGINERS_noOEM.xml")) + i = supermemo_xml.SupermemoXmlImporter(deck, file) + #i.META.logToStdOutput = True + i.doImport() + assert i.total == 92 + deck.s.close() + +def test_supermemo_xml_02_escaped(): + deck = DeckStorage.Deck() + deck.addModel(BasicModel()) + file = unicode(os.path.join(testDir, "importing/supermemo_ENGLISHFORBEGGINERS_oem_1250.xml")) + i = supermemo_xml.SupermemoXmlImporter(deck, file) + i.doImport() + assert i.total == 30 + deck.s.close() + +def test_supermemo_xml_03(): + deck = DeckStorage.Deck() + deck.addModel(BasicModel()) + file = unicode(os.path.join(testDir, "importing/supermemo_EnglishPronunciationTop100.xml")) + i = supermemo_xml.SupermemoXmlImporter(deck, file) + #i.META.logToStdOutput = True + i.doImport() + assert i.total == 100 + deck.s.close() + +def test_supermemo_xml_04(): + deck = DeckStorage.Deck() + deck.addModel(BasicModel()) + file = unicode(os.path.join(testDir, "importing/supermemo_ENGLISHVOCABULARYBUILDER.xml")) + i = supermemo_xml.SupermemoXmlImporter(deck, file) + i.doImport() + assert i.total == 60 + deck.s.close() + def test_anki10(): # though these are not modified, sqlite updates the mtime, so copy to tmp # first