mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 14:32:22 -04:00
485 lines
22 KiB
Python
485 lines
22 KiB
Python
# -*- coding: utf-8 -*-
|
||
# Copyright: petr.michalec@gmail.com
|
||
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
|
||
|
||
"""\
|
||
Importing Supermemo XML decks
|
||
==============================
|
||
"""
|
||
__docformat__ = 'restructuredtext'
|
||
|
||
import sys
|
||
|
||
from anki.importing import Importer, ForeignCard
|
||
from anki.lang import _
|
||
from anki.errors import *
|
||
|
||
from xml.dom import minidom, Node
|
||
from types import DictType, InstanceType
|
||
from string import capwords, maketrans
|
||
import re, unicodedata, time
|
||
#import chardet
|
||
|
||
|
||
from anki.deck import Deck
|
||
|
||
class SmartDict(dict):
|
||
"""
|
||
See http://www.peterbe.com/plog/SmartDict
|
||
Copyright 2005, Peter Bengtsson, peter@fry-it.com
|
||
|
||
A smart dict can be instanciated either from a pythonic dict
|
||
or an instance object (eg. SQL recordsets) but it ensures that you can
|
||
do all the convenient lookups such as x.first_name, x['first_name'] or
|
||
x.get('first_name').
|
||
"""
|
||
|
||
def __init__(self, *a, **kw):
|
||
if a:
|
||
if type(a[0]) is DictType:
|
||
kw.update(a[0])
|
||
elif type(a[0]) is InstanceType:
|
||
kw.update(a[0].__dict__)
|
||
elif hasattr(a[0], '__class__') and a[0].__class__.__name__=='SmartDict':
|
||
kw.update(a[0].__dict__)
|
||
|
||
dict.__init__(self, **kw)
|
||
self.__dict__ = self
|
||
|
||
class SuperMemoElement(SmartDict):
|
||
"SmartDict wrapper to store SM Element data"
|
||
|
||
def __init__(self, *a, **kw):
|
||
SmartDict.__init__(self, *a, **kw)
|
||
#default content
|
||
self.__dict__['lTitle'] = None
|
||
self.__dict__['Title'] = None
|
||
self.__dict__['Question'] = None
|
||
self.__dict__['Answer'] = None
|
||
self.__dict__['Count'] = None
|
||
self.__dict__['Type'] = None
|
||
self.__dict__['ID'] = None
|
||
self.__dict__['Interval'] = None
|
||
self.__dict__['Lapses'] = None
|
||
self.__dict__['Repetitions'] = None
|
||
self.__dict__['LastRepetiton'] = None
|
||
self.__dict__['AFactor'] = None
|
||
self.__dict__['UFactor'] = None
|
||
|
||
|
||
|
||
# This is an AnkiImporter
|
||
class SupermemoXmlImporter(Importer):
|
||
"""
|
||
Supermemo XML export's to Anki parser.
|
||
Goes through a SM collection and fetch all elements.
|
||
|
||
My SM collection was a big mess where topics and items were mixed.
|
||
I was unable to parse my content in a regular way like for loop on
|
||
minidom.getElementsByTagName() etc. My collection had also an
|
||
limitation, topics were splited into branches with max 100 items
|
||
on each. Learning themes were in deep structure. I wanted to have
|
||
full title on each element to be stored in tags.
|
||
|
||
Code should be upgrade to support importing of SM2006 exports.
|
||
"""
|
||
|
||
def __init__(self, *args):
|
||
"""Initialize internal varables.
|
||
Pameters to be exposed to GUI are stored in self.META"""
|
||
|
||
Importer.__init__(self, *args)
|
||
self.lines = None
|
||
self.numFields=int(2)
|
||
|
||
# SmXmlParse VARIABLES
|
||
self.xmldoc = None
|
||
self.pieces = []
|
||
self.cntBuf = [] #to store last parsed data
|
||
self.cntElm = [] #to store SM Elements data
|
||
self.cntCol = [] #to store SM Colections data
|
||
|
||
# store some meta info related to parse algorithm
|
||
# SmartDict works like dict / class wrapper
|
||
self.cntMeta = SmartDict()
|
||
self.cntMeta.popTitles = False
|
||
self.cntMeta.title = []
|
||
|
||
# META stores controls of import scritp, should be
|
||
# exposed to import dialog. These are default values.
|
||
self.META = SmartDict()
|
||
self.META.resetLearningData = False # implemented
|
||
self.META.onlyMemorizedItems = False # implemented
|
||
self.META.loggerLevel = 2 # implemented 0no,1info,2error,3debug
|
||
self.META.tagAllTopics = False
|
||
self.META.pathsToBeTagged = ['English for begginers', 'Advanced English 97', 'Phrasal Verbs'] # path patterns to be tagged - in gui entered like 'Advanced English 97|My Vocablary'
|
||
self.META.tagMemorizedItems = True # implemented
|
||
self.META.logToStdOutput = False # implemented
|
||
|
||
self.cards = []
|
||
|
||
## TOOLS
|
||
|
||
def _fudgeText(self, text):
|
||
"Replace sm syntax to Anki syntax"
|
||
text = text.replace("\n\r", u"<br>")
|
||
text = text.replace("\n", u"<br>")
|
||
return text
|
||
|
||
def _unicode2ascii(self,str):
|
||
"Remove diacritic punctuation from strings (titles)"
|
||
return u"".join([ c for c in unicodedata.normalize('NFKD', str) if not unicodedata.combining(c)])
|
||
|
||
def _decode_htmlescapes(self,s):
|
||
"""Unescape HTML code."""
|
||
from BeautifulSoup import BeautifulStoneSoup
|
||
#my sm2004 also ecaped & chars in escaped sequences.
|
||
s = re.sub(u'&',u'&',s)
|
||
return unicode(BeautifulStoneSoup(s,convertEntities=BeautifulStoneSoup.HTML_ENTITIES ))
|
||
|
||
|
||
def _unescape(self,s,initilize):
|
||
"""Note: This method is not used, BeautifulSoup does better job.
|
||
"""
|
||
|
||
if self._unescape_trtable == None:
|
||
self._unescape_trtable = (
|
||
('€',u'€'), (' ',u' '), ('!',u'!'), ('"',u'"'), ('#',u'#'), ('$',u'$'), ('%',u'%'), ('&',u'&'), (''',u"'"),
|
||
('(',u'('), (')',u')'), ('*',u'*'), ('+',u'+'), (',',u','), ('-',u'-'), ('.',u'.'), ('/',u'/'), ('0',u'0'),
|
||
('1',u'1'), ('2',u'2'), ('3',u'3'), ('4',u'4'), ('5',u'5'), ('6',u'6'), ('7',u'7'), ('8',u'8'), ('9',u'9'),
|
||
(':',u':'), (';',u';'), ('<',u'<'), ('=',u'='), ('>',u'>'), ('?',u'?'), ('@',u'@'), ('A',u'A'), ('B',u'B'),
|
||
('C',u'C'), ('D',u'D'), ('E',u'E'), ('F',u'F'), ('G',u'G'), ('H',u'H'), ('I',u'I'), ('J',u'J'), ('K',u'K'),
|
||
('L',u'L'), ('M',u'M'), ('N',u'N'), ('O',u'O'), ('P',u'P'), ('Q',u'Q'), ('R',u'R'), ('S',u'S'), ('T',u'T'),
|
||
('U',u'U'), ('V',u'V'), ('W',u'W'), ('X',u'X'), ('Y',u'Y'), ('Z',u'Z'), ('[',u'['), ('\',u'\\'), (']',u']'),
|
||
('^',u'^'), ('_',u'_'), ('`',u'`'), ('a',u'a'), ('b',u'b'), ('c',u'c'), ('d',u'd'), ('e',u'e'), ('f',u'f'),
|
||
('g',u'g'), ('h',u'h'), ('i',u'i'), ('j',u'j'), ('k',u'k'), ('l',u'l'), ('m',u'm'), ('n',u'n'),
|
||
('o',u'o'), ('p',u'p'), ('q',u'q'), ('r',u'r'), ('s',u's'), ('t',u't'), ('u',u'u'), ('v',u'v'),
|
||
('w',u'w'), ('x',u'x'), ('y',u'y'), ('z',u'z'), ('{',u'{'), ('|',u'|'), ('}',u'}'), ('~',u'~'),
|
||
(' ',u' '), ('¡',u'¡'), ('¢',u'¢'), ('£',u'£'), ('¤',u'¤'), ('¥',u'¥'), ('¦',u'¦'), ('§',u'§'),
|
||
('¨',u'¨'), ('©',u'©'), ('ª',u'ª'), ('«',u'«'), ('¬',u'¬'), ('­',u''), ('®',u'®'), ('¯',u'¯'),
|
||
('°',u'°'), ('±',u'±'), ('²',u'²'), ('³',u'³'), ('´',u'´'), ('µ',u'µ'), ('¶',u'¶'), ('·',u'·'),
|
||
('¸',u'¸'), ('¹',u'¹'), ('º',u'º'), ('»',u'»'), ('¼',u'¼'), ('½',u'½'), ('¾',u'¾'), ('¿',u'¿'),
|
||
('À',u'À'), ('Á',u'Á'), ('Â',u'Â'), ('Ã',u'Ã'), ('Ä',u'Ä'), ('Å',u'Å'), ('Å',u'Å'), ('Æ',u'Æ'),
|
||
('Ç',u'Ç'), ('È',u'È'), ('É',u'É'), ('Ê',u'Ê'), ('Ë',u'Ë'), ('Ì',u'Ì'), ('Í',u'Í'), ('Î',u'Î'),
|
||
('Ï',u'Ï'), ('Ð',u'Ð'), ('Ñ',u'Ñ'), ('Ò',u'Ò'), ('Ó',u'Ó'), ('Ô',u'Ô'), ('Õ',u'Õ'), ('Ö',u'Ö'),
|
||
('×',u'×'), ('Ø',u'Ø'), ('Ù',u'Ù'), ('Ú',u'Ú'), ('Û',u'Û'), ('Ü',u'Ü'), ('Ý',u'Ý'), ('Þ',u'Þ'),
|
||
('ß',u'ß'), ('à',u'à'), ('á',u'á'), ('â',u'â'), ('ã',u'ã'), ('ä',u'ä'), ('å',u'å'), ('æ',u'æ'),
|
||
('ç',u'ç'), ('è',u'è'), ('é',u'é'), ('ê',u'ê'), ('ë',u'ë'), ('ì',u'ì'), ('í',u'í'), ('í',u'í'),
|
||
('î',u'î'), ('ï',u'ï'), ('ð',u'ð'), ('ñ',u'ñ'), ('ò',u'ò'), ('ó',u'ó'), ('ô',u'ô'), ('õ',u'õ'),
|
||
('ö',u'ö'), ('÷',u'÷'), ('ø',u'ø'), ('ù',u'ù'), ('ú',u'ú'), ('û',u'û'), ('ü',u'ü'), ('ý',u'ý'),
|
||
('þ',u'þ'), ('ÿ',u'ÿ'), ('Ā',u'Ā'), ('ā',u'ā'), ('Ă',u'Ă'), ('ă',u'ă'), ('Ą',u'Ą'), ('ą',u'ą'),
|
||
('Ć',u'Ć'), ('ć',u'ć'), ('Ĉ',u'Ĉ'), ('ĉ',u'ĉ'), ('Ċ',u'Ċ'), ('ċ',u'ċ'), ('Č',u'Č'), ('č',u'č'),
|
||
('Ď',u'Ď'), ('ď',u'ď'), ('Đ',u'Đ'), ('đ',u'đ'), ('Ē',u'Ē'), ('ē',u'ē'), ('Ĕ',u'Ĕ'), ('ĕ',u'ĕ'),
|
||
('Ė',u'Ė'), ('ė',u'ė'), ('Ę',u'Ę'), ('ę',u'ę'), ('Ě',u'Ě'), ('ě',u'ě'), ('Ĝ',u'Ĝ'), ('ĝ',u'ĝ'),
|
||
('Ğ',u'Ğ'), ('ğ',u'ğ'), ('Ġ',u'Ġ'), ('ġ',u'ġ'), ('Ģ',u'Ģ'), ('ģ',u'ģ'), ('Ĥ',u'Ĥ'), ('ĥ',u'ĥ'),
|
||
('Ħ',u'Ħ'), ('ħ',u'ħ'), ('Ĩ',u'Ĩ'), ('ĩ',u'ĩ'), ('Ī',u'Ī'), ('ī',u'ī'), ('Ĭ',u'Ĭ'), ('ĭ',u'ĭ'),
|
||
('Į',u'Į'), ('į',u'į'), ('İ',u'İ'), ('ı',u'ı'), ('IJ',u'IJ'), ('ij',u'ij'), ('Ĵ',u'Ĵ'), ('ĵ',u'ĵ'),
|
||
('Ķ',u'Ķ'), ('ķ',u'ķ'), ('ĸ',u'ĸ'), ('Ĺ',u'Ĺ'), ('ĺ',u'ĺ'), ('Ļ',u'Ļ'), ('ļ',u'ļ'), ('Ľ',u'Ľ'),
|
||
('ľ',u'ľ'), ('Ŀ',u'Ŀ'), ('ŀ',u'ŀ'), ('Ł',u'Ł'), ('ł',u'ł'), ('Ń',u'Ń'), ('ń',u'ń'), ('Ņ',u'Ņ'),
|
||
('ņ',u'ņ'), ('Ň',u'Ň'), ('ň',u'ň'), ('ʼn',u'ʼn'), ('Ŋ',u'Ŋ'), ('ŋ',u'ŋ'), ('Ō',u'Ō'), ('ō',u'ō'),
|
||
('Ŏ',u'Ŏ'), ('ŏ',u'ŏ'), ('Ő',u'Ő'), ('ő',u'ő'), ('Œ',u'Œ'), ('œ',u'œ'), ('Ŕ',u'Ŕ'), ('ŕ',u'ŕ'),
|
||
('Ŗ',u'Ŗ'), ('ŗ',u'ŗ'), ('Ř',u'Ř'), ('ř',u'ř'), ('Ś',u'Ś'), ('ś',u'ś'), ('Ŝ',u'Ŝ'), ('ŝ',u'ŝ'),
|
||
('Ş',u'Ş'), ('ş',u'ş'), ('Š',u'Š'), ('š',u'š'), ('Ţ',u'Ţ'), ('ţ',u'ţ'), ('Ť',u'Ť'), ('ť',u'ť'),
|
||
('Ŧ',u'Ŧ'), ('ŧ',u'ŧ'), ('Ũ',u'Ũ'), ('ũ',u'ũ'), ('Ū',u'Ū'), ('ū',u'ū'), ('Ŭ',u'Ŭ'), ('ŭ',u'ŭ'),
|
||
('Ů',u'Ů'), ('ů',u'ů'), ('Ű',u'Ű'), ('ű',u'ű'), ('Ų',u'Ų'), ('ų',u'ų'), ('Ŵ',u'Ŵ'), ('ŵ',u'ŵ'),
|
||
('Ŷ',u'Ŷ'), ('ŷ',u'ŷ'), ('Ÿ',u'Ÿ'), ('Ź',u'Ź'), ('ź',u'ź'), ('Ż',u'Ż'), ('ż',u'ż'), ('Ž',u'Ž'),
|
||
('ž',u'ž'), ('ſ',u'ſ'), ('Ŕ',u'Ŕ'), ('ŕ',u'ŕ'), ('Ŗ',u'Ŗ'), ('ŗ',u'ŗ'), ('Ř',u'Ř'), ('ř',u'ř'),
|
||
('Ś',u'Ś'), ('ś',u'ś'), ('Ŝ',u'Ŝ'), ('ŝ',u'ŝ'), ('Ş',u'Ş'), ('ş',u'ş'), ('Š',u'Š'), ('š',u'š'),
|
||
('Ţ',u'Ţ'), ('ţ',u'ţ'), ('Ť',u'Ť'), ('Ɂ',u'ť'), ('Ŧ',u'Ŧ'), ('ŧ',u'ŧ'), ('Ũ',u'Ũ'), ('ũ',u'ũ'),
|
||
('Ū',u'Ū'), ('ū',u'ū'), ('Ŭ',u'Ŭ'), ('ŭ',u'ŭ'), ('Ů',u'Ů'), ('ů',u'ů'), ('Ű',u'Ű'), ('ű',u'ű'),
|
||
('Ų',u'Ų'), ('ų',u'ų'), ('Ŵ',u'Ŵ'), ('ŵ',u'ŵ'), ('Ŷ',u'Ŷ'), ('ŷ',u'ŷ'), ('Ÿ',u'Ÿ'), ('Ź',u'Ź'),
|
||
('ź',u'ź'), ('Ż',u'Ż'), ('ż',u'ż'), ('Ž',u'Ž'), ('ž',u'ž'), ('ſ',u'ſ'),
|
||
)
|
||
|
||
|
||
#m = re.match()
|
||
#s = s.replace(code[0], code[1])
|
||
|
||
## DEFAULT IMPORTER METHODS
|
||
|
||
def foreignCards(self):
|
||
|
||
# Load file and parse it by minidom
|
||
self.loadSource(self.file)
|
||
|
||
# Migrating content / time consuming part
|
||
# addItemToCards is called for each sm element
|
||
self.logger(u'Parsing started.')
|
||
self.parse()
|
||
self.logger(u'Parsing done.')
|
||
|
||
# Return imported cards
|
||
return self.cards
|
||
|
||
def fields(self):
|
||
return 2
|
||
|
||
## PARSER METHODS
|
||
|
||
def addItemToCards(self,item):
|
||
"This method actually do conversion"
|
||
|
||
# new anki card
|
||
card = ForeignCard()
|
||
|
||
# clean Q and A
|
||
card.fields.append(self._fudgeText(self._decode_htmlescapes(item.Question)))
|
||
card.fields.append(self._fudgeText(self._decode_htmlescapes(item.Answer)))
|
||
card.tags = u""
|
||
|
||
# pre-process scheduling data
|
||
tLastrep = time.mktime(time.strptime(item.LastRepetition, '%d.%m.%Y'))
|
||
tToday = time.time()
|
||
|
||
# convert learning data
|
||
if not self.META.resetLearningData:
|
||
# migration of LearningData algorithm
|
||
card.interval = item.Interval
|
||
card.successive = item.Repetitions
|
||
##card.due = tToday + (float(item.Interval) * 86400.0) - tLastrep
|
||
card.due = tLastrep + (float(item.Interval) * 86400.0)
|
||
card.lastDue = 0
|
||
|
||
card.factor = float(item.AFactor.replace(',','.'))
|
||
card.lastFactor = float(item.AFactor.replace(',','.'))
|
||
|
||
# SM is not exporting all the information Anki keeps track off, so it
|
||
# needs to be fudged
|
||
card.youngEase0 = item.Lapses
|
||
card.youngEase3 = item.Repetitions + item.Lapses
|
||
card.yesCount = item.Repetitions
|
||
card.noCount = item.Lapses
|
||
card.reps = card.yesCount + card.noCount
|
||
card.spaceUntil = card.due
|
||
card.combinedDue = card.due
|
||
|
||
# categories & tags
|
||
# it's worth to have every theme (tree structure of sm collection) stored in tags, but sometimes not
|
||
# you can deceide if you are going to tag all toppics or just that containing some pattern
|
||
tTaggTitle = False
|
||
for pattern in self.META.pathsToBeTagged:
|
||
if item.lTitle != None and pattern.lower() in u" ".join(item.lTitle).lower():
|
||
tTaggTitle = True
|
||
break
|
||
if tTaggTitle or self.META.tagAllTopics:
|
||
# normalize - remove diacritic punctuation from unicode chars to ascii
|
||
item.lTitle = [ self._unicode2ascii(topic) for topic in item.lTitle]
|
||
|
||
# Transfrom xyz / aaa / bbb / ccc on Title path to Tag xyzAaaBbbCcc
|
||
# clean things like [999] or [111-2222] from title path, example: xyz / [1000-1200] zyx / xyz
|
||
# clean whitespaces
|
||
# set Capital letters for first char of the word
|
||
tmp = list(set([ re.sub('(\[[0-9]+\])' , ' ' , i ).replace('_',' ') for i in item.lTitle ]))
|
||
tmp = list(set([ re.sub('(\W)',' ', i ) for i in tmp ]))
|
||
tmp = list(set([ re.sub( '^[0-9 ]+$','',i) for i in tmp ]))
|
||
tmp = list(set([ capwords(i).replace(' ','') for i in tmp ]))
|
||
tags = [ j[0].lower() + j[1:] for j in tmp if j.strip() <> '']
|
||
|
||
card.tags += u" ".join(tags)
|
||
|
||
if self.META.tagMemorizedItems and item.Interval >0:
|
||
card.tags += " Memorized"
|
||
|
||
self.logger(u'Element tags\t- ' + card.tags, level=3)
|
||
|
||
self.cards.append(card)
|
||
|
||
def logger(self,text,level=1):
|
||
"Wrapper for Anki logger"
|
||
|
||
dLevels={0:'',1:u'Info',2:u'Verbose',3:u'Debug'}
|
||
if level<=self.META.loggerLevel:
|
||
self.deck.updateProgress(_(text))
|
||
|
||
if self.META.logToStdOutput:
|
||
print self.__class__.__name__+ u" - " + dLevels[level].ljust(9) +u' -\t'+ _(text)
|
||
|
||
|
||
# OPEN AND LOAD
|
||
def openAnything(self,source):
|
||
"Open any source / actually only openig of files is used"
|
||
|
||
if source == "-":
|
||
return sys.stdin
|
||
|
||
# try to open with urllib (if source is http, ftp, or file URL)
|
||
import urllib
|
||
try:
|
||
return urllib.urlopen(source)
|
||
except (IOError, OSError):
|
||
pass
|
||
|
||
# try to open with native open function (if source is pathname)
|
||
try:
|
||
return open(source)
|
||
except (IOError, OSError):
|
||
pass
|
||
|
||
# treat source as string
|
||
import StringIO
|
||
return StringIO.StringIO(str(source))
|
||
|
||
def loadSource(self, source):
|
||
"""Load source file and parse with xml.dom.minidom"""
|
||
self.source = source
|
||
self.logger(u'Load started...')
|
||
sock = self.openAnything(self.source)
|
||
self.xmldoc = minidom.parse(sock).documentElement
|
||
sock.close()
|
||
self.logger(u'Load done.')
|
||
|
||
|
||
# PARSE
|
||
def parse(self, node=None):
|
||
"Parse method - parses document elements"
|
||
|
||
if node==None and self.xmldoc<>None:
|
||
node = self.xmldoc
|
||
|
||
_method = "parse_%s" % node.__class__.__name__
|
||
if hasattr(self,_method):
|
||
parseMethod = getattr(self, _method)
|
||
parseMethod(node)
|
||
else:
|
||
self.logger(u'No handler for method %s' % _method, level=3)
|
||
|
||
def parse_Document(self, node):
|
||
"Parse XML document"
|
||
|
||
self.parse(node.documentElement)
|
||
|
||
def parse_Element(self, node):
|
||
"Parse XML element"
|
||
|
||
_method = "do_%s" % node.tagName
|
||
if hasattr(self,_method):
|
||
handlerMethod = getattr(self, _method)
|
||
handlerMethod(node)
|
||
else:
|
||
self.logger(u'No handler for method %s' % _method, level=3)
|
||
#print traceback.print_exc()
|
||
|
||
def parse_Text(self, node):
|
||
"Parse text inside elements. Text is stored into local buffer."
|
||
|
||
text = node.data
|
||
self.cntBuf.append(text)
|
||
|
||
#def parse_Comment(self, node):
|
||
# """
|
||
# Source can contain XML comments, but we ignore them
|
||
# """
|
||
# pass
|
||
|
||
|
||
# DO
|
||
def do_SuperMemoCollection(self, node):
|
||
"Process SM Collection"
|
||
|
||
for child in node.childNodes: self.parse(child)
|
||
|
||
def do_SuperMemoElement(self, node):
|
||
"Process SM Element (Type - Title,Topics)"
|
||
|
||
self.logger('='*45, level=3)
|
||
|
||
self.cntElm.append(SuperMemoElement())
|
||
self.cntElm[-1]['lTitle'] = self.cntMeta['title']
|
||
|
||
#parse all child elements
|
||
for child in node.childNodes: self.parse(child)
|
||
|
||
#strip all saved strings, just for sure
|
||
for key in self.cntElm[-1].keys():
|
||
if hasattr(self.cntElm[-1][key], 'strip'):
|
||
self.cntElm[-1][key]=self.cntElm[-1][key].strip()
|
||
|
||
#pop current element
|
||
smel = self.cntElm.pop()
|
||
|
||
# Process cntElm if is valid Item (and not an Topic etc..)
|
||
# if smel.Lapses != None and smel.Interval != None and smel.Question != None and smel.Answer != None:
|
||
if smel.Title == None and smel.Question != None and smel.Answer != None:
|
||
if smel.Answer.strip() !='' and smel.Question.strip() !='':
|
||
|
||
# migrate only memorized otherway skip/continue
|
||
if self.META.onlyMemorizedItems and not(int(smel.Interval) > 0):
|
||
self.logger(u'Element skiped \t- not memorized ...', level=3)
|
||
else:
|
||
#import sm element data to Anki
|
||
self.addItemToCards(smel)
|
||
self.logger(u"Import element \t- " + smel['Question'], level=3)
|
||
|
||
#print element
|
||
self.logger('-'*45, level=3)
|
||
for key in smel.keys():
|
||
self.logger('\t%s %s' % ((key+':').ljust(15),smel[key]), level=3 )
|
||
else:
|
||
self.logger(u'Element skiped \t- no valid Q and A ...', level=3)
|
||
|
||
|
||
else:
|
||
# now we know that item was topic
|
||
# parseing of whole node is now finished
|
||
|
||
# test if it's really topic
|
||
if smel.Title != None:
|
||
# remove topic from title list
|
||
t = self.cntMeta['title'].pop()
|
||
self.logger(u'End of topic \t- %s' % (t), level=2)
|
||
|
||
def do_Content(self, node):
|
||
"Process SM element Content"
|
||
|
||
for child in node.childNodes:
|
||
if hasattr(child,'tagName') and child.firstChild != None:
|
||
self.cntElm[-1][child.tagName]=child.firstChild.data
|
||
|
||
def do_LearningData(self, node):
|
||
"Process SM element LearningData"
|
||
|
||
for child in node.childNodes:
|
||
if hasattr(child,'tagName') and child.firstChild != None:
|
||
self.cntElm[-1][child.tagName]=child.firstChild.data
|
||
|
||
# It's being processed in do_Content now
|
||
#def do_Question(self, node):
|
||
# for child in node.childNodes: self.parse(child)
|
||
# self.cntElm[-1][node.tagName]=self.cntBuf.pop()
|
||
|
||
# It's being processed in do_Content now
|
||
#def do_Answer(self, node):
|
||
# for child in node.childNodes: self.parse(child)
|
||
# self.cntElm[-1][node.tagName]=self.cntBuf.pop()
|
||
|
||
def do_Title(self, node):
|
||
"Process SM element Title"
|
||
|
||
t = self._decode_htmlescapes(node.firstChild.data)
|
||
self.cntElm[-1][node.tagName] = t
|
||
self.cntMeta['title'].append(t)
|
||
self.cntElm[-1]['lTitle'] = self.cntMeta['title']
|
||
self.logger(u'Start of topic \t- ' + u" / ".join(self.cntMeta['title']), level=2)
|
||
|
||
|
||
def do_Type(self, node):
|
||
"Process SM element Type"
|
||
|
||
if len(self.cntBuf) >=1 :
|
||
self.cntElm[-1][node.tagName]=self.cntBuf.pop()
|
||
|
||
|
||
if __name__ == '__main__':
|
||
|
||
# for testing you can start it standalone
|
||
|
||
#file = u'/home/epcim/hg2g/dev/python/sm2anki/ADVENG2EXP.xxe.esc.zaloha_FINAL.xml'
|
||
#file = u'/home/epcim/hg2g/dev/python/anki/libanki/tests/importing/supermemo/original_ENGLISHFORBEGGINERS_noOEM.xml'
|
||
#file = u'/home/epcim/hg2g/dev/python/anki/libanki/tests/importing/supermemo/original_ENGLISHFORBEGGINERS_oem_1250.xml'
|
||
file = str(sys.argv[1])
|
||
impo = SupermemoXmlImporter(Deck(),file)
|
||
impo.foreignCards()
|
||
|
||
sys.exit(1)
|
||
|
||
# vim: ts=4 sts=2 ft=python
|