mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 14:32:22 -04:00

SQLAlchemy is a great tool, but it wasn't a great fit for Anki: - We often had to drop down to raw SQL for performance reasons. - The DB cursors and results were wrapped, which incurred a sizable performance hit due to introspection. Operations like fetching 50k records from a hot cache were taking more than twice as long to complete. - We take advantage of sqlite-specific features, so SQL language abstraction is useless to us. - The anki schema is quite small, so manually saving and loading objects is not a big burden. In the process of porting to DBAPI, I've refactored the database schema: - App configuration data that we don't need in joins or bulk updates has been moved into JSON objects. This simplifies serializing, and means we won't need DB schema changes to store extra options in the future. This change obsoletes the deckVars table. - Renamed tables: -- fieldModels -> fields -- cardModels -> templates -- fields -> fdata - a number of attribute names have been shortened Classes like Card, Fact & Model remain. They maintain a reference to the deck. To write their state to the DB, call .flush(). Objects no longer have their modification time manually updated. Instead, the modification time is updated when they are flushed. This also applies to the deck. Decks will now save on close, because various operations that were done at deck load will be moved into deck close instead. Operations like undoing buried card are cheap on a hot cache, but expensive on startup. Programmatically you can call .close(save=False) to avoid a save and a modification bump. This will be useful for generating due counts. Because of the new saving behaviour, the save and save as options will be removed from the GUI in the future. The q/a cache and field cache generating has been centralized. Facts will automatically rebuild the cache on flush; models can do so with model.updateCache(). Media handling has also been reworked. It has moved into a MediaRegistry object, which the deck holds. Refcounting has been dropped - it meant we had to compare old and new value every time facts or models were changed, and existed for the sole purpose of not showing errors on a missing media download. Instead we just media.registerText(q+a) when it's updated. The download function will be expanded to ask the user if they want to continue after a certain number of files have failed to download, which should be an adequate alternative. And we now add the file into the media DB when it's copied to th emedia directory, not when the card is commited. This fixes duplicates a user would get if they added the same media to a card twice without adding the card. The old DeckStorage object had its upgrade code split in a previous commit; the opening and upgrading code has been merged back together, and put in a separate storage.py file. The correct way to open a deck now is import anki; d = anki.Deck(path). deck.getCard() -> deck.sched.getCard() same with answerCard deck.getCard(id) returns a Card object now. And the DB wrapper has had a few changes: - sql statements are a more standard DBAPI: - statement() -> execute() - statements() -> executemany() - called like execute(sql, 1, 2, 3) or execute(sql, a=1, b=2, c=3) - column0 -> list
485 lines
22 KiB
Python
485 lines
22 KiB
Python
# -*- coding: utf-8 -*-
|
||
# Copyright: petr.michalec@gmail.com
|
||
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
|
||
|
||
import sys
|
||
|
||
from anki.importing import Importer, ForeignCard
|
||
from anki.lang import _
|
||
from anki.errors import *
|
||
|
||
from xml.dom import minidom, Node
|
||
from types import DictType, InstanceType
|
||
from string import capwords, maketrans
|
||
import re, unicodedata, time
|
||
#import chardet
|
||
|
||
|
||
from anki import Deck
|
||
|
||
class SmartDict(dict):
|
||
"""
|
||
See http://www.peterbe.com/plog/SmartDict
|
||
Copyright 2005, Peter Bengtsson, peter@fry-it.com
|
||
|
||
A smart dict can be instanciated either from a pythonic dict
|
||
or an instance object (eg. SQL recordsets) but it ensures that you can
|
||
do all the convenient lookups such as x.first_name, x['first_name'] or
|
||
x.get('first_name').
|
||
"""
|
||
|
||
def __init__(self, *a, **kw):
|
||
if a:
|
||
if type(a[0]) is DictType:
|
||
kw.update(a[0])
|
||
elif type(a[0]) is InstanceType:
|
||
kw.update(a[0].__dict__)
|
||
elif hasattr(a[0], '__class__') and a[0].__class__.__name__=='SmartDict':
|
||
kw.update(a[0].__dict__)
|
||
|
||
dict.__init__(self, **kw)
|
||
self.__dict__ = self
|
||
|
||
class SuperMemoElement(SmartDict):
|
||
"SmartDict wrapper to store SM Element data"
|
||
|
||
def __init__(self, *a, **kw):
|
||
SmartDict.__init__(self, *a, **kw)
|
||
#default content
|
||
self.__dict__['lTitle'] = None
|
||
self.__dict__['Title'] = None
|
||
self.__dict__['Question'] = None
|
||
self.__dict__['Answer'] = None
|
||
self.__dict__['Count'] = None
|
||
self.__dict__['Type'] = None
|
||
self.__dict__['ID'] = None
|
||
self.__dict__['Interval'] = None
|
||
self.__dict__['Lapses'] = None
|
||
self.__dict__['Repetitions'] = None
|
||
self.__dict__['LastRepetiton'] = None
|
||
self.__dict__['AFactor'] = None
|
||
self.__dict__['UFactor'] = None
|
||
|
||
|
||
|
||
# This is an AnkiImporter
|
||
class SupermemoXmlImporter(Importer):
|
||
"""
|
||
Supermemo XML export's to Anki parser.
|
||
Goes through a SM collection and fetch all elements.
|
||
|
||
My SM collection was a big mess where topics and items were mixed.
|
||
I was unable to parse my content in a regular way like for loop on
|
||
minidom.getElementsByTagName() etc. My collection had also an
|
||
limitation, topics were splited into branches with max 100 items
|
||
on each. Learning themes were in deep structure. I wanted to have
|
||
full title on each element to be stored in tags.
|
||
|
||
Code should be upgrade to support importing of SM2006 exports.
|
||
"""
|
||
|
||
def __init__(self, *args):
|
||
"""Initialize internal varables.
|
||
Pameters to be exposed to GUI are stored in self.META"""
|
||
|
||
Importer.__init__(self, *args)
|
||
self.lines = None
|
||
self.numFields=int(2)
|
||
|
||
# SmXmlParse VARIABLES
|
||
self.xmldoc = None
|
||
self.pieces = []
|
||
self.cntBuf = [] #to store last parsed data
|
||
self.cntElm = [] #to store SM Elements data
|
||
self.cntCol = [] #to store SM Colections data
|
||
|
||
# store some meta info related to parse algorithm
|
||
# SmartDict works like dict / class wrapper
|
||
self.cntMeta = SmartDict()
|
||
self.cntMeta.popTitles = False
|
||
self.cntMeta.title = []
|
||
|
||
# META stores controls of import scritp, should be
|
||
# exposed to import dialog. These are default values.
|
||
self.META = SmartDict()
|
||
self.META.resetLearningData = False # implemented
|
||
self.META.onlyMemorizedItems = False # implemented
|
||
self.META.loggerLevel = 2 # implemented 0no,1info,2error,3debug
|
||
self.META.tagAllTopics = True
|
||
self.META.pathsToBeTagged = ['English for begginers', 'Advanced English 97', 'Phrasal Verbs'] # path patterns to be tagged - in gui entered like 'Advanced English 97|My Vocablary'
|
||
self.META.tagMemorizedItems = True # implemented
|
||
self.META.logToStdOutput = False # implemented
|
||
|
||
self.cards = []
|
||
|
||
## TOOLS
|
||
|
||
def _fudgeText(self, text):
|
||
"Replace sm syntax to Anki syntax"
|
||
text = text.replace("\n\r", u"<br>")
|
||
text = text.replace("\n", u"<br>")
|
||
return text
|
||
|
||
def _unicode2ascii(self,str):
|
||
"Remove diacritic punctuation from strings (titles)"
|
||
return u"".join([ c for c in unicodedata.normalize('NFKD', str) if not unicodedata.combining(c)])
|
||
|
||
def _decode_htmlescapes(self,s):
|
||
"""Unescape HTML code."""
|
||
#In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code
|
||
from BeautifulSoup import BeautifulStoneSoup as btflsoup
|
||
|
||
#my sm2004 also ecaped & char in escaped sequences.
|
||
s = re.sub(u'&',u'&',s)
|
||
#unescaped solitary chars < or > that were ok for minidom confuse btfl soup
|
||
s = re.sub(u'>',u'>',s)
|
||
s = re.sub(u'<',u'<',s)
|
||
|
||
return unicode(btflsoup(s,convertEntities=btflsoup.HTML_ENTITIES ))
|
||
|
||
|
||
def _unescape(self,s,initilize):
|
||
"""Note: This method is not used, BeautifulSoup does better job.
|
||
"""
|
||
|
||
if self._unescape_trtable == None:
|
||
self._unescape_trtable = (
|
||
('€',u'€'), (' ',u' '), ('!',u'!'), ('"',u'"'), ('#',u'#'), ('$',u'$'), ('%',u'%'), ('&',u'&'), (''',u"'"),
|
||
('(',u'('), (')',u')'), ('*',u'*'), ('+',u'+'), (',',u','), ('-',u'-'), ('.',u'.'), ('/',u'/'), ('0',u'0'),
|
||
('1',u'1'), ('2',u'2'), ('3',u'3'), ('4',u'4'), ('5',u'5'), ('6',u'6'), ('7',u'7'), ('8',u'8'), ('9',u'9'),
|
||
(':',u':'), (';',u';'), ('<',u'<'), ('=',u'='), ('>',u'>'), ('?',u'?'), ('@',u'@'), ('A',u'A'), ('B',u'B'),
|
||
('C',u'C'), ('D',u'D'), ('E',u'E'), ('F',u'F'), ('G',u'G'), ('H',u'H'), ('I',u'I'), ('J',u'J'), ('K',u'K'),
|
||
('L',u'L'), ('M',u'M'), ('N',u'N'), ('O',u'O'), ('P',u'P'), ('Q',u'Q'), ('R',u'R'), ('S',u'S'), ('T',u'T'),
|
||
('U',u'U'), ('V',u'V'), ('W',u'W'), ('X',u'X'), ('Y',u'Y'), ('Z',u'Z'), ('[',u'['), ('\',u'\\'), (']',u']'),
|
||
('^',u'^'), ('_',u'_'), ('`',u'`'), ('a',u'a'), ('b',u'b'), ('c',u'c'), ('d',u'd'), ('e',u'e'), ('f',u'f'),
|
||
('g',u'g'), ('h',u'h'), ('i',u'i'), ('j',u'j'), ('k',u'k'), ('l',u'l'), ('m',u'm'), ('n',u'n'),
|
||
('o',u'o'), ('p',u'p'), ('q',u'q'), ('r',u'r'), ('s',u's'), ('t',u't'), ('u',u'u'), ('v',u'v'),
|
||
('w',u'w'), ('x',u'x'), ('y',u'y'), ('z',u'z'), ('{',u'{'), ('|',u'|'), ('}',u'}'), ('~',u'~'),
|
||
(' ',u' '), ('¡',u'¡'), ('¢',u'¢'), ('£',u'£'), ('¤',u'¤'), ('¥',u'¥'), ('¦',u'¦'), ('§',u'§'),
|
||
('¨',u'¨'), ('©',u'©'), ('ª',u'ª'), ('«',u'«'), ('¬',u'¬'), ('­',u''), ('®',u'®'), ('¯',u'¯'),
|
||
('°',u'°'), ('±',u'±'), ('²',u'²'), ('³',u'³'), ('´',u'´'), ('µ',u'µ'), ('¶',u'¶'), ('·',u'·'),
|
||
('¸',u'¸'), ('¹',u'¹'), ('º',u'º'), ('»',u'»'), ('¼',u'¼'), ('½',u'½'), ('¾',u'¾'), ('¿',u'¿'),
|
||
('À',u'À'), ('Á',u'Á'), ('Â',u'Â'), ('Ã',u'Ã'), ('Ä',u'Ä'), ('Å',u'Å'), ('Å',u'Å'), ('Æ',u'Æ'),
|
||
('Ç',u'Ç'), ('È',u'È'), ('É',u'É'), ('Ê',u'Ê'), ('Ë',u'Ë'), ('Ì',u'Ì'), ('Í',u'Í'), ('Î',u'Î'),
|
||
('Ï',u'Ï'), ('Ð',u'Ð'), ('Ñ',u'Ñ'), ('Ò',u'Ò'), ('Ó',u'Ó'), ('Ô',u'Ô'), ('Õ',u'Õ'), ('Ö',u'Ö'),
|
||
('×',u'×'), ('Ø',u'Ø'), ('Ù',u'Ù'), ('Ú',u'Ú'), ('Û',u'Û'), ('Ü',u'Ü'), ('Ý',u'Ý'), ('Þ',u'Þ'),
|
||
('ß',u'ß'), ('à',u'à'), ('á',u'á'), ('â',u'â'), ('ã',u'ã'), ('ä',u'ä'), ('å',u'å'), ('æ',u'æ'),
|
||
('ç',u'ç'), ('è',u'è'), ('é',u'é'), ('ê',u'ê'), ('ë',u'ë'), ('ì',u'ì'), ('í',u'í'), ('í',u'í'),
|
||
('î',u'î'), ('ï',u'ï'), ('ð',u'ð'), ('ñ',u'ñ'), ('ò',u'ò'), ('ó',u'ó'), ('ô',u'ô'), ('õ',u'õ'),
|
||
('ö',u'ö'), ('÷',u'÷'), ('ø',u'ø'), ('ù',u'ù'), ('ú',u'ú'), ('û',u'û'), ('ü',u'ü'), ('ý',u'ý'),
|
||
('þ',u'þ'), ('ÿ',u'ÿ'), ('Ā',u'Ā'), ('ā',u'ā'), ('Ă',u'Ă'), ('ă',u'ă'), ('Ą',u'Ą'), ('ą',u'ą'),
|
||
('Ć',u'Ć'), ('ć',u'ć'), ('Ĉ',u'Ĉ'), ('ĉ',u'ĉ'), ('Ċ',u'Ċ'), ('ċ',u'ċ'), ('Č',u'Č'), ('č',u'č'),
|
||
('Ď',u'Ď'), ('ď',u'ď'), ('Đ',u'Đ'), ('đ',u'đ'), ('Ē',u'Ē'), ('ē',u'ē'), ('Ĕ',u'Ĕ'), ('ĕ',u'ĕ'),
|
||
('Ė',u'Ė'), ('ė',u'ė'), ('Ę',u'Ę'), ('ę',u'ę'), ('Ě',u'Ě'), ('ě',u'ě'), ('Ĝ',u'Ĝ'), ('ĝ',u'ĝ'),
|
||
('Ğ',u'Ğ'), ('ğ',u'ğ'), ('Ġ',u'Ġ'), ('ġ',u'ġ'), ('Ģ',u'Ģ'), ('ģ',u'ģ'), ('Ĥ',u'Ĥ'), ('ĥ',u'ĥ'),
|
||
('Ħ',u'Ħ'), ('ħ',u'ħ'), ('Ĩ',u'Ĩ'), ('ĩ',u'ĩ'), ('Ī',u'Ī'), ('ī',u'ī'), ('Ĭ',u'Ĭ'), ('ĭ',u'ĭ'),
|
||
('Į',u'Į'), ('į',u'į'), ('İ',u'İ'), ('ı',u'ı'), ('IJ',u'IJ'), ('ij',u'ij'), ('Ĵ',u'Ĵ'), ('ĵ',u'ĵ'),
|
||
('Ķ',u'Ķ'), ('ķ',u'ķ'), ('ĸ',u'ĸ'), ('Ĺ',u'Ĺ'), ('ĺ',u'ĺ'), ('Ļ',u'Ļ'), ('ļ',u'ļ'), ('Ľ',u'Ľ'),
|
||
('ľ',u'ľ'), ('Ŀ',u'Ŀ'), ('ŀ',u'ŀ'), ('Ł',u'Ł'), ('ł',u'ł'), ('Ń',u'Ń'), ('ń',u'ń'), ('Ņ',u'Ņ'),
|
||
('ņ',u'ņ'), ('Ň',u'Ň'), ('ň',u'ň'), ('ʼn',u'ʼn'), ('Ŋ',u'Ŋ'), ('ŋ',u'ŋ'), ('Ō',u'Ō'), ('ō',u'ō'),
|
||
('Ŏ',u'Ŏ'), ('ŏ',u'ŏ'), ('Ő',u'Ő'), ('ő',u'ő'), ('Œ',u'Œ'), ('œ',u'œ'), ('Ŕ',u'Ŕ'), ('ŕ',u'ŕ'),
|
||
('Ŗ',u'Ŗ'), ('ŗ',u'ŗ'), ('Ř',u'Ř'), ('ř',u'ř'), ('Ś',u'Ś'), ('ś',u'ś'), ('Ŝ',u'Ŝ'), ('ŝ',u'ŝ'),
|
||
('Ş',u'Ş'), ('ş',u'ş'), ('Š',u'Š'), ('š',u'š'), ('Ţ',u'Ţ'), ('ţ',u'ţ'), ('Ť',u'Ť'), ('ť',u'ť'),
|
||
('Ŧ',u'Ŧ'), ('ŧ',u'ŧ'), ('Ũ',u'Ũ'), ('ũ',u'ũ'), ('Ū',u'Ū'), ('ū',u'ū'), ('Ŭ',u'Ŭ'), ('ŭ',u'ŭ'),
|
||
('Ů',u'Ů'), ('ů',u'ů'), ('Ű',u'Ű'), ('ű',u'ű'), ('Ų',u'Ų'), ('ų',u'ų'), ('Ŵ',u'Ŵ'), ('ŵ',u'ŵ'),
|
||
('Ŷ',u'Ŷ'), ('ŷ',u'ŷ'), ('Ÿ',u'Ÿ'), ('Ź',u'Ź'), ('ź',u'ź'), ('Ż',u'Ż'), ('ż',u'ż'), ('Ž',u'Ž'),
|
||
('ž',u'ž'), ('ſ',u'ſ'), ('Ŕ',u'Ŕ'), ('ŕ',u'ŕ'), ('Ŗ',u'Ŗ'), ('ŗ',u'ŗ'), ('Ř',u'Ř'), ('ř',u'ř'),
|
||
('Ś',u'Ś'), ('ś',u'ś'), ('Ŝ',u'Ŝ'), ('ŝ',u'ŝ'), ('Ş',u'Ş'), ('ş',u'ş'), ('Š',u'Š'), ('š',u'š'),
|
||
('Ţ',u'Ţ'), ('ţ',u'ţ'), ('Ť',u'Ť'), ('Ɂ',u'ť'), ('Ŧ',u'Ŧ'), ('ŧ',u'ŧ'), ('Ũ',u'Ũ'), ('ũ',u'ũ'),
|
||
('Ū',u'Ū'), ('ū',u'ū'), ('Ŭ',u'Ŭ'), ('ŭ',u'ŭ'), ('Ů',u'Ů'), ('ů',u'ů'), ('Ű',u'Ű'), ('ű',u'ű'),
|
||
('Ų',u'Ų'), ('ų',u'ų'), ('Ŵ',u'Ŵ'), ('ŵ',u'ŵ'), ('Ŷ',u'Ŷ'), ('ŷ',u'ŷ'), ('Ÿ',u'Ÿ'), ('Ź',u'Ź'),
|
||
('ź',u'ź'), ('Ż',u'Ż'), ('ż',u'ż'), ('Ž',u'Ž'), ('ž',u'ž'), ('ſ',u'ſ'),
|
||
)
|
||
|
||
|
||
#m = re.match()
|
||
#s = s.replace(code[0], code[1])
|
||
|
||
## DEFAULT IMPORTER METHODS
|
||
|
||
def foreignCards(self):
|
||
|
||
# Load file and parse it by minidom
|
||
self.loadSource(self.file)
|
||
|
||
# Migrating content / time consuming part
|
||
# addItemToCards is called for each sm element
|
||
self.logger(u'Parsing started.')
|
||
self.parse()
|
||
self.logger(u'Parsing done.')
|
||
|
||
# Return imported cards
|
||
return self.cards
|
||
|
||
def fields(self):
|
||
return 2
|
||
|
||
## PARSER METHODS
|
||
|
||
def addItemToCards(self,item):
|
||
"This method actually do conversion"
|
||
|
||
# new anki card
|
||
card = ForeignCard()
|
||
|
||
# clean Q and A
|
||
card.fields.append(self._fudgeText(self._decode_htmlescapes(item.Question)))
|
||
card.fields.append(self._fudgeText(self._decode_htmlescapes(item.Answer)))
|
||
card.tags = u""
|
||
|
||
# pre-process scheduling data
|
||
tLastrep = time.mktime(time.strptime(item.LastRepetition, '%d.%m.%Y'))
|
||
tToday = time.time()
|
||
|
||
# convert learning data
|
||
if not self.META.resetLearningData:
|
||
# migration of LearningData algorithm
|
||
card.interval = item.Interval
|
||
card.successive = item.Repetitions
|
||
##card.due = tToday + (float(item.Interval) * 86400.0) - tLastrep
|
||
card.due = tLastrep + (float(item.Interval) * 86400.0)
|
||
card.lastDue = 0
|
||
|
||
card.factor = float(item.AFactor.replace(',','.'))
|
||
card.lastFactor = float(item.AFactor.replace(',','.'))
|
||
|
||
# SM is not exporting all the information Anki keeps track off, so it
|
||
# needs to be fudged
|
||
card.youngEase0 = item.Lapses
|
||
card.youngEase3 = item.Repetitions + item.Lapses
|
||
card.yesCount = item.Repetitions
|
||
card.noCount = item.Lapses
|
||
card.reps = card.yesCount + card.noCount
|
||
card.spaceUntil = card.due
|
||
card.combinedDue = card.due
|
||
|
||
# categories & tags
|
||
# it's worth to have every theme (tree structure of sm collection) stored in tags, but sometimes not
|
||
# you can deceide if you are going to tag all toppics or just that containing some pattern
|
||
tTaggTitle = False
|
||
for pattern in self.META.pathsToBeTagged:
|
||
if item.lTitle != None and pattern.lower() in u" ".join(item.lTitle).lower():
|
||
tTaggTitle = True
|
||
break
|
||
if tTaggTitle or self.META.tagAllTopics:
|
||
# normalize - remove diacritic punctuation from unicode chars to ascii
|
||
item.lTitle = [ self._unicode2ascii(topic) for topic in item.lTitle]
|
||
|
||
# Transfrom xyz / aaa / bbb / ccc on Title path to Tag xyzAaaBbbCcc
|
||
# clean things like [999] or [111-2222] from title path, example: xyz / [1000-1200] zyx / xyz
|
||
# clean whitespaces
|
||
# set Capital letters for first char of the word
|
||
tmp = list(set([ re.sub('(\[[0-9]+\])' , ' ' , i ).replace('_',' ') for i in item.lTitle ]))
|
||
tmp = list(set([ re.sub('(\W)',' ', i ) for i in tmp ]))
|
||
tmp = list(set([ re.sub( '^[0-9 ]+$','',i) for i in tmp ]))
|
||
tmp = list(set([ capwords(i).replace(' ','') for i in tmp ]))
|
||
tags = [ j[0].lower() + j[1:] for j in tmp if j.strip() <> '']
|
||
|
||
card.tags += u" ".join(tags)
|
||
|
||
if self.META.tagMemorizedItems and item.Interval >0:
|
||
card.tags += " Memorized"
|
||
|
||
self.logger(u'Element tags\t- ' + card.tags, level=3)
|
||
|
||
self.cards.append(card)
|
||
|
||
def logger(self,text,level=1):
|
||
"Wrapper for Anki logger"
|
||
|
||
dLevels={0:'',1:u'Info',2:u'Verbose',3:u'Debug'}
|
||
if level<=self.META.loggerLevel:
|
||
self.deck.updateProgress(_(text))
|
||
|
||
if self.META.logToStdOutput:
|
||
print self.__class__.__name__+ u" - " + dLevels[level].ljust(9) +u' -\t'+ _(text)
|
||
|
||
|
||
# OPEN AND LOAD
|
||
def openAnything(self,source):
|
||
"Open any source / actually only openig of files is used"
|
||
|
||
if source == "-":
|
||
return sys.stdin
|
||
|
||
# try to open with urllib (if source is http, ftp, or file URL)
|
||
import urllib
|
||
try:
|
||
return urllib.urlopen(source)
|
||
except (IOError, OSError):
|
||
pass
|
||
|
||
# try to open with native open function (if source is pathname)
|
||
try:
|
||
return open(source)
|
||
except (IOError, OSError):
|
||
pass
|
||
|
||
# treat source as string
|
||
import StringIO
|
||
return StringIO.StringIO(str(source))
|
||
|
||
def loadSource(self, source):
|
||
"""Load source file and parse with xml.dom.minidom"""
|
||
self.source = source
|
||
self.logger(u'Load started...')
|
||
sock = self.openAnything(self.source)
|
||
self.xmldoc = minidom.parse(sock).documentElement
|
||
sock.close()
|
||
self.logger(u'Load done.')
|
||
|
||
|
||
# PARSE
|
||
def parse(self, node=None):
|
||
"Parse method - parses document elements"
|
||
|
||
if node==None and self.xmldoc<>None:
|
||
node = self.xmldoc
|
||
|
||
_method = "parse_%s" % node.__class__.__name__
|
||
if hasattr(self,_method):
|
||
parseMethod = getattr(self, _method)
|
||
parseMethod(node)
|
||
else:
|
||
self.logger(u'No handler for method %s' % _method, level=3)
|
||
|
||
def parse_Document(self, node):
|
||
"Parse XML document"
|
||
|
||
self.parse(node.documentElement)
|
||
|
||
def parse_Element(self, node):
|
||
"Parse XML element"
|
||
|
||
_method = "do_%s" % node.tagName
|
||
if hasattr(self,_method):
|
||
handlerMethod = getattr(self, _method)
|
||
handlerMethod(node)
|
||
else:
|
||
self.logger(u'No handler for method %s' % _method, level=3)
|
||
#print traceback.print_exc()
|
||
|
||
def parse_Text(self, node):
|
||
"Parse text inside elements. Text is stored into local buffer."
|
||
|
||
text = node.data
|
||
self.cntBuf.append(text)
|
||
|
||
#def parse_Comment(self, node):
|
||
# """
|
||
# Source can contain XML comments, but we ignore them
|
||
# """
|
||
# pass
|
||
|
||
|
||
# DO
|
||
def do_SuperMemoCollection(self, node):
|
||
"Process SM Collection"
|
||
|
||
for child in node.childNodes: self.parse(child)
|
||
|
||
def do_SuperMemoElement(self, node):
|
||
"Process SM Element (Type - Title,Topics)"
|
||
|
||
self.logger('='*45, level=3)
|
||
|
||
self.cntElm.append(SuperMemoElement())
|
||
self.cntElm[-1]['lTitle'] = self.cntMeta['title']
|
||
|
||
#parse all child elements
|
||
for child in node.childNodes: self.parse(child)
|
||
|
||
#strip all saved strings, just for sure
|
||
for key in self.cntElm[-1].keys():
|
||
if hasattr(self.cntElm[-1][key], 'strip'):
|
||
self.cntElm[-1][key]=self.cntElm[-1][key].strip()
|
||
|
||
#pop current element
|
||
smel = self.cntElm.pop()
|
||
|
||
# Process cntElm if is valid Item (and not an Topic etc..)
|
||
# if smel.Lapses != None and smel.Interval != None and smel.Question != None and smel.Answer != None:
|
||
if smel.Title == None and smel.Question != None and smel.Answer != None:
|
||
if smel.Answer.strip() !='' and smel.Question.strip() !='':
|
||
|
||
# migrate only memorized otherway skip/continue
|
||
if self.META.onlyMemorizedItems and not(int(smel.Interval) > 0):
|
||
self.logger(u'Element skiped \t- not memorized ...', level=3)
|
||
else:
|
||
#import sm element data to Anki
|
||
self.addItemToCards(smel)
|
||
self.logger(u"Import element \t- " + smel['Question'], level=3)
|
||
|
||
#print element
|
||
self.logger('-'*45, level=3)
|
||
for key in smel.keys():
|
||
self.logger('\t%s %s' % ((key+':').ljust(15),smel[key]), level=3 )
|
||
else:
|
||
self.logger(u'Element skiped \t- no valid Q and A ...', level=3)
|
||
|
||
|
||
else:
|
||
# now we know that item was topic
|
||
# parseing of whole node is now finished
|
||
|
||
# test if it's really topic
|
||
if smel.Title != None:
|
||
# remove topic from title list
|
||
t = self.cntMeta['title'].pop()
|
||
self.logger(u'End of topic \t- %s' % (t), level=2)
|
||
|
||
def do_Content(self, node):
|
||
"Process SM element Content"
|
||
|
||
for child in node.childNodes:
|
||
if hasattr(child,'tagName') and child.firstChild != None:
|
||
self.cntElm[-1][child.tagName]=child.firstChild.data
|
||
|
||
def do_LearningData(self, node):
|
||
"Process SM element LearningData"
|
||
|
||
for child in node.childNodes:
|
||
if hasattr(child,'tagName') and child.firstChild != None:
|
||
self.cntElm[-1][child.tagName]=child.firstChild.data
|
||
|
||
# It's being processed in do_Content now
|
||
#def do_Question(self, node):
|
||
# for child in node.childNodes: self.parse(child)
|
||
# self.cntElm[-1][node.tagName]=self.cntBuf.pop()
|
||
|
||
# It's being processed in do_Content now
|
||
#def do_Answer(self, node):
|
||
# for child in node.childNodes: self.parse(child)
|
||
# self.cntElm[-1][node.tagName]=self.cntBuf.pop()
|
||
|
||
def do_Title(self, node):
|
||
"Process SM element Title"
|
||
|
||
t = self._decode_htmlescapes(node.firstChild.data)
|
||
self.cntElm[-1][node.tagName] = t
|
||
self.cntMeta['title'].append(t)
|
||
self.cntElm[-1]['lTitle'] = self.cntMeta['title']
|
||
self.logger(u'Start of topic \t- ' + u" / ".join(self.cntMeta['title']), level=2)
|
||
|
||
|
||
def do_Type(self, node):
|
||
"Process SM element Type"
|
||
|
||
if len(self.cntBuf) >=1 :
|
||
self.cntElm[-1][node.tagName]=self.cntBuf.pop()
|
||
|
||
|
||
if __name__ == '__main__':
|
||
|
||
# for testing you can start it standalone
|
||
|
||
#file = u'/home/epcim/hg2g/dev/python/sm2anki/ADVENG2EXP.xxe.esc.zaloha_FINAL.xml'
|
||
#file = u'/home/epcim/hg2g/dev/python/anki/libanki/tests/importing/supermemo/original_ENGLISHFORBEGGINERS_noOEM.xml'
|
||
#file = u'/home/epcim/hg2g/dev/python/anki/libanki/tests/importing/supermemo/original_ENGLISHFORBEGGINERS_oem_1250.xml'
|
||
file = str(sys.argv[1])
|
||
impo = SupermemoXmlImporter(Deck(),file)
|
||
impo.foreignCards()
|
||
|
||
sys.exit(1)
|
||
|
||
# vim: ts=4 sts=2 ft=python
|