implement field cache, fix unit tests, remove some importers

the field cache (fsums table) also needs to store the model id to preserve the
old behaviour of limiting duplicate checks to a given model, and to ensure
we're actually comparing against the same fields

removed the dingsbums and wcu importers; will accept them back if the authors
port them to the new codebase.
This commit is contained in:
Damien Elmes 2011-03-11 01:03:14 +09:00
parent 1078285f0f
commit 4becd8399c
8 changed files with 69 additions and 321 deletions

View file

@ -475,7 +475,7 @@ due > :now and due < :now""", now=time.time())
if isRandom:
due = random.randrange(0, 10000)
# flush the fact so we get its id
fact.flush(cache=False)
fact.flush()
for template in cms:
print "fixme:specify group on fact add"
group = self.groupForTemplate(template)
@ -560,7 +560,7 @@ where fid = :fid and tid = :cmid""",
return
strids = ids2str(ids)
self.db.execute("delete from facts where id in %s" % strids)
#self.db.execute("delete from fdata where fid in %s" % strids)
self.db.execute("delete from fsums where fid in %s" % strids)
def _deleteDanglingFacts(self):
"Delete any facts without cards. Don't call this directly."
@ -621,17 +621,20 @@ select id from facts where id not in (select distinct fid from cards)""")
# trash
sfids = ids2str(
self.db.list("select fid from cards where id in "+sids))
# need to handle delete of fsums/revlog remotely after sync
self.db.execute(
"update cards set crt = 0, mod = ? where id in "+sids,
intTime())
self.db.execute(
"update facts set crt = 0, mod = ? where id in "+sfids,
intTime())
self.db.execute("delete from fsums where fid in "+sfids)
self.db.execute("delete from revlog where cid in "+sids)
self.db.execute("update cards set crt = 0 where id in "+sids)
self.db.execute("update facts set crt = 0 where id in "+sfids)
self.db.execute("delete from fdata where fid in "+sfids)
self.finishProgress()
def emptyTrash(self):
self.db.executescript("""
delete from facts where id in (select fid from cards where queue = -4);
delete from fdata where fid in (select fid from cards where queue = -4);
delete from revlog where cid in (select id from cards where queue = -4);
delete from cards where queue = -4;""")
# Models

View file

@ -5,7 +5,7 @@
import time
from anki.errors import AnkiError
from anki.utils import stripHTMLMedia, fieldChecksum, intTime, \
addTags, deleteTags, parseTags
addTags, deleteTags, joinFields, splitFields, ids2str
class Fact(object):
@ -22,7 +22,6 @@ class Fact(object):
self.crt = intTime()
self.mod = self.crt
self.tags = ""
self.cache = ""
self._fields = [""] * len(self.model.fields)
self.data = ""
self._fmap = self.model.fieldMap()
@ -38,7 +37,7 @@ select mid, crt, mod, tags, flds, data from facts where id = ?""", self.id)
self._fields = self._field.split("\x1f")
self.model = self.deck.getModel(self.mid)
def flush(self, cache=True):
def flush(self):
self.mod = intTime()
# facts table
sfld = self._fields[self.model.sortField()]
@ -48,22 +47,22 @@ insert or replace into facts values (?, ?, ?, ?, ?, ?, ?, ?)""",
self.mod, self.tags, self.joinedFields(),
sfld, self.data)
self.id = res.lastrowid
self.updateFieldChecksums()
def joinedFields(self):
return "\x1f".join(self._fields)
return joinFields(self._fields)
# # fdata table
# self.deck.db.execute("delete from fdata where fid = ?", self.id)
# d = []
# for (fmid, ord, conf) in self._fmap.values():
# val = self._fields[ord]
# d.append(dict(fid=self.id, fmid=fmid, ord=ord,
# val=val))
# d.append(dict(fid=self.id, fmid=0, ord=-1, val=self.tags))
# self.deck.db.executemany("""
# insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
# # media and caches
# self.deck.updateCache([self.id], "fact")
def updateFieldChecksums(self):
self.deck.db.execute("delete from fsums where fid = ?", self.id)
d = []
for (ord, conf) in self._fmap.values():
if not conf['uniq']:
continue
val = fieldChecksum(self._fields[ord])
if not val:
continue
d.append((self.id, self.mid, val))
self.deck.db.executemany("insert into fsums values (?, ?, ?)", d)
def cards(self):
return [self.deck.getCard(id) for id in self.deck.db.list(
@ -111,14 +110,26 @@ insert or replace into facts values (?, ?, ?, ?, ?, ?, ?, ?)""",
if not conf['uniq']:
return True
val = self[name]
if not val:
return True
csum = fieldChecksum(val)
if self.id:
lim = "and fid != :fid"
else:
lim = ""
return not self.deck.db.scalar(
"select 1 from fdata where csum = :c %s and val = :v" % lim,
c=csum, v=val, fid=self.id)
fids = self.deck.db.list(
"select fid from fsums where csum = ? and fid != ? and mid = ?",
csum, self.id or 0, self.mid)
if not fids:
return True
# grab facts with the same checksums, and see if they're actually
# duplicates
for flds in self.deck.db.list("select flds from facts where id in "+
ids2str(fids)):
fields = splitFields(flds)
if fields[ord] == val:
return False
return True
def fieldComplete(self, name, text=None):
(ord, conf) = self._fmap[name]

View file

@ -361,15 +361,11 @@ where factId in (%s)""" % ",".join([str(s) for s in factIds]))
from anki.importing.csvfile import TextImporter
from anki.importing.anki10 import Anki10Importer
from anki.importing.mnemosyne10 import Mnemosyne10Importer
from anki.importing.wcu import WCUImporter
from anki.importing.supermemo_xml import SupermemoXmlImporter
from anki.importing.dingsbums import DingsBumsImporter
Importers = (
(_("Text separated by tabs or semicolons (*)"), TextImporter),
(_("Anki Deck (*.anki)"), Anki10Importer),
(_("Mnemosyne Deck (*.mem)"), Mnemosyne10Importer),
(_("CueCard Deck (*.wcu)"), WCUImporter),
(_("Supermemo XML export (*.xml)"), SupermemoXmlImporter),
(_("DingsBums?! Deck (*.dbxml)"), DingsBumsImporter),
)

View file

@ -1,222 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright: rick@vanosten.net
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
"""\
Importing DingsBums?! decks (see dingsbums.vanosten.net)
========================================================
GENERAL:
* DingsBums?! files are xml with relational content.
* DingsBums?!'s data format is more relational than Anki's. Therefore some of the relations are denormalized.
* A stack in DingsBums?! is a deck in Anki
* An entry type in DingsBums?! is a model in Anki
* An entry type attribute in DingsBums?! is a field in Anki
* An entry type attribute item in DingsBums?! does not exist in Anki. It is just the contents of a field denormalized.
* There is not concept of units and categories in Anki.
* An entry in DingsBums?! is basically a fact in Anki
* There are no cards in DingsBums?!
* There is a special plugin in Anki for Pinyin. Therefore syllable settings from DingsBums?! are ignored.
* The locale settings in DingsBums?! have never been active and are therefore ignored.
* All statistics will get lost - i.e. no historic informaiton about progress will be migrated to Anki.
* The DingsBums?! stack needs to end with *.xml in order to be recognizable in Anki import.
* The learning levels from DingsBums?! are not taken into account because they do not really match spaced repetition.
DESIGN OF MAPPING FROM DingsBums?! TO Anki
*
* The contents of units and categories are transferred as tags to Anki: unit/category label + "_" + unit/category name.
* If unit/category name has space, then it is replaced by "_"
* The fields "base", "target", explanation", example", "pronounciation" and "relation" are created as fields in Anki
* The fields are only created and used in Anki, if they were visible in DingsBums?!, i.e. < 3:
VISIBILITY_ALWAYS = 0;
VISIBILITY_QUERY = 1;
VISIBILITY_SOLUTION = 2;
VISIBILITY_NEVER = 3;
* The name of the fields in Anki is taken from the labels defined in the stack properties
* The description field of Anki is not used/displayed. Therefore there is not much sense to transfer the contents of title, author, notes, copyright and license.
* The visibility options in DingsBums?! are used as hints to make cards in Anki:
+ Two card templates are made for each model and then applied to each fact when importing.
+ "Forward": Base -> "Question", target -> "Answer"; if "always", then part of question; if "solution" or "part of query" then part of answer
+ "Reverse": Target -> "Answer", base -> "Question"
+ Unit and category are not shown, as they are tags and there is no possibility to distinguish between visibility settings in this case.
CHANGES MADE TO LIBANKI:
* Added libanki/anki/import/dingsbums.py
* Added DingsBumsImporter to importers at end of file libanki/anki/import/__init__.py
* Added libanki/tests/importing/dingsbums.xml
* Added method test_dingsbums() to libanki/anki/tests/test_importing.py
"""
from anki.importing import Importer
from anki import Deck
from anki.facts import Fact
from anki.models import Field, Template, Model
from anki.lang import _
from xml.sax import make_parser
from xml.sax.handler import ContentHandler
import sys
class DingsBumsHandler(ContentHandler):
def __init__(self, deck):
self.eid = "0"
self.attributeItems = {}
self.unitCategories = {}
self.attributes = {}
self.currentContent = ""
self.labels = {}
self.labels["pro"] = u"Pronunciation" # the user cannot change this label and therefore not in xml-file
self.labels["rel"] = u"Relation"
self.visibility = {}
self.models = {}
self.typeAttributes = {} # mapping of entry type and attribute name (e.g. "ET8_A1", "ET8_A2", ...)
self.deck = deck
self.f = None # the current fact
self.countFacts = 0
def startElement(self, name, attrs):
"""Implements SAX interface"""
if name in ["etai", "unit", "category"]:
self.eid = attrs["eid"]
elif "eta" == name:
self.attributes[attrs["eid"]] = attrs["n"]
elif "entrytype" == name:
self.createModel(attrs)
elif "e" == name:
self.createFact(attrs)
def endElement(self, name):
"""Implements SAX interface"""
if "vocabulary" == name:
self.deck.updateProgress()
elif name.endswith("label"):
self.labels[name.replace("label", "")] = self.currentContent
elif name.startswith("vis"):
self.visibility[name.replace("vis", "")] = self.currentContent
elif "etai" == name:
self.attributeItems[self.eid] = self.currentContent
elif "etattributes" == name:
self.deck.updateProgress()
elif "entrytypes" == name:
self.deck.updateProgress()
elif "name" == name:
self.unitCategories[self.eid] = self.prepareTag(self.currentContent)
elif "units" == name:
self.deck.updateProgress()
elif "categories" == name:
self.deck.updateProgress()
elif "entries" == name:
self.deck.updateProgress()
elif "e" == name:
self.deck.addFact(self.f)
self.countFacts += 1
# there is a not logical mapping between the tags for fields and names in VocabInfo
# See net.vanosten.dings.consts.Constants.XML_*
elif "o" == name:
self.f.__setitem__(self.labels["b"], self.currentContent)
elif "d" == name:
self.f.__setitem__(self.labels["t"], self.currentContent)
elif "ep" == name:
self.f.__setitem__(self.labels["exp"], self.currentContent)
elif "ea" == name:
self.f.__setitem__(self.labels["ex"], self.currentContent)
elif "p" == name:
self.f.__setitem__(self.labels["pro"], self.currentContent)
elif "r" == name:
self.f.__setitem__(self.labels["rel"], self.currentContent)
def characters(self, content):
"""Implements SAX interface"""
self.currentContent = content.strip()
def createModel(self, attrs):
"""Makes a new Anki (fact) model from an entry type.
The card models are made each time from scratch in order that evt. model specific fields (attributes) can make part."""
m = Model(attrs["n"])
# field model for standard fields
m.addFieldModel(FieldModel(self.labels["b"], True, False)) #there is no uniqueness check in DingsBums?!
m.addFieldModel(FieldModel(self.labels["t"], True, False))
for aField in ["exp", "ex", "pro", "rel"]:
if self.visibility[aField] in "012":
m.addFieldModel(FieldModel(self.labels[aField], False, False))
# field models for attributes
for attr in ["a1", "a2" "a3", "a4"]:
if attr in attrs.keys():
m.addFieldModel(FieldModel(self.attributes[attrs[attr]], False, False))
self.typeAttributes[attrs["eid"] + "_" + attr] = self.attributes[attrs[attr]]
# card model for front
frontStrings = ["%(" + self.labels["b"] + ")s"]
backStrings = ["%(" + self.labels["t"] + ")s"]
for aField in ["exp", "ex", "pro", "rel"]:
if self.visibility[aField] in "01":
frontStrings.append("%(" + self.labels[aField] + ")s")
if self.visibility[aField] in "02":
backStrings.append("%(" + self.labels[aField] + ")s")
m.addCardModel(CardModel(u'Forward', "<br>".join(frontStrings), "<br>".join(backStrings)))
# card model for back
m.addCardModel(CardModel(u'Reverse', unicode("%(" + self.labels["t"] + ")s"), unicode("%(" + self.labels["b"] + ")s")))
# tags is just the name without spaces
m.tags = self.prepareTag(m.name)
# link
self.models[attrs["eid"]] = m
self.deck.addModel(m)
def createFact(self, attrs):
"""Makes a new Anki fact from an entry."""
model = self.models[attrs["et"]]
self.f = Fact(model)
# process attributes
for attr in ["a1", "a2" "a3", "a4"]:
if attr in attrs.keys():
self.f.__setitem__(self.typeAttributes[attrs["et"] + "_" + attr], self.attributeItems[attrs[attr]])
# process tags. Unit, Category plus entry type name
tagString = unicode(self.unitCategories[attrs["u"]] + " " + self.unitCategories[attrs["c"]] + " " + model.tags)
self.f.tags = tagString
def prepareTag(self, stringWithSpace):
parts = stringWithSpace.split()
return "_".join(parts)
class DingsBumsImporter(Importer):
needMapper = False # needs to overwrite default in Importer - otherwise Mapping dialog is shown in GUI
def __init__(self, deck, file):
Importer.__init__(self, deck, file)
self.deck = deck
self.file = file
self.total = 0
def doImport(self):
"""Totally overrides the method in Importer"""
num = 7 # the number of updates to progress bar (see references in method endElement in DingsBumsHandler
self.deck.startProgress(num)
self.deck.updateProgress(_("Importing..."))
# parse the DingsBums?! xml file
handler = DingsBumsHandler(self.deck)
saxparser = make_parser( )
saxparser.setContentHandler(handler)
saxparser.parse(self.file)
self.total = handler.countFacts
self.deck.finishProgress()
self.deck.setModified()
if __name__ == '__main__':
print "Starting ..."
# for testing you can start it standalone. Use an argument to specify the file to import
filename = str(sys.argv[1])
mydeck = DeckStorage.Deck()
i = DingsBumsImporter(mydeck, filename)
i.doImport()
assert 7 == i.total
mydeck.db.close()
print "... Finished"
sys.exit(1)

View file

@ -1,51 +0,0 @@
# -*- coding: utf-8 -*-
# Author Chris Aakre <caaakre@gmail.com>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
import codecs
from anki.importing import Importer, ForeignCard
from anki.lang import _
from anki.errors import *
class WCUImporter(Importer):
def __init__(self, *args):
Importer.__init__(self, *args)
self.lines = None
self.numFields=int(2)
def foreignCards(self):
from xml.dom import minidom, Node
cards = []
f = None
try:
f = codecs.open(self.file, encoding="utf-8")
except:
raise ImportFormatError(type="encodingError", info=_("The file was not in UTF8 format."))
f.close()
def wcuwalk(parent, cards, level=0):
for node in parent.childNodes:
if node.nodeType == Node.ELEMENT_NODE:
myCard=ForeignCard()
if node.attributes.has_key("QuestionPicture"):
question = [unicode('<img src="'+node.attributes.get("QuestionPicture").nodeValue+'"><br/>'+node.attributes.get("Question").nodeValue)]
else:
question = [unicode(node.attributes.get("Question").nodeValue)]
if node.attributes.has_key("AnswerPicture"):
answer = [unicode('<img src="'+node.attributes.get("AnswerPicture").nodeValue+'"><br/>'+node.attributes.get("Answer").nodeValue)]
else:
answer = [unicode(node.attributes.get("Answer").nodeValue)]
myCard.fields.extend(question)
myCard.fields.extend(answer)
cards.append(myCard)
wcuwalk(node, cards, level+1)
def importwcu(file):
wcuwalk(minidom.parse(file).documentElement,cards)
importwcu(self.file)
return cards
def fields(self):
return self.numFields
def setNumFields(self):
self.numFields = int(2)

View file

@ -93,6 +93,12 @@ create table if not exists facts (
data text not null
);
create table if not exists fsums (
fid integer not null,
mid integer not null,
csum integer not null
);
create table if not exists models (
id integer primary key,
mod integer not null,

View file

@ -256,6 +256,15 @@ def deleteTags(deltags, tags):
currentTags.remove(r)
return joinTags(currentTags)
# Fields
##############################################################################
def joinFields(list):
return "\x1f".join(list)
def splitFields(string):
return string.split("\x1f")
# Misc
##############################################################################

View file

@ -88,34 +88,30 @@ def test_fieldChecksum():
f['Front'] = u"new"; f['Back'] = u"new2"
deck.addFact(f)
assert deck.db.scalar(
"select csum from fdata where ord = 0") == "22af645d"
"select csum from fsums") == "22af645d"
# empty field should have no checksum
f['Front'] = u""
f.flush()
assert deck.db.scalar(
"select csum from fdata where ord = 0") == ""
"select count() from fsums") == 0
# changing the val should change the checksum
f['Front'] = u"newx"
f.flush()
assert deck.db.scalar(
"select csum from fdata where ord = 0") == "4b0e5a4c"
# back should have no checksum, because it's not set to be unique
assert deck.db.scalar(
"select csum from fdata where ord = 1") == ""
# if we turn on unique, it should get a checksum
f.model.fields[1].conf['unique'] = True
"select csum from fsums") == "4b0e5a4c"
# turning off unique and modifying the fact should delete the sum
f.model.fields[0]['uniq'] = False
f.model.flush()
f.model.updateCache()
print deck.db.scalar(
"select csum from fdata where ord = 1")
f.flush()
assert deck.db.scalar(
"select csum from fdata where ord = 1") == "82f2ec5f"
# turning it off doesn't currently zero the checksum for efficiency reasons
# f.model.fields[1].conf['unique'] = False
# f.model.flush()
# f.model.updateCache()
# assert deck.db.scalar(
# "select csum from fdata where ord = 1") == ""
"select count() from fsums") == 0
# and turning on both should ensure two checksums generated
f.model.fields[0]['uniq'] = True
f.model.fields[1]['uniq'] = True
f.model.flush()
f.flush()
assert deck.db.scalar(
"select count() from fsums") == 2
def test_upgrade():
import tempfile, shutil