Anki/anki/facts.py
Damien Elmes 9c247f45bd remove q/a cache, tags in fields, rewrite remaining ids, more
Anki used random 64bit IDs for cards, facts and fields. This had some nice
properties:
- merging data in syncs and imports was simply a matter of copying each way,
  as conflicts were astronomically unlikely
- it made it easy to identify identical cards and prevent them from being
  reimported
But there were some negatives too:
- they're more expensive to store
- javascript can't handle numbers > 2**53, which means AnkiMobile, iAnki and
  so on have to treat the ids as strings, which is slow
- simply copying data in a sync or import can lead to corruption, as while a
  duplicate id indicates the data was originally the same, it may have
  diverged. A more intelligent approach is necessary.
- sqlite was sorting the fields table based on the id, which meant the fields
  were spread across the table, and costly to fetch

So instead, we'll move to incremental ids. In the case of model changes we'll
declare that a schema change and force a full sync to avoid having to deal
with conflicts, and in the case of cards and facts, we'll need to update the
ids on one end to merge. Identical cards can be detected by checking to see if
their id is the same and their creation time is the same.

Creation time has been added back to cards and facts because it's necessary
for sync conflict merging. That means facts.pos is not required.

The graves table has been removed. It's not necessary for schema related
changes, and dead cards/facts can be represented as a card with queue=-4 and
created=0. Because we will record schema modification time and can ensure a
full sync propagates to all endpoints, it means we can remove the dead
cards/facts on schema change.

Tags have been removed from the facts table and are represented as a field
with ord=-1 and fmid=0. Combined with the locality improvement for fields, it
means that fetching fields is not much more expensive than using the q/a
cache.

Because of the above, removing the q/a cache is a possibility now. The q and a
columns on cards has been dropped. It will still be necessary to render the
q/a on fact add/edit, since we need to record media references. It would be
nice to avoid this in the future. Perhaps one way would be the ability to
assign a type to fields, like "image", "audio", or "latex". LaTeX needs
special consider anyway, as it was being rendered into the q/a cache.
2011-04-28 09:23:53 +09:00

137 lines
4.2 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
import time
from anki.errors import AnkiError
from anki.utils import stripHTMLMedia, fieldChecksum, intTime, \
addTags, deleteTags, parseTags
class Fact(object):
def __init__(self, deck, model=None, id=None):
assert not (model and id)
self.deck = deck
if id:
self.id = id
self.load()
else:
self.id = None
self.model = model
self.mid = model.id
self.crt = intTime()
self.mod = self.crt
self.tags = ""
self.cache = ""
self._fields = [""] * len(self.model.fields)
self._fmap = self.model.fieldMap()
def load(self):
(self.mid,
self.crt,
self.mod) = self.deck.db.first("""
select mid, crt, mod from facts where id = ?""", self.id)
self._fields = self.deck.db.list("""
select val from fdata where fid = ? and fmid order by ord""", self.id)
self.tags = self.deck.db.scalar("""
select val from fdata where fid = ? and ord = -1""", self.id)
self.model = self.deck.getModel(self.mid)
def flush(self, cache=True):
self.mod = intTime()
# facts table
self.cache = stripHTMLMedia(u" ".join(self._fields))
res = self.deck.db.execute("""
insert or replace into facts values (?, ?, ?, ?, ?)""",
self.id, self.mid, self.crt,
self.mod, self.cache)
self.id = res.lastrowid
# fdata table
self.deck.db.execute("delete from fdata where fid = ?", self.id)
d = []
for (fmid, ord, conf) in self._fmap.values():
val = self._fields[ord]
d.append(dict(fid=self.id, fmid=fmid, ord=ord,
val=val))
d.append(dict(fid=self.id, fmid=0, ord=-1, val=self.tags))
self.deck.db.executemany("""
insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
# media and caches
self.deck.updateCache([self.id], "fact")
def cards(self):
return [self.deck.getCard(id) for id in self.deck.db.list(
"select id from cards where fid = ? order by ord", self.id)]
# Dict interface
##################################################
def keys(self):
return self._fmap.keys()
def values(self):
return self._fields
def items(self):
return [(k, self._fields[v])
for (k, v) in self._fmap.items()]
def _fieldOrd(self, key):
try:
return self._fmap[key][1]
except:
raise KeyError(key)
def __getitem__(self, key):
return self._fields[self._fieldOrd(key)]
def __setitem__(self, key, value):
self._fields[self._fieldOrd(key)] = value
def fieldsWithIds(self):
return dict(
[(k, (v[0], self[k])) for (k,v) in self._fmap.items()])
# Tags
##################################################
def addTags(self, tags):
self.tags = addTags(tags, self.tags)
def deleteTags(self, tags):
self.tags = deleteTags(tags, self.tags)
# Unique/duplicate checks
##################################################
def fieldUnique(self, name):
(fmid, ord, conf) = self._fmap[name]
if not conf['unique']:
return True
val = self[name]
csum = fieldChecksum(val)
print "in check, ", self.id
if self.id:
lim = "and fid != :fid"
else:
lim = ""
return not self.deck.db.scalar(
"select 1 from fdata where csum = :c %s and val = :v" % lim,
c=csum, v=val, fid=self.id)
def fieldComplete(self, name, text=None):
(fmid, ord, conf) = self._fmap[name]
if not conf['required']:
return True
return self[name]
def problems(self):
d = []
for k in self._fmap.keys():
if not self.fieldUnique(k):
d.append("unique")
elif not self.fieldComplete(k):
d.append("required")
else:
d.append(None)
return d