diff --git a/anki/importing/cardimp.py b/anki/importing/cardimp.py deleted file mode 100644 index 18d980b8d..000000000 --- a/anki/importing/cardimp.py +++ /dev/null @@ -1,321 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright: Damien Elmes -# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html - -import time -from anki.lang import _ -from anki.utils import fieldChecksum, ids2str -from anki.errors import * -from anki.importing.base import Importer -#from anki.deck import NEW_CARDS_RANDOM - -# Stores a list of fields, tags, and optionally properties like 'ivl' -###################################################################### - -class ForeignCard(object): - "An temporary object storing fields and attributes." - def __init__(self): - self.fields = [] - self.tags = u"" - -# Base class for csv/supermemo/etc importers -###################################################################### - -class CardImporter(Importer): - - needMapper = True - tagDuplicates = False - # if set, update instead of regular importing - # (foreignCardFieldIndex, fieldModelId) - updateKey = None - needDelimiter = False - - def __init__(self, col, file): - Importer.__init__(self, col, file) - self._model = col.currentModel - self.tagsToAdd = u"" - self._mapping = None - - def run(self): - "Import." - if self.updateKey is not None: - return self.doUpdate() - random = self.col.newCardOrder == NEW_CARDS_RANDOM - num = 6 - if random: - num += 1 - c = self.foreignCards() - if self.importCards(c): - self.col.updateCardTags(self.cardIds) - if random: - self.col.randomizeNewCards(self.cardIds) - if c: - self.col.setModified() - - def doUpdate(self): - # grab the data from the external file - cards = self.foreignCards() - # grab data from db - fields = self.col.db.all(""" -select noteId, value from fields where fieldModelId = :id -and value != ''""", - id=self.updateKey[1]) - # hash it - vhash = {} - nids = [] - for (nid, val) in fields: - nids.append(nid) - vhash[val] = nid - # prepare tags - tagsIdx = None - try: - tagsIdx = self.mapping.index(0) - for c in cards: - c.tags = canonifyTags(self.tagsToAdd + " " + c.fields[tagsIdx]) - except ValueError: - pass - # look for matches - upcards = [] - newcards = [] - for c in cards: - v = c.fields[self.updateKey[0]] - if v in vhash: - # ignore empty keys - if v: - # nid, card - upcards.append((vhash[v], c)) - else: - newcards.append(c) - # update fields - for fm in self.model.fieldModels: - if fm.id == self.updateKey[1]: - # don't update key - continue - try: - index = self.mapping.index(fm) - except ValueError: - # not mapped - continue - data = [{'nid': nid, - 'fmid': fm.id, - 'v': c.fields[index], - 'chk': self.maybeChecksum(c.fields[index], fm.unique)} - for (nid, c) in upcards] - self.col.db.execute(""" -update fields set value = :v, chksum = :chk where noteId = :nid -and fieldModelId = :fmid""", data) - # update tags - if tagsIdx is not None: - data = [{'nid': nid, - 't': c.fields[tagsIdx]} - for (nid, c) in upcards] - self.col.db.execute( - "update notes set tags = :t where id = :nid", - data) - # rebuild caches - cids = self.col.db.column0( - "select id from cards where noteId in %s" % - ids2str(nids)) - self.col.updateCardTags(cids) - self.col.updateCardsFromNoteIds(nids) - self.total = len(cards) - self.col.setModified() - - def fields(self): - "The number of fields." - return 0 - - def maybeChecksum(self, data, unique): - if not unique: - return "" - return fieldChecksum(data) - - def foreignCards(self): - "Return a list of foreign cards for importing." - assert 0 - - def resetMapping(self): - "Reset mapping to default." - numFields = self.fields() - m = [f for f in self.model.fieldModels] - m.append(0) - rem = max(0, self.fields() - len(m)) - m += [None] * rem - del m[numFields:] - self._mapping = m - - def getMapping(self): - if not self._mapping: - self.resetMapping() - return self._mapping - - def setMapping(self, mapping): - self._mapping = mapping - - mapping = property(getMapping, setMapping) - - def getModel(self): - return self._model - - def setModel(self, model): - self._model = model - # update the mapping for the new model - self._mapping = None - self.getMapping() - - model = property(getModel, setModel) - - def importCards(self, cards): - "Convert each card into a note, apply attributes and add to col." - # ensure all unique and required fields are mapped - for fm in self.model.fieldModels: - if fm.required or fm.unique: - if fm not in self.mapping: - raise ImportFormatError( - type="missingRequiredUnique", - info=_("Missing required/unique field '%(field)s'") % - {'field': fm.name}) - active = 0 - for cm in self.model.cardModels: - if cm.active: active += 1 - # strip invalid cards - cards = self.stripInvalid(cards) - cards = self.stripOrTagDupes(cards) - self.cardIds = [] - if cards: - self.addCards(cards) - return cards - - def addCards(self, cards): - "Add notes in bulk from foreign cards." - # map tags field to attr - try: - idx = self.mapping.index(0) - for c in cards: - c.tags += " " + c.fields[idx] - except ValueError: - pass - # add notes - noteIds = [genID() for n in range(len(cards))] - noteCreated = {} - def fudgeCreated(d, tmp=[]): - if not tmp: - tmp.append(time.time()) - else: - tmp[0] += 0.0001 - d['created'] = tmp[0] - noteCreated[d['id']] = d['created'] - return d - self.col.db.execute(notesTable.insert(), - [fudgeCreated({'modelId': self.model.id, - 'tags': canonifyTags(self.tagsToAdd + " " + cards[n].tags), - 'id': noteIds[n]}) for n in range(len(cards))]) - self.col.db.execute(""" -delete from notesDeleted -where noteId in (%s)""" % ",".join([str(s) for s in noteIds])) - # add all the fields - for fm in self.model.fieldModels: - try: - index = self.mapping.index(fm) - except ValueError: - index = None - data = [{'noteId': noteIds[m], - 'fieldModelId': fm.id, - 'ordinal': fm.ordinal, - 'id': genID(), - 'value': (index is not None and - cards[m].fields[index] or u""), - 'chksum': self.maybeChecksum( - index is not None and - cards[m].fields[index] or u"", fm.unique) - } - for m in range(len(cards))] - self.col.db.execute(fieldsTable.insert(), - data) - # and cards - active = 0 - for cm in self.model.cardModels: - if cm.active: - active += 1 - data = [self.addMeta({ - 'id': genID(), - 'noteId': noteIds[m], - 'noteCreated': noteCreated[noteIds[m]], - 'cardModelId': cm.id, - 'ordinal': cm.ordinal, - 'question': u"", - 'answer': u"" - },cards[m]) for m in range(len(cards))] - self.col.db.execute(cardsTable.insert(), - data) - self.col.updateCardsFromNoteIds(noteIds) - self.total = len(noteIds) - - def addMeta(self, data, card): - "Add any scheduling metadata to cards" - if 'fields' in card.__dict__: - del card.fields - t = data['noteCreated'] + data['ordinal'] * 0.00001 - data['created'] = t - data['modified'] = t - data['due'] = t - data.update(card.__dict__) - data['tags'] = u"" - self.cardIds.append(data['id']) - data['combinedDue'] = data['due'] - if data.get('successive', 0): - t = 1 - elif data.get('reps', 0): - t = 0 - else: - t = 2 - data['type'] = t - data['queue'] = t - return data - - def stripInvalid(self, cards): - return [c for c in cards if self.cardIsValid(c)] - - def cardIsValid(self, card): - fieldNum = len(card.fields) - for n in range(len(self.mapping)): - if self.mapping[n] and self.mapping[n].required: - if fieldNum <= n or not card.fields[n].strip(): - self.log.append("Note is missing field '%s': %s" % - (self.mapping[n].name, - ", ".join(card.fields))) - return False - return True - - def stripOrTagDupes(self, cards): - # build a cache of items - self.uniqueCache = {} - for field in self.mapping: - if field and field.unique: - self.uniqueCache[field.id] = self.getUniqueCache(field) - return [c for c in cards if self.cardIsUnique(c)] - - def getUniqueCache(self, field): - "Return a dict with all fields, to test for uniqueness." - return dict(self.col.db.all( - "select value, 1 from fields where fieldModelId = :fmid", - fmid=field.id)) - - def cardIsUnique(self, card): - fieldsAsTags = [] - for n in range(len(self.mapping)): - if self.mapping[n] and self.mapping[n].unique: - if card.fields[n] in self.uniqueCache[self.mapping[n].id]: - if not self.tagDuplicates: - self.log.append("Note has duplicate '%s': %s" % - (self.mapping[n].name, - ", ".join(card.fields))) - return False - fieldsAsTags.append(self.mapping[n].name.replace(" ", "-")) - else: - self.uniqueCache[self.mapping[n].id][card.fields[n]] = 1 - if fieldsAsTags: - card.tags += u" Duplicate:" + ( - "+".join(fieldsAsTags)) - card.tags = canonifyTags(card.tags) - return True diff --git a/anki/importing/csvfile.py b/anki/importing/csvfile.py index 07f5fea1d..4ea330207 100644 --- a/anki/importing/csvfile.py +++ b/anki/importing/csvfile.py @@ -3,26 +3,26 @@ # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html import codecs, csv, re -from anki.importing.cardimp import CardImporter, ForeignCard +from anki.importing.noteimp import NoteImporter, ForeignNote from anki.lang import _ from anki.errors import * -class TextImporter(CardImporter): +class TextImporter(NoteImporter): needDelimiter = True patterns = ("\t", ";") def __init__(self, *args): - Importer.__init__(self, *args) + NoteImporter.__init__(self, *args) self.lines = None self.fileobj = None self.delimiter = None - def foreignCards(self): + def foreignNotes(self): self.sniff() # process all lines log = [] - cards = [] + notes = [] lineNum = 0 ignored = 0 if self.delimiter: @@ -46,12 +46,12 @@ class TextImporter(CardImporter): }) ignored += 1 continue - card = self.cardFromFields(row) - cards.append(card) + note = self.noteFromFields(row) + notes.append(note) self.log = log self.ignored = ignored self.fileobj.close() - return cards + return notes def sniff(self): "Parse the top line and determine the pattern and number of fields." @@ -77,7 +77,7 @@ class TextImporter(CardImporter): self.data = [sub(x) for x in self.data.split("\n") if sub(x)] if self.data: if self.data[0].startswith("tags:"): - self.tagsToAdd = self.data[0][5:] + self.tagsToAdd = self.data[0][5:].split(" ") del self.data[0] self.updateDelimiter() if not self.dialect and not self.delimiter: @@ -128,7 +128,8 @@ class TextImporter(CardImporter): self.sniff() return self.numFields - def cardFromFields(self, fields): - card = ForeignCard() - card.fields.extend([x.strip() for x in fields]) - return card + def noteFromFields(self, fields): + note = ForeignNote() + note.fields.extend([x.strip() for x in fields]) + print "fixme - add tagsToAdd to note tags" + return note diff --git a/anki/importing/noteimp.py b/anki/importing/noteimp.py new file mode 100644 index 000000000..76d5699a8 --- /dev/null +++ b/anki/importing/noteimp.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- +# Copyright: Damien Elmes +# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +import time +from anki.lang import _ +from anki.utils import fieldChecksum, ids2str, guid64, timestampID, \ + joinFields, intTime +from anki.errors import * +from anki.importing.base import Importer +#from anki.deck import NEW_CARDS_RANDOM + +# Stores a list of fields, tags and deck +###################################################################### + +class ForeignNote(object): + "An temporary object storing fields and attributes." + def __init__(self): + self.fields = [] + self.tags = [] + self.deck = None + +# Base class for csv/supermemo/etc importers +###################################################################### + +# - instead of specifying an update key, do it by default using first field + + +# The mapping is list of input fields, like: +# ['Expression', 'Reading', '_tags', None] +# - None means that the input should be discarded +# - _tags maps to note tags +# - _deck maps to card deck +# If the first field of the model is not in the map, the map is invalid. + +class NoteImporter(Importer): + + needMapper = True + needDelimiter = False + update = True + + def __init__(self, col, file): + Importer.__init__(self, col, file) + self.model = col.models.current() + self.mapping = None + self._deckMap = {} + + def run(self): + "Import." + print "fixme: randomize" + assert self.mapping + c = self.foreignNotes() + self.importNotes(c) + + def fields(self): + "The number of fields." + return 0 + + def maybeChecksum(self, data, unique): + if not unique: + return "" + return fieldChecksum(data) + + def foreignNotes(self): + "Return a list of foreign notes for importing." + assert 0 + + def importNotes(self, notes): + "Convert each card into a note, apply attributes and add to col." + # gather checks for duplicate comparison + csums = {} + for csum, id in self.col.db.execute( + "select csum, id from notes where mid = ?", self.model['id']): + if csum in csums: + csums[csum].append(id) + else: + csums[csum] = [id] + fld0idx = self.mapping.index(self.model['flds'][0]['name']) + self._fmap = self.col.models.fieldMap(self.model) + self._nextID = timestampID(self.col.db, "notes") + # loop through the notes + updates = [] + new = [] + self._ids = [] + for n in notes: + fld0 = n.fields[fld0idx] + csum = fieldChecksum(fld0) + # first field must exist + if not fld0: + self.log.append(_("Empty first field: %s") % + " ".join(n.fields)) + continue + # already exists? + if csum in csums: + if csums[csum] == -1: + # duplicates in source file; log and ignore + self.log.append(_("Appeared twice in file: %s") % + fld0) + continue + # csum is not a guarantee; have to check + for id in csums[csum]: + flds = self.col.db.scalar( + "select flds from notes where id = ?", id) + if fld0 == splitFields(flds)[0]: + # duplicate + data = self.updateData(n, id) + if data: + updates.append(data) + break + # newly add + else: + data = self.newData(n) + if data: + new.append(data) + # note that we've seen this note once already + csums[fieldChecksum(n.fields[0])] = -1 + self.addNew(new) + self.addUpdates(updates) + self.col.updateFieldCache(self._ids) + assert not self.col.genCards(self._ids) + # make sure to update sflds, etc + self.total = len(self._ids) + + def newData(self, n): + id = self._nextID + self._nextID += 1 + self._ids.append(id) + if not self.processFields(n): + print "no cards generated" + return + return [id, guid64(), self.model['id'], self.didForNote(n), + intTime(), self.col.usn(), self.col.tags.join(n.tags), + n.fieldsStr, "", "", 0, ""] + + def addNew(self, rows): + self.col.db.executemany( + "insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?,?)", + rows) + + # need to document that deck is ignored in this case + def updateData(self, n, id): + self._ids.append(id) + if not self.processFields(n): + print "no cards generated" + return + tags = self.col.tags.join(n.tags) + return [intTime(), self.col.usn(), n.fieldsStr, tags, + id, n.fieldsStr, tags] + + def addUpdates(self, rows): + self.col.db.executemany(""" +update notes set mod = ?, usn = ?, flds = ?, tags = ? +where id = ? and (flds != ? or tags != ?)""", rows) + + def didForNote(self, n): + if not n.deck: + n.deck = _("Imported") + if n.deck not in self._deckMap: + self._deckMap[n.deck] = self.col.decks.id(n.deck) + return self._deckMap[n.deck] + + def processFields(self, note): + fields = [""]*len(self.model['flds']) + for c, f in enumerate(self.mapping): + if not f: + continue + elif f == "_tags": + note.tags.extend(self.col.tags.split(note.fields[c])) + elif f == "_deck": + note.deck = note.fields[c] + else: + sidx = self._fmap[f][0] + fields[sidx] = note.fields[c] + note.fieldsStr = joinFields(fields) + return self.col.models.availOrds(self.model, note.fieldsStr) diff --git a/anki/importing/supermemo_xml.py b/anki/importing/supermemo_xml.py index c3ff04ba7..f3c148fa6 100644 --- a/anki/importing/supermemo_xml.py +++ b/anki/importing/supermemo_xml.py @@ -4,7 +4,7 @@ import sys -from anki.importing.cardimp import CardImporter, ForeignCard +from anki.importing.noteimp import NoteImporter, ForeignNote from anki.lang import _ from anki.errors import * @@ -63,7 +63,7 @@ class SuperMemoElement(SmartDict): # This is an AnkiImporter -class SupermemoXmlImporter(CardImporter): +class SupermemoXmlImporter(NoteImporter): """ Supermemo XML export's to Anki parser. Goes through a SM collection and fetch all elements. diff --git a/anki/utils.py b/anki/utils.py index d715b3676..2ef5d7075 100644 --- a/anki/utils.py +++ b/anki/utils.py @@ -225,7 +225,7 @@ def checksum(data): return sha1(data).hexdigest() def fieldChecksum(data): - # 32 bit unsigned number from first 8 digits of md5 hash + # 32 bit unsigned number from first 8 digits of sha1 hash return int(checksum(data.encode("utf-8"))[:8], 16) # Temp files diff --git a/tests/test_importing.py b/tests/test_importing.py index 38aae7ce5..7f9143087 100644 --- a/tests/test_importing.py +++ b/tests/test_importing.py @@ -77,23 +77,24 @@ def test_anki1(): check() def test_csv(): - print "disabled"; return - deck = Deck() - deck.addModel(BasicModel()) - file = unicode(os.path.join(testDir, "importing/text-2fields.txt")) - i = csvfile.TextImporter(deck, file) + deck = getEmptyDeck() + file = unicode(os.path.join(testDir, "support/text-2fields.txt")) + i = TextImporter(deck, file) + i.mapping = ['Front', 'Back'] i.run() - # four problems - missing front, dupe front, wrong num of fields + print i.log + # four problems - too many & too few fields, a missing front, and a + # duplicate entry assert len(i.log) == 4 assert i.total == 5 + print deck.db.all("select * from notes") deck.close() def test_csv_tags(): print "disabled"; return - deck = Deck() - deck.addModel(BasicModel()) - file = unicode(os.path.join(testDir, "importing/text-tags.txt")) - i = csvfile.TextImporter(deck, file) + deck = getEmptyDeck() + file = unicode(os.path.join(testDir, "support/text-tags.txt")) + i = TextImporter(deck, file) i.run() notes = deck.db.query(Note).all() assert len(notes) == 2