importing

This commit is contained in:
Damien Elmes 2019-12-20 11:41:24 +10:00
parent c415a70e72
commit 55795822b5
7 changed files with 60 additions and 47 deletions

View file

@ -59,8 +59,11 @@ def timezoneOffset() -> int:
else: else:
return time.timezone//60 return time.timezone//60
from anki.schedv2 import Scheduler
# this is initialized by storage.Collection # this is initialized by storage.Collection
class _Collection: class _Collection:
sched: Scheduler
def __init__(self, db: DB, server: bool = False, log: bool = False) -> None: def __init__(self, db: DB, server: bool = False, log: bool = False) -> None:
self._debugLog = log self._debugLog = log

View file

@ -8,8 +8,10 @@ from anki.storage import Collection
from anki.utils import intTime, splitFields, joinFields from anki.utils import intTime, splitFields, joinFields
from anki.importing.base import Importer from anki.importing.base import Importer
from anki.lang import _ from anki.lang import _
from typing import Any from typing import Any, Optional
from anki.collection import _Collection
from typing import List, Union
GUID = 1 GUID = 1
MID = 2 MID = 2
MOD = 3 MOD = 3
@ -20,7 +22,7 @@ class Anki2Importer(Importer):
deckPrefix = None deckPrefix = None
allowUpdate = True allowUpdate = True
def __init__(self, col, file): def __init__(self, col: _Collection, file: str) -> None:
super().__init__(col, file) super().__init__(col, file)
# set later, defined here for typechecking # set later, defined here for typechecking
@ -28,7 +30,7 @@ class Anki2Importer(Importer):
self._decks = {} self._decks = {}
self.mustResetLearning = False self.mustResetLearning = False
def run(self, media=None) -> None: def run(self, media: None = None) -> None:
self._prepareFiles() self._prepareFiles()
if media is not None: if media is not None:
# Anki1 importer has provided us with a custom media folder # Anki1 importer has provided us with a custom media folder
@ -69,7 +71,7 @@ class Anki2Importer(Importer):
# Notes # Notes
###################################################################### ######################################################################
def _logNoteRow(self, action, noteRow) -> None: def _logNoteRow(self, action: str, noteRow: List[str]) -> None:
self.log.append("[%s] %s" % ( self.log.append("[%s] %s" % (
action, action,
noteRow[6].replace("\x1f", ", ") noteRow[6].replace("\x1f", ", ")
@ -186,7 +188,7 @@ class Anki2Importer(Importer):
# determine if note is a duplicate, and adjust mid and/or guid as required # determine if note is a duplicate, and adjust mid and/or guid as required
# returns true if note should be added # returns true if note should be added
def _uniquifyNote(self, note) -> bool: def _uniquifyNote(self, note: List[Union[int, str]]) -> bool:
origGuid = note[GUID] origGuid = note[GUID]
srcMid = note[MID] srcMid = note[MID]
dstMid = self._mid(srcMid) dstMid = self._mid(srcMid)
@ -212,7 +214,7 @@ class Anki2Importer(Importer):
"Prepare index of schema hashes." "Prepare index of schema hashes."
self._modelMap = {} self._modelMap = {}
def _mid(self, srcMid) -> Any: def _mid(self, srcMid: int) -> Any:
"Return local id for remote MID." "Return local id for remote MID."
# already processed this mid? # already processed this mid?
if srcMid in self._modelMap: if srcMid in self._modelMap:
@ -249,7 +251,7 @@ class Anki2Importer(Importer):
# Decks # Decks
###################################################################### ######################################################################
def _did(self, did) -> Any: def _did(self, did: int) -> Any:
"Given did in src col, return local id." "Given did in src col, return local id."
# already converted? # already converted?
if did in self._decks: if did in self._decks:
@ -393,7 +395,7 @@ insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)""", revlog)
if fname.startswith("_") and not self.dst.media.have(fname): if fname.startswith("_") and not self.dst.media.have(fname):
self._writeDstMedia(fname, self._srcMediaData(fname)) self._writeDstMedia(fname, self._srcMediaData(fname))
def _mediaData(self, fname, dir=None) -> bytes: def _mediaData(self, fname: str, dir: Optional[str] = None) -> bytes:
if not dir: if not dir:
dir = self.src.media.dir() dir = self.src.media.dir()
path = os.path.join(dir, fname) path = os.path.join(dir, fname)
@ -403,15 +405,15 @@ insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)""", revlog)
except (IOError, OSError): except (IOError, OSError):
return return
def _srcMediaData(self, fname) -> bytes: def _srcMediaData(self, fname: str) -> bytes:
"Data for FNAME in src collection." "Data for FNAME in src collection."
return self._mediaData(fname, self.src.media.dir()) return self._mediaData(fname, self.src.media.dir())
def _dstMediaData(self, fname) -> bytes: def _dstMediaData(self, fname: str) -> bytes:
"Data for FNAME in dst collection." "Data for FNAME in dst collection."
return self._mediaData(fname, self.dst.media.dir()) return self._mediaData(fname, self.dst.media.dir())
def _writeDstMedia(self, fname, data) -> None: def _writeDstMedia(self, fname: str, data: bytes) -> None:
path = os.path.join(self.dst.media.dir(), path = os.path.join(self.dst.media.dir(),
unicodedata.normalize("NFC", fname)) unicodedata.normalize("NFC", fname))
try: try:
@ -421,7 +423,7 @@ insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)""", revlog)
# the user likely used subdirectories # the user likely used subdirectories
pass pass
def _mungeMedia(self, mid, fields) -> str: def _mungeMedia(self, mid: int, fields: str) -> str:
fields = splitFields(fields) fields = splitFields(fields)
def repl(match): def repl(match):
fname = match.group("fname") fname = match.group("fname")

View file

@ -9,9 +9,10 @@ from anki.utils import tmpfile
from anki.importing.anki2 import Anki2Importer from anki.importing.anki2 import Anki2Importer
from typing import Any from typing import Any
from anki.collection import _Collection
class AnkiPackageImporter(Anki2Importer): class AnkiPackageImporter(Anki2Importer):
def __init__(self, col, file): def __init__(self, col: _Collection, file: str) -> None:
super().__init__(col, file) super().__init__(col, file)
# set later; set here for typechecking # set later; set here for typechecking
self.nameToNum = {} self.nameToNum = {}
@ -53,7 +54,7 @@ class AnkiPackageImporter(Anki2Importer):
with open(path, "wb") as f: with open(path, "wb") as f:
f.write(z.read(c)) f.write(z.read(c))
def _srcMediaData(self, fname) -> Any: def _srcMediaData(self, fname: str) -> Any:
if fname in self.nameToNum: if fname in self.nameToNum:
return self.zip.read(self.nameToNum[fname]) return self.zip.read(self.nameToNum[fname])
return None return None

View file

@ -8,12 +8,13 @@ from typing import Any
# Base importer # Base importer
########################################################################## ##########################################################################
from anki.collection import _Collection
class Importer: class Importer:
needMapper = False needMapper = False
needDelimiter = False needDelimiter = False
def __init__(self, col, file) -> None: def __init__(self, col: _Collection, file: str) -> None:
self.file = file self.file = file
self.log = [] self.log = []
self.col = col self.col = col

View file

@ -10,12 +10,13 @@ from anki.lang import _
from typing import List from typing import List
from anki.collection import _Collection
class TextImporter(NoteImporter): class TextImporter(NoteImporter):
needDelimiter = True needDelimiter = True
patterns = "\t|,;:" patterns = "\t|,;:"
def __init__(self, col, file): def __init__(self, col: _Collection, file: str) -> None:
NoteImporter.__init__(self, col, file) NoteImporter.__init__(self, col, file)
self.lines = None self.lines = None
self.fileobj = None self.fileobj = None
@ -56,7 +57,7 @@ class TextImporter(NoteImporter):
self.fileobj.close() self.fileobj.close()
return notes return notes
def open(self): def open(self) -> None:
"Parse the top line and determine the pattern and number of fields." "Parse the top line and determine the pattern and number of fields."
# load & look for the right pattern # load & look for the right pattern
self.cacheFile() self.cacheFile()
@ -122,12 +123,12 @@ class TextImporter(NoteImporter):
err() err()
self.initMapping() self.initMapping()
def fields(self): def fields(self) -> int:
"Number of fields." "Number of fields."
self.open() self.open()
return self.numFields return self.numFields
def noteFromFields(self, fields) -> ForeignNote: def noteFromFields(self, fields: List[str]) -> ForeignNote:
note = ForeignNote() note = ForeignNote()
note.fields.extend([x for x in fields]) note.fields.extend([x for x in fields])
note.tags.extend(self.tagsToAdd) note.tags.extend(self.tagsToAdd)

View file

@ -17,6 +17,8 @@ from typing import Any, List, Optional
# Stores a list of fields, tags and deck # Stores a list of fields, tags and deck
###################################################################### ######################################################################
from anki.collection import _Collection
from typing import List, Optional, Union
class ForeignNote: class ForeignNote:
"An temporary object storing fields and attributes." "An temporary object storing fields and attributes."
def __init__(self) -> None: def __init__(self) -> None:
@ -24,6 +26,7 @@ class ForeignNote:
self.tags = [] self.tags = []
self.deck = None self.deck = None
self.cards = {} # map of ord -> card self.cards = {} # map of ord -> card
self.fieldsStr = ""
class ForeignCard: class ForeignCard:
def __init__(self) -> None: def __init__(self) -> None:
@ -54,14 +57,14 @@ class NoteImporter(Importer):
allowHTML = False allowHTML = False
importMode = 0 importMode = 0
def __init__(self, col, file): def __init__(self, col: _Collection, file: str) -> None:
Importer.__init__(self, col, file) Importer.__init__(self, col, file)
self.model = col.models.current() self.model = col.models.current()
self.mapping = None self.mapping = None
self._deckMap = {} self._deckMap = {}
self._tagsMapped = False self._tagsMapped = False
def run(self): def run(self) -> None:
"Import." "Import."
assert self.mapping assert self.mapping
c = self.foreignNotes() c = self.foreignNotes()
@ -93,7 +96,7 @@ class NoteImporter(Importer):
"Open file and ensure it's in the right format." "Open file and ensure it's in the right format."
return return
def importNotes(self, notes) -> None: def importNotes(self, notes: List[ForeignNote]) -> None:
"Convert each card into a note, apply attributes and add to col." "Convert each card into a note, apply attributes and add to col."
assert self.mappingOk() assert self.mappingOk()
# note whether tags are mapped # note whether tags are mapped
@ -220,7 +223,7 @@ This can happen when you have empty fields or when you have not mapped the \
content in the text file to the correct fields.""")) content in the text file to the correct fields."""))
self.total = len(self._ids) self.total = len(self._ids)
def newData(self, n) -> Optional[list]: def newData(self, n: ForeignNote) -> Optional[list]:
id = self._nextID id = self._nextID
self._nextID += 1 self._nextID += 1
self._ids.append(id) self._ids.append(id)
@ -234,12 +237,12 @@ content in the text file to the correct fields."""))
intTime(), self.col.usn(), self.col.tags.join(n.tags), intTime(), self.col.usn(), self.col.tags.join(n.tags),
n.fieldsStr, "", "", 0, ""] n.fieldsStr, "", "", 0, ""]
def addNew(self, rows) -> None: def addNew(self, rows: List[List[Union[int, str]]]) -> None:
self.col.db.executemany( self.col.db.executemany(
"insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)", "insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)",
rows) rows)
def updateData(self, n, id, sflds) -> Optional[list]: def updateData(self, n: ForeignNote, id: int, sflds: List[str]) -> Optional[list]:
self._ids.append(id) self._ids.append(id)
if not self.processFields(n, sflds): if not self.processFields(n, sflds):
return return
@ -252,7 +255,7 @@ content in the text file to the correct fields."""))
return [intTime(), self.col.usn(), n.fieldsStr, return [intTime(), self.col.usn(), n.fieldsStr,
id, n.fieldsStr] id, n.fieldsStr]
def addUpdates(self, rows) -> None: def addUpdates(self, rows: List[List[Union[int, str]]]) -> None:
old = self.col.db.totalChanges() old = self.col.db.totalChanges()
if self._tagsMapped: if self._tagsMapped:
self.col.db.executemany(""" self.col.db.executemany("""
@ -264,7 +267,7 @@ update notes set mod = ?, usn = ?, flds = ?
where id = ? and flds != ?""", rows) where id = ? and flds != ?""", rows)
self.updateCount = self.col.db.totalChanges() - old self.updateCount = self.col.db.totalChanges() - old
def processFields(self, note, fields=None) -> Any: def processFields(self, note: ForeignNote, fields: Optional[List[str]] = None) -> Any:
if not fields: if not fields:
fields = [""]*len(self.model['flds']) fields = [""]*len(self.model['flds'])
for c, f in enumerate(self.mapping): for c, f in enumerate(self.mapping):

View file

@ -13,8 +13,10 @@ from anki.lang import ngettext
from xml.dom import minidom from xml.dom import minidom
from string import capwords from string import capwords
import re, unicodedata, time import re, unicodedata, time
from typing import Any, List from typing import Any, List, Optional
from anki.collection import _Collection
from xml.dom.minidom import Element, Text
class SmartDict(dict): class SmartDict(dict):
""" """
See http://www.peterbe.com/plog/SmartDict See http://www.peterbe.com/plog/SmartDict
@ -41,7 +43,7 @@ class SmartDict(dict):
class SuperMemoElement(SmartDict): class SuperMemoElement(SmartDict):
"SmartDict wrapper to store SM Element data" "SmartDict wrapper to store SM Element data"
def __init__(self, *a, **kw): def __init__(self, *a, **kw) -> None:
SmartDict.__init__(self, *a, **kw) SmartDict.__init__(self, *a, **kw)
#default content #default content
self.__dict__['lTitle'] = None self.__dict__['lTitle'] = None
@ -80,7 +82,7 @@ class SupermemoXmlImporter(NoteImporter):
Code should be upgrade to support importing of SM2006 exports. Code should be upgrade to support importing of SM2006 exports.
""" """
def __init__(self, col, file): def __init__(self, col: _Collection, file: str) -> None:
"""Initialize internal varables. """Initialize internal varables.
Pameters to be exposed to GUI are stored in self.META""" Pameters to be exposed to GUI are stored in self.META"""
NoteImporter.__init__(self, col, file) NoteImporter.__init__(self, col, file)
@ -120,17 +122,17 @@ class SupermemoXmlImporter(NoteImporter):
## TOOLS ## TOOLS
def _fudgeText(self, text) -> Any: def _fudgeText(self, text: str) -> Any:
"Replace sm syntax to Anki syntax" "Replace sm syntax to Anki syntax"
text = text.replace("\n\r", "<br>") text = text.replace("\n\r", "<br>")
text = text.replace("\n", "<br>") text = text.replace("\n", "<br>")
return text return text
def _unicode2ascii(self,str) -> str: def _unicode2ascii(self,str: str) -> str:
"Remove diacritic punctuation from strings (titles)" "Remove diacritic punctuation from strings (titles)"
return "".join([ c for c in unicodedata.normalize('NFKD', str) if not unicodedata.combining(c)]) return "".join([ c for c in unicodedata.normalize('NFKD', str) if not unicodedata.combining(c)])
def _decode_htmlescapes(self,s) -> str: def _decode_htmlescapes(self,s: str) -> str:
"""Unescape HTML code.""" """Unescape HTML code."""
#In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code #In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code
from bs4 import BeautifulSoup as btflsoup from bs4 import BeautifulSoup as btflsoup
@ -143,7 +145,7 @@ class SupermemoXmlImporter(NoteImporter):
return str(btflsoup(s, "html.parser")) return str(btflsoup(s, "html.parser"))
def _afactor2efactor(self, af) -> Any: def _afactor2efactor(self, af: float) -> Any:
# Adapted from <http://www.supermemo.com/beta/xml/xml-core.htm> # Adapted from <http://www.supermemo.com/beta/xml/xml-core.htm>
# Ranges for A-factors and E-factors # Ranges for A-factors and E-factors
@ -183,12 +185,12 @@ class SupermemoXmlImporter(NoteImporter):
self.log.append(ngettext("%d card imported.", "%d cards imported.", self.total) % self.total) self.log.append(ngettext("%d card imported.", "%d cards imported.", self.total) % self.total)
return self.notes return self.notes
def fields(self): def fields(self) -> int:
return 2 return 2
## PARSER METHODS ## PARSER METHODS
def addItemToCards(self,item) -> None: def addItemToCards(self,item: SuperMemoElement) -> None:
"This method actually do conversion" "This method actually do conversion"
# new anki card # new anki card
@ -248,7 +250,7 @@ class SupermemoXmlImporter(NoteImporter):
self.notes.append(note) self.notes.append(note)
def logger(self,text,level=1) -> None: def logger(self,text: str,level: int = 1) -> None:
"Wrapper for Anki logger" "Wrapper for Anki logger"
dLevels={0:'',1:'Info',2:'Verbose',3:'Debug'} dLevels={0:'',1:'Info',2:'Verbose',3:'Debug'}
@ -283,7 +285,7 @@ class SupermemoXmlImporter(NoteImporter):
import io import io
return io.StringIO(str(source)) return io.StringIO(str(source))
def loadSource(self, source) -> None: def loadSource(self, source: str) -> None:
"""Load source file and parse with xml.dom.minidom""" """Load source file and parse with xml.dom.minidom"""
self.source = source self.source = source
self.logger('Load started...') self.logger('Load started...')
@ -294,7 +296,7 @@ class SupermemoXmlImporter(NoteImporter):
# PARSE # PARSE
def parse(self, node=None) -> None: def parse(self, node: Optional[Any] = None) -> None:
"Parse method - parses document elements" "Parse method - parses document elements"
if node is None and self.xmldoc is not None: if node is None and self.xmldoc is not None:
@ -312,7 +314,7 @@ class SupermemoXmlImporter(NoteImporter):
self.parse(node.documentElement) self.parse(node.documentElement)
def parse_Element(self, node) -> None: def parse_Element(self, node: Element) -> None:
"Parse XML element" "Parse XML element"
_method = "do_%s" % node.tagName _method = "do_%s" % node.tagName
@ -323,7 +325,7 @@ class SupermemoXmlImporter(NoteImporter):
self.logger('No handler for method %s' % _method, level=3) self.logger('No handler for method %s' % _method, level=3)
#print traceback.print_exc() #print traceback.print_exc()
def parse_Text(self, node) -> None: def parse_Text(self, node: Text) -> None:
"Parse text inside elements. Text is stored into local buffer." "Parse text inside elements. Text is stored into local buffer."
text = node.data text = node.data
@ -337,12 +339,12 @@ class SupermemoXmlImporter(NoteImporter):
# DO # DO
def do_SuperMemoCollection(self, node) -> None: def do_SuperMemoCollection(self, node: Element) -> None:
"Process SM Collection" "Process SM Collection"
for child in node.childNodes: self.parse(child) for child in node.childNodes: self.parse(child)
def do_SuperMemoElement(self, node) -> None: def do_SuperMemoElement(self, node: Element) -> None:
"Process SM Element (Type - Title,Topics)" "Process SM Element (Type - Title,Topics)"
self.logger('='*45, level=3) self.logger('='*45, level=3)
@ -392,14 +394,14 @@ class SupermemoXmlImporter(NoteImporter):
t = self.cntMeta['title'].pop() t = self.cntMeta['title'].pop()
self.logger('End of topic \t- %s' % (t), level=2) self.logger('End of topic \t- %s' % (t), level=2)
def do_Content(self, node) -> None: def do_Content(self, node: Element) -> None:
"Process SM element Content" "Process SM element Content"
for child in node.childNodes: for child in node.childNodes:
if hasattr(child,'tagName') and child.firstChild is not None: if hasattr(child,'tagName') and child.firstChild is not None:
self.cntElm[-1][child.tagName]=child.firstChild.data self.cntElm[-1][child.tagName]=child.firstChild.data
def do_LearningData(self, node) -> None: def do_LearningData(self, node: Element) -> None:
"Process SM element LearningData" "Process SM element LearningData"
for child in node.childNodes: for child in node.childNodes:
@ -416,7 +418,7 @@ class SupermemoXmlImporter(NoteImporter):
# for child in node.childNodes: self.parse(child) # for child in node.childNodes: self.parse(child)
# self.cntElm[-1][node.tagName]=self.cntBuf.pop() # self.cntElm[-1][node.tagName]=self.cntBuf.pop()
def do_Title(self, node) -> None: def do_Title(self, node: Element) -> None:
"Process SM element Title" "Process SM element Title"
t = self._decode_htmlescapes(node.firstChild.data) t = self._decode_htmlescapes(node.firstChild.data)
@ -426,7 +428,7 @@ class SupermemoXmlImporter(NoteImporter):
self.logger('Start of topic \t- ' + " / ".join(self.cntMeta['title']), level=2) self.logger('Start of topic \t- ' + " / ".join(self.cntMeta['title']), level=2)
def do_Type(self, node) -> None: def do_Type(self, node: Element) -> None:
"Process SM element Type" "Process SM element Type"
if len(self.cntBuf) >=1 : if len(self.cntBuf) >=1 :