mirror of
https://github.com/ankitects/anki.git
synced 2025-09-24 16:56:36 -04:00
switch to python csv
This commit is contained in:
parent
b6a50db1c2
commit
e62967ecb1
6 changed files with 115 additions and 138 deletions
|
@ -253,7 +253,7 @@ where factId in (%s)""" % ",".join([str(s) for s in factIds]))
|
||||||
# Export modules
|
# Export modules
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
from anki.importing.csv import TextImporter
|
from anki.importing.csvfile import TextImporter
|
||||||
from anki.importing.anki10 import Anki10Importer
|
from anki.importing.anki10 import Anki10Importer
|
||||||
from anki.importing.mnemosyne10 import Mnemosyne10Importer
|
from anki.importing.mnemosyne10 import Mnemosyne10Importer
|
||||||
from anki.importing.wcu import WCUImporter
|
from anki.importing.wcu import WCUImporter
|
||||||
|
|
|
@ -1,130 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# Copyright: Damien Elmes <anki@ichi2.net>
|
|
||||||
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
|
|
||||||
|
|
||||||
"""\
|
|
||||||
Importing CSV/TSV files
|
|
||||||
========================
|
|
||||||
"""
|
|
||||||
__docformat__ = 'restructuredtext'
|
|
||||||
|
|
||||||
import codecs
|
|
||||||
from anki.importing import Importer, ForeignCard
|
|
||||||
from anki.lang import _
|
|
||||||
from anki.errors import *
|
|
||||||
from anki.utils import tidyHTML
|
|
||||||
|
|
||||||
class TextImporter(Importer):
|
|
||||||
|
|
||||||
patterns = ("\t", ";")
|
|
||||||
|
|
||||||
def __init__(self, *args):
|
|
||||||
Importer.__init__(self, *args)
|
|
||||||
self.lines = None
|
|
||||||
|
|
||||||
def foreignCards(self):
|
|
||||||
self.parseTopLine()
|
|
||||||
# process all lines
|
|
||||||
log = []
|
|
||||||
cards = []
|
|
||||||
lineNum = 0
|
|
||||||
ignored = 0
|
|
||||||
for line in self.lines:
|
|
||||||
lineNum += 1
|
|
||||||
if not line.strip():
|
|
||||||
# ignore blank lines
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
fields = self.parseLine(line)
|
|
||||||
except ValueError:
|
|
||||||
log.append(_("Line %(line)d doesn't match pattern '%(pat)s'")
|
|
||||||
% {
|
|
||||||
'line': lineNum,
|
|
||||||
'pat': pattern,
|
|
||||||
})
|
|
||||||
ignored += 1
|
|
||||||
continue
|
|
||||||
if len(fields) != self.numFields:
|
|
||||||
log.append(_(
|
|
||||||
"Line %(line)d had %(num1)d fields,"
|
|
||||||
" expected %(num2)d") % {
|
|
||||||
"line": lineNum,
|
|
||||||
"num1": len(fields),
|
|
||||||
"num2": self.numFields,
|
|
||||||
})
|
|
||||||
ignored += 1
|
|
||||||
continue
|
|
||||||
card = self.cardFromFields(fields)
|
|
||||||
cards.append(card)
|
|
||||||
self.log = log
|
|
||||||
self.ignored = ignored
|
|
||||||
return cards
|
|
||||||
|
|
||||||
def parseTopLine(self):
|
|
||||||
"Parse the top line and determine the pattern and number of fields."
|
|
||||||
# load & look for the right pattern
|
|
||||||
self.cacheFile()
|
|
||||||
# look for the first non-blank line
|
|
||||||
l = None
|
|
||||||
for line in self.lines:
|
|
||||||
ret = line.strip()
|
|
||||||
if ret:
|
|
||||||
l = line
|
|
||||||
break
|
|
||||||
if not l:
|
|
||||||
raise ImportFormatError(type="emptyFile",
|
|
||||||
info=_("The file had no non-empty lines."))
|
|
||||||
found = False
|
|
||||||
for p in self.patterns:
|
|
||||||
if p in l:
|
|
||||||
pattern = p
|
|
||||||
fields = l.split(p)
|
|
||||||
numFields = len(fields)
|
|
||||||
found = True
|
|
||||||
break
|
|
||||||
if not found:
|
|
||||||
fmtError = _(
|
|
||||||
"Couldn't find pattern. The file should be a series "
|
|
||||||
"of lines separated by tabs or semicolons.")
|
|
||||||
raise ImportFormatError(type="invalidPattern",
|
|
||||||
info=fmtError)
|
|
||||||
self.pattern = pattern
|
|
||||||
self.setNumFields(line)
|
|
||||||
|
|
||||||
def cacheFile(self):
|
|
||||||
"Read file into self.lines if not already there."
|
|
||||||
if not self.lines:
|
|
||||||
self.lines = self.readFile()
|
|
||||||
|
|
||||||
def readFile(self):
|
|
||||||
f = codecs.open(self.file, encoding="utf-8")
|
|
||||||
try:
|
|
||||||
data = f.readlines()
|
|
||||||
except UnicodeDecodeError, e:
|
|
||||||
raise ImportFormatError(type="encodingError",
|
|
||||||
info=_("The file was not in UTF8 format."))
|
|
||||||
if not data:
|
|
||||||
return []
|
|
||||||
if data[0].startswith(unicode(codecs.BOM_UTF8, "utf8")):
|
|
||||||
data[0] = data[0][1:]
|
|
||||||
# remove comment char
|
|
||||||
lines = [l for l in data if not l.lstrip().startswith("#")]
|
|
||||||
return lines
|
|
||||||
|
|
||||||
def fields(self):
|
|
||||||
"Number of fields."
|
|
||||||
self.parseTopLine()
|
|
||||||
return self.numFields
|
|
||||||
|
|
||||||
def setNumFields(self, line):
|
|
||||||
self.numFields = len(self.parseLine(line))
|
|
||||||
|
|
||||||
def parseLine(self, line):
|
|
||||||
fields = line.split(self.pattern)
|
|
||||||
fields = [tidyHTML(f.strip()) for f in fields]
|
|
||||||
return fields
|
|
||||||
|
|
||||||
def cardFromFields(self, fields):
|
|
||||||
card = ForeignCard()
|
|
||||||
card.fields.extend(fields)
|
|
||||||
return card
|
|
104
anki/importing/csvfile.py
Normal file
104
anki/importing/csvfile.py
Normal file
|
@ -0,0 +1,104 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Copyright: Damien Elmes <anki@ichi2.net>
|
||||||
|
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
|
||||||
|
|
||||||
|
"""\
|
||||||
|
Importing CSV/TSV files
|
||||||
|
========================
|
||||||
|
"""
|
||||||
|
__docformat__ = 'restructuredtext'
|
||||||
|
|
||||||
|
import codecs, csv, re
|
||||||
|
from anki.importing import Importer, ForeignCard
|
||||||
|
from anki.lang import _
|
||||||
|
from anki.errors import *
|
||||||
|
from anki.utils import tidyHTML
|
||||||
|
|
||||||
|
class TextImporter(Importer):
|
||||||
|
|
||||||
|
patterns = ("\t", ";")
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
Importer.__init__(self, *args)
|
||||||
|
self.lines = None
|
||||||
|
self.fileobj = None
|
||||||
|
|
||||||
|
def foreignCards(self):
|
||||||
|
self.sniff()
|
||||||
|
# process all lines
|
||||||
|
log = []
|
||||||
|
cards = []
|
||||||
|
lineNum = 0
|
||||||
|
ignored = 0
|
||||||
|
reader = csv.reader(self.data, self.dialect)
|
||||||
|
for row in reader:
|
||||||
|
row = [unicode(x, "utf-8") for x in row]
|
||||||
|
if len(row) != self.numFields:
|
||||||
|
log.append(_(
|
||||||
|
"'%(row)s' had %(num1)d fields, "
|
||||||
|
"expected %(num2)d") % {
|
||||||
|
"row": u" ".join(row),
|
||||||
|
"num1": len(row),
|
||||||
|
"num2": self.numFields,
|
||||||
|
})
|
||||||
|
ignored += 1
|
||||||
|
continue
|
||||||
|
card = self.cardFromFields(row)
|
||||||
|
cards.append(card)
|
||||||
|
self.log = log
|
||||||
|
self.ignored = ignored
|
||||||
|
return cards
|
||||||
|
|
||||||
|
def sniff(self):
|
||||||
|
"Parse the top line and determine the pattern and number of fields."
|
||||||
|
# load & look for the right pattern
|
||||||
|
self.cacheFile()
|
||||||
|
|
||||||
|
def cacheFile(self):
|
||||||
|
"Read file into self.lines if not already there."
|
||||||
|
if not self.fileobj:
|
||||||
|
self.openFile()
|
||||||
|
|
||||||
|
def openFile(self):
|
||||||
|
self.dialect = None
|
||||||
|
self.fileobj = open(self.file, "rb")
|
||||||
|
try:
|
||||||
|
self.data = self.fileobj.read()
|
||||||
|
self.data = re.sub("^ *#.*", "", self.data)
|
||||||
|
self.data = [x for x in self.data.split("\n") if x]
|
||||||
|
if self.data:
|
||||||
|
# strip out comments and blank lines
|
||||||
|
try:
|
||||||
|
self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10]))
|
||||||
|
except:
|
||||||
|
self.dialect = csv.Sniffer().sniff(self.data[0])
|
||||||
|
reader = csv.reader(self.data, self.dialect)
|
||||||
|
self.numFields = len(reader.next())
|
||||||
|
else:
|
||||||
|
self.dialect = None
|
||||||
|
except UnicodeDecodeError, e:
|
||||||
|
raise ImportFormatError(
|
||||||
|
type="encodingError",
|
||||||
|
info=_("The file was not in UTF8 format."))
|
||||||
|
if not self.dialect:
|
||||||
|
raise ImportFormatError(
|
||||||
|
type="encodingError",
|
||||||
|
info=_("Couldn't determine format of file."))
|
||||||
|
|
||||||
|
def fields(self):
|
||||||
|
"Number of fields."
|
||||||
|
self.sniff()
|
||||||
|
return self.numFields
|
||||||
|
|
||||||
|
def setNumFields(self, line):
|
||||||
|
self.numFields = len(self.parseLine(line))
|
||||||
|
|
||||||
|
def parseLine(self, line):
|
||||||
|
fields = line.split(self.pattern)
|
||||||
|
fields = [tidyHTML(f.strip()) for f in fields]
|
||||||
|
return fields
|
||||||
|
|
||||||
|
def cardFromFields(self, fields):
|
||||||
|
card = ForeignCard()
|
||||||
|
card.fields.extend(fields)
|
||||||
|
return card
|
|
@ -5,5 +5,7 @@
|
||||||
テスト test
|
テスト test
|
||||||
to eat 食べる
|
to eat 食べる
|
||||||
飲む to drink
|
飲む to drink
|
||||||
|
多すぎる too many fields
|
||||||
|
not, enough, fields
|
||||||
遊ぶ
|
遊ぶ
|
||||||
to play
|
to play
|
||||||
|
|
|
@ -1 +1,2 @@
|
||||||
foo bar baz,qux
|
foo bar baz,qux
|
||||||
|
foo2 bar2 baz2
|
||||||
|
|
|
@ -5,7 +5,7 @@ from tests.shared import assertException
|
||||||
|
|
||||||
from anki.errors import *
|
from anki.errors import *
|
||||||
from anki import DeckStorage
|
from anki import DeckStorage
|
||||||
from anki.importing import anki10, csv, mnemosyne10
|
from anki.importing import anki10, csvfile, mnemosyne10
|
||||||
from anki.stdmodels import BasicModel
|
from anki.stdmodels import BasicModel
|
||||||
from anki.facts import Fact
|
from anki.facts import Fact
|
||||||
from anki.sync import SyncClient, SyncServer
|
from anki.sync import SyncClient, SyncServer
|
||||||
|
@ -18,10 +18,10 @@ def test_csv():
|
||||||
deck = DeckStorage.Deck()
|
deck = DeckStorage.Deck()
|
||||||
deck.addModel(BasicModel())
|
deck.addModel(BasicModel())
|
||||||
file = unicode(os.path.join(testDir, "importing/text-2fields.txt"))
|
file = unicode(os.path.join(testDir, "importing/text-2fields.txt"))
|
||||||
i = csv.TextImporter(deck, file)
|
i = csvfile.TextImporter(deck, file)
|
||||||
i.doImport()
|
i.doImport()
|
||||||
# two problems - missing front, dupe front
|
# four problems - missing front, dupe front, wrong num of fields
|
||||||
assert len(i.log) == 2
|
assert len(i.log) == 4
|
||||||
assert i.total == 5
|
assert i.total == 5
|
||||||
deck.s.close()
|
deck.s.close()
|
||||||
|
|
||||||
|
@ -29,11 +29,11 @@ def test_csv_tags():
|
||||||
deck = DeckStorage.Deck()
|
deck = DeckStorage.Deck()
|
||||||
deck.addModel(BasicModel())
|
deck.addModel(BasicModel())
|
||||||
file = unicode(os.path.join(testDir, "importing/text-tags.txt"))
|
file = unicode(os.path.join(testDir, "importing/text-tags.txt"))
|
||||||
i = csv.TextImporter(deck, file)
|
i = csvfile.TextImporter(deck, file)
|
||||||
i.doImport()
|
i.doImport()
|
||||||
facts = deck.s.query(Fact).all()
|
facts = deck.s.query(Fact).all()
|
||||||
assert len(facts) == 1
|
assert len(facts) == 2
|
||||||
assert facts[0].tags == "baz qux"
|
assert facts[0].tags == "baz qux" or facts[1].tags == "baz qux"
|
||||||
deck.s.close()
|
deck.s.close()
|
||||||
|
|
||||||
def test_mnemosyne10():
|
def test_mnemosyne10():
|
||||||
|
|
Loading…
Reference in a new issue