switch to python csv

This commit is contained in:
Damien Elmes 2009-06-18 05:21:47 +09:00
parent b6a50db1c2
commit e62967ecb1
6 changed files with 115 additions and 138 deletions

View file

@ -253,7 +253,7 @@ where factId in (%s)""" % ",".join([str(s) for s in factIds]))
# Export modules # Export modules
########################################################################## ##########################################################################
from anki.importing.csv import TextImporter from anki.importing.csvfile import TextImporter
from anki.importing.anki10 import Anki10Importer from anki.importing.anki10 import Anki10Importer
from anki.importing.mnemosyne10 import Mnemosyne10Importer from anki.importing.mnemosyne10 import Mnemosyne10Importer
from anki.importing.wcu import WCUImporter from anki.importing.wcu import WCUImporter

View file

@ -1,130 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
"""\
Importing CSV/TSV files
========================
"""
__docformat__ = 'restructuredtext'
import codecs
from anki.importing import Importer, ForeignCard
from anki.lang import _
from anki.errors import *
from anki.utils import tidyHTML
class TextImporter(Importer):
patterns = ("\t", ";")
def __init__(self, *args):
Importer.__init__(self, *args)
self.lines = None
def foreignCards(self):
self.parseTopLine()
# process all lines
log = []
cards = []
lineNum = 0
ignored = 0
for line in self.lines:
lineNum += 1
if not line.strip():
# ignore blank lines
continue
try:
fields = self.parseLine(line)
except ValueError:
log.append(_("Line %(line)d doesn't match pattern '%(pat)s'")
% {
'line': lineNum,
'pat': pattern,
})
ignored += 1
continue
if len(fields) != self.numFields:
log.append(_(
"Line %(line)d had %(num1)d fields,"
" expected %(num2)d") % {
"line": lineNum,
"num1": len(fields),
"num2": self.numFields,
})
ignored += 1
continue
card = self.cardFromFields(fields)
cards.append(card)
self.log = log
self.ignored = ignored
return cards
def parseTopLine(self):
"Parse the top line and determine the pattern and number of fields."
# load & look for the right pattern
self.cacheFile()
# look for the first non-blank line
l = None
for line in self.lines:
ret = line.strip()
if ret:
l = line
break
if not l:
raise ImportFormatError(type="emptyFile",
info=_("The file had no non-empty lines."))
found = False
for p in self.patterns:
if p in l:
pattern = p
fields = l.split(p)
numFields = len(fields)
found = True
break
if not found:
fmtError = _(
"Couldn't find pattern. The file should be a series "
"of lines separated by tabs or semicolons.")
raise ImportFormatError(type="invalidPattern",
info=fmtError)
self.pattern = pattern
self.setNumFields(line)
def cacheFile(self):
"Read file into self.lines if not already there."
if not self.lines:
self.lines = self.readFile()
def readFile(self):
f = codecs.open(self.file, encoding="utf-8")
try:
data = f.readlines()
except UnicodeDecodeError, e:
raise ImportFormatError(type="encodingError",
info=_("The file was not in UTF8 format."))
if not data:
return []
if data[0].startswith(unicode(codecs.BOM_UTF8, "utf8")):
data[0] = data[0][1:]
# remove comment char
lines = [l for l in data if not l.lstrip().startswith("#")]
return lines
def fields(self):
"Number of fields."
self.parseTopLine()
return self.numFields
def setNumFields(self, line):
self.numFields = len(self.parseLine(line))
def parseLine(self, line):
fields = line.split(self.pattern)
fields = [tidyHTML(f.strip()) for f in fields]
return fields
def cardFromFields(self, fields):
card = ForeignCard()
card.fields.extend(fields)
return card

104
anki/importing/csvfile.py Normal file
View file

@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
"""\
Importing CSV/TSV files
========================
"""
__docformat__ = 'restructuredtext'
import codecs, csv, re
from anki.importing import Importer, ForeignCard
from anki.lang import _
from anki.errors import *
from anki.utils import tidyHTML
class TextImporter(Importer):
patterns = ("\t", ";")
def __init__(self, *args):
Importer.__init__(self, *args)
self.lines = None
self.fileobj = None
def foreignCards(self):
self.sniff()
# process all lines
log = []
cards = []
lineNum = 0
ignored = 0
reader = csv.reader(self.data, self.dialect)
for row in reader:
row = [unicode(x, "utf-8") for x in row]
if len(row) != self.numFields:
log.append(_(
"'%(row)s' had %(num1)d fields, "
"expected %(num2)d") % {
"row": u" ".join(row),
"num1": len(row),
"num2": self.numFields,
})
ignored += 1
continue
card = self.cardFromFields(row)
cards.append(card)
self.log = log
self.ignored = ignored
return cards
def sniff(self):
"Parse the top line and determine the pattern and number of fields."
# load & look for the right pattern
self.cacheFile()
def cacheFile(self):
"Read file into self.lines if not already there."
if not self.fileobj:
self.openFile()
def openFile(self):
self.dialect = None
self.fileobj = open(self.file, "rb")
try:
self.data = self.fileobj.read()
self.data = re.sub("^ *#.*", "", self.data)
self.data = [x for x in self.data.split("\n") if x]
if self.data:
# strip out comments and blank lines
try:
self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10]))
except:
self.dialect = csv.Sniffer().sniff(self.data[0])
reader = csv.reader(self.data, self.dialect)
self.numFields = len(reader.next())
else:
self.dialect = None
except UnicodeDecodeError, e:
raise ImportFormatError(
type="encodingError",
info=_("The file was not in UTF8 format."))
if not self.dialect:
raise ImportFormatError(
type="encodingError",
info=_("Couldn't determine format of file."))
def fields(self):
"Number of fields."
self.sniff()
return self.numFields
def setNumFields(self, line):
self.numFields = len(self.parseLine(line))
def parseLine(self, line):
fields = line.split(self.pattern)
fields = [tidyHTML(f.strip()) for f in fields]
return fields
def cardFromFields(self, fields):
card = ForeignCard()
card.fields.extend(fields)
return card

View file

@ -5,5 +5,7 @@
テスト test テスト test
to eat 食べる to eat 食べる
飲む to drink 飲む to drink
多すぎる too many fields
not, enough, fields
遊ぶ 遊ぶ
to play to play

View file

@ -1 +1,2 @@
foo bar baz,qux foo bar baz,qux
foo2 bar2 baz2

View file

@ -5,7 +5,7 @@ from tests.shared import assertException
from anki.errors import * from anki.errors import *
from anki import DeckStorage from anki import DeckStorage
from anki.importing import anki10, csv, mnemosyne10 from anki.importing import anki10, csvfile, mnemosyne10
from anki.stdmodels import BasicModel from anki.stdmodels import BasicModel
from anki.facts import Fact from anki.facts import Fact
from anki.sync import SyncClient, SyncServer from anki.sync import SyncClient, SyncServer
@ -18,10 +18,10 @@ def test_csv():
deck = DeckStorage.Deck() deck = DeckStorage.Deck()
deck.addModel(BasicModel()) deck.addModel(BasicModel())
file = unicode(os.path.join(testDir, "importing/text-2fields.txt")) file = unicode(os.path.join(testDir, "importing/text-2fields.txt"))
i = csv.TextImporter(deck, file) i = csvfile.TextImporter(deck, file)
i.doImport() i.doImport()
# two problems - missing front, dupe front # four problems - missing front, dupe front, wrong num of fields
assert len(i.log) == 2 assert len(i.log) == 4
assert i.total == 5 assert i.total == 5
deck.s.close() deck.s.close()
@ -29,11 +29,11 @@ def test_csv_tags():
deck = DeckStorage.Deck() deck = DeckStorage.Deck()
deck.addModel(BasicModel()) deck.addModel(BasicModel())
file = unicode(os.path.join(testDir, "importing/text-tags.txt")) file = unicode(os.path.join(testDir, "importing/text-tags.txt"))
i = csv.TextImporter(deck, file) i = csvfile.TextImporter(deck, file)
i.doImport() i.doImport()
facts = deck.s.query(Fact).all() facts = deck.s.query(Fact).all()
assert len(facts) == 1 assert len(facts) == 2
assert facts[0].tags == "baz qux" assert facts[0].tags == "baz qux" or facts[1].tags == "baz qux"
deck.s.close() deck.s.close()
def test_mnemosyne10(): def test_mnemosyne10():