add ability to customize separato in csv import

This commit is contained in:
Damien Elmes 2009-06-26 07:13:14 +09:00
parent 5598bcbce4
commit 2b86cd6b33
2 changed files with 34 additions and 11 deletions

View file

@ -36,6 +36,7 @@ class Importer(object):
needMapper = True needMapper = True
tagDuplicates = False tagDuplicates = False
multipleCardsAllowed = True multipleCardsAllowed = True
needDelimiter = False
def __init__(self, deck, file): def __init__(self, deck, file):
self.file = file self.file = file

View file

@ -16,12 +16,14 @@ from anki.utils import tidyHTML
class TextImporter(Importer): class TextImporter(Importer):
needDelimiter = True
patterns = ("\t", ";") patterns = ("\t", ";")
def __init__(self, *args): def __init__(self, *args):
Importer.__init__(self, *args) Importer.__init__(self, *args)
self.lines = None self.lines = None
self.fileobj = None self.fileobj = None
self.delimiter = None
def foreignCards(self): def foreignCards(self):
self.sniff() self.sniff()
@ -30,6 +32,9 @@ class TextImporter(Importer):
cards = [] cards = []
lineNum = 0 lineNum = 0
ignored = 0 ignored = 0
if self.delimiter:
reader = csv.reader(self.data, delimiter=self.delimiter)
else:
reader = csv.reader(self.data, self.dialect) reader = csv.reader(self.data, self.dialect)
for row in reader: for row in reader:
try: try:
@ -71,20 +76,37 @@ class TextImporter(Importer):
self.data = re.sub("^ *#.*", "", self.data) self.data = re.sub("^ *#.*", "", self.data)
self.data = [x for x in self.data.split("\n") if x] self.data = [x for x in self.data.split("\n") if x]
if self.data: if self.data:
# strip out comments and blank lines self.updateDelimiter()
try: if not self.dialect and not self.delimiter:
self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10]))
except:
self.dialect = csv.Sniffer().sniff(self.data[0])
reader = csv.reader(self.data, self.dialect)
self.numFields = len(reader.next())
else:
self.dialect = None
if not self.dialect:
raise ImportFormatError( raise ImportFormatError(
type="encodingError", type="encodingError",
info=_("Couldn't determine format of file.")) info=_("Couldn't determine format of file."))
def updateDelimiter(self):
self.dialect = None
if not self.delimiter:
try:
self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10]))
except:
try:
self.dialect = csv.Sniffer().sniff(self.data[0])
except:
pass
if self.dialect:
reader = csv.reader(self.data, self.dialect)
else:
if not self.delimiter:
if "\t" in self.data[0]:
self.delimiter = "\t"
elif ";" in self.data[0]:
self.delimiter = ";"
elif "," in self.data[0]:
self.delimiter = ","
else:
self.delimiter = " "
reader = csv.reader(self.data, delimiter=self.delimiter)
self.numFields = len(reader.next())
def fields(self): def fields(self):
"Number of fields." "Number of fields."
self.sniff() self.sniff()