catch unicode decode errors in csv import

This commit is contained in:
Damien Elmes 2009-06-18 05:37:56 +09:00
parent e62967ecb1
commit 5eb1a69735

View file

@ -32,7 +32,12 @@ class TextImporter(Importer):
ignored = 0
reader = csv.reader(self.data, self.dialect)
for row in reader:
try:
row = [unicode(x, "utf-8") for x in row]
except UnicodeDecodeError, e:
raise ImportFormatError(
type="encodingError",
info=_("The file was not in UTF8 format."))
if len(row) != self.numFields:
log.append(_(
"'%(row)s' had %(num1)d fields, "
@ -62,7 +67,6 @@ class TextImporter(Importer):
def openFile(self):
self.dialect = None
self.fileobj = open(self.file, "rb")
try:
self.data = self.fileobj.read()
self.data = re.sub("^ *#.*", "", self.data)
self.data = [x for x in self.data.split("\n") if x]
@ -76,10 +80,6 @@ class TextImporter(Importer):
self.numFields = len(reader.next())
else:
self.dialect = None
except UnicodeDecodeError, e:
raise ImportFormatError(
type="encodingError",
info=_("The file was not in UTF8 format."))
if not self.dialect:
raise ImportFormatError(
type="encodingError",