catch unicode decode errors in csv import

This commit is contained in:
Damien Elmes 2009-06-18 05:37:56 +09:00
parent e62967ecb1
commit 5eb1a69735

View file

@ -32,7 +32,12 @@ class TextImporter(Importer):
ignored = 0 ignored = 0
reader = csv.reader(self.data, self.dialect) reader = csv.reader(self.data, self.dialect)
for row in reader: for row in reader:
try:
row = [unicode(x, "utf-8") for x in row] row = [unicode(x, "utf-8") for x in row]
except UnicodeDecodeError, e:
raise ImportFormatError(
type="encodingError",
info=_("The file was not in UTF8 format."))
if len(row) != self.numFields: if len(row) != self.numFields:
log.append(_( log.append(_(
"'%(row)s' had %(num1)d fields, " "'%(row)s' had %(num1)d fields, "
@ -62,7 +67,6 @@ class TextImporter(Importer):
def openFile(self): def openFile(self):
self.dialect = None self.dialect = None
self.fileobj = open(self.file, "rb") self.fileobj = open(self.file, "rb")
try:
self.data = self.fileobj.read() self.data = self.fileobj.read()
self.data = re.sub("^ *#.*", "", self.data) self.data = re.sub("^ *#.*", "", self.data)
self.data = [x for x in self.data.split("\n") if x] self.data = [x for x in self.data.split("\n") if x]
@ -76,10 +80,6 @@ class TextImporter(Importer):
self.numFields = len(reader.next()) self.numFields = len(reader.next())
else: else:
self.dialect = None self.dialect = None
except UnicodeDecodeError, e:
raise ImportFormatError(
type="encodingError",
info=_("The file was not in UTF8 format."))
if not self.dialect: if not self.dialect:
raise ImportFormatError( raise ImportFormatError(
type="encodingError", type="encodingError",