catch unicode decode errors in csv import

This commit is contained in:
Damien Elmes 2009-06-18 05:37:56 +09:00
parent e62967ecb1
commit 5eb1a69735

View file

@ -32,7 +32,12 @@ class TextImporter(Importer):
ignored = 0 ignored = 0
reader = csv.reader(self.data, self.dialect) reader = csv.reader(self.data, self.dialect)
for row in reader: for row in reader:
row = [unicode(x, "utf-8") for x in row] try:
row = [unicode(x, "utf-8") for x in row]
except UnicodeDecodeError, e:
raise ImportFormatError(
type="encodingError",
info=_("The file was not in UTF8 format."))
if len(row) != self.numFields: if len(row) != self.numFields:
log.append(_( log.append(_(
"'%(row)s' had %(num1)d fields, " "'%(row)s' had %(num1)d fields, "
@ -62,24 +67,19 @@ class TextImporter(Importer):
def openFile(self): def openFile(self):
self.dialect = None self.dialect = None
self.fileobj = open(self.file, "rb") self.fileobj = open(self.file, "rb")
try: self.data = self.fileobj.read()
self.data = self.fileobj.read() self.data = re.sub("^ *#.*", "", self.data)
self.data = re.sub("^ *#.*", "", self.data) self.data = [x for x in self.data.split("\n") if x]
self.data = [x for x in self.data.split("\n") if x] if self.data:
if self.data: # strip out comments and blank lines
# strip out comments and blank lines try:
try: self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10]))
self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10])) except:
except: self.dialect = csv.Sniffer().sniff(self.data[0])
self.dialect = csv.Sniffer().sniff(self.data[0]) reader = csv.reader(self.data, self.dialect)
reader = csv.reader(self.data, self.dialect) self.numFields = len(reader.next())
self.numFields = len(reader.next()) else:
else: self.dialect = None
self.dialect = None
except UnicodeDecodeError, e:
raise ImportFormatError(
type="encodingError",
info=_("The file was not in UTF8 format."))
if not self.dialect: if not self.dialect:
raise ImportFormatError( raise ImportFormatError(
type="encodingError", type="encodingError",