mirror of
https://github.com/ankitects/anki.git
synced 2025-09-25 01:06:35 -04:00
catch unicode decode errors in csv import
This commit is contained in:
parent
e62967ecb1
commit
5eb1a69735
1 changed files with 19 additions and 19 deletions
|
@ -32,7 +32,12 @@ class TextImporter(Importer):
|
||||||
ignored = 0
|
ignored = 0
|
||||||
reader = csv.reader(self.data, self.dialect)
|
reader = csv.reader(self.data, self.dialect)
|
||||||
for row in reader:
|
for row in reader:
|
||||||
row = [unicode(x, "utf-8") for x in row]
|
try:
|
||||||
|
row = [unicode(x, "utf-8") for x in row]
|
||||||
|
except UnicodeDecodeError, e:
|
||||||
|
raise ImportFormatError(
|
||||||
|
type="encodingError",
|
||||||
|
info=_("The file was not in UTF8 format."))
|
||||||
if len(row) != self.numFields:
|
if len(row) != self.numFields:
|
||||||
log.append(_(
|
log.append(_(
|
||||||
"'%(row)s' had %(num1)d fields, "
|
"'%(row)s' had %(num1)d fields, "
|
||||||
|
@ -62,24 +67,19 @@ class TextImporter(Importer):
|
||||||
def openFile(self):
|
def openFile(self):
|
||||||
self.dialect = None
|
self.dialect = None
|
||||||
self.fileobj = open(self.file, "rb")
|
self.fileobj = open(self.file, "rb")
|
||||||
try:
|
self.data = self.fileobj.read()
|
||||||
self.data = self.fileobj.read()
|
self.data = re.sub("^ *#.*", "", self.data)
|
||||||
self.data = re.sub("^ *#.*", "", self.data)
|
self.data = [x for x in self.data.split("\n") if x]
|
||||||
self.data = [x for x in self.data.split("\n") if x]
|
if self.data:
|
||||||
if self.data:
|
# strip out comments and blank lines
|
||||||
# strip out comments and blank lines
|
try:
|
||||||
try:
|
self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10]))
|
||||||
self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10]))
|
except:
|
||||||
except:
|
self.dialect = csv.Sniffer().sniff(self.data[0])
|
||||||
self.dialect = csv.Sniffer().sniff(self.data[0])
|
reader = csv.reader(self.data, self.dialect)
|
||||||
reader = csv.reader(self.data, self.dialect)
|
self.numFields = len(reader.next())
|
||||||
self.numFields = len(reader.next())
|
else:
|
||||||
else:
|
self.dialect = None
|
||||||
self.dialect = None
|
|
||||||
except UnicodeDecodeError, e:
|
|
||||||
raise ImportFormatError(
|
|
||||||
type="encodingError",
|
|
||||||
info=_("The file was not in UTF8 format."))
|
|
||||||
if not self.dialect:
|
if not self.dialect:
|
||||||
raise ImportFormatError(
|
raise ImportFormatError(
|
||||||
type="encodingError",
|
type="encodingError",
|
||||||
|
|
Loading…
Reference in a new issue