catch unicode decode errors in csv import

2026-01-01 00:02:57 -05:00 · 2009-06-18 05:37:56 +09:00 · 2009-06-18 05:37:56 +09:00 · 5eb1a69735
commit 5eb1a69735
parent e62967ecb1
1 changed files with 19 additions and 19 deletions
--- a/anki/importing/csvfile.py
+++ b/anki/importing/csvfile.py
@ -32,7 +32,12 @@ class TextImporter(Importer):
        ignored = 0
        reader = csv.reader(self.data, self.dialect)
        for row in reader:
-            row = [unicode(x, "utf-8") for x in row]
+            try:
                row = [unicode(x, "utf-8") for x in row]
            except UnicodeDecodeError, e:
                raise ImportFormatError(
                    type="encodingError",
                    info=_("The file was not in UTF8 format."))
            if len(row) != self.numFields:
                log.append(_(
                    "'%(row)s' had %(num1)d fields, "
@ -62,24 +67,19 @@ class TextImporter(Importer):
    def openFile(self):
        self.dialect = None
        self.fileobj = open(self.file, "rb")
-        try:
+        self.data = self.fileobj.read()
-            self.data = self.fileobj.read()
+        self.data = re.sub("^ *#.*", "", self.data)
-            self.data = re.sub("^ *#.*", "", self.data)
+        self.data = [x for x in self.data.split("\n") if x]
-            self.data = [x for x in self.data.split("\n") if x]
+        if self.data:
-            if self.data:
+            # strip out comments and blank lines
-                # strip out comments and blank lines
+            try:
-                try:
+                self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10]))
-                    self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10]))
+            except:
-                except:
+                self.dialect = csv.Sniffer().sniff(self.data[0])
-                    self.dialect = csv.Sniffer().sniff(self.data[0])
+            reader = csv.reader(self.data, self.dialect)
-                reader = csv.reader(self.data, self.dialect)
+            self.numFields = len(reader.next())
-                self.numFields = len(reader.next())
+        else:
-            else:
+            self.dialect = None
                self.dialect = None
        except UnicodeDecodeError, e:
            raise ImportFormatError(
                type="encodingError",
                info=_("The file was not in UTF8 format."))
        if not self.dialect:
            raise ImportFormatError(
                type="encodingError",