CSV import/export fixes and features (#1898)

* Fix footer moving upwards * Fix column detection Was broken because escaped line breaks were not considered. Also removes delimiter detection on `#columns:` line. User must use tabs or set delimiter beforehand. * Add CSV preview * Parse `#tags column:` * Optionally export deck and notetype with CSV * Avoid clones in CSV export * Prevent bottom of page appearing under footer (dae) * Increase padding to 1em (dae) With 0.5em, when a vertical scrollbar is shown, it sits right next to the right edge of the content, making it look like there's no right margin. * Experimental changes to make table fit+scroll (dae) - limit individual cells to 15em, and show ellipses when truncated - limit total table width to body width, so that inner table is shown with scrollbar - use class rather than id - ids are bad practice in Svelte components, as more than one may be displayed on a single page * Skip importing foreign notes with filtered decks Were implicitly imported into the default deck before. Also some refactoring to fetch deck ids and names beforehand. * Hide spacer below hidden field mapping * Fix guid being replaced when updating note * Fix dupe identity check Canonify tags before checking if dupe is identical, but only add update tags later if appropriate. * Fix deck export for notes with missing card 1 * Fix note lines starting with `#` csv crate doesn't support escaping a leading comment char. :( * Support import/export of guids * Strip HTML from preview rows * Fix initially set deck if current is filtered * Make isHtml toggle reactive * Fix `html_to_text_line()` stripping sound names * Tweak export option labels * Switch to patched rust-csv fork Fixes writing lines starting with `#`, so revert 5ece10ad05. * List column options with first column field * Fix flag for exports with HTML stripped
2025-12-24 12:22:56 -05:00 · 2022-06-09 02:28:01 +02:00 · 2022-06-09 02:28:01 +02:00 · 6da5e5b042
commit 6da5e5b042
parent d6b8520d03
32 changed files with 798 additions and 227 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -54,7 +54,7 @@ dependencies = [
 "chrono",
 "coarsetime",
 "criterion",
- "csv",
+ "csv 1.1.6 (git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90)",
 "env_logger",
 "flate2",
 "fluent",
@ -392,7 +392,7 @@ dependencies = [
 "cast",
 "clap",
 "criterion-plot",
- "csv",
+ "csv 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "itertools",
 "lazy_static",
 "num-traits",
@ -506,12 +506,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
 dependencies = [
 "bstr",
- "csv-core",
+ "csv-core 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
 "itoa 0.4.8",
 "ryu",
 "serde",
 ]
 [[package]]
 name = "csv"
 version = "1.1.6"
 source = "git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90#1c9d3aab6f79a7d815c69f925a46a4590c115f90"
 dependencies = [
 "bstr",
 "csv-core 0.1.10 (git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90)",
 "itoa 1.0.1",
 "ryu",
 "serde",
 ]
 [[package]]
 name = "csv-core"
 version = "0.1.10"
@ -521,6 +533,14 @@ dependencies = [
 "memchr",
 ]
 [[package]]
 name = "csv-core"
 version = "0.1.10"
 source = "git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90#1c9d3aab6f79a7d815c69f925a46a4590c115f90"
 dependencies = [
 "memchr",
 ]
 [[package]]
 name = "derive_more"
 version = "0.99.17"
--- a/cargo/crates.bzl
+++ b/cargo/crates.bzl
@ -372,23 +372,23 @@ def raze_fetch_remote_crates():
    )
    maybe(
-        http_archive,
+        new_git_repository,
        name = "raze__csv__1_1_6",
-        url = "https://crates.io/api/v1/crates/csv/1.1.6/download",
+        remote = "https://github.com/ankitects/rust-csv.git",
-        type = "tar.gz",
+        shallow_since = "1654675287 +1000",
-        sha256 = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1",
+        commit = "1c9d3aab6f79a7d815c69f925a46a4590c115f90",
        strip_prefix = "csv-1.1.6",
        build_file = Label("//cargo/remote:BUILD.csv-1.1.6.bazel"),
        init_submodules = True,
    )
    maybe(
-        http_archive,
+        new_git_repository,
        name = "raze__csv_core__0_1_10",
-        url = "https://crates.io/api/v1/crates/csv-core/0.1.10/download",
+        remote = "https://github.com/ankitects/rust-csv.git",
-        type = "tar.gz",
+        shallow_since = "1654675287 +1000",
-        sha256 = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90",
+        commit = "1c9d3aab6f79a7d815c69f925a46a4590c115f90",
        strip_prefix = "csv-core-0.1.10",
        build_file = Label("//cargo/remote:BUILD.csv-core-0.1.10.bazel"),
        init_submodules = True,
    )
    maybe(
--- a/cargo/remote/BUILD.csv-1.1.6.bazel
+++ b/cargo/remote/BUILD.csv-1.1.6.bazel
@ -126,7 +126,7 @@ rust_library(
    deps = [
        "@raze__bstr__0_2_17//:bstr",
        "@raze__csv_core__0_1_10//:csv_core",
-        "@raze__itoa__0_4_8//:itoa",
+        "@raze__itoa__1_0_1//:itoa",
        "@raze__ryu__1_0_9//:ryu",
        "@raze__serde__1_0_136//:serde",
    ],
--- a/cargo/remote/BUILD.csv-core-0.1.10.bazel
+++ b/cargo/remote/BUILD.csv-core-0.1.10.bazel
@ -39,7 +39,7 @@ rust_library(
    crate_features = [
        "default",
    ],
-    crate_root = "src/lib.rs",
+    crate_root = "csv-core/src/lib.rs",
    data = [],
    edition = "2018",
    rustc_flags = [
--- a/cargo/update.py
+++ b/cargo/update.py
@ -21,7 +21,10 @@ COMMITS_SHALLOW_SINCE = {
    "1ee0892217e9a76bba4bb369ec5fab8854935a3c": "1619517354 +1000",
    # pct-str
    "4adccd8d4a222ab2672350a102f06ae832a0572d": "1605376517 +0100",
    # linkcheck
    "2f20798ce521cc594d510d4e417e76d5eac04d4b": "1626729019 +0200",
    # rust-csv
    "1c9d3aab6f79a7d815c69f925a46a4590c115f90": "1654675287 +1000",
 }
 import glob
--- a/ftl/core/exporting.ftl
+++ b/ftl/core/exporting.ftl
@ -38,3 +38,6 @@ exporting-processed-media-files =
        [one] Processed { $count } media file...
       *[other] Processed { $count } media files...
    }
 exporting-include-deck = Include deck name
 exporting-include-notetype = Include notetype name
 exporting-include-guid = Include unique note identifier
--- a/ftl/core/importing.ftl
+++ b/ftl/core/importing.ftl
@ -9,7 +9,6 @@ importing-appeared-twice-in-file = Appeared twice in file: { $val }
 importing-by-default-anki-will-detect-the = By default, Anki will detect the character between fields, such as a tab, comma, and so on. If Anki is detecting the character incorrectly, you can enter it here. Use \t to represent tab.
 importing-change = Change
 importing-colon = Colon
 importing-column = Column { $val }
 importing-comma = Comma
 importing-empty-first-field = Empty first field: { $val }
 importing-field-separator = Field separator
@ -108,3 +107,4 @@ importing-preserve = Preserve
 importing-update = Update
 importing-tag-all-notes = Tag all notes
 importing-tag-updated-notes = Tag updated notes
 importing-file = File
--- a/proto/anki/import_export.proto
+++ b/proto/anki/import_export.proto
@ -119,6 +119,7 @@ message CsvMetadataRequest {
  string path = 1;
  optional CsvMetadata.Delimiter delimiter = 2;
  optional int64 notetype_id = 3;
  optional bool is_html = 4;
 }
 // Column indices are 1-based to make working with them in TS easier, where
@ -163,6 +164,8 @@ message CsvMetadata {
  uint32 tags_column = 10;
  bool force_delimiter = 11;
  bool force_is_html = 12;
  repeated generic.StringList preview = 13;
  uint32 guid_column = 14;
 }
 message ExportCardCsvRequest {
@ -175,7 +178,10 @@ message ExportNoteCsvRequest {
  string out_path = 1;
  bool with_html = 2;
  bool with_tags = 3;
-  ExportLimit limit = 4;
+  bool with_deck = 4;
  bool with_notetype = 5;
  bool with_guid = 6;
  ExportLimit limit = 7;
 }
 message ExportLimit {
--- a/pylib/anki/collection.py
+++ b/pylib/anki/collection.py
@ -423,11 +423,17 @@ class Collection(DeprecatedNamesMixin):
        limit: ExportLimit,
        with_html: bool,
        with_tags: bool,
        with_deck: bool,
        with_notetype: bool,
        with_guid: bool,
    ) -> int:
        return self._backend.export_note_csv(
            out_path=out_path,
            with_html=with_html,
            with_tags=with_tags,
            with_deck=with_deck,
            with_notetype=with_notetype,
            with_guid=with_guid,
            limit=pb_export_limit(limit),
        )
--- a/qt/aqt/exporting.py
+++ b/qt/aqt/exporting.py
@ -98,6 +98,10 @@ class ExportDialog(QDialog):
            self.frm.includeHTML.setVisible(False)
        # show deck list?
        self.frm.deck.setVisible(not self.isVerbatim)
        # used by the new export screen
        self.frm.includeDeck.setVisible(False)
        self.frm.includeNotetype.setVisible(False)
        self.frm.includeGuid.setVisible(False)
    def accept(self) -> None:
        self.exporter.includeSched = self.frm.includeSched.isChecked()
--- a/qt/aqt/forms/exporting.ui
+++ b/qt/aqt/forms/exporting.ui
@ -6,8 +6,8 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>563</width>
+    <width>610</width>
-    <height>245</height>
+    <height>348</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -77,6 +77,13 @@
       </property>
      </widget>
     </item>
     <item>
      <widget class="QCheckBox" name="includeHTML">
       <property name="text">
        <string>exporting_include_html_and_media_references</string>
       </property>
      </widget>
     </item>
     <item>
      <widget class="QCheckBox" name="includeTags">
       <property name="text">
@ -88,9 +95,29 @@
      </widget>
     </item>
     <item>
-      <widget class="QCheckBox" name="includeHTML">
+      <widget class="QCheckBox" name="includeDeck">
       <property name="enabled">
        <bool>true</bool>
       </property>
       <property name="text">
-        <string>exporting_include_html_and_media_references</string>
+        <string>exporting_include_deck</string>
       </property>
      </widget>
     </item>
     <item>
      <widget class="QCheckBox" name="includeNotetype">
       <property name="enabled">
        <bool>true</bool>
       </property>
       <property name="text">
        <string>exporting_include_notetype</string>
       </property>
      </widget>
     </item>
     <item>
      <widget class="QCheckBox" name="includeGuid">
       <property name="text">
        <string>exporting_include_guid</string>
       </property>
      </widget>
     </item>
--- a/qt/aqt/import_export/exporting.py
+++ b/qt/aqt/import_export/exporting.py
@ -91,6 +91,9 @@ class ExportDialog(QDialog):
        self.frm.includeMedia.setVisible(self.exporter.show_include_media)
        self.frm.includeTags.setVisible(self.exporter.show_include_tags)
        self.frm.includeHTML.setVisible(self.exporter.show_include_html)
        self.frm.includeDeck.setVisible(self.exporter.show_include_deck)
        self.frm.includeNotetype.setVisible(self.exporter.show_include_notetype)
        self.frm.includeGuid.setVisible(self.exporter.show_include_guid)
        self.frm.legacy_support.setVisible(self.exporter.show_legacy_support)
        self.frm.deck.setVisible(self.exporter.show_deck_list)
@ -135,6 +138,9 @@ class ExportDialog(QDialog):
            include_media=self.frm.includeMedia.isChecked(),
            include_tags=self.frm.includeTags.isChecked(),
            include_html=self.frm.includeHTML.isChecked(),
            include_deck=self.frm.includeDeck.isChecked(),
            include_notetype=self.frm.includeNotetype.isChecked(),
            include_guid=self.frm.includeGuid.isChecked(),
            legacy_support=self.frm.legacy_support.isChecked(),
            limit=limit,
        )
@ -165,6 +171,9 @@ class Options:
    include_media: bool
    include_tags: bool
    include_html: bool
    include_deck: bool
    include_notetype: bool
    include_guid: bool
    legacy_support: bool
    limit: ExportLimit
@ -177,6 +186,9 @@ class Exporter(ABC):
    show_include_tags = False
    show_include_html = False
    show_legacy_support = False
    show_include_deck = False
    show_include_notetype = False
    show_include_guid = False
    @staticmethod
    @abstractmethod
@ -255,6 +267,9 @@ class NoteCsvExporter(Exporter):
    show_deck_list = True
    show_include_html = True
    show_include_tags = True
    show_include_deck = True
    show_include_notetype = True
    show_include_guid = True
    @staticmethod
    def name() -> str:
@ -269,6 +284,9 @@ class NoteCsvExporter(Exporter):
                limit=options.limit,
                with_html=options.include_html,
                with_tags=options.include_tags,
                with_deck=options.include_deck,
                with_notetype=options.include_notetype,
                with_guid=options.include_guid,
            ),
            success=lambda count: tooltip(
                tr.exporting_note_exported(count=count), parent=mw
--- a/rslib/Cargo.toml
+++ b/rslib/Cargo.toml
@ -100,4 +100,4 @@ unic-ucd-category = "0.9.0"
 id_tree = "1.8.0"
 zstd = { version="0.10.0", features=["zstdmt"] }
 num_cpus = "1.13.1"
-csv = "1.1.6"
+csv = { git="https://github.com/ankitects/rust-csv.git", rev="1c9d3aab6f79a7d815c69f925a46a4590c115f90" }
--- a/rslib/src/backend/import_export.rs
+++ b/rslib/src/backend/import_export.rs
@ -75,7 +75,12 @@ impl ImportExportService for Backend {
    fn get_csv_metadata(&self, input: pb::CsvMetadataRequest) -> Result<pb::CsvMetadata> {
        let delimiter = input.delimiter.is_some().then(|| input.delimiter());
        self.with_col(|col| {
-            col.get_csv_metadata(&input.path, delimiter, input.notetype_id.map(Into::into))
+            col.get_csv_metadata(
                &input.path,
                delimiter,
                input.notetype_id.map(Into::into),
                input.is_html,
            )
        })
    }
@ -93,15 +98,7 @@ impl ImportExportService for Backend {
    }
    fn export_note_csv(&self, input: pb::ExportNoteCsvRequest) -> Result<pb::UInt32> {
-        self.with_col(|col| {
+        self.with_col(|col| col.export_note_csv(input, self.export_progress_fn()))
            col.export_note_csv(
                &input.out_path,
                SearchNode::from(input.limit.unwrap_or_default()),
                input.with_html,
                input.with_tags,
                self.export_progress_fn(),
            )
        })
            .map(Into::into)
    }
--- a/rslib/src/import_export/text/csv/export.rs
+++ b/rslib/src/import_export/text/csv/export.rs
@ -1,7 +1,7 @@
 // Copyright: Ankitects Pty Ltd and contributors
 // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
-use std::{borrow::Cow, fs::File, io::Write};
+use std::{borrow::Cow, collections::HashMap, fs::File, io::Write, sync::Arc};
 use itertools::Itertools;
 use lazy_static::lazy_static;
@ -9,10 +9,11 @@ use regex::Regex;
 use super::metadata::Delimiter;
 use crate::{
    backend_proto::ExportNoteCsvRequest,
    import_export::{ExportProgress, IncrementableProgress},
    notetype::RenderCardOutput,
    prelude::*,
-    search::SortMode,
+    search::{SearchNode, SortMode},
    template::RenderedNode,
    text::{html_to_text_line, CowMapping},
 };
@ -31,7 +32,7 @@ impl Collection {
        progress.call(ExportProgress::File)?;
        let mut incrementor = progress.incrementor(ExportProgress::Cards);
-        let mut writer = file_writer_with_header(path)?;
+        let mut writer = file_writer_with_header(path, with_html)?;
        let mut cards = self.search_cards(search, SortMode::NoOrder)?;
        cards.sort_unstable();
        for &card in &cards {
@ -45,21 +46,19 @@ impl Collection {
    pub fn export_note_csv(
        &mut self,
-        path: &str,
+        mut request: ExportNoteCsvRequest,
        search: impl TryIntoSearch,
        with_html: bool,
        with_tags: bool,
        progress_fn: impl 'static + FnMut(ExportProgress, bool) -> bool,
    ) -> Result<usize> {
        let mut progress = IncrementableProgress::new(progress_fn);
        progress.call(ExportProgress::File)?;
        let mut incrementor = progress.incrementor(ExportProgress::Notes);
-        let mut writer = file_writer_with_header(path)?;
+        self.search_notes_into_table(request.search_node())?;
-        self.search_notes_into_table(search)?;
+        let ctx = NoteContext::new(&request, self)?;
        let mut writer = note_file_writer_with_header(&request.out_path, &ctx)?;
        self.storage.for_each_note_in_search(|note| {
            incrementor.increment()?;
-            writer.write_record(note_record(&note, with_html, with_tags))?;
+            writer.write_record(ctx.record(&note))?;
            Ok(())
        })?;
        writer.flush()?;
@ -77,17 +76,46 @@ impl Collection {
    }
 }
-fn file_writer_with_header(path: &str) -> Result<csv::Writer<File>> {
+fn file_writer_with_header(path: &str, with_html: bool) -> Result<csv::Writer<File>> {
    let mut file = File::create(path)?;
-    write_header(&mut file)?;
+    write_file_header(&mut file, with_html)?;
    Ok(csv::WriterBuilder::new()
        .delimiter(DELIMITER.byte())
-        .flexible(true)
+        .comment(Some(b'#'))
        .from_writer(file))
 }
-fn write_header(writer: &mut impl Write) -> Result<()> {
+fn write_file_header(writer: &mut impl Write, with_html: bool) -> Result<()> {
-    write!(writer, "#separator:{}\n#html:true\n", DELIMITER.name())?;
+    writeln!(writer, "#separator:{}", DELIMITER.name())?;
    writeln!(writer, "#html:{with_html}")?;
    Ok(())
 }
 fn note_file_writer_with_header(path: &str, ctx: &NoteContext) -> Result<csv::Writer<File>> {
    let mut file = File::create(path)?;
    write_note_file_header(&mut file, ctx)?;
    Ok(csv::WriterBuilder::new()
        .delimiter(DELIMITER.byte())
        .comment(Some(b'#'))
        .from_writer(file))
 }
 fn write_note_file_header(writer: &mut impl Write, ctx: &NoteContext) -> Result<()> {
    write_file_header(writer, ctx.with_html)?;
    write_column_header(ctx, writer)
 }
 fn write_column_header(ctx: &NoteContext, writer: &mut impl Write) -> Result<()> {
    for (name, column) in [
        ("guid", ctx.guid_column()),
        ("notetype", ctx.notetype_column()),
        ("deck", ctx.deck_column()),
        ("tags", ctx.tags_column()),
    ] {
        if let Some(index) = column {
            writeln!(writer, "#{name} column:{index}")?;
        }
    }
    Ok(())
 }
@ -117,24 +145,12 @@ fn rendered_nodes_to_str(nodes: &[RenderedNode]) -> String {
        .join("")
 }
-fn note_record(note: &Note, with_html: bool, with_tags: bool) -> Vec<String> {
+fn field_to_record_field(field: &str, with_html: bool) -> Cow<str> {
    let mut fields: Vec<_> = note
        .fields()
        .iter()
        .map(|f| field_to_record_field(f, with_html))
        .collect();
    if with_tags {
        fields.push(note.tags.join(" "));
    }
    fields
 }
 fn field_to_record_field(field: &str, with_html: bool) -> String {
    let mut text = strip_redundant_sections(field);
    if !with_html {
        text = text.map_cow(|t| html_to_text_line(t, false));
    }
-    text.into()
+    text
 }
 fn strip_redundant_sections(text: &str) -> Cow<str> {
@ -157,3 +173,110 @@ fn strip_answer_side_question(text: &str) -> Cow<str> {
    }
    RE.replace_all(text.as_ref(), "")
 }
 struct NoteContext {
    with_html: bool,
    with_tags: bool,
    with_deck: bool,
    with_notetype: bool,
    with_guid: bool,
    notetypes: HashMap<NotetypeId, Arc<Notetype>>,
    deck_ids: HashMap<NoteId, DeckId>,
    deck_names: HashMap<DeckId, String>,
    field_columns: usize,
 }
 impl NoteContext {
    /// Caller must have searched notes into table.
    fn new(request: &ExportNoteCsvRequest, col: &mut Collection) -> Result<Self> {
        let notetypes = col.get_all_notetypes_of_search_notes()?;
        let field_columns = notetypes
            .values()
            .map(|nt| nt.fields.len())
            .max()
            .unwrap_or_default();
        let deck_ids = col.storage.all_decks_of_search_notes()?;
        let deck_names = HashMap::from_iter(col.storage.get_all_deck_names()?.into_iter());
        Ok(Self {
            with_html: request.with_html,
            with_tags: request.with_tags,
            with_deck: request.with_deck,
            with_notetype: request.with_notetype,
            with_guid: request.with_guid,
            notetypes,
            field_columns,
            deck_ids,
            deck_names,
        })
    }
    fn guid_column(&self) -> Option<usize> {
        self.with_guid.then(|| 1)
    }
    fn notetype_column(&self) -> Option<usize> {
        self.with_notetype
            .then(|| 1 + self.guid_column().unwrap_or_default())
    }
    fn deck_column(&self) -> Option<usize> {
        self.with_deck
            .then(|| 1 + self.notetype_column().unwrap_or_default())
    }
    fn tags_column(&self) -> Option<usize> {
        self.with_tags
            .then(|| 1 + self.deck_column().unwrap_or_default() + self.field_columns)
    }
    fn record<'c, 's: 'c, 'n: 'c>(&'s self, note: &'n Note) -> impl Iterator<Item = Cow<'c, [u8]>> {
        self.with_guid
            .then(|| Cow::from(note.guid.as_bytes()))
            .into_iter()
            .chain(self.notetype_name(note).into_iter())
            .chain(self.deck_name(note).into_iter())
            .chain(self.note_fields(note))
            .chain(self.tags(note).into_iter())
    }
    fn notetype_name(&self, note: &Note) -> Option<Cow<[u8]>> {
        self.with_notetype.then(|| {
            self.notetypes
                .get(&note.notetype_id)
                .map_or(Cow::from(vec![]), |nt| Cow::from(nt.name.as_bytes()))
        })
    }
    fn deck_name(&self, note: &Note) -> Option<Cow<[u8]>> {
        self.with_deck.then(|| {
            self.deck_ids
                .get(&note.id)
                .and_then(|did| self.deck_names.get(did))
                .map_or(Cow::from(vec![]), |name| Cow::from(name.as_bytes()))
        })
    }
    fn tags(&self, note: &Note) -> Option<Cow<[u8]>> {
        self.with_tags
            .then(|| Cow::from(note.tags.join(" ").into_bytes()))
    }
    fn note_fields<'n>(&self, note: &'n Note) -> impl Iterator<Item = Cow<'n, [u8]>> {
        let with_html = self.with_html;
        note.fields()
            .iter()
            .map(move |f| field_to_record_field(f, with_html))
            .pad_using(self.field_columns, |_| Cow::from(""))
            .map(|cow| match cow {
                Cow::Borrowed(s) => Cow::from(s.as_bytes()),
                Cow::Owned(s) => Cow::from(s.into_bytes()),
            })
    }
 }
 impl ExportNoteCsvRequest {
    fn search_node(&mut self) -> SearchNode {
        SearchNode::from(self.limit.take().unwrap_or_default())
    }
 }
--- a/rslib/src/import_export/text/csv/import.rs
+++ b/rslib/src/import_export/text/csv/import.rs
@ -113,6 +113,7 @@ type FieldSourceColumns = Vec<Option<usize>>;
 // Column indices are 1-based.
 struct ColumnContext {
    tags_column: Option<usize>,
    guid_column: Option<usize>,
    deck_column: Option<usize>,
    notetype_column: Option<usize>,
    /// Source column indices for the fields of a notetype, identified by its
@ -126,6 +127,7 @@ impl ColumnContext {
    fn new(metadata: &CsvMetadata) -> Result<Self> {
        Ok(Self {
            tags_column: (metadata.tags_column > 0).then(|| metadata.tags_column as usize),
            guid_column: (metadata.guid_column > 0).then(|| metadata.guid_column as usize),
            deck_column: metadata.deck()?.column(),
            notetype_column: metadata.notetype()?.column(),
            field_source_columns: metadata.field_source_columns()?,
@ -135,16 +137,10 @@ impl ColumnContext {
    fn deserialize_csv(
        &mut self,
-        mut reader: impl Read + Seek,
+        reader: impl Read + Seek,
        delimiter: Delimiter,
    ) -> Result<Vec<ForeignNote>> {
-        remove_tags_line_from_reader(&mut reader)?;
+        let mut csv_reader = build_csv_reader(reader, delimiter)?;
        let mut csv_reader = csv::ReaderBuilder::new()
            .has_headers(false)
            .flexible(true)
            .comment(Some(b'#'))
            .delimiter(delimiter.byte())
            .from_reader(reader);
        self.deserialize_csv_reader(&mut csv_reader)
    }
@ -162,34 +158,17 @@ impl ColumnContext {
            .collect()
    }
-    fn foreign_note_from_record(&mut self, record: &csv::StringRecord) -> ForeignNote {
+    fn foreign_note_from_record(&self, record: &csv::StringRecord) -> ForeignNote {
        let notetype = self.gather_notetype(record).into();
        let deck = self.gather_deck(record).into();
        let tags = self.gather_tags(record);
        let fields = self.gather_note_fields(record);
        ForeignNote {
-            notetype,
+            notetype: str_from_record_column(self.notetype_column, record).into(),
-            fields,
+            fields: self.gather_note_fields(record),
-            tags,
+            tags: self.gather_tags(record),
-            deck,
+            deck: str_from_record_column(self.deck_column, record).into(),
            guid: str_from_record_column(self.guid_column, record),
            ..Default::default()
        }
    }
    fn gather_notetype(&self, record: &csv::StringRecord) -> String {
        self.notetype_column
            .and_then(|i| record.get(i - 1))
            .unwrap_or_default()
            .to_string()
    }
    fn gather_deck(&self, record: &csv::StringRecord) -> String {
        self.deck_column
            .and_then(|i| record.get(i - 1))
            .unwrap_or_default()
            .to_string()
    }
    fn gather_tags(&self, record: &csv::StringRecord) -> Vec<String> {
        self.tags_column
            .and_then(|i| record.get(i - 1))
@ -200,7 +179,7 @@ impl ColumnContext {
            .collect()
    }
-    fn gather_note_fields(&mut self, record: &csv::StringRecord) -> Vec<String> {
+    fn gather_note_fields(&self, record: &csv::StringRecord) -> Vec<String> {
        let stringify = self.stringify;
        self.field_source_columns
            .iter()
@ -210,6 +189,26 @@ impl ColumnContext {
    }
 }
 fn str_from_record_column(column: Option<usize>, record: &csv::StringRecord) -> String {
    column
        .and_then(|i| record.get(i - 1))
        .unwrap_or_default()
        .to_string()
 }
 pub(super) fn build_csv_reader(
    mut reader: impl Read + Seek,
    delimiter: Delimiter,
 ) -> Result<csv::Reader<impl Read + Seek>> {
    remove_tags_line_from_reader(&mut reader)?;
    Ok(csv::ReaderBuilder::new()
        .has_headers(false)
        .flexible(true)
        .comment(Some(b'#'))
        .delimiter(delimiter.byte())
        .from_reader(reader))
 }
 fn stringify_fn(is_html: bool) -> fn(&str) -> String {
    if is_html {
        ToString::to_string
@ -267,6 +266,7 @@ mod test {
                is_html: false,
                force_is_html: false,
                tags_column: 0,
                guid_column: 0,
                global_tags: Vec::new(),
                updated_tags: Vec::new(),
                column_labels: vec!["".to_string(); 2],
@ -275,6 +275,7 @@ mod test {
                    id: 1,
                    field_columns: vec![1, 2],
                })),
                preview: Vec::new(),
            }
        }
    }
--- a/rslib/src/import_export/text/csv/metadata.rs
+++ b/rslib/src/import_export/text/csv/metadata.rs
@ -4,65 +4,81 @@
 use std::{
    collections::{HashMap, HashSet},
    fs::File,
-    io::{BufRead, BufReader},
+    io::{BufRead, BufReader, Read, Seek, SeekFrom},
 };
 use itertools::Itertools;
 use strum::IntoEnumIterator;
 use super::import::build_csv_reader;
 pub use crate::backend_proto::import_export::{
    csv_metadata::{Deck as CsvDeck, Delimiter, MappedNotetype, Notetype as CsvNotetype},
    CsvMetadata,
 };
 use crate::{
-    error::ImportError, import_export::text::NameOrId, notetype::NoteField, prelude::*,
+    backend_proto::StringList,
-    text::is_html,
+    error::ImportError,
    import_export::text::NameOrId,
    notetype::NoteField,
    prelude::*,
    text::{html_to_text_line, is_html},
 };
 /// The maximum number of preview rows.
 const PREVIEW_LENGTH: usize = 5;
 /// The maximum number of characters per preview field.
 const PREVIEW_FIELD_LENGTH: usize = 80;
 impl Collection {
    pub fn get_csv_metadata(
        &mut self,
        path: &str,
        delimiter: Option<Delimiter>,
        notetype_id: Option<NotetypeId>,
        is_html: Option<bool>,
    ) -> Result<CsvMetadata> {
-        let reader = BufReader::new(File::open(path)?);
+        let mut reader = File::open(path)?;
-        self.get_reader_metadata(reader, delimiter, notetype_id)
+        self.get_reader_metadata(&mut reader, delimiter, notetype_id, is_html)
    }
    fn get_reader_metadata(
        &mut self,
-        reader: impl BufRead,
+        mut reader: impl Read + Seek,
        delimiter: Option<Delimiter>,
        notetype_id: Option<NotetypeId>,
        is_html: Option<bool>,
    ) -> Result<CsvMetadata> {
        let mut metadata = CsvMetadata::default();
-        let line = self.parse_meta_lines(reader, &mut metadata)?;
+        let meta_len = self.parse_meta_lines(&mut reader, &mut metadata)? as u64;
-        maybe_set_fallback_delimiter(delimiter, &mut metadata, &line);
+        maybe_set_fallback_delimiter(delimiter, &mut metadata, &mut reader, meta_len)?;
-        maybe_set_fallback_columns(&mut metadata, &line)?;
+        let records = collect_preview_records(&mut metadata, reader)?;
-        maybe_set_fallback_is_html(&mut metadata, &line)?;
+        maybe_set_fallback_is_html(&mut metadata, &records, is_html)?;
        set_preview(&mut metadata, &records)?;
        maybe_set_fallback_columns(&mut metadata)?;
        self.maybe_set_fallback_notetype(&mut metadata, notetype_id)?;
        self.maybe_init_notetype_map(&mut metadata)?;
        self.maybe_set_fallback_deck(&mut metadata)?;
        Ok(metadata)
    }
-    /// Parses the meta head of the file, and returns the first content line.
+    /// Parses the meta head of the file and returns the total of meta bytes.
-    fn parse_meta_lines(
+    fn parse_meta_lines(&mut self, reader: impl Read, metadata: &mut CsvMetadata) -> Result<usize> {
-        &mut self,
+        let mut meta_len = 0;
-        mut reader: impl BufRead,
+        let mut reader = BufReader::new(reader);
        metadata: &mut CsvMetadata,
    ) -> Result<String> {
        let mut line = String::new();
-        reader.read_line(&mut line)?;
+        let mut line_len = reader.read_line(&mut line)?;
        if self.parse_first_line(&line, metadata) {
            meta_len += line_len;
            line.clear();
-            reader.read_line(&mut line)?;
+            line_len = reader.read_line(&mut line)?;
            while self.parse_line(&line, metadata) {
                meta_len += line_len;
                line.clear();
-                reader.read_line(&mut line)?;
+                line_len = reader.read_line(&mut line)?;
            }
        }
-        Ok(line)
+        Ok(meta_len)
    }
    /// True if the line is a meta line, i.e. a comment, or starting with 'tags:'.
@ -103,7 +119,7 @@ impl Collection {
            }
            "tags" => metadata.global_tags = collect_tags(value),
            "columns" => {
-                if let Ok(columns) = self.parse_columns(value, metadata) {
+                if let Ok(columns) = parse_columns(value, metadata.delimiter()) {
                    metadata.column_labels = columns;
                }
            }
@ -127,21 +143,20 @@ impl Collection {
                    metadata.deck = Some(CsvDeck::DeckColumn(n));
                }
            }
            "tags column" => {
                if let Ok(n) = value.trim().parse() {
                    metadata.tags_column = n;
                }
            }
            "guid column" => {
                if let Ok(n) = value.trim().parse() {
                    metadata.guid_column = n;
                }
            }
            _ => (),
        }
    }
    fn parse_columns(&mut self, line: &str, metadata: &mut CsvMetadata) -> Result<Vec<String>> {
        let delimiter = if metadata.force_delimiter {
            metadata.delimiter()
        } else {
            delimiter_from_line(line)
        };
        map_single_record(line, delimiter, |record| {
            record.iter().map(ToString::to_string).collect()
        })
    }
    fn maybe_set_fallback_notetype(
        &mut self,
        metadata: &mut CsvMetadata,
@ -161,7 +176,15 @@ impl Collection {
                metadata
                    .notetype_id()
                    .and_then(|ntid| self.default_deck_for_notetype(ntid).transpose())
-                    .unwrap_or_else(|| self.get_current_deck().map(|d| d.id))?
+                    .unwrap_or_else(|| {
                        self.get_current_deck().map(|deck| {
                            if deck.is_filtered() {
                                DeckId(1)
                            } else {
                                deck.id
                            }
                        })
                    })?
                    .0,
            ));
        }
@ -205,6 +228,61 @@ impl Collection {
    }
 }
 fn parse_columns(line: &str, delimiter: Delimiter) -> Result<Vec<String>> {
    map_single_record(line, delimiter, |record| {
        record.iter().map(ToString::to_string).collect()
    })
 }
 fn collect_preview_records(
    metadata: &mut CsvMetadata,
    mut reader: impl Read + Seek,
 ) -> Result<Vec<csv::StringRecord>> {
    reader.rewind()?;
    let mut csv_reader = build_csv_reader(reader, metadata.delimiter())?;
    csv_reader
        .records()
        .into_iter()
        .take(PREVIEW_LENGTH)
        .collect::<csv::Result<_>>()
        .map_err(Into::into)
 }
 fn set_preview(metadata: &mut CsvMetadata, records: &[csv::StringRecord]) -> Result<()> {
    let mut min_len = 1;
    metadata.preview = records
        .iter()
        .enumerate()
        .map(|(idx, record)| {
            let row = build_preview_row(min_len, record, metadata.is_html);
            if idx == 0 {
                min_len = row.vals.len();
            }
            row
        })
        .collect();
    Ok(())
 }
 fn build_preview_row(min_len: usize, record: &csv::StringRecord, strip_html: bool) -> StringList {
    StringList {
        vals: record
            .iter()
            .pad_using(min_len, |_| "")
            .map(|field| {
                if strip_html {
                    html_to_text_line(field, true)
                        .chars()
                        .take(PREVIEW_FIELD_LENGTH)
                        .collect()
                } else {
                    field.chars().take(PREVIEW_FIELD_LENGTH).collect()
                }
            })
            .collect(),
    }
 }
 pub(super) fn collect_tags(txt: &str) -> Vec<String> {
    txt.split_whitespace()
        .filter(|s| !s.is_empty())
@ -263,20 +341,23 @@ fn ensure_first_field_is_mapped(
    Ok(())
 }
-fn maybe_set_fallback_columns(metadata: &mut CsvMetadata, line: &str) -> Result<()> {
+fn maybe_set_fallback_columns(metadata: &mut CsvMetadata) -> Result<()> {
    if metadata.column_labels.is_empty() {
-        let columns = map_single_record(line, metadata.delimiter(), |r| r.len())?;
+        metadata.column_labels =
-        metadata.column_labels = vec![String::new(); columns];
+            vec![String::new(); metadata.preview.get(0).map_or(0, |row| row.vals.len())];
    }
    Ok(())
 }
-fn maybe_set_fallback_is_html(metadata: &mut CsvMetadata, line: &str) -> Result<()> {
+fn maybe_set_fallback_is_html(
-    // TODO: should probably check more than one line; can reuse preview lines
+    metadata: &mut CsvMetadata,
-    // when it's implemented
+    records: &[csv::StringRecord],
-    if !metadata.force_is_html {
+    is_html_option: Option<bool>,
-        metadata.is_html =
+) -> Result<()> {
-            map_single_record(line, metadata.delimiter(), |r| r.iter().any(is_html))?;
+    if let Some(is_html) = is_html_option {
        metadata.is_html = is_html;
    } else if !metadata.force_is_html {
        metadata.is_html = records.iter().flat_map(|record| record.iter()).any(is_html);
    }
    Ok(())
 }
@ -284,13 +365,16 @@ fn maybe_set_fallback_is_html(metadata: &mut CsvMetadata, line: &str) -> Result<
 fn maybe_set_fallback_delimiter(
    delimiter: Option<Delimiter>,
    metadata: &mut CsvMetadata,
-    line: &str,
+    mut reader: impl Read + Seek,
-) {
+    meta_len: u64,
 ) -> Result<()> {
    if let Some(delim) = delimiter {
        metadata.set_delimiter(delim);
    } else if !metadata.force_delimiter {
-        metadata.set_delimiter(delimiter_from_line(line));
+        reader.seek(SeekFrom::Start(meta_len))?;
        metadata.set_delimiter(delimiter_from_reader(reader)?);
    }
    Ok(())
 }
 fn delimiter_from_value(value: &str) -> Option<Delimiter> {
@ -303,14 +387,16 @@ fn delimiter_from_value(value: &str) -> Option<Delimiter> {
    None
 }
-fn delimiter_from_line(line: &str) -> Delimiter {
+fn delimiter_from_reader(mut reader: impl Read) -> Result<Delimiter> {
    let mut buf = [0; 8 * 1024];
    let _ = reader.read(&mut buf)?;
    // TODO: use smarter heuristic
    for delimiter in Delimiter::iter() {
-        if line.contains(delimiter.byte() as char) {
+        if buf.contains(&delimiter.byte()) {
-            return delimiter;
+            return Ok(delimiter);
        }
    }
-    Delimiter::Space
+    Ok(Delimiter::Space)
 }
 fn map_single_record<T>(
@ -384,6 +470,9 @@ impl CsvMetadata {
        if self.tags_column > 0 {
            columns.insert(self.tags_column as usize);
        }
        if self.guid_column > 0 {
            columns.insert(self.guid_column as usize);
        }
        columns
    }
 }
@ -398,8 +487,18 @@ impl NameOrId {
    }
 }
 impl From<csv::StringRecord> for StringList {
    fn from(record: csv::StringRecord) -> Self {
        Self {
            vals: record.iter().map(ToString::to_string).collect(),
        }
    }
 }
 #[cfg(test)]
 mod test {
    use std::io::Cursor;
    use super::*;
    use crate::collection::open_test_collection;
@ -408,7 +507,7 @@ mod test {
            metadata!($col, $csv, None)
        };
        ($col:expr,$csv:expr, $delim:expr) => {
-            $col.get_reader_metadata(BufReader::new($csv.as_bytes()), $delim, None)
+            $col.get_reader_metadata(Cursor::new($csv.as_bytes()), $delim, None, None)
                .unwrap()
        };
    }
@ -561,7 +660,7 @@ mod test {
        // custom names
        assert_eq!(
-            metadata!(col, "#columns:one,two\n").column_labels,
+            metadata!(col, "#columns:one\ttwo\n").column_labels,
            ["one", "two"]
        );
        assert_eq!(
@ -570,6 +669,17 @@ mod test {
        );
    }
    #[test]
    fn should_detect_column_number_despite_escaped_line_breaks() {
        let mut col = open_test_collection();
        assert_eq!(
            metadata!(col, "\"foo|\nbar\"\tfoo\tbar\n")
                .column_labels
                .len(),
            3
        );
    }
    impl CsvMetadata {
        fn unwrap_notetype_map(&self) -> &[u32] {
            match &self.notetype {
@ -589,7 +699,16 @@ mod test {
    #[test]
    fn should_map_default_notetype_fields_by_given_column_names() {
        let mut col = open_test_collection();
-        let meta = metadata!(col, "#columns:Back,Front\nfoo,bar,baz\n");
+        let meta = metadata!(col, "#columns:Back\tFront\nfoo,bar,baz\n");
        assert_eq!(meta.unwrap_notetype_map(), &[2, 1]);
    }
    #[test]
    fn should_gather_first_lines_into_preview() {
        let mut col = open_test_collection();
        let meta = metadata!(col, "#separator: \nfoo bar\nbaz<br>\n");
        assert_eq!(meta.preview[0].vals, ["foo", "bar"]);
        // html is stripped
        assert_eq!(meta.preview[1].vals, ["baz", ""]);
    }
 }
--- a/rslib/src/import_export/text/import.rs
+++ b/rslib/src/import_export/text/import.rs
@ -1,7 +1,12 @@
 // Copyright: Ankitects Pty Ltd and contributors
 // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
-use std::{borrow::Cow, collections::HashMap, mem, sync::Arc};
+use std::{
    borrow::Cow,
    collections::{HashMap, HashSet},
    mem,
    sync::Arc,
 };
 use super::NameOrId;
 use crate::{
@ -52,24 +57,75 @@ struct Context<'a> {
    col: &'a mut Collection,
    /// Contains the optional default notetype with the default key.
    notetypes: HashMap<NameOrId, Option<Arc<Notetype>>>,
-    /// Contains the optional default deck id with the default key.
+    deck_ids: DeckIdsByNameOrId,
    deck_ids: HashMap<NameOrId, Option<DeckId>>,
    usn: Usn,
    normalize_notes: bool,
    today: u32,
    dupe_resolution: DupeResolution,
    card_gen_ctxs: HashMap<(NotetypeId, DeckId), CardGenContext<Arc<Notetype>>>,
-    existing_notes: HashMap<(NotetypeId, u32), Vec<NoteId>>,
+    existing_checksums: HashMap<(NotetypeId, u32), Vec<NoteId>>,
    existing_guids: HashMap<String, NoteId>,
 }
 struct DeckIdsByNameOrId {
    ids: HashSet<DeckId>,
    names: HashMap<String, DeckId>,
    default: Option<DeckId>,
 }
 struct NoteContext {
    /// Prepared and with canonified tags.
    note: Note,
-    dupes: Vec<Note>,
+    dupes: Vec<Duplicate>,
    cards: Vec<Card>,
    notetype: Arc<Notetype>,
    deck_id: DeckId,
 }
 struct Duplicate {
    note: Note,
    identical: bool,
    first_field_match: bool,
 }
 impl Duplicate {
    fn new(dupe: Note, original: &Note, first_field_match: bool) -> Self {
        let identical = dupe.equal_fields_and_tags(original);
        Self {
            note: dupe,
            identical,
            first_field_match,
        }
    }
 }
 impl DeckIdsByNameOrId {
    fn new(col: &mut Collection, default: &NameOrId) -> Result<Self> {
        let names: HashMap<String, DeckId> = col
            .get_all_normal_deck_names()?
            .into_iter()
            .map(|(id, name)| (name, id))
            .collect();
        let ids = names.values().copied().collect();
        let mut new = Self {
            ids,
            names,
            default: None,
        };
        new.default = new.get(default);
        Ok(new)
    }
    fn get(&self, name_or_id: &NameOrId) -> Option<DeckId> {
        match name_or_id {
            _ if *name_or_id == NameOrId::default() => self.default,
            NameOrId::Id(id) => self.ids.get(&DeckId(*id)).copied(),
            NameOrId::Name(name) => self.names.get(name).copied(),
        }
    }
 }
 impl<'a> Context<'a> {
    fn new(data: &ForeignData, col: &'a mut Collection) -> Result<Self> {
        let usn = col.usn()?;
@ -80,12 +136,10 @@ impl<'a> Context<'a> {
            NameOrId::default(),
            col.notetype_by_name_or_id(&data.default_notetype)?,
        );
-        let mut deck_ids = HashMap::new();
+        let deck_ids = DeckIdsByNameOrId::new(col, &data.default_deck)?;
-        deck_ids.insert(
+        let existing_checksums = col.storage.all_notes_by_type_and_checksum()?;
-            NameOrId::default(),
+        let existing_guids = col.storage.all_notes_by_guid()?;
-            col.deck_id_by_name_or_id(&data.default_deck)?,
+
        );
        let existing_notes = col.storage.all_notes_by_type_and_checksum()?;
        Ok(Self {
            col,
            usn,
@ -95,7 +149,8 @@ impl<'a> Context<'a> {
            notetypes,
            deck_ids,
            card_gen_ctxs: HashMap::new(),
-            existing_notes,
+            existing_checksums,
            existing_guids,
        })
    }
@ -119,16 +174,6 @@ impl<'a> Context<'a> {
        })
    }
    fn deck_id_for_note(&mut self, note: &ForeignNote) -> Result<Option<DeckId>> {
        Ok(if let Some(did) = self.deck_ids.get(&note.deck) {
            *did
        } else {
            let did = self.col.deck_id_by_name_or_id(&note.deck)?;
            self.deck_ids.insert(note.deck.clone(), did);
            did
        })
    }
    fn import_foreign_notes(
        &mut self,
        notes: Vec<ForeignNote>,
@ -145,7 +190,7 @@ impl<'a> Context<'a> {
                continue;
            }
            if let Some(notetype) = self.notetype_for_note(&foreign)? {
-                if let Some(deck_id) = self.deck_id_for_note(&foreign)? {
+                if let Some(deck_id) = self.deck_ids.get(&foreign.deck) {
                    let ctx = self.build_note_context(foreign, notetype, deck_id, global_tags)?;
                    self.import_note(ctx, updated_tags, &mut log)?;
                } else {
@ -167,6 +212,7 @@ impl<'a> Context<'a> {
    ) -> Result<NoteContext> {
        let (mut note, cards) = foreign.into_native(&notetype, deck_id, self.today, global_tags);
        note.prepare_for_update(&notetype, self.normalize_notes)?;
        self.col.canonify_note_tags(&mut note, self.usn)?;
        let dupes = self.find_duplicates(&notetype, &note)?;
        Ok(NoteContext {
@ -178,14 +224,34 @@ impl<'a> Context<'a> {
        })
    }
-    fn find_duplicates(&mut self, notetype: &Notetype, note: &Note) -> Result<Vec<Note>> {
+    fn find_duplicates(&self, notetype: &Notetype, note: &Note) -> Result<Vec<Duplicate>> {
        let checksum = note
            .checksum
            .ok_or_else(|| AnkiError::invalid_input("note unprepared"))?;
-        self.existing_notes
+        if let Some(nid) = self.existing_guids.get(&note.guid) {
-            .get(&(notetype.id, checksum))
+            self.get_guid_dupe(*nid, note).map(|dupe| vec![dupe])
-            .map(|dupe_ids| self.col.get_full_duplicates(note, dupe_ids))
+        } else if let Some(nids) = self.existing_checksums.get(&(notetype.id, checksum)) {
-            .unwrap_or_else(|| Ok(vec![]))
+            self.get_first_field_dupes(note, nids)
        } else {
            Ok(Vec::new())
        }
    }
    fn get_guid_dupe(&self, nid: NoteId, original: &Note) -> Result<Duplicate> {
        self.col
            .storage
            .get_note(nid)?
            .ok_or(AnkiError::NotFound)
            .map(|dupe| Duplicate::new(dupe, original, false))
    }
    fn get_first_field_dupes(&self, note: &Note, nids: &[NoteId]) -> Result<Vec<Duplicate>> {
        Ok(self
            .col
            .get_full_duplicates(note, nids)?
            .into_iter()
            .map(|dupe| Duplicate::new(dupe, note, true))
            .collect())
    }
    fn import_note(
@ -204,7 +270,6 @@ impl<'a> Context<'a> {
    }
    fn add_note(&mut self, mut ctx: NoteContext, log_queue: &mut Vec<LogNote>) -> Result<()> {
        self.col.canonify_note_tags(&mut ctx.note, self.usn)?;
        ctx.note.usn = self.usn;
        self.col.add_note_only_undoable(&mut ctx.note)?;
        self.add_cards(&mut ctx.cards, &ctx.note, ctx.deck_id, ctx.notetype)?;
@ -237,28 +302,49 @@ impl<'a> Context<'a> {
    }
    fn prepare_note_for_update(&mut self, note: &mut Note, updated_tags: &[String]) -> Result<()> {
        if !updated_tags.is_empty() {
            note.tags.extend(updated_tags.iter().cloned());
            self.col.canonify_note_tags(note, self.usn)?;
        }
        note.set_modified(self.usn);
        Ok(())
    }
    fn maybe_update_dupe(
        &mut self,
-        dupe: Note,
+        dupe: Duplicate,
        ctx: &mut NoteContext,
        log: &mut NoteLog,
    ) -> Result<()> {
-        ctx.note.id = dupe.id;
+        if dupe.note.notetype_id != ctx.notetype.id {
-        if dupe.equal_fields_and_tags(&ctx.note) {
+            log.conflicting.push(dupe.note.into_log_note());
-            log.duplicate.push(dupe.into_log_note());
+            return Ok(());
        }
        if dupe.identical {
            log.duplicate.push(dupe.note.into_log_note());
        } else {
-            self.col.update_note_undoable(&ctx.note, &dupe)?;
+            self.update_dupe(dupe, ctx, log)?;
            log.first_field_match.push(dupe.into_log_note());
        }
        self.add_cards(&mut ctx.cards, &ctx.note, ctx.deck_id, ctx.notetype.clone())
    }
    fn update_dupe(
        &mut self,
        dupe: Duplicate,
        ctx: &mut NoteContext,
        log: &mut NoteLog,
    ) -> Result<()> {
        ctx.note.id = dupe.note.id;
        ctx.note.guid = dupe.note.guid.clone();
        self.col.update_note_undoable(&ctx.note, &dupe.note)?;
        if dupe.first_field_match {
            log.first_field_match.push(dupe.note.into_log_note());
        } else {
            log.updated.push(dupe.note.into_log_note());
        }
        Ok(())
    }
    fn import_cards(&mut self, cards: &mut [Card], note_id: NoteId) -> Result<()> {
        for card in cards {
            card.note_id = note_id;
@ -306,7 +392,7 @@ impl Collection {
        }
    }
-    fn get_full_duplicates(&mut self, note: &Note, dupe_ids: &[NoteId]) -> Result<Vec<Note>> {
+    fn get_full_duplicates(&self, note: &Note, dupe_ids: &[NoteId]) -> Result<Vec<Note>> {
        let first_field = note.first_field_stripped();
        dupe_ids
            .iter()
@ -329,6 +415,9 @@ impl ForeignNote {
    ) -> (Note, Vec<Card>) {
        // TODO: Handle new and learning cards
        let mut note = Note::new(notetype);
        if !self.guid.is_empty() {
            note.guid = self.guid;
        }
        note.tags = self.tags;
        note.tags.extend(extra_tags.iter().cloned());
        note.fields_mut()
@ -501,4 +590,16 @@ mod test {
        data.import(&mut col, |_, _| true).unwrap();
        assert_eq!(col.storage.get_all_notes()[0].tags, ["bar", "baz"]);
    }
    #[test]
    fn should_match_note_with_same_guid() {
        let mut col = open_test_collection();
        let mut data = ForeignData::with_defaults();
        data.add_note(&["foo"]);
        data.notes[0].tags = vec![String::from("bar")];
        data.global_tags = vec![String::from("baz")];
        data.import(&mut col, |_, _| true).unwrap();
        assert_eq!(col.storage.get_all_notes()[0].tags, ["bar", "baz"]);
    }
 }
--- a/rslib/src/import_export/text/mod.rs
+++ b/rslib/src/import_export/text/mod.rs
@ -25,6 +25,7 @@ pub struct ForeignData {
 #[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
 #[serde(default)]
 pub struct ForeignNote {
    guid: String,
    fields: Vec<String>,
    tags: Vec<String>,
    notetype: NameOrId,
--- a/rslib/src/notetype/mod.rs
+++ b/rslib/src/notetype/mod.rs
@ -218,6 +218,21 @@ impl Collection {
            .collect()
    }
    pub fn get_all_notetypes_of_search_notes(
        &mut self,
    ) -> Result<HashMap<NotetypeId, Arc<Notetype>>> {
        self.storage
            .all_notetypes_of_search_notes()?
            .into_iter()
            .map(|ntid| {
                self.get_notetype(ntid)
                    .transpose()
                    .unwrap()
                    .map(|nt| (ntid, nt))
            })
            .collect()
    }
    pub fn remove_notetype(&mut self, ntid: NotetypeId) -> Result<OpOutput<()>> {
        self.transact(Op::RemoveNotetype, |col| col.remove_notetype_inner(ntid))
    }
--- a/rslib/src/storage/deck/all_decks_of_search_notes.sql
+++ b/rslib/src/storage/deck/all_decks_of_search_notes.sql
@ -0,0 +1,9 @@
 SELECT nid,
  did
 FROM cards
 WHERE nid IN (
    SELECT nid
    FROM search_nids
  )
 GROUP BY nid
 HAVING ord = MIN(ord)
--- a/rslib/src/storage/deck/mod.rs
+++ b/rslib/src/storage/deck/mod.rs
@ -131,6 +131,14 @@ impl SqliteStorage {
            .collect()
    }
    /// Returns the deck id of the first existing card of every searched note.
    pub(crate) fn all_decks_of_search_notes(&self) -> Result<HashMap<NoteId, DeckId>> {
        self.db
            .prepare_cached(include_str!("all_decks_of_search_notes.sql"))?
            .query_and_then([], |r| Ok((r.get(0)?, r.get(1)?)))?
            .collect()
    }
    // caller should ensure name unique
    pub(crate) fn add_deck(&self, deck: &mut Deck) -> Result<()> {
        assert!(deck.id.0 == 0);
--- a/rslib/src/storage/note/mod.rs
+++ b/rslib/src/storage/note/mod.rs
@ -338,6 +338,13 @@ impl super::SqliteStorage {
            .collect()
    }
    pub(crate) fn all_notes_by_guid(&mut self) -> Result<HashMap<String, NoteId>> {
        self.db
            .prepare("SELECT guid, id FROM notes")?
            .query_and_then([], |r| Ok((r.get(0)?, r.get(1)?)))?
            .collect()
    }
    #[cfg(test)]
    pub(crate) fn get_all_notes(&mut self) -> Vec<Note> {
        self.db
--- a/rslib/src/storage/notetype/mod.rs
+++ b/rslib/src/storage/notetype/mod.rs
@ -116,6 +116,15 @@ impl SqliteStorage {
            .collect()
    }
    pub(crate) fn all_notetypes_of_search_notes(&self) -> Result<Vec<NotetypeId>> {
        self.db
            .prepare_cached(
                "SELECT DISTINCT mid FROM notes WHERE id IN (SELECT nid FROM search_nids)",
            )?
            .query_and_then([], |r| Ok(r.get(0)?))?
            .collect()
    }
    pub fn get_all_notetype_names(&self) -> Result<Vec<(NotetypeId, String)>> {
        self.db
            .prepare_cached(include_str!("get_notetype_names.sql"))?
--- a/rslib/src/text.rs
+++ b/rslib/src/text.rs
@ -1,7 +1,7 @@
 // Copyright: Ankitects Pty Ltd and contributors
 // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
-use std::{borrow::Cow, ptr};
+use std::borrow::Cow;
 use lazy_static::lazy_static;
 use pct_str::{IriReserved, PctStr, PctString};
@ -134,12 +134,8 @@ lazy_static! {
    static ref PERSISTENT_HTML_SPACERS: Regex = Regex::new(r#"(?i)<br\s*/?>|<div>|\n"#).unwrap();
-    static ref UNPRINTABLE_TAGS: Regex = Regex::new(
+    static ref TYPE_TAG: Regex = Regex::new(r"\[\[type:[^]]+\]\]").unwrap();
-        r"(?xs)
+    static ref SOUND_TAG: Regex = Regex::new(r"\[sound:([^]]+)\]").unwrap();
        \[sound:[^]]+\]
        |
        \[\[type:[^]]+\]\]
    ").unwrap();
    /// Files included in CSS with a leading underscore.
    static ref UNDERSCORED_CSS_IMPORTS: Regex = Regex::new(
@ -172,19 +168,21 @@ lazy_static! {
    "#).unwrap();
 }
-pub fn is_html(text: &str) -> bool {
+pub fn is_html(text: impl AsRef<str>) -> bool {
-    HTML.is_match(text)
+    HTML.is_match(text.as_ref())
 }
 pub fn html_to_text_line(html: &str, preserve_media_filenames: bool) -> Cow<str> {
    let (html_stripper, sound_rep): (fn(&str) -> Cow<str>, _) = if preserve_media_filenames {
        (strip_html_preserving_media_filenames, "$1")
    } else {
        (strip_html, "")
    };
    PERSISTENT_HTML_SPACERS
        .replace_all(html, " ")
-        .map_cow(|s| UNPRINTABLE_TAGS.replace_all(s, ""))
+        .map_cow(|s| TYPE_TAG.replace_all(s, ""))
-        .map_cow(if preserve_media_filenames {
+        .map_cow(|s| SOUND_TAG.replace_all(s, sound_rep))
-            strip_html_preserving_media_filenames
+        .map_cow(html_stripper)
        } else {
            strip_html
        })
        .trim()
 }
@ -330,16 +328,9 @@ pub(crate) fn extract_underscored_references(text: &str) -> Vec<&str> {
 }
 pub fn strip_html_preserving_media_filenames(html: &str) -> Cow<str> {
-    let without_fnames = HTML_MEDIA_TAGS.replace_all(html, r" ${1}${2}${3} ");
+    HTML_MEDIA_TAGS
-    let without_html = strip_html(&without_fnames);
+        .replace_all(html, r" ${1}${2}${3} ")
-    // no changes?
+        .map_cow(strip_html)
    if let Cow::Borrowed(b) = without_html {
        if ptr::eq(b, html) {
            return Cow::Borrowed(html);
        }
    }
    // make borrow checker happy
    without_html.into_owned().into()
 }
 #[allow(dead_code)]
--- a/ts/import-csv/FieldMapper.svelte
+++ b/ts/import-csv/FieldMapper.svelte
@ -26,6 +26,6 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
            />
        {/each}
    {/await}
 {/if}
    <Spacer --height="1.5rem" />
 {/if}
 <MapperRow label={tr.editingTags()} {columnOptions} bind:value={tagsColumn} />
--- a/ts/import-csv/ImportCsvPage.svelte
+++ b/ts/import-csv/ImportCsvPage.svelte
@ -8,7 +8,13 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
    import Row from "../components/Row.svelte";
    import Spacer from "../components/Spacer.svelte";
    import * as tr from "../lib/ftl";
-    import { Decks, ImportExport, importExport, Notetypes } from "../lib/proto";
+    import {
        Decks,
        Generic,
        ImportExport,
        importExport,
        Notetypes,
    } from "../lib/proto";
    import DeckSelector from "./DeckSelector.svelte";
    import DelimiterSelector from "./DelimiterSelector.svelte";
    import DupeResolutionSelector from "./DupeResolutionSelector.svelte";
@ -17,6 +23,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
    import HtmlSwitch from "./HtmlSwitch.svelte";
    import { getColumnOptions, getCsvMetadata } from "./lib";
    import NotetypeSelector from "./NotetypeSelector.svelte";
    import Preview from "./Preview.svelte";
    import StickyFooter from "./StickyFooter.svelte";
    import Tags from "./Tags.svelte";
@ -32,6 +39,8 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
    export let updatedTags: string[];
    export let columnLabels: string[];
    export let tagsColumn: number;
    export let guidColumn: number;
    export let preview: Generic.StringList[];
    // Protobuf oneofs. Exactly one of these pairs is expected to be set.
    export let notetypeColumn: number | null;
    export let globalNotetype: ImportExport.CsvMetadata.MappedNotetype | null;
@ -41,9 +50,17 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
    let dupeResolution: ImportExport.ImportCsvRequest.DupeResolution;
    let lastNotetypeId = globalNotetype?.id;
-    $: columnOptions = getColumnOptions(columnLabels, notetypeColumn, deckColumn);
+    $: columnOptions = getColumnOptions(
-    $: getCsvMetadata(path, delimiter).then((meta) => {
+        columnLabels,
        preview[0].vals,
        notetypeColumn,
        deckColumn,
        tagsColumn,
        guidColumn,
    );
    $: getCsvMetadata(path, delimiter, undefined, isHtml).then((meta) => {
        columnLabels = meta.columnLabels;
        preview = meta.preview;
    });
    $: if (globalNotetype?.id !== lastNotetypeId) {
        lastNotetypeId = globalNotetype?.id;
@ -66,6 +83,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
                    updatedTags,
                    columnLabels,
                    tagsColumn,
                    guidColumn,
                    notetypeColumn,
                    globalNotetype,
                    deckColumn,
@ -78,6 +96,15 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
 <Container class="csv-page">
    <Row --cols={2}>
        <Col --col-size={1} breakpoint="md">
            <Container>
                <Header heading={tr.importingFile()} />
                <Spacer --height="1.5rem" />
                <DelimiterSelector bind:delimiter disabled={forceDelimiter} />
                <HtmlSwitch bind:isHtml disabled={forceIsHtml} />
                <Preview {columnOptions} {preview} />
            </Container>
        </Col>
        <Col --col-size={1} breakpoint="md">
            <Container>
                <Header heading={tr.importingImportOptions()} />
@ -92,8 +119,6 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
                    <DeckSelector {deckNameIds} bind:deckId />
                {/if}
                <DupeResolutionSelector bind:dupeResolution />
                <DelimiterSelector bind:delimiter disabled={forceDelimiter} />
                <HtmlSwitch bind:isHtml disabled={forceIsHtml} />
                <Tags bind:globalTags bind:updatedTags />
            </Container>
        </Col>
--- a/ts/import-csv/Preview.svelte
+++ b/ts/import-csv/Preview.svelte
@ -0,0 +1,65 @@
 <!--
 Copyright: Ankitects Pty Ltd and contributors
 License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
 -->
 <script lang="ts">
    import type { Generic } from "../lib/proto";
    import type { ColumnOption } from "./lib";
    export let columnOptions: ColumnOption[];
    export let preview: Generic.StringList[];
 </script>
 <div class="outer">
    <table class="preview">
        {#each columnOptions.slice(1) as { label, shortLabel }}
            <th>
                {shortLabel || label}
            </th>
        {/each}
        {#each preview as row}
            <tr>
                {#each row.vals as cell}
                    <td>{cell}</td>
                {/each}
            </tr>
        {/each}
    </table>
 </div>
 <style lang="scss">
    .outer {
        // approximate size based on body max width + margins
        width: min(90vw, 65em);
        overflow: auto;
    }
    .preview {
        border-collapse: collapse;
        white-space: nowrap;
        th,
        td {
            text-overflow: ellipsis;
            overflow: hidden;
            border: 1px solid var(--faint-border);
            padding: 0.25rem 0.5rem;
            max-width: 15em;
        }
        th {
            background: var(--medium-border);
            text-align: center;
        }
        tr {
            &:nth-child(even) {
                background: var(--frame-bg);
            }
        }
        td {
            text-align: start;
        }
    }
 </style>
--- a/ts/import-csv/StickyFooter.svelte
+++ b/ts/import-csv/StickyFooter.svelte
@ -37,11 +37,13 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
 <style lang="scss">
    .sticky-footer {
-        position: sticky;
+        position: fixed;
        bottom: 0;
        left: 0;
        right: 0;
        z-index: 10;
-        margin: 0.75rem;
+        margin: 0;
        padding: 0.25rem;
        background: var(--window-bg);
--- a/ts/import-csv/import-csv-base.scss
+++ b/ts/import-csv/import-csv-base.scss
@ -17,16 +17,13 @@
 body {
    width: min(100vw, 70em);
    margin: 0 auto;
-    height: 100%;
+    padding: 1em;
    // pad out the underside of the footer
    padding-bottom: 5em;
 }
 html {
    overflow-x: hidden;
    height: 100%;
 }
 #main {
    padding: 0.5em 0.5em 1em 0.5em;
 }
 // override the default down arrow colour in <select> elements
--- a/ts/import-csv/index.ts
+++ b/ts/import-csv/index.ts
@ -59,7 +59,9 @@ export async function setupImportCsvPage(path: string): Promise<ImportCsvPage> {
            updatedTags: metadata.updatedTags,
            columnLabels: metadata.columnLabels,
            tagsColumn: metadata.tagsColumn,
            guidColumn: metadata.guidColumn,
            globalNotetype: metadata.globalNotetype ?? null,
            preview: metadata.preview,
            // Unset oneof numbers default to 0, which also means n/a here,
            // but it's vital to differentiate between unset and 0 when reserializing.
            notetypeColumn: metadata.notetypeColumn ? metadata.notetypeColumn : null,
--- a/ts/import-csv/lib.ts
+++ b/ts/import-csv/lib.ts
@ -11,14 +11,18 @@ import {
 export interface ColumnOption {
    label: string;
    shortLabel?: string;
    value: number;
    disabled: boolean;
 }
 export function getColumnOptions(
    columnLabels: string[],
    firstRow: string[],
    notetypeColumn: number | null,
    deckColumn: number | null,
    tagsColumn: number,
    guidColumn: number,
 ): ColumnOption[] {
    return [{ label: tr.changeNotetypeNothing(), value: 0, disabled: false }].concat(
        columnLabels.map((label, index) => {
@ -27,22 +31,28 @@ export function getColumnOptions(
                return columnOption(tr.notetypesNotetype(), true, index);
            } else if (index === deckColumn) {
                return columnOption(tr.decksDeck(), true, index);
            } else if (index === guidColumn) {
                return columnOption("GUID", true, index);
            } else if (index === tagsColumn) {
                return columnOption(tr.editingTags(), false, index);
            } else if (label === "") {
-                return columnOption(index, false, index);
+                return columnOption(firstRow[index - 1], false, index, true);
            } else {
-                return columnOption(`"${label}"`, false, index);
+                return columnOption(label, false, index);
            }
        }),
    );
 }
 function columnOption(
-    label: string | number,
+    label: string,
    disabled: boolean,
    index: number,
    shortLabel?: boolean,
 ): ColumnOption {
    return {
-        label: tr.importingColumn({ val: label }),
+        label: label ? `${index}: ${label}` : index.toString(),
        shortLabel: shortLabel ? index.toString() : undefined,
        value: index,
        disabled,
    };
@ -58,12 +68,14 @@ export async function getCsvMetadata(
    path: string,
    delimiter?: ImportExport.CsvMetadata.Delimiter,
    notetypeId?: number,
    isHtml?: boolean,
 ): Promise<ImportExport.CsvMetadata> {
    return importExport.getCsvMetadata(
        ImportExport.CsvMetadataRequest.create({
            path,
            delimiter,
            notetypeId,
            isHtml,
        }),
    );
 }