CSV import/export fixes and features (#1898)

* Fix footer moving upwards * Fix column detection Was broken because escaped line breaks were not considered. Also removes delimiter detection on `#columns:` line. User must use tabs or set delimiter beforehand. * Add CSV preview * Parse `#tags column:` * Optionally export deck and notetype with CSV * Avoid clones in CSV export * Prevent bottom of page appearing under footer (dae) * Increase padding to 1em (dae) With 0.5em, when a vertical scrollbar is shown, it sits right next to the right edge of the content, making it look like there's no right margin. * Experimental changes to make table fit+scroll (dae) - limit individual cells to 15em, and show ellipses when truncated - limit total table width to body width, so that inner table is shown with scrollbar - use class rather than id - ids are bad practice in Svelte components, as more than one may be displayed on a single page * Skip importing foreign notes with filtered decks Were implicitly imported into the default deck before. Also some refactoring to fetch deck ids and names beforehand. * Hide spacer below hidden field mapping * Fix guid being replaced when updating note * Fix dupe identity check Canonify tags before checking if dupe is identical, but only add update tags later if appropriate. * Fix deck export for notes with missing card 1 * Fix note lines starting with `#` csv crate doesn't support escaping a leading comment char. :( * Support import/export of guids * Strip HTML from preview rows * Fix initially set deck if current is filtered * Make isHtml toggle reactive * Fix `html_to_text_line()` stripping sound names * Tweak export option labels * Switch to patched rust-csv fork Fixes writing lines starting with `#`, so revert 5ece10ad05. * List column options with first column field * Fix flag for exports with HTML stripped
2025-11-06 12:47:11 -05:00 · 2022-06-09 02:28:01 +02:00 · 2022-06-09 02:28:01 +02:00 · 6da5e5b042
commit 6da5e5b042
parent d6b8520d03
32 changed files with 798 additions and 227 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -54,7 +54,7 @@ dependencies = [
 "chrono",
 "coarsetime",
 "criterion",
- "csv",
+ "csv 1.1.6 (git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90)",
 "env_logger",
 "flate2",
 "fluent",
@ -392,7 +392,7 @@ dependencies = [
 "cast",
 "clap",
 "criterion-plot",
- "csv",
+ "csv 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "itertools",
 "lazy_static",
 "num-traits",
@ -506,12 +506,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
 dependencies = [
 "bstr",
- "csv-core",
+ "csv-core 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
 "itoa 0.4.8",
 "ryu",
 "serde",
 ]

+[[package]]
+name = "csv"
+version = "1.1.6"
+source = "git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90#1c9d3aab6f79a7d815c69f925a46a4590c115f90"
+dependencies = [
+ "bstr",
+ "csv-core 0.1.10 (git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90)",
+ "itoa 1.0.1",
+ "ryu",
+ "serde",
+]
+
 [[package]]
 name = "csv-core"
 version = "0.1.10"
@ -521,6 +533,14 @@ dependencies = [
 "memchr",
 ]

+[[package]]
+name = "csv-core"
+version = "0.1.10"
+source = "git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90#1c9d3aab6f79a7d815c69f925a46a4590c115f90"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "derive_more"
 version = "0.99.17"
--- a/cargo/crates.bzl
+++ b/cargo/crates.bzl
@ -372,23 +372,23 @@ def raze_fetch_remote_crates():
    )

    maybe(
-        http_archive,
+        new_git_repository,
        name = "raze__csv__1_1_6",
-        url = "https://crates.io/api/v1/crates/csv/1.1.6/download",
-        type = "tar.gz",
-        sha256 = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1",
-        strip_prefix = "csv-1.1.6",
+        remote = "https://github.com/ankitects/rust-csv.git",
+        shallow_since = "1654675287 +1000",
+        commit = "1c9d3aab6f79a7d815c69f925a46a4590c115f90",
        build_file = Label("//cargo/remote:BUILD.csv-1.1.6.bazel"),
+        init_submodules = True,
    )

    maybe(
-        http_archive,
+        new_git_repository,
        name = "raze__csv_core__0_1_10",
-        url = "https://crates.io/api/v1/crates/csv-core/0.1.10/download",
-        type = "tar.gz",
-        sha256 = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90",
-        strip_prefix = "csv-core-0.1.10",
+        remote = "https://github.com/ankitects/rust-csv.git",
+        shallow_since = "1654675287 +1000",
+        commit = "1c9d3aab6f79a7d815c69f925a46a4590c115f90",
        build_file = Label("//cargo/remote:BUILD.csv-core-0.1.10.bazel"),
+        init_submodules = True,
    )

    maybe(
--- a/cargo/remote/BUILD.csv-1.1.6.bazel
+++ b/cargo/remote/BUILD.csv-1.1.6.bazel
@ -126,7 +126,7 @@ rust_library(
    deps = [
        "@raze__bstr__0_2_17//:bstr",
        "@raze__csv_core__0_1_10//:csv_core",
-        "@raze__itoa__0_4_8//:itoa",
+        "@raze__itoa__1_0_1//:itoa",
        "@raze__ryu__1_0_9//:ryu",
        "@raze__serde__1_0_136//:serde",
    ],
--- a/cargo/remote/BUILD.csv-core-0.1.10.bazel
+++ b/cargo/remote/BUILD.csv-core-0.1.10.bazel
@ -39,7 +39,7 @@ rust_library(
    crate_features = [
        "default",
    ],
-    crate_root = "src/lib.rs",
+    crate_root = "csv-core/src/lib.rs",
    data = [],
    edition = "2018",
    rustc_flags = [
--- a/cargo/update.py
+++ b/cargo/update.py
@ -21,7 +21,10 @@ COMMITS_SHALLOW_SINCE = {
    "1ee0892217e9a76bba4bb369ec5fab8854935a3c": "1619517354 +1000",
    # pct-str
    "4adccd8d4a222ab2672350a102f06ae832a0572d": "1605376517 +0100",
+    # linkcheck
    "2f20798ce521cc594d510d4e417e76d5eac04d4b": "1626729019 +0200",
+    # rust-csv
+    "1c9d3aab6f79a7d815c69f925a46a4590c115f90": "1654675287 +1000",
 }

 import glob
--- a/ftl/core/exporting.ftl
+++ b/ftl/core/exporting.ftl
@ -38,3 +38,6 @@ exporting-processed-media-files =
        [one] Processed { $count } media file...
       *[other] Processed { $count } media files...
    }
+exporting-include-deck = Include deck name
+exporting-include-notetype = Include notetype name
+exporting-include-guid = Include unique note identifier
--- a/ftl/core/importing.ftl
+++ b/ftl/core/importing.ftl
@ -9,7 +9,6 @@ importing-appeared-twice-in-file = Appeared twice in file: { $val }
 importing-by-default-anki-will-detect-the = By default, Anki will detect the character between fields, such as a tab, comma, and so on. If Anki is detecting the character incorrectly, you can enter it here. Use \t to represent tab.
 importing-change = Change
 importing-colon = Colon
-importing-column = Column { $val }
 importing-comma = Comma
 importing-empty-first-field = Empty first field: { $val }
 importing-field-separator = Field separator
@ -108,3 +107,4 @@ importing-preserve = Preserve
 importing-update = Update
 importing-tag-all-notes = Tag all notes
 importing-tag-updated-notes = Tag updated notes
+importing-file = File
--- a/proto/anki/import_export.proto
+++ b/proto/anki/import_export.proto
@ -119,6 +119,7 @@ message CsvMetadataRequest {
  string path = 1;
  optional CsvMetadata.Delimiter delimiter = 2;
  optional int64 notetype_id = 3;
+  optional bool is_html = 4;
 }

 // Column indices are 1-based to make working with them in TS easier, where
@ -163,6 +164,8 @@ message CsvMetadata {
  uint32 tags_column = 10;
  bool force_delimiter = 11;
  bool force_is_html = 12;
+  repeated generic.StringList preview = 13;
+  uint32 guid_column = 14;
 }

 message ExportCardCsvRequest {
@ -175,7 +178,10 @@ message ExportNoteCsvRequest {
  string out_path = 1;
  bool with_html = 2;
  bool with_tags = 3;
-  ExportLimit limit = 4;
+  bool with_deck = 4;
+  bool with_notetype = 5;
+  bool with_guid = 6;
+  ExportLimit limit = 7;
 }

 message ExportLimit {
--- a/pylib/anki/collection.py
+++ b/pylib/anki/collection.py
@ -423,11 +423,17 @@ class Collection(DeprecatedNamesMixin):
        limit: ExportLimit,
        with_html: bool,
        with_tags: bool,
+        with_deck: bool,
+        with_notetype: bool,
+        with_guid: bool,
    ) -> int:
        return self._backend.export_note_csv(
            out_path=out_path,
            with_html=with_html,
            with_tags=with_tags,
+            with_deck=with_deck,
+            with_notetype=with_notetype,
+            with_guid=with_guid,
            limit=pb_export_limit(limit),
        )

--- a/qt/aqt/exporting.py
+++ b/qt/aqt/exporting.py
@ -98,6 +98,10 @@ class ExportDialog(QDialog):
            self.frm.includeHTML.setVisible(False)
        # show deck list?
        self.frm.deck.setVisible(not self.isVerbatim)
+        # used by the new export screen
+        self.frm.includeDeck.setVisible(False)
+        self.frm.includeNotetype.setVisible(False)
+        self.frm.includeGuid.setVisible(False)

    def accept(self) -> None:
        self.exporter.includeSched = self.frm.includeSched.isChecked()
--- a/qt/aqt/forms/exporting.ui
+++ b/qt/aqt/forms/exporting.ui
@ -6,8 +6,8 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>563</width>
-    <height>245</height>
+    <width>610</width>
+    <height>348</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -77,6 +77,13 @@
       </property>
      </widget>
     </item>
+     <item>
+      <widget class="QCheckBox" name="includeHTML">
+       <property name="text">
+        <string>exporting_include_html_and_media_references</string>
+       </property>
+      </widget>
+     </item>
     <item>
      <widget class="QCheckBox" name="includeTags">
       <property name="text">
@ -88,9 +95,29 @@
      </widget>
     </item>
     <item>
-      <widget class="QCheckBox" name="includeHTML">
+      <widget class="QCheckBox" name="includeDeck">
+       <property name="enabled">
+        <bool>true</bool>
+       </property>
       <property name="text">
-        <string>exporting_include_html_and_media_references</string>
+        <string>exporting_include_deck</string>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QCheckBox" name="includeNotetype">
+       <property name="enabled">
+        <bool>true</bool>
+       </property>
+       <property name="text">
+        <string>exporting_include_notetype</string>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QCheckBox" name="includeGuid">
+       <property name="text">
+        <string>exporting_include_guid</string>
       </property>
      </widget>
     </item>
--- a/qt/aqt/import_export/exporting.py
+++ b/qt/aqt/import_export/exporting.py
@ -91,6 +91,9 @@ class ExportDialog(QDialog):
        self.frm.includeMedia.setVisible(self.exporter.show_include_media)
        self.frm.includeTags.setVisible(self.exporter.show_include_tags)
        self.frm.includeHTML.setVisible(self.exporter.show_include_html)
+        self.frm.includeDeck.setVisible(self.exporter.show_include_deck)
+        self.frm.includeNotetype.setVisible(self.exporter.show_include_notetype)
+        self.frm.includeGuid.setVisible(self.exporter.show_include_guid)
        self.frm.legacy_support.setVisible(self.exporter.show_legacy_support)
        self.frm.deck.setVisible(self.exporter.show_deck_list)

@ -135,6 +138,9 @@ class ExportDialog(QDialog):
            include_media=self.frm.includeMedia.isChecked(),
            include_tags=self.frm.includeTags.isChecked(),
            include_html=self.frm.includeHTML.isChecked(),
+            include_deck=self.frm.includeDeck.isChecked(),
+            include_notetype=self.frm.includeNotetype.isChecked(),
+            include_guid=self.frm.includeGuid.isChecked(),
            legacy_support=self.frm.legacy_support.isChecked(),
            limit=limit,
        )
@ -165,6 +171,9 @@ class Options:
    include_media: bool
    include_tags: bool
    include_html: bool
+    include_deck: bool
+    include_notetype: bool
+    include_guid: bool
    legacy_support: bool
    limit: ExportLimit

@ -177,6 +186,9 @@ class Exporter(ABC):
    show_include_tags = False
    show_include_html = False
    show_legacy_support = False
+    show_include_deck = False
+    show_include_notetype = False
+    show_include_guid = False

    @staticmethod
    @abstractmethod
@ -255,6 +267,9 @@ class NoteCsvExporter(Exporter):
    show_deck_list = True
    show_include_html = True
    show_include_tags = True
+    show_include_deck = True
+    show_include_notetype = True
+    show_include_guid = True

    @staticmethod
    def name() -> str:
@ -269,6 +284,9 @@ class NoteCsvExporter(Exporter):
                limit=options.limit,
                with_html=options.include_html,
                with_tags=options.include_tags,
+                with_deck=options.include_deck,
+                with_notetype=options.include_notetype,
+                with_guid=options.include_guid,
            ),
            success=lambda count: tooltip(
                tr.exporting_note_exported(count=count), parent=mw
--- a/rslib/Cargo.toml
+++ b/rslib/Cargo.toml
@ -100,4 +100,4 @@ unic-ucd-category = "0.9.0"
 id_tree = "1.8.0"
 zstd = { version="0.10.0", features=["zstdmt"] }
 num_cpus = "1.13.1"
-csv = "1.1.6"
+csv = { git="https://github.com/ankitects/rust-csv.git", rev="1c9d3aab6f79a7d815c69f925a46a4590c115f90" }
--- a/rslib/src/backend/import_export.rs
+++ b/rslib/src/backend/import_export.rs
@ -75,7 +75,12 @@ impl ImportExportService for Backend {
    fn get_csv_metadata(&self, input: pb::CsvMetadataRequest) -> Result<pb::CsvMetadata> {
        let delimiter = input.delimiter.is_some().then(|| input.delimiter());
        self.with_col(|col| {
-            col.get_csv_metadata(&input.path, delimiter, input.notetype_id.map(Into::into))
+            col.get_csv_metadata(
+                &input.path,
+                delimiter,
+                input.notetype_id.map(Into::into),
+                input.is_html,
+            )
        })
    }

@ -93,16 +98,8 @@ impl ImportExportService for Backend {
    }

    fn export_note_csv(&self, input: pb::ExportNoteCsvRequest) -> Result<pb::UInt32> {
-        self.with_col(|col| {
-            col.export_note_csv(
-                &input.out_path,
-                SearchNode::from(input.limit.unwrap_or_default()),
-                input.with_html,
-                input.with_tags,
-                self.export_progress_fn(),
-            )
-        })
-        .map(Into::into)
+        self.with_col(|col| col.export_note_csv(input, self.export_progress_fn()))
+            .map(Into::into)
    }

    fn export_card_csv(&self, input: pb::ExportCardCsvRequest) -> Result<pb::UInt32> {
--- a/rslib/src/import_export/text/csv/export.rs
+++ b/rslib/src/import_export/text/csv/export.rs
@ -1,7 +1,7 @@
 // Copyright: Ankitects Pty Ltd and contributors
 // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html

-use std::{borrow::Cow, fs::File, io::Write};
+use std::{borrow::Cow, collections::HashMap, fs::File, io::Write, sync::Arc};

 use itertools::Itertools;
 use lazy_static::lazy_static;
@ -9,10 +9,11 @@ use regex::Regex;

 use super::metadata::Delimiter;
 use crate::{
+    backend_proto::ExportNoteCsvRequest,
    import_export::{ExportProgress, IncrementableProgress},
    notetype::RenderCardOutput,
    prelude::*,
-    search::SortMode,
+    search::{SearchNode, SortMode},
    template::RenderedNode,
    text::{html_to_text_line, CowMapping},
 };
@ -31,7 +32,7 @@ impl Collection {
        progress.call(ExportProgress::File)?;
        let mut incrementor = progress.incrementor(ExportProgress::Cards);

-        let mut writer = file_writer_with_header(path)?;
+        let mut writer = file_writer_with_header(path, with_html)?;
        let mut cards = self.search_cards(search, SortMode::NoOrder)?;
        cards.sort_unstable();
        for &card in &cards {
@ -45,21 +46,19 @@ impl Collection {

    pub fn export_note_csv(
        &mut self,
-        path: &str,
-        search: impl TryIntoSearch,
-        with_html: bool,
-        with_tags: bool,
+        mut request: ExportNoteCsvRequest,
        progress_fn: impl 'static + FnMut(ExportProgress, bool) -> bool,
    ) -> Result<usize> {
        let mut progress = IncrementableProgress::new(progress_fn);
        progress.call(ExportProgress::File)?;
        let mut incrementor = progress.incrementor(ExportProgress::Notes);

-        let mut writer = file_writer_with_header(path)?;
-        self.search_notes_into_table(search)?;
+        self.search_notes_into_table(request.search_node())?;
+        let ctx = NoteContext::new(&request, self)?;
+        let mut writer = note_file_writer_with_header(&request.out_path, &ctx)?;
        self.storage.for_each_note_in_search(|note| {
            incrementor.increment()?;
-            writer.write_record(note_record(&note, with_html, with_tags))?;
+            writer.write_record(ctx.record(&note))?;
            Ok(())
        })?;
        writer.flush()?;
@ -77,17 +76,46 @@ impl Collection {
    }
 }

-fn file_writer_with_header(path: &str) -> Result<csv::Writer<File>> {
+fn file_writer_with_header(path: &str, with_html: bool) -> Result<csv::Writer<File>> {
    let mut file = File::create(path)?;
-    write_header(&mut file)?;
+    write_file_header(&mut file, with_html)?;
    Ok(csv::WriterBuilder::new()
        .delimiter(DELIMITER.byte())
-        .flexible(true)
+        .comment(Some(b'#'))
        .from_writer(file))
 }

-fn write_header(writer: &mut impl Write) -> Result<()> {
-    write!(writer, "#separator:{}\n#html:true\n", DELIMITER.name())?;
+fn write_file_header(writer: &mut impl Write, with_html: bool) -> Result<()> {
+    writeln!(writer, "#separator:{}", DELIMITER.name())?;
+    writeln!(writer, "#html:{with_html}")?;
+    Ok(())
+}
+
+fn note_file_writer_with_header(path: &str, ctx: &NoteContext) -> Result<csv::Writer<File>> {
+    let mut file = File::create(path)?;
+    write_note_file_header(&mut file, ctx)?;
+    Ok(csv::WriterBuilder::new()
+        .delimiter(DELIMITER.byte())
+        .comment(Some(b'#'))
+        .from_writer(file))
+}
+
+fn write_note_file_header(writer: &mut impl Write, ctx: &NoteContext) -> Result<()> {
+    write_file_header(writer, ctx.with_html)?;
+    write_column_header(ctx, writer)
+}
+
+fn write_column_header(ctx: &NoteContext, writer: &mut impl Write) -> Result<()> {
+    for (name, column) in [
+        ("guid", ctx.guid_column()),
+        ("notetype", ctx.notetype_column()),
+        ("deck", ctx.deck_column()),
+        ("tags", ctx.tags_column()),
+    ] {
+        if let Some(index) = column {
+            writeln!(writer, "#{name} column:{index}")?;
+        }
+    }
    Ok(())
 }

@ -117,24 +145,12 @@ fn rendered_nodes_to_str(nodes: &[RenderedNode]) -> String {
        .join("")
 }

-fn note_record(note: &Note, with_html: bool, with_tags: bool) -> Vec<String> {
-    let mut fields: Vec<_> = note
-        .fields()
-        .iter()
-        .map(|f| field_to_record_field(f, with_html))
-        .collect();
-    if with_tags {
-        fields.push(note.tags.join(" "));
-    }
-    fields
-}
-
-fn field_to_record_field(field: &str, with_html: bool) -> String {
+fn field_to_record_field(field: &str, with_html: bool) -> Cow<str> {
    let mut text = strip_redundant_sections(field);
    if !with_html {
        text = text.map_cow(|t| html_to_text_line(t, false));
    }
-    text.into()
+    text
 }

 fn strip_redundant_sections(text: &str) -> Cow<str> {
@ -157,3 +173,110 @@ fn strip_answer_side_question(text: &str) -> Cow<str> {
    }
    RE.replace_all(text.as_ref(), "")
 }
+
+struct NoteContext {
+    with_html: bool,
+    with_tags: bool,
+    with_deck: bool,
+    with_notetype: bool,
+    with_guid: bool,
+    notetypes: HashMap<NotetypeId, Arc<Notetype>>,
+    deck_ids: HashMap<NoteId, DeckId>,
+    deck_names: HashMap<DeckId, String>,
+    field_columns: usize,
+}
+
+impl NoteContext {
+    /// Caller must have searched notes into table.
+    fn new(request: &ExportNoteCsvRequest, col: &mut Collection) -> Result<Self> {
+        let notetypes = col.get_all_notetypes_of_search_notes()?;
+        let field_columns = notetypes
+            .values()
+            .map(|nt| nt.fields.len())
+            .max()
+            .unwrap_or_default();
+        let deck_ids = col.storage.all_decks_of_search_notes()?;
+        let deck_names = HashMap::from_iter(col.storage.get_all_deck_names()?.into_iter());
+
+        Ok(Self {
+            with_html: request.with_html,
+            with_tags: request.with_tags,
+            with_deck: request.with_deck,
+            with_notetype: request.with_notetype,
+            with_guid: request.with_guid,
+            notetypes,
+            field_columns,
+            deck_ids,
+            deck_names,
+        })
+    }
+
+    fn guid_column(&self) -> Option<usize> {
+        self.with_guid.then(|| 1)
+    }
+
+    fn notetype_column(&self) -> Option<usize> {
+        self.with_notetype
+            .then(|| 1 + self.guid_column().unwrap_or_default())
+    }
+
+    fn deck_column(&self) -> Option<usize> {
+        self.with_deck
+            .then(|| 1 + self.notetype_column().unwrap_or_default())
+    }
+
+    fn tags_column(&self) -> Option<usize> {
+        self.with_tags
+            .then(|| 1 + self.deck_column().unwrap_or_default() + self.field_columns)
+    }
+
+    fn record<'c, 's: 'c, 'n: 'c>(&'s self, note: &'n Note) -> impl Iterator<Item = Cow<'c, [u8]>> {
+        self.with_guid
+            .then(|| Cow::from(note.guid.as_bytes()))
+            .into_iter()
+            .chain(self.notetype_name(note).into_iter())
+            .chain(self.deck_name(note).into_iter())
+            .chain(self.note_fields(note))
+            .chain(self.tags(note).into_iter())
+    }
+
+    fn notetype_name(&self, note: &Note) -> Option<Cow<[u8]>> {
+        self.with_notetype.then(|| {
+            self.notetypes
+                .get(&note.notetype_id)
+                .map_or(Cow::from(vec![]), |nt| Cow::from(nt.name.as_bytes()))
+        })
+    }
+
+    fn deck_name(&self, note: &Note) -> Option<Cow<[u8]>> {
+        self.with_deck.then(|| {
+            self.deck_ids
+                .get(&note.id)
+                .and_then(|did| self.deck_names.get(did))
+                .map_or(Cow::from(vec![]), |name| Cow::from(name.as_bytes()))
+        })
+    }
+
+    fn tags(&self, note: &Note) -> Option<Cow<[u8]>> {
+        self.with_tags
+            .then(|| Cow::from(note.tags.join(" ").into_bytes()))
+    }
+
+    fn note_fields<'n>(&self, note: &'n Note) -> impl Iterator<Item = Cow<'n, [u8]>> {
+        let with_html = self.with_html;
+        note.fields()
+            .iter()
+            .map(move |f| field_to_record_field(f, with_html))
+            .pad_using(self.field_columns, |_| Cow::from(""))
+            .map(|cow| match cow {
+                Cow::Borrowed(s) => Cow::from(s.as_bytes()),
+                Cow::Owned(s) => Cow::from(s.into_bytes()),
+            })
+    }
+}
+
+impl ExportNoteCsvRequest {
+    fn search_node(&mut self) -> SearchNode {
+        SearchNode::from(self.limit.take().unwrap_or_default())
+    }
+}
--- a/rslib/src/import_export/text/csv/import.rs
+++ b/rslib/src/import_export/text/csv/import.rs
@ -113,6 +113,7 @@ type FieldSourceColumns = Vec<Option<usize>>;
 // Column indices are 1-based.
 struct ColumnContext {
    tags_column: Option<usize>,
+    guid_column: Option<usize>,
    deck_column: Option<usize>,
    notetype_column: Option<usize>,
    /// Source column indices for the fields of a notetype, identified by its
@ -126,6 +127,7 @@ impl ColumnContext {
    fn new(metadata: &CsvMetadata) -> Result<Self> {
        Ok(Self {
            tags_column: (metadata.tags_column > 0).then(|| metadata.tags_column as usize),
+            guid_column: (metadata.guid_column > 0).then(|| metadata.guid_column as usize),
            deck_column: metadata.deck()?.column(),
            notetype_column: metadata.notetype()?.column(),
            field_source_columns: metadata.field_source_columns()?,
@ -135,16 +137,10 @@ impl ColumnContext {

    fn deserialize_csv(
        &mut self,
-        mut reader: impl Read + Seek,
+        reader: impl Read + Seek,
        delimiter: Delimiter,
    ) -> Result<Vec<ForeignNote>> {
-        remove_tags_line_from_reader(&mut reader)?;
-        let mut csv_reader = csv::ReaderBuilder::new()
-            .has_headers(false)
-            .flexible(true)
-            .comment(Some(b'#'))
-            .delimiter(delimiter.byte())
-            .from_reader(reader);
+        let mut csv_reader = build_csv_reader(reader, delimiter)?;
        self.deserialize_csv_reader(&mut csv_reader)
    }

@ -162,34 +158,17 @@ impl ColumnContext {
            .collect()
    }

-    fn foreign_note_from_record(&mut self, record: &csv::StringRecord) -> ForeignNote {
-        let notetype = self.gather_notetype(record).into();
-        let deck = self.gather_deck(record).into();
-        let tags = self.gather_tags(record);
-        let fields = self.gather_note_fields(record);
+    fn foreign_note_from_record(&self, record: &csv::StringRecord) -> ForeignNote {
        ForeignNote {
-            notetype,
-            fields,
-            tags,
-            deck,
+            notetype: str_from_record_column(self.notetype_column, record).into(),
+            fields: self.gather_note_fields(record),
+            tags: self.gather_tags(record),
+            deck: str_from_record_column(self.deck_column, record).into(),
+            guid: str_from_record_column(self.guid_column, record),
            ..Default::default()
        }
    }

-    fn gather_notetype(&self, record: &csv::StringRecord) -> String {
-        self.notetype_column
-            .and_then(|i| record.get(i - 1))
-            .unwrap_or_default()
-            .to_string()
-    }
-
-    fn gather_deck(&self, record: &csv::StringRecord) -> String {
-        self.deck_column
-            .and_then(|i| record.get(i - 1))
-            .unwrap_or_default()
-            .to_string()
-    }
-
    fn gather_tags(&self, record: &csv::StringRecord) -> Vec<String> {
        self.tags_column
            .and_then(|i| record.get(i - 1))
@ -200,7 +179,7 @@ impl ColumnContext {
            .collect()
    }

-    fn gather_note_fields(&mut self, record: &csv::StringRecord) -> Vec<String> {
+    fn gather_note_fields(&self, record: &csv::StringRecord) -> Vec<String> {
        let stringify = self.stringify;
        self.field_source_columns
            .iter()
@ -210,6 +189,26 @@ impl ColumnContext {
    }
 }

+fn str_from_record_column(column: Option<usize>, record: &csv::StringRecord) -> String {
+    column
+        .and_then(|i| record.get(i - 1))
+        .unwrap_or_default()
+        .to_string()
+}
+
+pub(super) fn build_csv_reader(
+    mut reader: impl Read + Seek,
+    delimiter: Delimiter,
+) -> Result<csv::Reader<impl Read + Seek>> {
+    remove_tags_line_from_reader(&mut reader)?;
+    Ok(csv::ReaderBuilder::new()
+        .has_headers(false)
+        .flexible(true)
+        .comment(Some(b'#'))
+        .delimiter(delimiter.byte())
+        .from_reader(reader))
+}
+
 fn stringify_fn(is_html: bool) -> fn(&str) -> String {
    if is_html {
        ToString::to_string
@ -267,6 +266,7 @@ mod test {
                is_html: false,
                force_is_html: false,
                tags_column: 0,
+                guid_column: 0,
                global_tags: Vec::new(),
                updated_tags: Vec::new(),
                column_labels: vec!["".to_string(); 2],
@ -275,6 +275,7 @@ mod test {
                    id: 1,
                    field_columns: vec![1, 2],
                })),
+                preview: Vec::new(),
            }
        }
    }
--- a/rslib/src/import_export/text/csv/metadata.rs
+++ b/rslib/src/import_export/text/csv/metadata.rs
@ -4,65 +4,81 @@
 use std::{
    collections::{HashMap, HashSet},
    fs::File,
-    io::{BufRead, BufReader},
+    io::{BufRead, BufReader, Read, Seek, SeekFrom},
 };

+use itertools::Itertools;
 use strum::IntoEnumIterator;

+use super::import::build_csv_reader;
 pub use crate::backend_proto::import_export::{
    csv_metadata::{Deck as CsvDeck, Delimiter, MappedNotetype, Notetype as CsvNotetype},
    CsvMetadata,
 };
 use crate::{
-    error::ImportError, import_export::text::NameOrId, notetype::NoteField, prelude::*,
-    text::is_html,
+    backend_proto::StringList,
+    error::ImportError,
+    import_export::text::NameOrId,
+    notetype::NoteField,
+    prelude::*,
+    text::{html_to_text_line, is_html},
 };

+/// The maximum number of preview rows.
+const PREVIEW_LENGTH: usize = 5;
+/// The maximum number of characters per preview field.
+const PREVIEW_FIELD_LENGTH: usize = 80;
+
 impl Collection {
    pub fn get_csv_metadata(
        &mut self,
        path: &str,
        delimiter: Option<Delimiter>,
        notetype_id: Option<NotetypeId>,
+        is_html: Option<bool>,
    ) -> Result<CsvMetadata> {
-        let reader = BufReader::new(File::open(path)?);
-        self.get_reader_metadata(reader, delimiter, notetype_id)
+        let mut reader = File::open(path)?;
+        self.get_reader_metadata(&mut reader, delimiter, notetype_id, is_html)
    }

    fn get_reader_metadata(
        &mut self,
-        reader: impl BufRead,
+        mut reader: impl Read + Seek,
        delimiter: Option<Delimiter>,
        notetype_id: Option<NotetypeId>,
+        is_html: Option<bool>,
    ) -> Result<CsvMetadata> {
        let mut metadata = CsvMetadata::default();
-        let line = self.parse_meta_lines(reader, &mut metadata)?;
-        maybe_set_fallback_delimiter(delimiter, &mut metadata, &line);
-        maybe_set_fallback_columns(&mut metadata, &line)?;
-        maybe_set_fallback_is_html(&mut metadata, &line)?;
+        let meta_len = self.parse_meta_lines(&mut reader, &mut metadata)? as u64;
+        maybe_set_fallback_delimiter(delimiter, &mut metadata, &mut reader, meta_len)?;
+        let records = collect_preview_records(&mut metadata, reader)?;
+        maybe_set_fallback_is_html(&mut metadata, &records, is_html)?;
+        set_preview(&mut metadata, &records)?;
+        maybe_set_fallback_columns(&mut metadata)?;
        self.maybe_set_fallback_notetype(&mut metadata, notetype_id)?;
        self.maybe_init_notetype_map(&mut metadata)?;
        self.maybe_set_fallback_deck(&mut metadata)?;
+
        Ok(metadata)
    }

-    /// Parses the meta head of the file, and returns the first content line.
-    fn parse_meta_lines(
-        &mut self,
-        mut reader: impl BufRead,
-        metadata: &mut CsvMetadata,
-    ) -> Result<String> {
+    /// Parses the meta head of the file and returns the total of meta bytes.
+    fn parse_meta_lines(&mut self, reader: impl Read, metadata: &mut CsvMetadata) -> Result<usize> {
+        let mut meta_len = 0;
+        let mut reader = BufReader::new(reader);
        let mut line = String::new();
-        reader.read_line(&mut line)?;
+        let mut line_len = reader.read_line(&mut line)?;
        if self.parse_first_line(&line, metadata) {
+            meta_len += line_len;
            line.clear();
-            reader.read_line(&mut line)?;
+            line_len = reader.read_line(&mut line)?;
            while self.parse_line(&line, metadata) {
+                meta_len += line_len;
                line.clear();
-                reader.read_line(&mut line)?;
+                line_len = reader.read_line(&mut line)?;
            }
        }
-        Ok(line)
+        Ok(meta_len)
    }

    /// True if the line is a meta line, i.e. a comment, or starting with 'tags:'.
@ -103,7 +119,7 @@ impl Collection {
            }
            "tags" => metadata.global_tags = collect_tags(value),
            "columns" => {
-                if let Ok(columns) = self.parse_columns(value, metadata) {
+                if let Ok(columns) = parse_columns(value, metadata.delimiter()) {
                    metadata.column_labels = columns;
                }
            }
@ -127,21 +143,20 @@ impl Collection {
                    metadata.deck = Some(CsvDeck::DeckColumn(n));
                }
            }
+            "tags column" => {
+                if let Ok(n) = value.trim().parse() {
+                    metadata.tags_column = n;
+                }
+            }
+            "guid column" => {
+                if let Ok(n) = value.trim().parse() {
+                    metadata.guid_column = n;
+                }
+            }
            _ => (),
        }
    }

-    fn parse_columns(&mut self, line: &str, metadata: &mut CsvMetadata) -> Result<Vec<String>> {
-        let delimiter = if metadata.force_delimiter {
-            metadata.delimiter()
-        } else {
-            delimiter_from_line(line)
-        };
-        map_single_record(line, delimiter, |record| {
-            record.iter().map(ToString::to_string).collect()
-        })
-    }
-
    fn maybe_set_fallback_notetype(
        &mut self,
        metadata: &mut CsvMetadata,
@ -161,7 +176,15 @@ impl Collection {
                metadata
                    .notetype_id()
                    .and_then(|ntid| self.default_deck_for_notetype(ntid).transpose())
-                    .unwrap_or_else(|| self.get_current_deck().map(|d| d.id))?
+                    .unwrap_or_else(|| {
+                        self.get_current_deck().map(|deck| {
+                            if deck.is_filtered() {
+                                DeckId(1)
+                            } else {
+                                deck.id
+                            }
+                        })
+                    })?
                    .0,
            ));
        }
@ -205,6 +228,61 @@ impl Collection {
    }
 }

+fn parse_columns(line: &str, delimiter: Delimiter) -> Result<Vec<String>> {
+    map_single_record(line, delimiter, |record| {
+        record.iter().map(ToString::to_string).collect()
+    })
+}
+
+fn collect_preview_records(
+    metadata: &mut CsvMetadata,
+    mut reader: impl Read + Seek,
+) -> Result<Vec<csv::StringRecord>> {
+    reader.rewind()?;
+    let mut csv_reader = build_csv_reader(reader, metadata.delimiter())?;
+    csv_reader
+        .records()
+        .into_iter()
+        .take(PREVIEW_LENGTH)
+        .collect::<csv::Result<_>>()
+        .map_err(Into::into)
+}
+
+fn set_preview(metadata: &mut CsvMetadata, records: &[csv::StringRecord]) -> Result<()> {
+    let mut min_len = 1;
+    metadata.preview = records
+        .iter()
+        .enumerate()
+        .map(|(idx, record)| {
+            let row = build_preview_row(min_len, record, metadata.is_html);
+            if idx == 0 {
+                min_len = row.vals.len();
+            }
+            row
+        })
+        .collect();
+    Ok(())
+}
+
+fn build_preview_row(min_len: usize, record: &csv::StringRecord, strip_html: bool) -> StringList {
+    StringList {
+        vals: record
+            .iter()
+            .pad_using(min_len, |_| "")
+            .map(|field| {
+                if strip_html {
+                    html_to_text_line(field, true)
+                        .chars()
+                        .take(PREVIEW_FIELD_LENGTH)
+                        .collect()
+                } else {
+                    field.chars().take(PREVIEW_FIELD_LENGTH).collect()
+                }
+            })
+            .collect(),
+    }
+}
+
 pub(super) fn collect_tags(txt: &str) -> Vec<String> {
    txt.split_whitespace()
        .filter(|s| !s.is_empty())
@ -263,20 +341,23 @@ fn ensure_first_field_is_mapped(
    Ok(())
 }

-fn maybe_set_fallback_columns(metadata: &mut CsvMetadata, line: &str) -> Result<()> {
+fn maybe_set_fallback_columns(metadata: &mut CsvMetadata) -> Result<()> {
    if metadata.column_labels.is_empty() {
-        let columns = map_single_record(line, metadata.delimiter(), |r| r.len())?;
-        metadata.column_labels = vec![String::new(); columns];
+        metadata.column_labels =
+            vec![String::new(); metadata.preview.get(0).map_or(0, |row| row.vals.len())];
    }
    Ok(())
 }

-fn maybe_set_fallback_is_html(metadata: &mut CsvMetadata, line: &str) -> Result<()> {
-    // TODO: should probably check more than one line; can reuse preview lines
-    // when it's implemented
-    if !metadata.force_is_html {
-        metadata.is_html =
-            map_single_record(line, metadata.delimiter(), |r| r.iter().any(is_html))?;
+fn maybe_set_fallback_is_html(
+    metadata: &mut CsvMetadata,
+    records: &[csv::StringRecord],
+    is_html_option: Option<bool>,
+) -> Result<()> {
+    if let Some(is_html) = is_html_option {
+        metadata.is_html = is_html;
+    } else if !metadata.force_is_html {
+        metadata.is_html = records.iter().flat_map(|record| record.iter()).any(is_html);
    }
    Ok(())
 }
@ -284,13 +365,16 @@ fn maybe_set_fallback_is_html(metadata: &mut CsvMetadata, line: &str) -> Result<
 fn maybe_set_fallback_delimiter(
    delimiter: Option<Delimiter>,
    metadata: &mut CsvMetadata,
-    line: &str,
-) {
+    mut reader: impl Read + Seek,
+    meta_len: u64,
+) -> Result<()> {
    if let Some(delim) = delimiter {
        metadata.set_delimiter(delim);
    } else if !metadata.force_delimiter {
-        metadata.set_delimiter(delimiter_from_line(line));
+        reader.seek(SeekFrom::Start(meta_len))?;
+        metadata.set_delimiter(delimiter_from_reader(reader)?);
    }
+    Ok(())
 }

 fn delimiter_from_value(value: &str) -> Option<Delimiter> {
@ -303,14 +387,16 @@ fn delimiter_from_value(value: &str) -> Option<Delimiter> {
    None
 }

-fn delimiter_from_line(line: &str) -> Delimiter {
+fn delimiter_from_reader(mut reader: impl Read) -> Result<Delimiter> {
+    let mut buf = [0; 8 * 1024];
+    let _ = reader.read(&mut buf)?;
    // TODO: use smarter heuristic
    for delimiter in Delimiter::iter() {
-        if line.contains(delimiter.byte() as char) {
-            return delimiter;
+        if buf.contains(&delimiter.byte()) {
+            return Ok(delimiter);
        }
    }
-    Delimiter::Space
+    Ok(Delimiter::Space)
 }

 fn map_single_record<T>(
@ -384,6 +470,9 @@ impl CsvMetadata {
        if self.tags_column > 0 {
            columns.insert(self.tags_column as usize);
        }
+        if self.guid_column > 0 {
+            columns.insert(self.guid_column as usize);
+        }
        columns
    }
 }
@ -398,8 +487,18 @@ impl NameOrId {
    }
 }

+impl From<csv::StringRecord> for StringList {
+    fn from(record: csv::StringRecord) -> Self {
+        Self {
+            vals: record.iter().map(ToString::to_string).collect(),
+        }
+    }
+}
+
 #[cfg(test)]
 mod test {
+    use std::io::Cursor;
+
    use super::*;
    use crate::collection::open_test_collection;

@ -408,7 +507,7 @@ mod test {
            metadata!($col, $csv, None)
        };
        ($col:expr,$csv:expr, $delim:expr) => {
-            $col.get_reader_metadata(BufReader::new($csv.as_bytes()), $delim, None)
+            $col.get_reader_metadata(Cursor::new($csv.as_bytes()), $delim, None, None)
                .unwrap()
        };
    }
@ -561,7 +660,7 @@ mod test {

        // custom names
        assert_eq!(
-            metadata!(col, "#columns:one,two\n").column_labels,
+            metadata!(col, "#columns:one\ttwo\n").column_labels,
            ["one", "two"]
        );
        assert_eq!(
@ -570,6 +669,17 @@ mod test {
        );
    }

+    #[test]
+    fn should_detect_column_number_despite_escaped_line_breaks() {
+        let mut col = open_test_collection();
+        assert_eq!(
+            metadata!(col, "\"foo|\nbar\"\tfoo\tbar\n")
+                .column_labels
+                .len(),
+            3
+        );
+    }
+
    impl CsvMetadata {
        fn unwrap_notetype_map(&self) -> &[u32] {
            match &self.notetype {
@ -589,7 +699,16 @@ mod test {
    #[test]
    fn should_map_default_notetype_fields_by_given_column_names() {
        let mut col = open_test_collection();
-        let meta = metadata!(col, "#columns:Back,Front\nfoo,bar,baz\n");
+        let meta = metadata!(col, "#columns:Back\tFront\nfoo,bar,baz\n");
        assert_eq!(meta.unwrap_notetype_map(), &[2, 1]);
    }
+
+    #[test]
+    fn should_gather_first_lines_into_preview() {
+        let mut col = open_test_collection();
+        let meta = metadata!(col, "#separator: \nfoo bar\nbaz<br>\n");
+        assert_eq!(meta.preview[0].vals, ["foo", "bar"]);
+        // html is stripped
+        assert_eq!(meta.preview[1].vals, ["baz", ""]);
+    }
 }
--- a/rslib/src/import_export/text/import.rs
+++ b/rslib/src/import_export/text/import.rs
@ -1,7 +1,12 @@
 // Copyright: Ankitects Pty Ltd and contributors
 // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html

-use std::{borrow::Cow, collections::HashMap, mem, sync::Arc};
+use std::{
+    borrow::Cow,
+    collections::{HashMap, HashSet},
+    mem,
+    sync::Arc,
+};

 use super::NameOrId;
 use crate::{
@ -52,24 +57,75 @@ struct Context<'a> {
    col: &'a mut Collection,
    /// Contains the optional default notetype with the default key.
    notetypes: HashMap<NameOrId, Option<Arc<Notetype>>>,
-    /// Contains the optional default deck id with the default key.
-    deck_ids: HashMap<NameOrId, Option<DeckId>>,
+    deck_ids: DeckIdsByNameOrId,
    usn: Usn,
    normalize_notes: bool,
    today: u32,
    dupe_resolution: DupeResolution,
    card_gen_ctxs: HashMap<(NotetypeId, DeckId), CardGenContext<Arc<Notetype>>>,
-    existing_notes: HashMap<(NotetypeId, u32), Vec<NoteId>>,
+    existing_checksums: HashMap<(NotetypeId, u32), Vec<NoteId>>,
+    existing_guids: HashMap<String, NoteId>,
+}
+
+struct DeckIdsByNameOrId {
+    ids: HashSet<DeckId>,
+    names: HashMap<String, DeckId>,
+    default: Option<DeckId>,
 }

 struct NoteContext {
+    /// Prepared and with canonified tags.
    note: Note,
-    dupes: Vec<Note>,
+    dupes: Vec<Duplicate>,
    cards: Vec<Card>,
    notetype: Arc<Notetype>,
    deck_id: DeckId,
 }

+struct Duplicate {
+    note: Note,
+    identical: bool,
+    first_field_match: bool,
+}
+
+impl Duplicate {
+    fn new(dupe: Note, original: &Note, first_field_match: bool) -> Self {
+        let identical = dupe.equal_fields_and_tags(original);
+        Self {
+            note: dupe,
+            identical,
+            first_field_match,
+        }
+    }
+}
+
+impl DeckIdsByNameOrId {
+    fn new(col: &mut Collection, default: &NameOrId) -> Result<Self> {
+        let names: HashMap<String, DeckId> = col
+            .get_all_normal_deck_names()?
+            .into_iter()
+            .map(|(id, name)| (name, id))
+            .collect();
+        let ids = names.values().copied().collect();
+        let mut new = Self {
+            ids,
+            names,
+            default: None,
+        };
+        new.default = new.get(default);
+
+        Ok(new)
+    }
+
+    fn get(&self, name_or_id: &NameOrId) -> Option<DeckId> {
+        match name_or_id {
+            _ if *name_or_id == NameOrId::default() => self.default,
+            NameOrId::Id(id) => self.ids.get(&DeckId(*id)).copied(),
+            NameOrId::Name(name) => self.names.get(name).copied(),
+        }
+    }
+}
+
 impl<'a> Context<'a> {
    fn new(data: &ForeignData, col: &'a mut Collection) -> Result<Self> {
        let usn = col.usn()?;
@ -80,12 +136,10 @@ impl<'a> Context<'a> {
            NameOrId::default(),
            col.notetype_by_name_or_id(&data.default_notetype)?,
        );
-        let mut deck_ids = HashMap::new();
-        deck_ids.insert(
-            NameOrId::default(),
-            col.deck_id_by_name_or_id(&data.default_deck)?,
-        );
-        let existing_notes = col.storage.all_notes_by_type_and_checksum()?;
+        let deck_ids = DeckIdsByNameOrId::new(col, &data.default_deck)?;
+        let existing_checksums = col.storage.all_notes_by_type_and_checksum()?;
+        let existing_guids = col.storage.all_notes_by_guid()?;
+
        Ok(Self {
            col,
            usn,
@ -95,7 +149,8 @@ impl<'a> Context<'a> {
            notetypes,
            deck_ids,
            card_gen_ctxs: HashMap::new(),
-            existing_notes,
+            existing_checksums,
+            existing_guids,
        })
    }

@ -119,16 +174,6 @@ impl<'a> Context<'a> {
        })
    }

-    fn deck_id_for_note(&mut self, note: &ForeignNote) -> Result<Option<DeckId>> {
-        Ok(if let Some(did) = self.deck_ids.get(&note.deck) {
-            *did
-        } else {
-            let did = self.col.deck_id_by_name_or_id(&note.deck)?;
-            self.deck_ids.insert(note.deck.clone(), did);
-            did
-        })
-    }
-
    fn import_foreign_notes(
        &mut self,
        notes: Vec<ForeignNote>,
@ -145,7 +190,7 @@ impl<'a> Context<'a> {
                continue;
            }
            if let Some(notetype) = self.notetype_for_note(&foreign)? {
-                if let Some(deck_id) = self.deck_id_for_note(&foreign)? {
+                if let Some(deck_id) = self.deck_ids.get(&foreign.deck) {
                    let ctx = self.build_note_context(foreign, notetype, deck_id, global_tags)?;
                    self.import_note(ctx, updated_tags, &mut log)?;
                } else {
@ -167,6 +212,7 @@ impl<'a> Context<'a> {
    ) -> Result<NoteContext> {
        let (mut note, cards) = foreign.into_native(&notetype, deck_id, self.today, global_tags);
        note.prepare_for_update(&notetype, self.normalize_notes)?;
+        self.col.canonify_note_tags(&mut note, self.usn)?;
        let dupes = self.find_duplicates(&notetype, &note)?;

        Ok(NoteContext {
@ -178,14 +224,34 @@ impl<'a> Context<'a> {
        })
    }

-    fn find_duplicates(&mut self, notetype: &Notetype, note: &Note) -> Result<Vec<Note>> {
+    fn find_duplicates(&self, notetype: &Notetype, note: &Note) -> Result<Vec<Duplicate>> {
        let checksum = note
            .checksum
            .ok_or_else(|| AnkiError::invalid_input("note unprepared"))?;
-        self.existing_notes
-            .get(&(notetype.id, checksum))
-            .map(|dupe_ids| self.col.get_full_duplicates(note, dupe_ids))
-            .unwrap_or_else(|| Ok(vec![]))
+        if let Some(nid) = self.existing_guids.get(&note.guid) {
+            self.get_guid_dupe(*nid, note).map(|dupe| vec![dupe])
+        } else if let Some(nids) = self.existing_checksums.get(&(notetype.id, checksum)) {
+            self.get_first_field_dupes(note, nids)
+        } else {
+            Ok(Vec::new())
+        }
+    }
+
+    fn get_guid_dupe(&self, nid: NoteId, original: &Note) -> Result<Duplicate> {
+        self.col
+            .storage
+            .get_note(nid)?
+            .ok_or(AnkiError::NotFound)
+            .map(|dupe| Duplicate::new(dupe, original, false))
+    }
+
+    fn get_first_field_dupes(&self, note: &Note, nids: &[NoteId]) -> Result<Vec<Duplicate>> {
+        Ok(self
+            .col
+            .get_full_duplicates(note, nids)?
+            .into_iter()
+            .map(|dupe| Duplicate::new(dupe, note, true))
+            .collect())
    }

    fn import_note(
@ -204,7 +270,6 @@ impl<'a> Context<'a> {
    }

    fn add_note(&mut self, mut ctx: NoteContext, log_queue: &mut Vec<LogNote>) -> Result<()> {
-        self.col.canonify_note_tags(&mut ctx.note, self.usn)?;
        ctx.note.usn = self.usn;
        self.col.add_note_only_undoable(&mut ctx.note)?;
        self.add_cards(&mut ctx.cards, &ctx.note, ctx.deck_id, ctx.notetype)?;
@ -237,28 +302,49 @@ impl<'a> Context<'a> {
    }

    fn prepare_note_for_update(&mut self, note: &mut Note, updated_tags: &[String]) -> Result<()> {
-        note.tags.extend(updated_tags.iter().cloned());
-        self.col.canonify_note_tags(note, self.usn)?;
+        if !updated_tags.is_empty() {
+            note.tags.extend(updated_tags.iter().cloned());
+            self.col.canonify_note_tags(note, self.usn)?;
+        }
        note.set_modified(self.usn);
        Ok(())
    }

    fn maybe_update_dupe(
        &mut self,
-        dupe: Note,
+        dupe: Duplicate,
        ctx: &mut NoteContext,
        log: &mut NoteLog,
    ) -> Result<()> {
-        ctx.note.id = dupe.id;
-        if dupe.equal_fields_and_tags(&ctx.note) {
-            log.duplicate.push(dupe.into_log_note());
+        if dupe.note.notetype_id != ctx.notetype.id {
+            log.conflicting.push(dupe.note.into_log_note());
+            return Ok(());
+        }
+        if dupe.identical {
+            log.duplicate.push(dupe.note.into_log_note());
        } else {
-            self.col.update_note_undoable(&ctx.note, &dupe)?;
-            log.first_field_match.push(dupe.into_log_note());
+            self.update_dupe(dupe, ctx, log)?;
        }
        self.add_cards(&mut ctx.cards, &ctx.note, ctx.deck_id, ctx.notetype.clone())
    }

+    fn update_dupe(
+        &mut self,
+        dupe: Duplicate,
+        ctx: &mut NoteContext,
+        log: &mut NoteLog,
+    ) -> Result<()> {
+        ctx.note.id = dupe.note.id;
+        ctx.note.guid = dupe.note.guid.clone();
+        self.col.update_note_undoable(&ctx.note, &dupe.note)?;
+        if dupe.first_field_match {
+            log.first_field_match.push(dupe.note.into_log_note());
+        } else {
+            log.updated.push(dupe.note.into_log_note());
+        }
+        Ok(())
+    }
+
    fn import_cards(&mut self, cards: &mut [Card], note_id: NoteId) -> Result<()> {
        for card in cards {
            card.note_id = note_id;
@ -306,7 +392,7 @@ impl Collection {
        }
    }

-    fn get_full_duplicates(&mut self, note: &Note, dupe_ids: &[NoteId]) -> Result<Vec<Note>> {
+    fn get_full_duplicates(&self, note: &Note, dupe_ids: &[NoteId]) -> Result<Vec<Note>> {
        let first_field = note.first_field_stripped();
        dupe_ids
            .iter()
@ -329,6 +415,9 @@ impl ForeignNote {
    ) -> (Note, Vec<Card>) {
        // TODO: Handle new and learning cards
        let mut note = Note::new(notetype);
+        if !self.guid.is_empty() {
+            note.guid = self.guid;
+        }
        note.tags = self.tags;
        note.tags.extend(extra_tags.iter().cloned());
        note.fields_mut()
@ -501,4 +590,16 @@ mod test {
        data.import(&mut col, |_, _| true).unwrap();
        assert_eq!(col.storage.get_all_notes()[0].tags, ["bar", "baz"]);
    }
+
+    #[test]
+    fn should_match_note_with_same_guid() {
+        let mut col = open_test_collection();
+        let mut data = ForeignData::with_defaults();
+        data.add_note(&["foo"]);
+        data.notes[0].tags = vec![String::from("bar")];
+        data.global_tags = vec![String::from("baz")];
+
+        data.import(&mut col, |_, _| true).unwrap();
+        assert_eq!(col.storage.get_all_notes()[0].tags, ["bar", "baz"]);
+    }
 }
--- a/rslib/src/import_export/text/mod.rs
+++ b/rslib/src/import_export/text/mod.rs
@ -25,6 +25,7 @@ pub struct ForeignData {
 #[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
 #[serde(default)]
 pub struct ForeignNote {
+    guid: String,
    fields: Vec<String>,
    tags: Vec<String>,
    notetype: NameOrId,
--- a/rslib/src/notetype/mod.rs
+++ b/rslib/src/notetype/mod.rs
@ -218,6 +218,21 @@ impl Collection {
            .collect()
    }

+    pub fn get_all_notetypes_of_search_notes(
+        &mut self,
+    ) -> Result<HashMap<NotetypeId, Arc<Notetype>>> {
+        self.storage
+            .all_notetypes_of_search_notes()?
+            .into_iter()
+            .map(|ntid| {
+                self.get_notetype(ntid)
+                    .transpose()
+                    .unwrap()
+                    .map(|nt| (ntid, nt))
+            })
+            .collect()
+    }
+
    pub fn remove_notetype(&mut self, ntid: NotetypeId) -> Result<OpOutput<()>> {
        self.transact(Op::RemoveNotetype, |col| col.remove_notetype_inner(ntid))
    }
--- a/rslib/src/storage/deck/all_decks_of_search_notes.sql
+++ b/rslib/src/storage/deck/all_decks_of_search_notes.sql
@ -0,0 +1,9 @@
+SELECT nid,
+  did
+FROM cards
+WHERE nid IN (
+    SELECT nid
+    FROM search_nids
+  )
+GROUP BY nid
+HAVING ord = MIN(ord)
--- a/rslib/src/storage/deck/mod.rs
+++ b/rslib/src/storage/deck/mod.rs
@ -131,6 +131,14 @@ impl SqliteStorage {
            .collect()
    }

+    /// Returns the deck id of the first existing card of every searched note.
+    pub(crate) fn all_decks_of_search_notes(&self) -> Result<HashMap<NoteId, DeckId>> {
+        self.db
+            .prepare_cached(include_str!("all_decks_of_search_notes.sql"))?
+            .query_and_then([], |r| Ok((r.get(0)?, r.get(1)?)))?
+            .collect()
+    }
+
    // caller should ensure name unique
    pub(crate) fn add_deck(&self, deck: &mut Deck) -> Result<()> {
        assert!(deck.id.0 == 0);
--- a/rslib/src/storage/note/mod.rs
+++ b/rslib/src/storage/note/mod.rs
@ -338,6 +338,13 @@ impl super::SqliteStorage {
            .collect()
    }

+    pub(crate) fn all_notes_by_guid(&mut self) -> Result<HashMap<String, NoteId>> {
+        self.db
+            .prepare("SELECT guid, id FROM notes")?
+            .query_and_then([], |r| Ok((r.get(0)?, r.get(1)?)))?
+            .collect()
+    }
+
    #[cfg(test)]
    pub(crate) fn get_all_notes(&mut self) -> Vec<Note> {
        self.db
--- a/rslib/src/storage/notetype/mod.rs
+++ b/rslib/src/storage/notetype/mod.rs
@ -116,6 +116,15 @@ impl SqliteStorage {
            .collect()
    }

+    pub(crate) fn all_notetypes_of_search_notes(&self) -> Result<Vec<NotetypeId>> {
+        self.db
+            .prepare_cached(
+                "SELECT DISTINCT mid FROM notes WHERE id IN (SELECT nid FROM search_nids)",
+            )?
+            .query_and_then([], |r| Ok(r.get(0)?))?
+            .collect()
+    }
+
    pub fn get_all_notetype_names(&self) -> Result<Vec<(NotetypeId, String)>> {
        self.db
            .prepare_cached(include_str!("get_notetype_names.sql"))?
--- a/rslib/src/text.rs
+++ b/rslib/src/text.rs
@ -1,7 +1,7 @@
 // Copyright: Ankitects Pty Ltd and contributors
 // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html

-use std::{borrow::Cow, ptr};
+use std::borrow::Cow;

 use lazy_static::lazy_static;
 use pct_str::{IriReserved, PctStr, PctString};
@ -134,12 +134,8 @@ lazy_static! {

    static ref PERSISTENT_HTML_SPACERS: Regex = Regex::new(r#"(?i)<br\s*/?>|<div>|\n"#).unwrap();

-    static ref UNPRINTABLE_TAGS: Regex = Regex::new(
-        r"(?xs)
-        \[sound:[^]]+\]
-        |
-        \[\[type:[^]]+\]\]
-    ").unwrap();
+    static ref TYPE_TAG: Regex = Regex::new(r"\[\[type:[^]]+\]\]").unwrap();
+    static ref SOUND_TAG: Regex = Regex::new(r"\[sound:([^]]+)\]").unwrap();

    /// Files included in CSS with a leading underscore.
    static ref UNDERSCORED_CSS_IMPORTS: Regex = Regex::new(
@ -172,19 +168,21 @@ lazy_static! {
    "#).unwrap();
 }

-pub fn is_html(text: &str) -> bool {
-    HTML.is_match(text)
+pub fn is_html(text: impl AsRef<str>) -> bool {
+    HTML.is_match(text.as_ref())
 }

 pub fn html_to_text_line(html: &str, preserve_media_filenames: bool) -> Cow<str> {
+    let (html_stripper, sound_rep): (fn(&str) -> Cow<str>, _) = if preserve_media_filenames {
+        (strip_html_preserving_media_filenames, "$1")
+    } else {
+        (strip_html, "")
+    };
    PERSISTENT_HTML_SPACERS
        .replace_all(html, " ")
-        .map_cow(|s| UNPRINTABLE_TAGS.replace_all(s, ""))
-        .map_cow(if preserve_media_filenames {
-            strip_html_preserving_media_filenames
-        } else {
-            strip_html
-        })
+        .map_cow(|s| TYPE_TAG.replace_all(s, ""))
+        .map_cow(|s| SOUND_TAG.replace_all(s, sound_rep))
+        .map_cow(html_stripper)
        .trim()
 }

@ -330,16 +328,9 @@ pub(crate) fn extract_underscored_references(text: &str) -> Vec<&str> {
 }

 pub fn strip_html_preserving_media_filenames(html: &str) -> Cow<str> {
-    let without_fnames = HTML_MEDIA_TAGS.replace_all(html, r" ${1}${2}${3} ");
-    let without_html = strip_html(&without_fnames);
-    // no changes?
-    if let Cow::Borrowed(b) = without_html {
-        if ptr::eq(b, html) {
-            return Cow::Borrowed(html);
-        }
-    }
-    // make borrow checker happy
-    without_html.into_owned().into()
+    HTML_MEDIA_TAGS
+        .replace_all(html, r" ${1}${2}${3} ")
+        .map_cow(strip_html)
 }

 #[allow(dead_code)]
--- a/ts/import-csv/FieldMapper.svelte
+++ b/ts/import-csv/FieldMapper.svelte
@ -26,6 +26,6 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
            />
        {/each}
    {/await}
+    <Spacer --height="1.5rem" />
 {/if}
-<Spacer --height="1.5rem" />
 <MapperRow label={tr.editingTags()} {columnOptions} bind:value={tagsColumn} />
--- a/ts/import-csv/ImportCsvPage.svelte
+++ b/ts/import-csv/ImportCsvPage.svelte
@ -8,7 +8,13 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
    import Row from "../components/Row.svelte";
    import Spacer from "../components/Spacer.svelte";
    import * as tr from "../lib/ftl";
-    import { Decks, ImportExport, importExport, Notetypes } from "../lib/proto";
+    import {
+        Decks,
+        Generic,
+        ImportExport,
+        importExport,
+        Notetypes,
+    } from "../lib/proto";
    import DeckSelector from "./DeckSelector.svelte";
    import DelimiterSelector from "./DelimiterSelector.svelte";
    import DupeResolutionSelector from "./DupeResolutionSelector.svelte";
@ -17,6 +23,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
    import HtmlSwitch from "./HtmlSwitch.svelte";
    import { getColumnOptions, getCsvMetadata } from "./lib";
    import NotetypeSelector from "./NotetypeSelector.svelte";
+    import Preview from "./Preview.svelte";
    import StickyFooter from "./StickyFooter.svelte";
    import Tags from "./Tags.svelte";

@ -32,6 +39,8 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
    export let updatedTags: string[];
    export let columnLabels: string[];
    export let tagsColumn: number;
+    export let guidColumn: number;
+    export let preview: Generic.StringList[];
    // Protobuf oneofs. Exactly one of these pairs is expected to be set.
    export let notetypeColumn: number | null;
    export let globalNotetype: ImportExport.CsvMetadata.MappedNotetype | null;
@ -41,9 +50,17 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
    let dupeResolution: ImportExport.ImportCsvRequest.DupeResolution;
    let lastNotetypeId = globalNotetype?.id;

-    $: columnOptions = getColumnOptions(columnLabels, notetypeColumn, deckColumn);
-    $: getCsvMetadata(path, delimiter).then((meta) => {
+    $: columnOptions = getColumnOptions(
+        columnLabels,
+        preview[0].vals,
+        notetypeColumn,
+        deckColumn,
+        tagsColumn,
+        guidColumn,
+    );
+    $: getCsvMetadata(path, delimiter, undefined, isHtml).then((meta) => {
        columnLabels = meta.columnLabels;
+        preview = meta.preview;
    });
    $: if (globalNotetype?.id !== lastNotetypeId) {
        lastNotetypeId = globalNotetype?.id;
@ -66,6 +83,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
                    updatedTags,
                    columnLabels,
                    tagsColumn,
+                    guidColumn,
                    notetypeColumn,
                    globalNotetype,
                    deckColumn,
@ -78,6 +96,15 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html

 <Container class="csv-page">
    <Row --cols={2}>
+        <Col --col-size={1} breakpoint="md">
+            <Container>
+                <Header heading={tr.importingFile()} />
+                <Spacer --height="1.5rem" />
+                <DelimiterSelector bind:delimiter disabled={forceDelimiter} />
+                <HtmlSwitch bind:isHtml disabled={forceIsHtml} />
+                <Preview {columnOptions} {preview} />
+            </Container>
+        </Col>
        <Col --col-size={1} breakpoint="md">
            <Container>
                <Header heading={tr.importingImportOptions()} />
@ -92,8 +119,6 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
                    <DeckSelector {deckNameIds} bind:deckId />
                {/if}
                <DupeResolutionSelector bind:dupeResolution />
-                <DelimiterSelector bind:delimiter disabled={forceDelimiter} />
-                <HtmlSwitch bind:isHtml disabled={forceIsHtml} />
                <Tags bind:globalTags bind:updatedTags />
            </Container>
        </Col>
--- a/ts/import-csv/Preview.svelte
+++ b/ts/import-csv/Preview.svelte
@ -0,0 +1,65 @@
+<!--
+Copyright: Ankitects Pty Ltd and contributors
+License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
+-->
+<script lang="ts">
+    import type { Generic } from "../lib/proto";
+    import type { ColumnOption } from "./lib";
+
+    export let columnOptions: ColumnOption[];
+    export let preview: Generic.StringList[];
+</script>
+
+<div class="outer">
+    <table class="preview">
+        {#each columnOptions.slice(1) as { label, shortLabel }}
+            <th>
+                {shortLabel || label}
+            </th>
+        {/each}
+        {#each preview as row}
+            <tr>
+                {#each row.vals as cell}
+                    <td>{cell}</td>
+                {/each}
+            </tr>
+        {/each}
+    </table>
+</div>
+
+<style lang="scss">
+    .outer {
+        // approximate size based on body max width + margins
+        width: min(90vw, 65em);
+        overflow: auto;
+    }
+
+    .preview {
+        border-collapse: collapse;
+        white-space: nowrap;
+
+        th,
+        td {
+            text-overflow: ellipsis;
+            overflow: hidden;
+            border: 1px solid var(--faint-border);
+            padding: 0.25rem 0.5rem;
+            max-width: 15em;
+        }
+
+        th {
+            background: var(--medium-border);
+            text-align: center;
+        }
+
+        tr {
+            &:nth-child(even) {
+                background: var(--frame-bg);
+            }
+        }
+
+        td {
+            text-align: start;
+        }
+    }
+</style>
--- a/ts/import-csv/StickyFooter.svelte
+++ b/ts/import-csv/StickyFooter.svelte
@ -37,11 +37,13 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html

 <style lang="scss">
    .sticky-footer {
-        position: sticky;
+        position: fixed;
        bottom: 0;
+        left: 0;
+        right: 0;
        z-index: 10;

-        margin: 0.75rem;
+        margin: 0;
        padding: 0.25rem;

        background: var(--window-bg);
--- a/ts/import-csv/import-csv-base.scss
+++ b/ts/import-csv/import-csv-base.scss
@ -17,16 +17,13 @@
 body {
    width: min(100vw, 70em);
    margin: 0 auto;
-    height: 100%;
+    padding: 1em;
+    // pad out the underside of the footer
+    padding-bottom: 5em;
 }

 html {
    overflow-x: hidden;
-    height: 100%;
-}
-
-#main {
-    padding: 0.5em 0.5em 1em 0.5em;
 }

 // override the default down arrow colour in <select> elements
--- a/ts/import-csv/index.ts
+++ b/ts/import-csv/index.ts
@ -59,7 +59,9 @@ export async function setupImportCsvPage(path: string): Promise<ImportCsvPage> {
            updatedTags: metadata.updatedTags,
            columnLabels: metadata.columnLabels,
            tagsColumn: metadata.tagsColumn,
+            guidColumn: metadata.guidColumn,
            globalNotetype: metadata.globalNotetype ?? null,
+            preview: metadata.preview,
            // Unset oneof numbers default to 0, which also means n/a here,
            // but it's vital to differentiate between unset and 0 when reserializing.
            notetypeColumn: metadata.notetypeColumn ? metadata.notetypeColumn : null,
--- a/ts/import-csv/lib.ts
+++ b/ts/import-csv/lib.ts
@ -11,14 +11,18 @@ import {

 export interface ColumnOption {
    label: string;
+    shortLabel?: string;
    value: number;
    disabled: boolean;
 }

 export function getColumnOptions(
    columnLabels: string[],
+    firstRow: string[],
    notetypeColumn: number | null,
    deckColumn: number | null,
+    tagsColumn: number,
+    guidColumn: number,
 ): ColumnOption[] {
    return [{ label: tr.changeNotetypeNothing(), value: 0, disabled: false }].concat(
        columnLabels.map((label, index) => {
@ -27,22 +31,28 @@ export function getColumnOptions(
                return columnOption(tr.notetypesNotetype(), true, index);
            } else if (index === deckColumn) {
                return columnOption(tr.decksDeck(), true, index);
+            } else if (index === guidColumn) {
+                return columnOption("GUID", true, index);
+            } else if (index === tagsColumn) {
+                return columnOption(tr.editingTags(), false, index);
            } else if (label === "") {
-                return columnOption(index, false, index);
+                return columnOption(firstRow[index - 1], false, index, true);
            } else {
-                return columnOption(`"${label}"`, false, index);
+                return columnOption(label, false, index);
            }
        }),
    );
 }

 function columnOption(
-    label: string | number,
+    label: string,
    disabled: boolean,
    index: number,
+    shortLabel?: boolean,
 ): ColumnOption {
    return {
-        label: tr.importingColumn({ val: label }),
+        label: label ? `${index}: ${label}` : index.toString(),
+        shortLabel: shortLabel ? index.toString() : undefined,
        value: index,
        disabled,
    };
@ -58,12 +68,14 @@ export async function getCsvMetadata(
    path: string,
    delimiter?: ImportExport.CsvMetadata.Delimiter,
    notetypeId?: number,
+    isHtml?: boolean,
 ): Promise<ImportExport.CsvMetadata> {
    return importExport.getCsvMetadata(
        ImportExport.CsvMetadataRequest.create({
            path,
            delimiter,
            notetypeId,
+            isHtml,
        }),
    );
 }