diff --git a/Cargo.lock b/Cargo.lock index 79ca25d45..5d2be794d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,7 +54,7 @@ dependencies = [ "chrono", "coarsetime", "criterion", - "csv", + "csv 1.1.6 (git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90)", "env_logger", "flate2", "fluent", @@ -392,7 +392,7 @@ dependencies = [ "cast", "clap", "criterion-plot", - "csv", + "csv 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "itertools", "lazy_static", "num-traits", @@ -506,12 +506,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" dependencies = [ "bstr", - "csv-core", + "csv-core 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", "itoa 0.4.8", "ryu", "serde", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90#1c9d3aab6f79a7d815c69f925a46a4590c115f90" +dependencies = [ + "bstr", + "csv-core 0.1.10 (git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90)", + "itoa 1.0.1", + "ryu", + "serde", +] + [[package]] name = "csv-core" version = "0.1.10" @@ -521,6 +533,14 @@ dependencies = [ "memchr", ] +[[package]] +name = "csv-core" +version = "0.1.10" +source = "git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90#1c9d3aab6f79a7d815c69f925a46a4590c115f90" +dependencies = [ + "memchr", +] + [[package]] name = "derive_more" version = "0.99.17" diff --git a/cargo/crates.bzl b/cargo/crates.bzl index af8b281be..5b7822884 100644 --- a/cargo/crates.bzl +++ b/cargo/crates.bzl @@ -372,23 +372,23 @@ def raze_fetch_remote_crates(): ) maybe( - http_archive, + new_git_repository, name = "raze__csv__1_1_6", - url = "https://crates.io/api/v1/crates/csv/1.1.6/download", - type = "tar.gz", - sha256 = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1", - strip_prefix = "csv-1.1.6", + remote = "https://github.com/ankitects/rust-csv.git", + shallow_since = "1654675287 +1000", + commit = "1c9d3aab6f79a7d815c69f925a46a4590c115f90", build_file = Label("//cargo/remote:BUILD.csv-1.1.6.bazel"), + init_submodules = True, ) maybe( - http_archive, + new_git_repository, name = "raze__csv_core__0_1_10", - url = "https://crates.io/api/v1/crates/csv-core/0.1.10/download", - type = "tar.gz", - sha256 = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90", - strip_prefix = "csv-core-0.1.10", + remote = "https://github.com/ankitects/rust-csv.git", + shallow_since = "1654675287 +1000", + commit = "1c9d3aab6f79a7d815c69f925a46a4590c115f90", build_file = Label("//cargo/remote:BUILD.csv-core-0.1.10.bazel"), + init_submodules = True, ) maybe( diff --git a/cargo/remote/BUILD.csv-1.1.6.bazel b/cargo/remote/BUILD.csv-1.1.6.bazel index 2ac7ec9de..f60722f81 100644 --- a/cargo/remote/BUILD.csv-1.1.6.bazel +++ b/cargo/remote/BUILD.csv-1.1.6.bazel @@ -126,7 +126,7 @@ rust_library( deps = [ "@raze__bstr__0_2_17//:bstr", "@raze__csv_core__0_1_10//:csv_core", - "@raze__itoa__0_4_8//:itoa", + "@raze__itoa__1_0_1//:itoa", "@raze__ryu__1_0_9//:ryu", "@raze__serde__1_0_136//:serde", ], diff --git a/cargo/remote/BUILD.csv-core-0.1.10.bazel b/cargo/remote/BUILD.csv-core-0.1.10.bazel index 57d4a98ed..3857bbf6f 100644 --- a/cargo/remote/BUILD.csv-core-0.1.10.bazel +++ b/cargo/remote/BUILD.csv-core-0.1.10.bazel @@ -39,7 +39,7 @@ rust_library( crate_features = [ "default", ], - crate_root = "src/lib.rs", + crate_root = "csv-core/src/lib.rs", data = [], edition = "2018", rustc_flags = [ diff --git a/cargo/update.py b/cargo/update.py index 3d770d394..0f33f8fa9 100755 --- a/cargo/update.py +++ b/cargo/update.py @@ -21,7 +21,10 @@ COMMITS_SHALLOW_SINCE = { "1ee0892217e9a76bba4bb369ec5fab8854935a3c": "1619517354 +1000", # pct-str "4adccd8d4a222ab2672350a102f06ae832a0572d": "1605376517 +0100", + # linkcheck "2f20798ce521cc594d510d4e417e76d5eac04d4b": "1626729019 +0200", + # rust-csv + "1c9d3aab6f79a7d815c69f925a46a4590c115f90": "1654675287 +1000", } import glob diff --git a/ftl/core/exporting.ftl b/ftl/core/exporting.ftl index 4c3e30477..d236022be 100644 --- a/ftl/core/exporting.ftl +++ b/ftl/core/exporting.ftl @@ -38,3 +38,6 @@ exporting-processed-media-files = [one] Processed { $count } media file... *[other] Processed { $count } media files... } +exporting-include-deck = Include deck name +exporting-include-notetype = Include notetype name +exporting-include-guid = Include unique note identifier diff --git a/ftl/core/importing.ftl b/ftl/core/importing.ftl index a4e6621ab..fd3788175 100644 --- a/ftl/core/importing.ftl +++ b/ftl/core/importing.ftl @@ -9,7 +9,6 @@ importing-appeared-twice-in-file = Appeared twice in file: { $val } importing-by-default-anki-will-detect-the = By default, Anki will detect the character between fields, such as a tab, comma, and so on. If Anki is detecting the character incorrectly, you can enter it here. Use \t to represent tab. importing-change = Change importing-colon = Colon -importing-column = Column { $val } importing-comma = Comma importing-empty-first-field = Empty first field: { $val } importing-field-separator = Field separator @@ -108,3 +107,4 @@ importing-preserve = Preserve importing-update = Update importing-tag-all-notes = Tag all notes importing-tag-updated-notes = Tag updated notes +importing-file = File diff --git a/proto/anki/import_export.proto b/proto/anki/import_export.proto index 0ea23d706..ddb3f9662 100644 --- a/proto/anki/import_export.proto +++ b/proto/anki/import_export.proto @@ -119,6 +119,7 @@ message CsvMetadataRequest { string path = 1; optional CsvMetadata.Delimiter delimiter = 2; optional int64 notetype_id = 3; + optional bool is_html = 4; } // Column indices are 1-based to make working with them in TS easier, where @@ -163,6 +164,8 @@ message CsvMetadata { uint32 tags_column = 10; bool force_delimiter = 11; bool force_is_html = 12; + repeated generic.StringList preview = 13; + uint32 guid_column = 14; } message ExportCardCsvRequest { @@ -175,7 +178,10 @@ message ExportNoteCsvRequest { string out_path = 1; bool with_html = 2; bool with_tags = 3; - ExportLimit limit = 4; + bool with_deck = 4; + bool with_notetype = 5; + bool with_guid = 6; + ExportLimit limit = 7; } message ExportLimit { diff --git a/pylib/anki/collection.py b/pylib/anki/collection.py index b5c4e79e1..79597a718 100644 --- a/pylib/anki/collection.py +++ b/pylib/anki/collection.py @@ -423,11 +423,17 @@ class Collection(DeprecatedNamesMixin): limit: ExportLimit, with_html: bool, with_tags: bool, + with_deck: bool, + with_notetype: bool, + with_guid: bool, ) -> int: return self._backend.export_note_csv( out_path=out_path, with_html=with_html, with_tags=with_tags, + with_deck=with_deck, + with_notetype=with_notetype, + with_guid=with_guid, limit=pb_export_limit(limit), ) diff --git a/qt/aqt/exporting.py b/qt/aqt/exporting.py index f17c5310d..0ad18c3d5 100644 --- a/qt/aqt/exporting.py +++ b/qt/aqt/exporting.py @@ -98,6 +98,10 @@ class ExportDialog(QDialog): self.frm.includeHTML.setVisible(False) # show deck list? self.frm.deck.setVisible(not self.isVerbatim) + # used by the new export screen + self.frm.includeDeck.setVisible(False) + self.frm.includeNotetype.setVisible(False) + self.frm.includeGuid.setVisible(False) def accept(self) -> None: self.exporter.includeSched = self.frm.includeSched.isChecked() diff --git a/qt/aqt/forms/exporting.ui b/qt/aqt/forms/exporting.ui index 3d39e9416..4b34d3ae1 100644 --- a/qt/aqt/forms/exporting.ui +++ b/qt/aqt/forms/exporting.ui @@ -6,8 +6,8 @@ 0 0 - 563 - 245 + 610 + 348 @@ -77,6 +77,13 @@ + + + + exporting_include_html_and_media_references + + + @@ -88,9 +95,29 @@ - + + + true + - exporting_include_html_and_media_references + exporting_include_deck + + + + + + + true + + + exporting_include_notetype + + + + + + + exporting_include_guid diff --git a/qt/aqt/import_export/exporting.py b/qt/aqt/import_export/exporting.py index 669a2ddc8..aed2e3c54 100644 --- a/qt/aqt/import_export/exporting.py +++ b/qt/aqt/import_export/exporting.py @@ -91,6 +91,9 @@ class ExportDialog(QDialog): self.frm.includeMedia.setVisible(self.exporter.show_include_media) self.frm.includeTags.setVisible(self.exporter.show_include_tags) self.frm.includeHTML.setVisible(self.exporter.show_include_html) + self.frm.includeDeck.setVisible(self.exporter.show_include_deck) + self.frm.includeNotetype.setVisible(self.exporter.show_include_notetype) + self.frm.includeGuid.setVisible(self.exporter.show_include_guid) self.frm.legacy_support.setVisible(self.exporter.show_legacy_support) self.frm.deck.setVisible(self.exporter.show_deck_list) @@ -135,6 +138,9 @@ class ExportDialog(QDialog): include_media=self.frm.includeMedia.isChecked(), include_tags=self.frm.includeTags.isChecked(), include_html=self.frm.includeHTML.isChecked(), + include_deck=self.frm.includeDeck.isChecked(), + include_notetype=self.frm.includeNotetype.isChecked(), + include_guid=self.frm.includeGuid.isChecked(), legacy_support=self.frm.legacy_support.isChecked(), limit=limit, ) @@ -165,6 +171,9 @@ class Options: include_media: bool include_tags: bool include_html: bool + include_deck: bool + include_notetype: bool + include_guid: bool legacy_support: bool limit: ExportLimit @@ -177,6 +186,9 @@ class Exporter(ABC): show_include_tags = False show_include_html = False show_legacy_support = False + show_include_deck = False + show_include_notetype = False + show_include_guid = False @staticmethod @abstractmethod @@ -255,6 +267,9 @@ class NoteCsvExporter(Exporter): show_deck_list = True show_include_html = True show_include_tags = True + show_include_deck = True + show_include_notetype = True + show_include_guid = True @staticmethod def name() -> str: @@ -269,6 +284,9 @@ class NoteCsvExporter(Exporter): limit=options.limit, with_html=options.include_html, with_tags=options.include_tags, + with_deck=options.include_deck, + with_notetype=options.include_notetype, + with_guid=options.include_guid, ), success=lambda count: tooltip( tr.exporting_note_exported(count=count), parent=mw diff --git a/rslib/Cargo.toml b/rslib/Cargo.toml index becb05d56..8d7b1ecc1 100644 --- a/rslib/Cargo.toml +++ b/rslib/Cargo.toml @@ -100,4 +100,4 @@ unic-ucd-category = "0.9.0" id_tree = "1.8.0" zstd = { version="0.10.0", features=["zstdmt"] } num_cpus = "1.13.1" -csv = "1.1.6" +csv = { git="https://github.com/ankitects/rust-csv.git", rev="1c9d3aab6f79a7d815c69f925a46a4590c115f90" } diff --git a/rslib/src/backend/import_export.rs b/rslib/src/backend/import_export.rs index 56773441b..ea3c05447 100644 --- a/rslib/src/backend/import_export.rs +++ b/rslib/src/backend/import_export.rs @@ -75,7 +75,12 @@ impl ImportExportService for Backend { fn get_csv_metadata(&self, input: pb::CsvMetadataRequest) -> Result { let delimiter = input.delimiter.is_some().then(|| input.delimiter()); self.with_col(|col| { - col.get_csv_metadata(&input.path, delimiter, input.notetype_id.map(Into::into)) + col.get_csv_metadata( + &input.path, + delimiter, + input.notetype_id.map(Into::into), + input.is_html, + ) }) } @@ -93,16 +98,8 @@ impl ImportExportService for Backend { } fn export_note_csv(&self, input: pb::ExportNoteCsvRequest) -> Result { - self.with_col(|col| { - col.export_note_csv( - &input.out_path, - SearchNode::from(input.limit.unwrap_or_default()), - input.with_html, - input.with_tags, - self.export_progress_fn(), - ) - }) - .map(Into::into) + self.with_col(|col| col.export_note_csv(input, self.export_progress_fn())) + .map(Into::into) } fn export_card_csv(&self, input: pb::ExportCardCsvRequest) -> Result { diff --git a/rslib/src/import_export/text/csv/export.rs b/rslib/src/import_export/text/csv/export.rs index e1c702be7..6d9072b91 100644 --- a/rslib/src/import_export/text/csv/export.rs +++ b/rslib/src/import_export/text/csv/export.rs @@ -1,7 +1,7 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html -use std::{borrow::Cow, fs::File, io::Write}; +use std::{borrow::Cow, collections::HashMap, fs::File, io::Write, sync::Arc}; use itertools::Itertools; use lazy_static::lazy_static; @@ -9,10 +9,11 @@ use regex::Regex; use super::metadata::Delimiter; use crate::{ + backend_proto::ExportNoteCsvRequest, import_export::{ExportProgress, IncrementableProgress}, notetype::RenderCardOutput, prelude::*, - search::SortMode, + search::{SearchNode, SortMode}, template::RenderedNode, text::{html_to_text_line, CowMapping}, }; @@ -31,7 +32,7 @@ impl Collection { progress.call(ExportProgress::File)?; let mut incrementor = progress.incrementor(ExportProgress::Cards); - let mut writer = file_writer_with_header(path)?; + let mut writer = file_writer_with_header(path, with_html)?; let mut cards = self.search_cards(search, SortMode::NoOrder)?; cards.sort_unstable(); for &card in &cards { @@ -45,21 +46,19 @@ impl Collection { pub fn export_note_csv( &mut self, - path: &str, - search: impl TryIntoSearch, - with_html: bool, - with_tags: bool, + mut request: ExportNoteCsvRequest, progress_fn: impl 'static + FnMut(ExportProgress, bool) -> bool, ) -> Result { let mut progress = IncrementableProgress::new(progress_fn); progress.call(ExportProgress::File)?; let mut incrementor = progress.incrementor(ExportProgress::Notes); - let mut writer = file_writer_with_header(path)?; - self.search_notes_into_table(search)?; + self.search_notes_into_table(request.search_node())?; + let ctx = NoteContext::new(&request, self)?; + let mut writer = note_file_writer_with_header(&request.out_path, &ctx)?; self.storage.for_each_note_in_search(|note| { incrementor.increment()?; - writer.write_record(note_record(¬e, with_html, with_tags))?; + writer.write_record(ctx.record(¬e))?; Ok(()) })?; writer.flush()?; @@ -77,17 +76,46 @@ impl Collection { } } -fn file_writer_with_header(path: &str) -> Result> { +fn file_writer_with_header(path: &str, with_html: bool) -> Result> { let mut file = File::create(path)?; - write_header(&mut file)?; + write_file_header(&mut file, with_html)?; Ok(csv::WriterBuilder::new() .delimiter(DELIMITER.byte()) - .flexible(true) + .comment(Some(b'#')) .from_writer(file)) } -fn write_header(writer: &mut impl Write) -> Result<()> { - write!(writer, "#separator:{}\n#html:true\n", DELIMITER.name())?; +fn write_file_header(writer: &mut impl Write, with_html: bool) -> Result<()> { + writeln!(writer, "#separator:{}", DELIMITER.name())?; + writeln!(writer, "#html:{with_html}")?; + Ok(()) +} + +fn note_file_writer_with_header(path: &str, ctx: &NoteContext) -> Result> { + let mut file = File::create(path)?; + write_note_file_header(&mut file, ctx)?; + Ok(csv::WriterBuilder::new() + .delimiter(DELIMITER.byte()) + .comment(Some(b'#')) + .from_writer(file)) +} + +fn write_note_file_header(writer: &mut impl Write, ctx: &NoteContext) -> Result<()> { + write_file_header(writer, ctx.with_html)?; + write_column_header(ctx, writer) +} + +fn write_column_header(ctx: &NoteContext, writer: &mut impl Write) -> Result<()> { + for (name, column) in [ + ("guid", ctx.guid_column()), + ("notetype", ctx.notetype_column()), + ("deck", ctx.deck_column()), + ("tags", ctx.tags_column()), + ] { + if let Some(index) = column { + writeln!(writer, "#{name} column:{index}")?; + } + } Ok(()) } @@ -117,24 +145,12 @@ fn rendered_nodes_to_str(nodes: &[RenderedNode]) -> String { .join("") } -fn note_record(note: &Note, with_html: bool, with_tags: bool) -> Vec { - let mut fields: Vec<_> = note - .fields() - .iter() - .map(|f| field_to_record_field(f, with_html)) - .collect(); - if with_tags { - fields.push(note.tags.join(" ")); - } - fields -} - -fn field_to_record_field(field: &str, with_html: bool) -> String { +fn field_to_record_field(field: &str, with_html: bool) -> Cow { let mut text = strip_redundant_sections(field); if !with_html { text = text.map_cow(|t| html_to_text_line(t, false)); } - text.into() + text } fn strip_redundant_sections(text: &str) -> Cow { @@ -157,3 +173,110 @@ fn strip_answer_side_question(text: &str) -> Cow { } RE.replace_all(text.as_ref(), "") } + +struct NoteContext { + with_html: bool, + with_tags: bool, + with_deck: bool, + with_notetype: bool, + with_guid: bool, + notetypes: HashMap>, + deck_ids: HashMap, + deck_names: HashMap, + field_columns: usize, +} + +impl NoteContext { + /// Caller must have searched notes into table. + fn new(request: &ExportNoteCsvRequest, col: &mut Collection) -> Result { + let notetypes = col.get_all_notetypes_of_search_notes()?; + let field_columns = notetypes + .values() + .map(|nt| nt.fields.len()) + .max() + .unwrap_or_default(); + let deck_ids = col.storage.all_decks_of_search_notes()?; + let deck_names = HashMap::from_iter(col.storage.get_all_deck_names()?.into_iter()); + + Ok(Self { + with_html: request.with_html, + with_tags: request.with_tags, + with_deck: request.with_deck, + with_notetype: request.with_notetype, + with_guid: request.with_guid, + notetypes, + field_columns, + deck_ids, + deck_names, + }) + } + + fn guid_column(&self) -> Option { + self.with_guid.then(|| 1) + } + + fn notetype_column(&self) -> Option { + self.with_notetype + .then(|| 1 + self.guid_column().unwrap_or_default()) + } + + fn deck_column(&self) -> Option { + self.with_deck + .then(|| 1 + self.notetype_column().unwrap_or_default()) + } + + fn tags_column(&self) -> Option { + self.with_tags + .then(|| 1 + self.deck_column().unwrap_or_default() + self.field_columns) + } + + fn record<'c, 's: 'c, 'n: 'c>(&'s self, note: &'n Note) -> impl Iterator> { + self.with_guid + .then(|| Cow::from(note.guid.as_bytes())) + .into_iter() + .chain(self.notetype_name(note).into_iter()) + .chain(self.deck_name(note).into_iter()) + .chain(self.note_fields(note)) + .chain(self.tags(note).into_iter()) + } + + fn notetype_name(&self, note: &Note) -> Option> { + self.with_notetype.then(|| { + self.notetypes + .get(¬e.notetype_id) + .map_or(Cow::from(vec![]), |nt| Cow::from(nt.name.as_bytes())) + }) + } + + fn deck_name(&self, note: &Note) -> Option> { + self.with_deck.then(|| { + self.deck_ids + .get(¬e.id) + .and_then(|did| self.deck_names.get(did)) + .map_or(Cow::from(vec![]), |name| Cow::from(name.as_bytes())) + }) + } + + fn tags(&self, note: &Note) -> Option> { + self.with_tags + .then(|| Cow::from(note.tags.join(" ").into_bytes())) + } + + fn note_fields<'n>(&self, note: &'n Note) -> impl Iterator> { + let with_html = self.with_html; + note.fields() + .iter() + .map(move |f| field_to_record_field(f, with_html)) + .pad_using(self.field_columns, |_| Cow::from("")) + .map(|cow| match cow { + Cow::Borrowed(s) => Cow::from(s.as_bytes()), + Cow::Owned(s) => Cow::from(s.into_bytes()), + }) + } +} + +impl ExportNoteCsvRequest { + fn search_node(&mut self) -> SearchNode { + SearchNode::from(self.limit.take().unwrap_or_default()) + } +} diff --git a/rslib/src/import_export/text/csv/import.rs b/rslib/src/import_export/text/csv/import.rs index d9eeb9c7c..4b7cfe8d3 100644 --- a/rslib/src/import_export/text/csv/import.rs +++ b/rslib/src/import_export/text/csv/import.rs @@ -113,6 +113,7 @@ type FieldSourceColumns = Vec>; // Column indices are 1-based. struct ColumnContext { tags_column: Option, + guid_column: Option, deck_column: Option, notetype_column: Option, /// Source column indices for the fields of a notetype, identified by its @@ -126,6 +127,7 @@ impl ColumnContext { fn new(metadata: &CsvMetadata) -> Result { Ok(Self { tags_column: (metadata.tags_column > 0).then(|| metadata.tags_column as usize), + guid_column: (metadata.guid_column > 0).then(|| metadata.guid_column as usize), deck_column: metadata.deck()?.column(), notetype_column: metadata.notetype()?.column(), field_source_columns: metadata.field_source_columns()?, @@ -135,16 +137,10 @@ impl ColumnContext { fn deserialize_csv( &mut self, - mut reader: impl Read + Seek, + reader: impl Read + Seek, delimiter: Delimiter, ) -> Result> { - remove_tags_line_from_reader(&mut reader)?; - let mut csv_reader = csv::ReaderBuilder::new() - .has_headers(false) - .flexible(true) - .comment(Some(b'#')) - .delimiter(delimiter.byte()) - .from_reader(reader); + let mut csv_reader = build_csv_reader(reader, delimiter)?; self.deserialize_csv_reader(&mut csv_reader) } @@ -162,34 +158,17 @@ impl ColumnContext { .collect() } - fn foreign_note_from_record(&mut self, record: &csv::StringRecord) -> ForeignNote { - let notetype = self.gather_notetype(record).into(); - let deck = self.gather_deck(record).into(); - let tags = self.gather_tags(record); - let fields = self.gather_note_fields(record); + fn foreign_note_from_record(&self, record: &csv::StringRecord) -> ForeignNote { ForeignNote { - notetype, - fields, - tags, - deck, + notetype: str_from_record_column(self.notetype_column, record).into(), + fields: self.gather_note_fields(record), + tags: self.gather_tags(record), + deck: str_from_record_column(self.deck_column, record).into(), + guid: str_from_record_column(self.guid_column, record), ..Default::default() } } - fn gather_notetype(&self, record: &csv::StringRecord) -> String { - self.notetype_column - .and_then(|i| record.get(i - 1)) - .unwrap_or_default() - .to_string() - } - - fn gather_deck(&self, record: &csv::StringRecord) -> String { - self.deck_column - .and_then(|i| record.get(i - 1)) - .unwrap_or_default() - .to_string() - } - fn gather_tags(&self, record: &csv::StringRecord) -> Vec { self.tags_column .and_then(|i| record.get(i - 1)) @@ -200,7 +179,7 @@ impl ColumnContext { .collect() } - fn gather_note_fields(&mut self, record: &csv::StringRecord) -> Vec { + fn gather_note_fields(&self, record: &csv::StringRecord) -> Vec { let stringify = self.stringify; self.field_source_columns .iter() @@ -210,6 +189,26 @@ impl ColumnContext { } } +fn str_from_record_column(column: Option, record: &csv::StringRecord) -> String { + column + .and_then(|i| record.get(i - 1)) + .unwrap_or_default() + .to_string() +} + +pub(super) fn build_csv_reader( + mut reader: impl Read + Seek, + delimiter: Delimiter, +) -> Result> { + remove_tags_line_from_reader(&mut reader)?; + Ok(csv::ReaderBuilder::new() + .has_headers(false) + .flexible(true) + .comment(Some(b'#')) + .delimiter(delimiter.byte()) + .from_reader(reader)) +} + fn stringify_fn(is_html: bool) -> fn(&str) -> String { if is_html { ToString::to_string @@ -267,6 +266,7 @@ mod test { is_html: false, force_is_html: false, tags_column: 0, + guid_column: 0, global_tags: Vec::new(), updated_tags: Vec::new(), column_labels: vec!["".to_string(); 2], @@ -275,6 +275,7 @@ mod test { id: 1, field_columns: vec![1, 2], })), + preview: Vec::new(), } } } diff --git a/rslib/src/import_export/text/csv/metadata.rs b/rslib/src/import_export/text/csv/metadata.rs index 4ce6c257f..dfa1cff32 100644 --- a/rslib/src/import_export/text/csv/metadata.rs +++ b/rslib/src/import_export/text/csv/metadata.rs @@ -4,65 +4,81 @@ use std::{ collections::{HashMap, HashSet}, fs::File, - io::{BufRead, BufReader}, + io::{BufRead, BufReader, Read, Seek, SeekFrom}, }; +use itertools::Itertools; use strum::IntoEnumIterator; +use super::import::build_csv_reader; pub use crate::backend_proto::import_export::{ csv_metadata::{Deck as CsvDeck, Delimiter, MappedNotetype, Notetype as CsvNotetype}, CsvMetadata, }; use crate::{ - error::ImportError, import_export::text::NameOrId, notetype::NoteField, prelude::*, - text::is_html, + backend_proto::StringList, + error::ImportError, + import_export::text::NameOrId, + notetype::NoteField, + prelude::*, + text::{html_to_text_line, is_html}, }; +/// The maximum number of preview rows. +const PREVIEW_LENGTH: usize = 5; +/// The maximum number of characters per preview field. +const PREVIEW_FIELD_LENGTH: usize = 80; + impl Collection { pub fn get_csv_metadata( &mut self, path: &str, delimiter: Option, notetype_id: Option, + is_html: Option, ) -> Result { - let reader = BufReader::new(File::open(path)?); - self.get_reader_metadata(reader, delimiter, notetype_id) + let mut reader = File::open(path)?; + self.get_reader_metadata(&mut reader, delimiter, notetype_id, is_html) } fn get_reader_metadata( &mut self, - reader: impl BufRead, + mut reader: impl Read + Seek, delimiter: Option, notetype_id: Option, + is_html: Option, ) -> Result { let mut metadata = CsvMetadata::default(); - let line = self.parse_meta_lines(reader, &mut metadata)?; - maybe_set_fallback_delimiter(delimiter, &mut metadata, &line); - maybe_set_fallback_columns(&mut metadata, &line)?; - maybe_set_fallback_is_html(&mut metadata, &line)?; + let meta_len = self.parse_meta_lines(&mut reader, &mut metadata)? as u64; + maybe_set_fallback_delimiter(delimiter, &mut metadata, &mut reader, meta_len)?; + let records = collect_preview_records(&mut metadata, reader)?; + maybe_set_fallback_is_html(&mut metadata, &records, is_html)?; + set_preview(&mut metadata, &records)?; + maybe_set_fallback_columns(&mut metadata)?; self.maybe_set_fallback_notetype(&mut metadata, notetype_id)?; self.maybe_init_notetype_map(&mut metadata)?; self.maybe_set_fallback_deck(&mut metadata)?; + Ok(metadata) } - /// Parses the meta head of the file, and returns the first content line. - fn parse_meta_lines( - &mut self, - mut reader: impl BufRead, - metadata: &mut CsvMetadata, - ) -> Result { + /// Parses the meta head of the file and returns the total of meta bytes. + fn parse_meta_lines(&mut self, reader: impl Read, metadata: &mut CsvMetadata) -> Result { + let mut meta_len = 0; + let mut reader = BufReader::new(reader); let mut line = String::new(); - reader.read_line(&mut line)?; + let mut line_len = reader.read_line(&mut line)?; if self.parse_first_line(&line, metadata) { + meta_len += line_len; line.clear(); - reader.read_line(&mut line)?; + line_len = reader.read_line(&mut line)?; while self.parse_line(&line, metadata) { + meta_len += line_len; line.clear(); - reader.read_line(&mut line)?; + line_len = reader.read_line(&mut line)?; } } - Ok(line) + Ok(meta_len) } /// True if the line is a meta line, i.e. a comment, or starting with 'tags:'. @@ -103,7 +119,7 @@ impl Collection { } "tags" => metadata.global_tags = collect_tags(value), "columns" => { - if let Ok(columns) = self.parse_columns(value, metadata) { + if let Ok(columns) = parse_columns(value, metadata.delimiter()) { metadata.column_labels = columns; } } @@ -127,21 +143,20 @@ impl Collection { metadata.deck = Some(CsvDeck::DeckColumn(n)); } } + "tags column" => { + if let Ok(n) = value.trim().parse() { + metadata.tags_column = n; + } + } + "guid column" => { + if let Ok(n) = value.trim().parse() { + metadata.guid_column = n; + } + } _ => (), } } - fn parse_columns(&mut self, line: &str, metadata: &mut CsvMetadata) -> Result> { - let delimiter = if metadata.force_delimiter { - metadata.delimiter() - } else { - delimiter_from_line(line) - }; - map_single_record(line, delimiter, |record| { - record.iter().map(ToString::to_string).collect() - }) - } - fn maybe_set_fallback_notetype( &mut self, metadata: &mut CsvMetadata, @@ -161,7 +176,15 @@ impl Collection { metadata .notetype_id() .and_then(|ntid| self.default_deck_for_notetype(ntid).transpose()) - .unwrap_or_else(|| self.get_current_deck().map(|d| d.id))? + .unwrap_or_else(|| { + self.get_current_deck().map(|deck| { + if deck.is_filtered() { + DeckId(1) + } else { + deck.id + } + }) + })? .0, )); } @@ -205,6 +228,61 @@ impl Collection { } } +fn parse_columns(line: &str, delimiter: Delimiter) -> Result> { + map_single_record(line, delimiter, |record| { + record.iter().map(ToString::to_string).collect() + }) +} + +fn collect_preview_records( + metadata: &mut CsvMetadata, + mut reader: impl Read + Seek, +) -> Result> { + reader.rewind()?; + let mut csv_reader = build_csv_reader(reader, metadata.delimiter())?; + csv_reader + .records() + .into_iter() + .take(PREVIEW_LENGTH) + .collect::>() + .map_err(Into::into) +} + +fn set_preview(metadata: &mut CsvMetadata, records: &[csv::StringRecord]) -> Result<()> { + let mut min_len = 1; + metadata.preview = records + .iter() + .enumerate() + .map(|(idx, record)| { + let row = build_preview_row(min_len, record, metadata.is_html); + if idx == 0 { + min_len = row.vals.len(); + } + row + }) + .collect(); + Ok(()) +} + +fn build_preview_row(min_len: usize, record: &csv::StringRecord, strip_html: bool) -> StringList { + StringList { + vals: record + .iter() + .pad_using(min_len, |_| "") + .map(|field| { + if strip_html { + html_to_text_line(field, true) + .chars() + .take(PREVIEW_FIELD_LENGTH) + .collect() + } else { + field.chars().take(PREVIEW_FIELD_LENGTH).collect() + } + }) + .collect(), + } +} + pub(super) fn collect_tags(txt: &str) -> Vec { txt.split_whitespace() .filter(|s| !s.is_empty()) @@ -263,20 +341,23 @@ fn ensure_first_field_is_mapped( Ok(()) } -fn maybe_set_fallback_columns(metadata: &mut CsvMetadata, line: &str) -> Result<()> { +fn maybe_set_fallback_columns(metadata: &mut CsvMetadata) -> Result<()> { if metadata.column_labels.is_empty() { - let columns = map_single_record(line, metadata.delimiter(), |r| r.len())?; - metadata.column_labels = vec![String::new(); columns]; + metadata.column_labels = + vec![String::new(); metadata.preview.get(0).map_or(0, |row| row.vals.len())]; } Ok(()) } -fn maybe_set_fallback_is_html(metadata: &mut CsvMetadata, line: &str) -> Result<()> { - // TODO: should probably check more than one line; can reuse preview lines - // when it's implemented - if !metadata.force_is_html { - metadata.is_html = - map_single_record(line, metadata.delimiter(), |r| r.iter().any(is_html))?; +fn maybe_set_fallback_is_html( + metadata: &mut CsvMetadata, + records: &[csv::StringRecord], + is_html_option: Option, +) -> Result<()> { + if let Some(is_html) = is_html_option { + metadata.is_html = is_html; + } else if !metadata.force_is_html { + metadata.is_html = records.iter().flat_map(|record| record.iter()).any(is_html); } Ok(()) } @@ -284,13 +365,16 @@ fn maybe_set_fallback_is_html(metadata: &mut CsvMetadata, line: &str) -> Result< fn maybe_set_fallback_delimiter( delimiter: Option, metadata: &mut CsvMetadata, - line: &str, -) { + mut reader: impl Read + Seek, + meta_len: u64, +) -> Result<()> { if let Some(delim) = delimiter { metadata.set_delimiter(delim); } else if !metadata.force_delimiter { - metadata.set_delimiter(delimiter_from_line(line)); + reader.seek(SeekFrom::Start(meta_len))?; + metadata.set_delimiter(delimiter_from_reader(reader)?); } + Ok(()) } fn delimiter_from_value(value: &str) -> Option { @@ -303,14 +387,16 @@ fn delimiter_from_value(value: &str) -> Option { None } -fn delimiter_from_line(line: &str) -> Delimiter { +fn delimiter_from_reader(mut reader: impl Read) -> Result { + let mut buf = [0; 8 * 1024]; + let _ = reader.read(&mut buf)?; // TODO: use smarter heuristic for delimiter in Delimiter::iter() { - if line.contains(delimiter.byte() as char) { - return delimiter; + if buf.contains(&delimiter.byte()) { + return Ok(delimiter); } } - Delimiter::Space + Ok(Delimiter::Space) } fn map_single_record( @@ -384,6 +470,9 @@ impl CsvMetadata { if self.tags_column > 0 { columns.insert(self.tags_column as usize); } + if self.guid_column > 0 { + columns.insert(self.guid_column as usize); + } columns } } @@ -398,8 +487,18 @@ impl NameOrId { } } +impl From for StringList { + fn from(record: csv::StringRecord) -> Self { + Self { + vals: record.iter().map(ToString::to_string).collect(), + } + } +} + #[cfg(test)] mod test { + use std::io::Cursor; + use super::*; use crate::collection::open_test_collection; @@ -408,7 +507,7 @@ mod test { metadata!($col, $csv, None) }; ($col:expr,$csv:expr, $delim:expr) => { - $col.get_reader_metadata(BufReader::new($csv.as_bytes()), $delim, None) + $col.get_reader_metadata(Cursor::new($csv.as_bytes()), $delim, None, None) .unwrap() }; } @@ -561,7 +660,7 @@ mod test { // custom names assert_eq!( - metadata!(col, "#columns:one,two\n").column_labels, + metadata!(col, "#columns:one\ttwo\n").column_labels, ["one", "two"] ); assert_eq!( @@ -570,6 +669,17 @@ mod test { ); } + #[test] + fn should_detect_column_number_despite_escaped_line_breaks() { + let mut col = open_test_collection(); + assert_eq!( + metadata!(col, "\"foo|\nbar\"\tfoo\tbar\n") + .column_labels + .len(), + 3 + ); + } + impl CsvMetadata { fn unwrap_notetype_map(&self) -> &[u32] { match &self.notetype { @@ -589,7 +699,16 @@ mod test { #[test] fn should_map_default_notetype_fields_by_given_column_names() { let mut col = open_test_collection(); - let meta = metadata!(col, "#columns:Back,Front\nfoo,bar,baz\n"); + let meta = metadata!(col, "#columns:Back\tFront\nfoo,bar,baz\n"); assert_eq!(meta.unwrap_notetype_map(), &[2, 1]); } + + #[test] + fn should_gather_first_lines_into_preview() { + let mut col = open_test_collection(); + let meta = metadata!(col, "#separator: \nfoo bar\nbaz
\n"); + assert_eq!(meta.preview[0].vals, ["foo", "bar"]); + // html is stripped + assert_eq!(meta.preview[1].vals, ["baz", ""]); + } } diff --git a/rslib/src/import_export/text/import.rs b/rslib/src/import_export/text/import.rs index ad1427c25..9aeb3a285 100644 --- a/rslib/src/import_export/text/import.rs +++ b/rslib/src/import_export/text/import.rs @@ -1,7 +1,12 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html -use std::{borrow::Cow, collections::HashMap, mem, sync::Arc}; +use std::{ + borrow::Cow, + collections::{HashMap, HashSet}, + mem, + sync::Arc, +}; use super::NameOrId; use crate::{ @@ -52,24 +57,75 @@ struct Context<'a> { col: &'a mut Collection, /// Contains the optional default notetype with the default key. notetypes: HashMap>>, - /// Contains the optional default deck id with the default key. - deck_ids: HashMap>, + deck_ids: DeckIdsByNameOrId, usn: Usn, normalize_notes: bool, today: u32, dupe_resolution: DupeResolution, card_gen_ctxs: HashMap<(NotetypeId, DeckId), CardGenContext>>, - existing_notes: HashMap<(NotetypeId, u32), Vec>, + existing_checksums: HashMap<(NotetypeId, u32), Vec>, + existing_guids: HashMap, +} + +struct DeckIdsByNameOrId { + ids: HashSet, + names: HashMap, + default: Option, } struct NoteContext { + /// Prepared and with canonified tags. note: Note, - dupes: Vec, + dupes: Vec, cards: Vec, notetype: Arc, deck_id: DeckId, } +struct Duplicate { + note: Note, + identical: bool, + first_field_match: bool, +} + +impl Duplicate { + fn new(dupe: Note, original: &Note, first_field_match: bool) -> Self { + let identical = dupe.equal_fields_and_tags(original); + Self { + note: dupe, + identical, + first_field_match, + } + } +} + +impl DeckIdsByNameOrId { + fn new(col: &mut Collection, default: &NameOrId) -> Result { + let names: HashMap = col + .get_all_normal_deck_names()? + .into_iter() + .map(|(id, name)| (name, id)) + .collect(); + let ids = names.values().copied().collect(); + let mut new = Self { + ids, + names, + default: None, + }; + new.default = new.get(default); + + Ok(new) + } + + fn get(&self, name_or_id: &NameOrId) -> Option { + match name_or_id { + _ if *name_or_id == NameOrId::default() => self.default, + NameOrId::Id(id) => self.ids.get(&DeckId(*id)).copied(), + NameOrId::Name(name) => self.names.get(name).copied(), + } + } +} + impl<'a> Context<'a> { fn new(data: &ForeignData, col: &'a mut Collection) -> Result { let usn = col.usn()?; @@ -80,12 +136,10 @@ impl<'a> Context<'a> { NameOrId::default(), col.notetype_by_name_or_id(&data.default_notetype)?, ); - let mut deck_ids = HashMap::new(); - deck_ids.insert( - NameOrId::default(), - col.deck_id_by_name_or_id(&data.default_deck)?, - ); - let existing_notes = col.storage.all_notes_by_type_and_checksum()?; + let deck_ids = DeckIdsByNameOrId::new(col, &data.default_deck)?; + let existing_checksums = col.storage.all_notes_by_type_and_checksum()?; + let existing_guids = col.storage.all_notes_by_guid()?; + Ok(Self { col, usn, @@ -95,7 +149,8 @@ impl<'a> Context<'a> { notetypes, deck_ids, card_gen_ctxs: HashMap::new(), - existing_notes, + existing_checksums, + existing_guids, }) } @@ -119,16 +174,6 @@ impl<'a> Context<'a> { }) } - fn deck_id_for_note(&mut self, note: &ForeignNote) -> Result> { - Ok(if let Some(did) = self.deck_ids.get(¬e.deck) { - *did - } else { - let did = self.col.deck_id_by_name_or_id(¬e.deck)?; - self.deck_ids.insert(note.deck.clone(), did); - did - }) - } - fn import_foreign_notes( &mut self, notes: Vec, @@ -145,7 +190,7 @@ impl<'a> Context<'a> { continue; } if let Some(notetype) = self.notetype_for_note(&foreign)? { - if let Some(deck_id) = self.deck_id_for_note(&foreign)? { + if let Some(deck_id) = self.deck_ids.get(&foreign.deck) { let ctx = self.build_note_context(foreign, notetype, deck_id, global_tags)?; self.import_note(ctx, updated_tags, &mut log)?; } else { @@ -167,6 +212,7 @@ impl<'a> Context<'a> { ) -> Result { let (mut note, cards) = foreign.into_native(¬etype, deck_id, self.today, global_tags); note.prepare_for_update(¬etype, self.normalize_notes)?; + self.col.canonify_note_tags(&mut note, self.usn)?; let dupes = self.find_duplicates(¬etype, ¬e)?; Ok(NoteContext { @@ -178,14 +224,34 @@ impl<'a> Context<'a> { }) } - fn find_duplicates(&mut self, notetype: &Notetype, note: &Note) -> Result> { + fn find_duplicates(&self, notetype: &Notetype, note: &Note) -> Result> { let checksum = note .checksum .ok_or_else(|| AnkiError::invalid_input("note unprepared"))?; - self.existing_notes - .get(&(notetype.id, checksum)) - .map(|dupe_ids| self.col.get_full_duplicates(note, dupe_ids)) - .unwrap_or_else(|| Ok(vec![])) + if let Some(nid) = self.existing_guids.get(¬e.guid) { + self.get_guid_dupe(*nid, note).map(|dupe| vec![dupe]) + } else if let Some(nids) = self.existing_checksums.get(&(notetype.id, checksum)) { + self.get_first_field_dupes(note, nids) + } else { + Ok(Vec::new()) + } + } + + fn get_guid_dupe(&self, nid: NoteId, original: &Note) -> Result { + self.col + .storage + .get_note(nid)? + .ok_or(AnkiError::NotFound) + .map(|dupe| Duplicate::new(dupe, original, false)) + } + + fn get_first_field_dupes(&self, note: &Note, nids: &[NoteId]) -> Result> { + Ok(self + .col + .get_full_duplicates(note, nids)? + .into_iter() + .map(|dupe| Duplicate::new(dupe, note, true)) + .collect()) } fn import_note( @@ -204,7 +270,6 @@ impl<'a> Context<'a> { } fn add_note(&mut self, mut ctx: NoteContext, log_queue: &mut Vec) -> Result<()> { - self.col.canonify_note_tags(&mut ctx.note, self.usn)?; ctx.note.usn = self.usn; self.col.add_note_only_undoable(&mut ctx.note)?; self.add_cards(&mut ctx.cards, &ctx.note, ctx.deck_id, ctx.notetype)?; @@ -237,28 +302,49 @@ impl<'a> Context<'a> { } fn prepare_note_for_update(&mut self, note: &mut Note, updated_tags: &[String]) -> Result<()> { - note.tags.extend(updated_tags.iter().cloned()); - self.col.canonify_note_tags(note, self.usn)?; + if !updated_tags.is_empty() { + note.tags.extend(updated_tags.iter().cloned()); + self.col.canonify_note_tags(note, self.usn)?; + } note.set_modified(self.usn); Ok(()) } fn maybe_update_dupe( &mut self, - dupe: Note, + dupe: Duplicate, ctx: &mut NoteContext, log: &mut NoteLog, ) -> Result<()> { - ctx.note.id = dupe.id; - if dupe.equal_fields_and_tags(&ctx.note) { - log.duplicate.push(dupe.into_log_note()); + if dupe.note.notetype_id != ctx.notetype.id { + log.conflicting.push(dupe.note.into_log_note()); + return Ok(()); + } + if dupe.identical { + log.duplicate.push(dupe.note.into_log_note()); } else { - self.col.update_note_undoable(&ctx.note, &dupe)?; - log.first_field_match.push(dupe.into_log_note()); + self.update_dupe(dupe, ctx, log)?; } self.add_cards(&mut ctx.cards, &ctx.note, ctx.deck_id, ctx.notetype.clone()) } + fn update_dupe( + &mut self, + dupe: Duplicate, + ctx: &mut NoteContext, + log: &mut NoteLog, + ) -> Result<()> { + ctx.note.id = dupe.note.id; + ctx.note.guid = dupe.note.guid.clone(); + self.col.update_note_undoable(&ctx.note, &dupe.note)?; + if dupe.first_field_match { + log.first_field_match.push(dupe.note.into_log_note()); + } else { + log.updated.push(dupe.note.into_log_note()); + } + Ok(()) + } + fn import_cards(&mut self, cards: &mut [Card], note_id: NoteId) -> Result<()> { for card in cards { card.note_id = note_id; @@ -306,7 +392,7 @@ impl Collection { } } - fn get_full_duplicates(&mut self, note: &Note, dupe_ids: &[NoteId]) -> Result> { + fn get_full_duplicates(&self, note: &Note, dupe_ids: &[NoteId]) -> Result> { let first_field = note.first_field_stripped(); dupe_ids .iter() @@ -329,6 +415,9 @@ impl ForeignNote { ) -> (Note, Vec) { // TODO: Handle new and learning cards let mut note = Note::new(notetype); + if !self.guid.is_empty() { + note.guid = self.guid; + } note.tags = self.tags; note.tags.extend(extra_tags.iter().cloned()); note.fields_mut() @@ -501,4 +590,16 @@ mod test { data.import(&mut col, |_, _| true).unwrap(); assert_eq!(col.storage.get_all_notes()[0].tags, ["bar", "baz"]); } + + #[test] + fn should_match_note_with_same_guid() { + let mut col = open_test_collection(); + let mut data = ForeignData::with_defaults(); + data.add_note(&["foo"]); + data.notes[0].tags = vec![String::from("bar")]; + data.global_tags = vec![String::from("baz")]; + + data.import(&mut col, |_, _| true).unwrap(); + assert_eq!(col.storage.get_all_notes()[0].tags, ["bar", "baz"]); + } } diff --git a/rslib/src/import_export/text/mod.rs b/rslib/src/import_export/text/mod.rs index f02610265..ff128076b 100644 --- a/rslib/src/import_export/text/mod.rs +++ b/rslib/src/import_export/text/mod.rs @@ -25,6 +25,7 @@ pub struct ForeignData { #[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] #[serde(default)] pub struct ForeignNote { + guid: String, fields: Vec, tags: Vec, notetype: NameOrId, diff --git a/rslib/src/notetype/mod.rs b/rslib/src/notetype/mod.rs index 5cc68c70c..c8e5922a1 100644 --- a/rslib/src/notetype/mod.rs +++ b/rslib/src/notetype/mod.rs @@ -218,6 +218,21 @@ impl Collection { .collect() } + pub fn get_all_notetypes_of_search_notes( + &mut self, + ) -> Result>> { + self.storage + .all_notetypes_of_search_notes()? + .into_iter() + .map(|ntid| { + self.get_notetype(ntid) + .transpose() + .unwrap() + .map(|nt| (ntid, nt)) + }) + .collect() + } + pub fn remove_notetype(&mut self, ntid: NotetypeId) -> Result> { self.transact(Op::RemoveNotetype, |col| col.remove_notetype_inner(ntid)) } diff --git a/rslib/src/storage/deck/all_decks_of_search_notes.sql b/rslib/src/storage/deck/all_decks_of_search_notes.sql new file mode 100644 index 000000000..d6012ef3d --- /dev/null +++ b/rslib/src/storage/deck/all_decks_of_search_notes.sql @@ -0,0 +1,9 @@ +SELECT nid, + did +FROM cards +WHERE nid IN ( + SELECT nid + FROM search_nids + ) +GROUP BY nid +HAVING ord = MIN(ord) \ No newline at end of file diff --git a/rslib/src/storage/deck/mod.rs b/rslib/src/storage/deck/mod.rs index f99467143..cf408a42d 100644 --- a/rslib/src/storage/deck/mod.rs +++ b/rslib/src/storage/deck/mod.rs @@ -131,6 +131,14 @@ impl SqliteStorage { .collect() } + /// Returns the deck id of the first existing card of every searched note. + pub(crate) fn all_decks_of_search_notes(&self) -> Result> { + self.db + .prepare_cached(include_str!("all_decks_of_search_notes.sql"))? + .query_and_then([], |r| Ok((r.get(0)?, r.get(1)?)))? + .collect() + } + // caller should ensure name unique pub(crate) fn add_deck(&self, deck: &mut Deck) -> Result<()> { assert!(deck.id.0 == 0); diff --git a/rslib/src/storage/note/mod.rs b/rslib/src/storage/note/mod.rs index d7dc53ffe..11466be46 100644 --- a/rslib/src/storage/note/mod.rs +++ b/rslib/src/storage/note/mod.rs @@ -338,6 +338,13 @@ impl super::SqliteStorage { .collect() } + pub(crate) fn all_notes_by_guid(&mut self) -> Result> { + self.db + .prepare("SELECT guid, id FROM notes")? + .query_and_then([], |r| Ok((r.get(0)?, r.get(1)?)))? + .collect() + } + #[cfg(test)] pub(crate) fn get_all_notes(&mut self) -> Vec { self.db diff --git a/rslib/src/storage/notetype/mod.rs b/rslib/src/storage/notetype/mod.rs index f68e636f8..cd73ab0d4 100644 --- a/rslib/src/storage/notetype/mod.rs +++ b/rslib/src/storage/notetype/mod.rs @@ -116,6 +116,15 @@ impl SqliteStorage { .collect() } + pub(crate) fn all_notetypes_of_search_notes(&self) -> Result> { + self.db + .prepare_cached( + "SELECT DISTINCT mid FROM notes WHERE id IN (SELECT nid FROM search_nids)", + )? + .query_and_then([], |r| Ok(r.get(0)?))? + .collect() + } + pub fn get_all_notetype_names(&self) -> Result> { self.db .prepare_cached(include_str!("get_notetype_names.sql"))? diff --git a/rslib/src/text.rs b/rslib/src/text.rs index b46e64ded..cea3c627d 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -1,7 +1,7 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html -use std::{borrow::Cow, ptr}; +use std::borrow::Cow; use lazy_static::lazy_static; use pct_str::{IriReserved, PctStr, PctString}; @@ -134,12 +134,8 @@ lazy_static! { static ref PERSISTENT_HTML_SPACERS: Regex = Regex::new(r#"(?i)|
|\n"#).unwrap(); - static ref UNPRINTABLE_TAGS: Regex = Regex::new( - r"(?xs) - \[sound:[^]]+\] - | - \[\[type:[^]]+\]\] - ").unwrap(); + static ref TYPE_TAG: Regex = Regex::new(r"\[\[type:[^]]+\]\]").unwrap(); + static ref SOUND_TAG: Regex = Regex::new(r"\[sound:([^]]+)\]").unwrap(); /// Files included in CSS with a leading underscore. static ref UNDERSCORED_CSS_IMPORTS: Regex = Regex::new( @@ -172,19 +168,21 @@ lazy_static! { "#).unwrap(); } -pub fn is_html(text: &str) -> bool { - HTML.is_match(text) +pub fn is_html(text: impl AsRef) -> bool { + HTML.is_match(text.as_ref()) } pub fn html_to_text_line(html: &str, preserve_media_filenames: bool) -> Cow { + let (html_stripper, sound_rep): (fn(&str) -> Cow, _) = if preserve_media_filenames { + (strip_html_preserving_media_filenames, "$1") + } else { + (strip_html, "") + }; PERSISTENT_HTML_SPACERS .replace_all(html, " ") - .map_cow(|s| UNPRINTABLE_TAGS.replace_all(s, "")) - .map_cow(if preserve_media_filenames { - strip_html_preserving_media_filenames - } else { - strip_html - }) + .map_cow(|s| TYPE_TAG.replace_all(s, "")) + .map_cow(|s| SOUND_TAG.replace_all(s, sound_rep)) + .map_cow(html_stripper) .trim() } @@ -330,16 +328,9 @@ pub(crate) fn extract_underscored_references(text: &str) -> Vec<&str> { } pub fn strip_html_preserving_media_filenames(html: &str) -> Cow { - let without_fnames = HTML_MEDIA_TAGS.replace_all(html, r" ${1}${2}${3} "); - let without_html = strip_html(&without_fnames); - // no changes? - if let Cow::Borrowed(b) = without_html { - if ptr::eq(b, html) { - return Cow::Borrowed(html); - } - } - // make borrow checker happy - without_html.into_owned().into() + HTML_MEDIA_TAGS + .replace_all(html, r" ${1}${2}${3} ") + .map_cow(strip_html) } #[allow(dead_code)] diff --git a/ts/import-csv/FieldMapper.svelte b/ts/import-csv/FieldMapper.svelte index 7ace7355d..4a80367f0 100644 --- a/ts/import-csv/FieldMapper.svelte +++ b/ts/import-csv/FieldMapper.svelte @@ -26,6 +26,6 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html /> {/each} {/await} + {/if} - diff --git a/ts/import-csv/ImportCsvPage.svelte b/ts/import-csv/ImportCsvPage.svelte index ffbbc4195..9ce321142 100644 --- a/ts/import-csv/ImportCsvPage.svelte +++ b/ts/import-csv/ImportCsvPage.svelte @@ -8,7 +8,13 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html import Row from "../components/Row.svelte"; import Spacer from "../components/Spacer.svelte"; import * as tr from "../lib/ftl"; - import { Decks, ImportExport, importExport, Notetypes } from "../lib/proto"; + import { + Decks, + Generic, + ImportExport, + importExport, + Notetypes, + } from "../lib/proto"; import DeckSelector from "./DeckSelector.svelte"; import DelimiterSelector from "./DelimiterSelector.svelte"; import DupeResolutionSelector from "./DupeResolutionSelector.svelte"; @@ -17,6 +23,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html import HtmlSwitch from "./HtmlSwitch.svelte"; import { getColumnOptions, getCsvMetadata } from "./lib"; import NotetypeSelector from "./NotetypeSelector.svelte"; + import Preview from "./Preview.svelte"; import StickyFooter from "./StickyFooter.svelte"; import Tags from "./Tags.svelte"; @@ -32,6 +39,8 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html export let updatedTags: string[]; export let columnLabels: string[]; export let tagsColumn: number; + export let guidColumn: number; + export let preview: Generic.StringList[]; // Protobuf oneofs. Exactly one of these pairs is expected to be set. export let notetypeColumn: number | null; export let globalNotetype: ImportExport.CsvMetadata.MappedNotetype | null; @@ -41,9 +50,17 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html let dupeResolution: ImportExport.ImportCsvRequest.DupeResolution; let lastNotetypeId = globalNotetype?.id; - $: columnOptions = getColumnOptions(columnLabels, notetypeColumn, deckColumn); - $: getCsvMetadata(path, delimiter).then((meta) => { + $: columnOptions = getColumnOptions( + columnLabels, + preview[0].vals, + notetypeColumn, + deckColumn, + tagsColumn, + guidColumn, + ); + $: getCsvMetadata(path, delimiter, undefined, isHtml).then((meta) => { columnLabels = meta.columnLabels; + preview = meta.preview; }); $: if (globalNotetype?.id !== lastNotetypeId) { lastNotetypeId = globalNotetype?.id; @@ -66,6 +83,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html updatedTags, columnLabels, tagsColumn, + guidColumn, notetypeColumn, globalNotetype, deckColumn, @@ -78,6 +96,15 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + + +
+ + + + + +
@@ -92,8 +119,6 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html {/if} - - diff --git a/ts/import-csv/Preview.svelte b/ts/import-csv/Preview.svelte new file mode 100644 index 000000000..5b476dd2d --- /dev/null +++ b/ts/import-csv/Preview.svelte @@ -0,0 +1,65 @@ + + + +
+ + {#each columnOptions.slice(1) as { label, shortLabel }} + + {/each} + {#each preview as row} + + {#each row.vals as cell} + + {/each} + + {/each} +
+ {shortLabel || label} +
{cell}
+
+ + diff --git a/ts/import-csv/StickyFooter.svelte b/ts/import-csv/StickyFooter.svelte index 49febd04b..3efc535ee 100644 --- a/ts/import-csv/StickyFooter.svelte +++ b/ts/import-csv/StickyFooter.svelte @@ -37,11 +37,13 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html