diff --git a/proto/anki/import_export.proto b/proto/anki/import_export.proto index 7c2679c4a..2e1646ab5 100644 --- a/proto/anki/import_export.proto +++ b/proto/anki/import_export.proto @@ -97,22 +97,20 @@ message MediaEntries { } message ImportCsvRequest { - message CsvColumn { - enum Other { - IGNORE = 0; - TAGS = 1; - } - oneof variant { - uint32 field = 1; - Other other = 2; - } + // Source column indices for note data. Negative values mean n/a. + message Columns { + repeated int32 fields = 1; + int32 tags = 2; + int32 deck = 3; + int32 notetype = 4; } string path = 1; int64 deck_id = 2; int64 notetype_id = 3; - repeated CsvColumn columns = 4; - CsvMetadata.Delimiter delimiter = 5; - bool is_html = 6; + CsvMetadata.Delimiter delimiter = 4; + bool is_html = 5; + Columns columns = 6; + repeated string column_names = 7; } message CsvMetadataRequest { diff --git a/rslib/src/backend/import_export.rs b/rslib/src/backend/import_export.rs index 7664db428..89ca3d153 100644 --- a/rslib/src/backend/import_export.rs +++ b/rslib/src/backend/import_export.rs @@ -6,14 +6,8 @@ use std::path::Path; use super::{progress::Progress, Backend}; pub(super) use crate::backend_proto::importexport_service::Service as ImportExportService; use crate::{ - backend_proto::{ - self as pb, - export_anki_package_request::Selector, - import_csv_request::{csv_column, CsvColumn}, - }, - import_export::{ - package::import_colpkg, text::csv::Column, ExportProgress, ImportProgress, NoteLog, - }, + backend_proto::{self as pb, export_anki_package_request::Selector}, + import_export::{package::import_colpkg, ExportProgress, ImportProgress, NoteLog}, prelude::*, search::SearchNode, }; @@ -93,9 +87,12 @@ impl ImportExportService for Backend { &input.path, input.deck_id.into(), input.notetype_id.into(), - input.columns.into_iter().map(Into::into).collect(), delimiter, input.is_html, + input + .columns + .ok_or_else(|| AnkiError::invalid_input("missing value"))?, + input.column_names, ) }) .map(Into::into) @@ -142,17 +139,3 @@ impl From> for pb::ImportResponse { } } } - -impl From for Column { - fn from(column: CsvColumn) -> Self { - match column.variant.unwrap_or(csv_column::Variant::Other(0)) { - csv_column::Variant::Field(idx) => Column::Field(idx as usize), - csv_column::Variant::Other(i) => { - match csv_column::Other::from_i32(i).unwrap_or_default() { - csv_column::Other::Tags => Column::Tags, - csv_column::Other::Ignore => Column::Ignore, - } - } - } - } -} diff --git a/rslib/src/import_export/text/csv/import.rs b/rslib/src/import_export/text/csv/import.rs index fd11a2379..11536bec1 100644 --- a/rslib/src/import_export/text/csv/import.rs +++ b/rslib/src/import_export/text/csv/import.rs @@ -2,35 +2,35 @@ // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html use std::{ + collections::HashMap, fs::File, io::{BufRead, BufReader, Read, Seek, SeekFrom}, }; use crate::{ + backend_proto::import_export::import_csv_request::Columns as ProtoColumns, import_export::{ - text::{ - csv::{metadata::Delimiter, Column}, - ForeignData, ForeignNote, - }, + text::{csv::metadata::Delimiter, import::NotetypeForString, ForeignData, ForeignNote}, NoteLog, }, prelude::*, }; impl Collection { + #[allow(clippy::too_many_arguments)] pub fn import_csv( &mut self, path: &str, deck_id: DeckId, notetype_id: NotetypeId, - columns: Vec, delimiter: Delimiter, is_html: bool, + columns: ProtoColumns, + column_names: Vec, ) -> Result> { - let notetype = self.get_notetype(notetype_id)?.ok_or(AnkiError::NotFound)?; - let fields_len = notetype.fields.len(); let file = File::open(path)?; - let notes = deserialize_csv(file, &columns, fields_len, delimiter, is_html)?; + let mut ctx = ColumnContext::new(columns, column_names, is_html, self); + let notes = ctx.deserialize_csv(file, delimiter)?; ForeignData { // TODO: refactor to allow passing ids directly @@ -43,21 +43,170 @@ impl Collection { } } -fn deserialize_csv( - mut reader: impl Read + Seek, - columns: &[Column], - fields_len: usize, - delimiter: Delimiter, - is_html: bool, -) -> Result> { - remove_tags_line_from_reader(&mut reader)?; - let mut csv_reader = csv::ReaderBuilder::new() - .has_headers(false) - .flexible(true) - .comment(Some(b'#')) - .delimiter(delimiter.byte()) - .from_reader(reader); - deserialize_csv_reader(&mut csv_reader, columns, fields_len, is_html) +/// Column indices for the fields of a notetype. +type FieldSourceColumns = Vec>; + +struct ColumnContext<'a, C: NotetypeForString> { + tags_column: Option, + deck_column: Option, + notetype_column: Option, + /// Source column indices for the fields of a notetype, identified by its + /// name or id as string. The empty string corresponds to the default notetype. + notetype_fields: HashMap, + /// CSV column labels. Used to identify the source columns for the fields + /// of a notetype. + column_names: HashMap, + /// How fields are converted to strings. Used for escaping HTML if appropriate. + stringify: fn(&str) -> String, + col: &'a mut C, +} + +impl<'a, C: NotetypeForString> ColumnContext<'a, C> { + fn new( + columns: ProtoColumns, + column_names: Vec, + is_html: bool, + col: &'a mut C, + ) -> Self { + Self { + tags_column: columns.tags.try_into().ok(), + deck_column: columns.deck.try_into().ok(), + notetype_column: columns.notetype.try_into().ok(), + notetype_fields: notetype_fields_map(&columns.fields), + column_names: column_names_map(column_names), + stringify: stringify_fn(is_html), + col, + } + } + + fn deserialize_csv( + &mut self, + mut reader: impl Read + Seek, + delimiter: Delimiter, + ) -> Result> { + remove_tags_line_from_reader(&mut reader)?; + let mut csv_reader = csv::ReaderBuilder::new() + .has_headers(false) + .flexible(true) + .comment(Some(b'#')) + .delimiter(delimiter.byte()) + .from_reader(reader); + self.deserialize_csv_reader(&mut csv_reader) + } + + fn deserialize_csv_reader( + &mut self, + reader: &mut csv::Reader, + ) -> Result> { + reader + .records() + .into_iter() + .map(|res| { + res.map_err(Into::into) + .and_then(|record| self.foreign_note_from_record(&record)) + }) + .collect() + } + + fn foreign_note_from_record(&mut self, record: &csv::StringRecord) -> Result { + let notetype = self.gather_notetype(record); + let deck = self.gather_deck(record); + let tags = self.gather_tags(record); + let fields = self.gather_note_fields(record, ¬etype)?; + Ok(ForeignNote { + notetype, + fields, + tags, + deck, + ..Default::default() + }) + } + + fn gather_notetype(&self, record: &csv::StringRecord) -> String { + self.notetype_column + .and_then(|i| record.get(i)) + .unwrap_or_default() + .to_string() + } + + fn gather_deck(&self, record: &csv::StringRecord) -> String { + self.deck_column + .and_then(|i| record.get(i)) + .unwrap_or_default() + .to_string() + } + + fn gather_tags(&self, record: &csv::StringRecord) -> Vec { + self.tags_column + .and_then(|i| record.get(i)) + .unwrap_or_default() + .split_whitespace() + .filter(|s| !s.is_empty()) + .map(ToString::to_string) + .collect() + } + + fn gather_note_fields( + &mut self, + record: &csv::StringRecord, + notetype: &str, + ) -> Result> { + let stringify = self.stringify; + Ok(self + .get_notetype_fields(notetype)? + .iter() + .map(|opt| opt.and_then(|idx| record.get(idx)).unwrap_or_default()) + .map(stringify) + .collect()) + } + + fn get_notetype_fields(&mut self, notetype: &str) -> Result<&FieldSourceColumns> { + Ok(if self.notetype_fields.contains_key(notetype) { + // borrow checker doesn't allow to use `if let` here + // https://users.rust-lang.org/t/solved-borrow-doesnt-drop-returning-this-value-requires-that/24182 + self.notetype_fields.get(notetype).unwrap() + } else { + // TODO: more specific errors + let nt = self + .col + .notetype_for_string(notetype)? + .ok_or(AnkiError::NotFound)?; + let map = self.build_notetype_fields_map(&nt); + if map[0].is_none() { + return Err(AnkiError::NotFound); + } + self.notetype_fields + .entry(notetype.to_string()) + .or_insert(map) + }) + } + + fn build_notetype_fields_map(&mut self, notetype: &Notetype) -> FieldSourceColumns { + notetype + .fields + .iter() + .map(|f| self.column_names.get(&f.name).copied()) + .collect() + } +} + +fn column_names_map(column_names: Vec) -> HashMap { + HashMap::from_iter(column_names.into_iter().enumerate().map(|(i, s)| (s, i))) +} + +fn notetype_fields_map(default_fields: &[i32]) -> HashMap { + let default_fields = default_fields.iter().map(|&i| i.try_into().ok()).collect(); + let mut notetype_fields = HashMap::new(); + notetype_fields.insert(String::new(), default_fields); + notetype_fields +} + +fn stringify_fn(is_html: bool) -> fn(&str) -> String { + if is_html { + ToString::to_string + } else { + htmlescape::encode_minimal + } } /// If the reader's first line starts with "tags:", which is allowed for historic @@ -77,161 +226,150 @@ fn remove_tags_line_from_reader(reader: &mut (impl Read + Seek)) -> Result<()> { Ok(()) } -fn deserialize_csv_reader( - reader: &mut csv::Reader, - columns: &[Column], - fields_len: usize, - is_html: bool, -) -> Result> { - reader - .records() - .into_iter() - .map(|res| { - res.map(|record| ForeignNote::from_record(record, columns, fields_len, is_html)) - .map_err(Into::into) - }) - .collect() -} - -impl ForeignNote { - fn from_record( - record: csv::StringRecord, - columns: &[Column], - fields_len: usize, - is_html: bool, - ) -> Self { - let mut note = Self { - fields: vec!["".to_string(); fields_len], - ..Default::default() - }; - for (&column, value) in columns.iter().zip(record.iter()) { - note.add_column_value(column, value, is_html); - } - note - } - - fn add_column_value(&mut self, column: Column, value: &str, is_html: bool) { - match column { - Column::Ignore => (), - Column::Field(idx) => { - if let Some(field) = self.fields.get_mut(idx) { - *field = if is_html { - value.to_string() - } else { - htmlescape::encode_minimal(value) - }; - } - } - Column::Tags => self.tags.extend(value.split(' ').map(ToString::to_string)), - } - } -} - #[cfg(test)] mod test { - use std::io::Cursor; + use std::{io::Cursor, sync::Arc}; use super::*; + use crate::notetype::all_stock_notetypes; + + macro_rules! import { + ($options:expr, $csv:expr) => {{ + let reader = Cursor::new($csv); + let delimiter = $options.delimiter; + let mut ctx = $options.ctx(); + ctx.deserialize_csv(reader, delimiter).unwrap() + }}; + } macro_rules! assert_imported_fields { - ($options:expr,$csv:expr, $expected:expr) => { - let reader = Cursor::new($csv); - let notes = deserialize_csv( - reader, - &$options.columns, - $options.fields_len, - $options.delimiter, - $options.is_html, - ) - .unwrap(); + ($options:expr, $csv:expr, $expected:expr) => { + let notes = import!($options, $csv); let fields: Vec<_> = notes.into_iter().map(|note| note.fields).collect(); assert_eq!(fields, $expected); }; } + struct MockNotetypeForString(HashMap>); + + impl NotetypeForString for MockNotetypeForString { + fn notetype_for_string(&mut self, name_or_id: &str) -> Result>> { + Ok(self.0.get(name_or_id).cloned()) + } + } + + impl MockNotetypeForString { + fn new() -> Self { + Self(HashMap::from_iter( + all_stock_notetypes(&I18n::template_only()) + .into_iter() + .map(|nt| (nt.name.clone(), Arc::new(nt))), + )) + } + } + struct CsvOptions { - columns: Vec, - fields_len: usize, delimiter: Delimiter, is_html: bool, + columns: ProtoColumns, + column_names: Vec, + mock_col: MockNotetypeForString, } impl CsvOptions { fn new() -> Self { Self { - columns: vec![Column::Field(0), Column::Field(1)], - fields_len: 2, delimiter: Delimiter::Comma, is_html: false, + columns: ProtoColumns { + tags: -1, + deck: -1, + notetype: -1, + fields: vec![0, 1], + }, + column_names: vec![], + mock_col: MockNotetypeForString::new(), } } - fn add_column(mut self, column: Column) -> Self { - self.columns.push(column); - self - } - - fn columns(mut self, columns: Vec) -> Self { - self.columns = columns; - self - } - - fn fields_len(mut self, fields_len: usize) -> Self { - self.fields_len = fields_len; - self - } - - fn delimiter(mut self, delimiter: Delimiter) -> Self { - self.delimiter = delimiter; - self - } - - #[allow(clippy::wrong_self_convention)] - fn is_html(mut self, is_html: bool) -> Self { - self.is_html = is_html; - self + fn ctx(&mut self) -> ColumnContext { + ColumnContext::new( + self.columns.clone(), + std::mem::take(&mut self.column_names), + self.is_html, + &mut self.mock_col, + ) } } #[test] fn should_allow_missing_columns() { - let options = CsvOptions::new().add_column(Column::Field(2)).fields_len(4); - assert_imported_fields!( - options, - "front,back\nfoo\n", - &[&["front", "back", "", ""], &["foo", "", "", ""]] - ); + let mut options = CsvOptions::new(); + assert_imported_fields!(options, "foo\n", &[&["foo", ""]]); } #[test] fn should_respect_custom_delimiter() { - let options = CsvOptions::new().delimiter(Delimiter::Pipe); + let mut options = CsvOptions::new(); + options.delimiter = Delimiter::Pipe; assert_imported_fields!(options, "fr,ont|ba,ck\n", &[&["fr,ont", "ba,ck"]]); } #[test] fn should_ignore_first_line_starting_with_tags() { - let options = CsvOptions::new(); + let mut options = CsvOptions::new(); assert_imported_fields!(options, "tags:foo\nfront,back\n", &[&["front", "back"]]); } #[test] fn should_respect_column_remapping() { - let options = - CsvOptions::new().columns(vec![Column::Field(1), Column::Ignore, Column::Field(0)]); + let mut options = CsvOptions::new(); + options.columns.fields = vec![2, 0]; assert_imported_fields!(options, "front,foo,back\n", &[&["back", "front"]]); } #[test] fn should_ignore_lines_starting_with_number_sign() { - let options = CsvOptions::new(); + let mut options = CsvOptions::new(); assert_imported_fields!(options, "#foo\nfront,back\n#bar\n", &[&["front", "back"]]); } #[test] fn should_escape_html_entities_if_csv_is_html() { - assert_imported_fields!(CsvOptions::new(), "
\n", &[&["<hr>", ""]]); - let with_html = CsvOptions::new().is_html(true); - assert_imported_fields!(with_html, "
\n", &[&["
", ""]]); + let mut options = CsvOptions::new(); + assert_imported_fields!(options, "
\n", &[&["<hr>", ""]]); + options.is_html = true; + assert_imported_fields!(options, "
\n", &[&["
", ""]]); + } + + #[test] + fn should_parse_tag_column() { + let mut options = CsvOptions::new(); + options.columns.tags = 2; + let notes = import!(options, "front,back,foo bar\n"); + assert_eq!(notes[0].tags, &["foo", "bar"]); + } + + #[test] + fn should_parse_deck_column() { + let mut options = CsvOptions::new(); + options.columns.deck = 2; + let notes = import!(options, "front,back,foo bar\n"); + assert_eq!(notes[0].deck, "foo bar"); + } + + #[test] + fn should_parse_notes_according_to_their_respective_notetypes() { + let mut options = CsvOptions::new(); + options.columns.notetype = 3; + options.column_names = ["Front", "Back", "Text", "notetype"] + .iter() + .map(ToString::to_string) + .collect(); + assert_imported_fields!( + options, + "front,back,Basic (and reversed card)\n,,foo,Cloze\n", + &[&["front", "back"], &["foo", ""]] + ); } } diff --git a/rslib/src/import_export/text/csv/metadata.rs b/rslib/src/import_export/text/csv/metadata.rs index aac09e322..45b3d2886 100644 --- a/rslib/src/import_export/text/csv/metadata.rs +++ b/rslib/src/import_export/text/csv/metadata.rs @@ -9,7 +9,10 @@ use std::{ use strum::IntoEnumIterator; pub use crate::backend_proto::csv_metadata::Delimiter; -use crate::{backend_proto::CsvMetadata, error::ImportError, prelude::*}; +use crate::{ + backend_proto::CsvMetadata, error::ImportError, import_export::text::import::NotetypeForString, + prelude::*, +}; impl Collection { pub fn get_csv_metadata( diff --git a/rslib/src/import_export/text/csv/mod.rs b/rslib/src/import_export/text/csv/mod.rs index d3a6fb0dd..f6a8e3f16 100644 --- a/rslib/src/import_export/text/csv/mod.rs +++ b/rslib/src/import_export/text/csv/mod.rs @@ -3,10 +3,3 @@ mod import; mod metadata; - -#[derive(Debug, Clone, Copy)] -pub enum Column { - Field(usize), - Ignore, - Tags, -} diff --git a/rslib/src/import_export/text/import.rs b/rslib/src/import_export/text/import.rs index deb0eef8e..e0893353d 100644 --- a/rslib/src/import_export/text/import.rs +++ b/rslib/src/import_export/text/import.rs @@ -143,6 +143,20 @@ impl<'a> Context<'a> { } } +pub(super) trait NotetypeForString { + fn notetype_for_string(&mut self, name_or_id: &str) -> Result>>; +} + +impl NotetypeForString for Collection { + fn notetype_for_string(&mut self, name_or_id: &str) -> Result>> { + if let Some(nt) = self.get_notetype_for_id_string(name_or_id)? { + Ok(Some(nt)) + } else { + self.get_notetype_by_name(name_or_id) + } + } +} + impl Collection { pub(super) fn deck_id_for_string(&mut self, deck: &str) -> Result> { if let Ok(did) = deck.parse::() { @@ -153,14 +167,6 @@ impl Collection { self.get_deck_id(deck) } - pub(super) fn notetype_for_string(&mut self, notetype: &str) -> Result>> { - if let Some(nt) = self.get_notetype_for_id_string(notetype)? { - Ok(Some(nt)) - } else { - self.get_notetype_by_name(notetype) - } - } - fn get_notetype_for_id_string(&mut self, notetype: &str) -> Result>> { notetype .parse::()