From 29c691eabd88e4aec671da7771d4fedb1afdeb43 Mon Sep 17 00:00:00 2001 From: RumovZ Date: Tue, 3 May 2022 22:08:31 +0200 Subject: [PATCH] Add start of csv importing on backend --- proto/anki/import_export.proto | 20 +++ rslib/BUILD.bazel | 1 + rslib/src/backend/import_export.rs | 43 +++++- rslib/src/error/mod.rs | 6 + rslib/src/import_export/mod.rs | 1 + rslib/src/import_export/text/csv.rs | 209 ++++++++++++++++++++++++++++ rslib/src/import_export/text/mod.rs | 20 +++ 7 files changed, 299 insertions(+), 1 deletion(-) create mode 100644 rslib/src/import_export/text/csv.rs create mode 100644 rslib/src/import_export/text/mod.rs diff --git a/proto/anki/import_export.proto b/proto/anki/import_export.proto index 2b5bb74ba..05e3df930 100644 --- a/proto/anki/import_export.proto +++ b/proto/anki/import_export.proto @@ -17,6 +17,7 @@ service ImportExportService { rpc ImportAnkiPackage(ImportAnkiPackageRequest) returns (ImportAnkiPackageResponse); rpc ExportAnkiPackage(ExportAnkiPackageRequest) returns (generic.UInt32); + rpc ImportCsv(ImportCsvRequest) returns (generic.Empty); } message ImportCollectionPackageRequest { @@ -92,3 +93,22 @@ message MediaEntries { repeated MediaEntry entries = 1; } + +message ImportCsvRequest { + message Column { + enum Other { + IGNORE = 0; + TAGS = 1; + } + oneof variant { + uint32 field = 1; + Other other = 2; + } + } + string path = 1; + int64 deck_id = 2; + int64 notetype_id = 3; + repeated Column columns = 4; + string delimiter = 5; + bool allow_html = 6; +} diff --git a/rslib/BUILD.bazel b/rslib/BUILD.bazel index 98c57a69c..78f84aaf0 100644 --- a/rslib/BUILD.bazel +++ b/rslib/BUILD.bazel @@ -76,6 +76,7 @@ rust_library( "//rslib/cargo:bytes", "//rslib/cargo:chrono", "//rslib/cargo:coarsetime", + "//rslib/cargo:csv", "//rslib/cargo:flate2", "//rslib/cargo:fluent", "//rslib/cargo:fnv", diff --git a/rslib/src/backend/import_export.rs b/rslib/src/backend/import_export.rs index e17b73ce4..36a93f828 100644 --- a/rslib/src/backend/import_export.rs +++ b/rslib/src/backend/import_export.rs @@ -6,9 +6,17 @@ use std::path::Path; use super::{progress::Progress, Backend}; pub(super) use crate::backend_proto::importexport_service::Service as ImportExportService; use crate::{ - backend_proto::{self as pb, export_anki_package_request::Selector}, + backend_proto::{ + self as pb, + export_anki_package_request::Selector, + import_csv_request::{ + column::{Other as OtherColumn, Variant as ColumnVariant}, + Column as ProtoColumn, + }, + }, import_export::{ package::{import_colpkg, NoteLog}, + text::csv::Column, ExportProgress, ImportProgress, }, prelude::*, @@ -77,6 +85,21 @@ impl ImportExportService for Backend { }) .map(Into::into) } + + fn import_csv(&self, input: pb::ImportCsvRequest) -> Result { + let out = self.with_col(|col| { + col.import_csv( + &input.path, + input.deck_id.into(), + input.notetype_id.into(), + input.columns.into_iter().map(Into::into).collect(), + byte_from_string(&input.delimiter)?, + input.allow_html, + ) + })?; + println!("{:?}", out); + Ok(pb::Empty {}) + } } impl SearchNode { @@ -109,3 +132,21 @@ impl From> for pb::ImportAnkiPackageResponse { } } } + +impl From for Column { + fn from(column: ProtoColumn) -> Self { + match column.variant.unwrap_or(ColumnVariant::Other(0)) { + ColumnVariant::Field(idx) => Column::Field(idx as usize), + ColumnVariant::Other(i) => match OtherColumn::from_i32(i).unwrap_or_default() { + OtherColumn::Tags => Column::Tags, + OtherColumn::Ignore => Column::Ignore, + }, + } + } +} + +fn byte_from_string(s: &str) -> Result { + s.bytes() + .next() + .ok_or_else(|| AnkiError::invalid_input("empty string")) +} diff --git a/rslib/src/error/mod.rs b/rslib/src/error/mod.rs index 4c1eae0a7..96815caf1 100644 --- a/rslib/src/error/mod.rs +++ b/rslib/src/error/mod.rs @@ -185,6 +185,12 @@ impl From for AnkiError { } } +impl From for AnkiError { + fn from(err: csv::Error) -> Self { + AnkiError::InvalidInput(err.to_string()) + } +} + #[derive(Debug, PartialEq)] pub struct CardTypeError { pub notetype: String, diff --git a/rslib/src/import_export/mod.rs b/rslib/src/import_export/mod.rs index 3ea552ab2..d76d2b351 100644 --- a/rslib/src/import_export/mod.rs +++ b/rslib/src/import_export/mod.rs @@ -4,6 +4,7 @@ mod gather; mod insert; pub mod package; +pub mod text; use std::marker::PhantomData; diff --git a/rslib/src/import_export/text/csv.rs b/rslib/src/import_export/text/csv.rs new file mode 100644 index 000000000..30ff281f8 --- /dev/null +++ b/rslib/src/import_export/text/csv.rs @@ -0,0 +1,209 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +#![allow(dead_code, unused_imports, unused_variables)] + +use std::{ + fs::File, + io::{BufRead, BufReader, Read, Seek}, +}; + +use crate::{ + import_export::text::{ForeignData, ForeignNote}, + prelude::*, +}; + +#[derive(Debug, Clone, Copy)] +pub enum Column { + Field(usize), + Ignore, + Tags, +} + +impl Collection { + pub fn import_csv( + &mut self, + path: &str, + deck_id: DeckId, + notetype_id: NotetypeId, + columns: Vec, + delimiter: u8, + allow_html: bool, + ) -> Result { + let notetype = self.get_notetype(notetype_id)?.ok_or(AnkiError::NotFound)?; + let fields_len = notetype.fields.len(); + let file = File::open(path)?; + let notes = deserialize_csv(file, &columns, fields_len, delimiter)?; + + Ok(ForeignData { + default_deck: deck_id, + default_notetype: notetype_id, + notes, + }) + } +} + +fn deserialize_csv( + reader: impl Read + Seek, + columns: &[Column], + fields_len: usize, + delimiter: u8, +) -> Result> { + let mut reader = csv::ReaderBuilder::new() + .has_headers(false) + .flexible(true) + .comment(Some(b'#')) + .delimiter(delimiter) + .trim(csv::Trim::All) + .from_reader(reader_without_tags_line(reader)?); + deserialize_csv_reader(&mut reader, columns, fields_len) +} + +/// Returns a reader with the first line stripped if it starts with "tags:", +/// which is allowed for historic reasons. +fn reader_without_tags_line(reader: impl Read + Seek) -> Result { + let mut buf_reader = BufReader::new(reader); + let mut first_line = String::new(); + buf_reader.read_line(&mut first_line)?; + if !first_line.starts_with("tags:") { + buf_reader.rewind()?; + } + Ok(buf_reader) +} + +fn deserialize_csv_reader( + reader: &mut csv::Reader, + columns: &[Column], + fields_len: usize, +) -> Result> { + reader + .records() + .into_iter() + .map(|res| { + res.map(|record| ForeignNote::from_record(record, columns, fields_len)) + .map_err(Into::into) + }) + .collect() +} + +impl ForeignNote { + fn from_record(record: csv::StringRecord, columns: &[Column], fields_len: usize) -> Self { + let mut note = Self { + fields: vec!["".to_string(); fields_len], + ..Default::default() + }; + for (&column, value) in columns.iter().zip(record.iter()) { + note.add_column_value(column, value); + } + note + } + + fn add_column_value(&mut self, column: Column, value: &str) { + match column { + Column::Ignore => (), + Column::Field(idx) => { + if let Some(field) = self.fields.get_mut(idx) { + field.push_str(value) + } + } + Column::Tags => self.tags.extend(value.split(' ').map(ToString::to_string)), + } + } +} + +#[cfg(test)] +mod test { + use std::io::Cursor; + + use super::*; + + macro_rules! assert_imported_fields { + ($options:expr,$csv:expr, $expected:expr) => { + let reader = Cursor::new($csv); + let notes = deserialize_csv( + reader, + &$options.columns, + $options.fields_len, + $options.delimiter, + ) + .unwrap(); + assert_eq!(notes.len(), $expected.len()); + for (note, fields) in notes.iter().zip($expected.iter()) { + assert_eq!(note.fields.len(), fields.len()); + for (note_field, field) in note.fields.iter().zip(fields.iter()) { + assert_eq!(note_field, field); + } + } + }; + } + + struct CsvOptions { + columns: Vec, + fields_len: usize, + delimiter: u8, + } + + impl CsvOptions { + fn new() -> Self { + Self { + columns: vec![Column::Field(0), Column::Field(1)], + fields_len: 2, + delimiter: b',', + } + } + + fn add_column(mut self, column: Column) -> Self { + self.columns.push(column); + self + } + + fn columns(mut self, columns: Vec) -> Self { + self.columns = columns; + self + } + + fn fields_len(mut self, fields_len: usize) -> Self { + self.fields_len = fields_len; + self + } + + fn delimiter(mut self, delimiter: u8) -> Self { + self.delimiter = delimiter; + self + } + } + + #[test] + fn should_allow_missing_columns() { + let options = CsvOptions::new().add_column(Column::Field(2)).fields_len(4); + assert_imported_fields!( + options, + "front,back\nfoo\n", + &[&["front", "back", "", ""], &["foo", "", "", ""]] + ); + } + + #[test] + fn should_respect_custom_delimiter() { + let options = CsvOptions::new().delimiter(b'|'); + assert_imported_fields!(options, "fr,ont|ba,ck\n", &[&["fr,ont", "ba,ck"]]); + } + + #[test] + fn should_ignore_first_line_starting_with_tags() { + let options = CsvOptions::new(); + assert_imported_fields!(options, "tags:foo\nfront,back\n", &[&["front", "back"]]); + } + + #[test] + fn should_respect_column_remapping() { + let options = + CsvOptions::new().columns(vec![Column::Field(1), Column::Ignore, Column::Field(0)]); + assert_imported_fields!(options, "front,foo,back\n", &[&["back", "front"]]); + } + + #[test] + fn should_ignore_lines_starting_with_number_sign() { + let options = CsvOptions::new(); + assert_imported_fields!(options, "#foo\nfront,back\n#bar\n", &[&["front", "back"]]); + } +} diff --git a/rslib/src/import_export/text/mod.rs b/rslib/src/import_export/text/mod.rs new file mode 100644 index 000000000..a0b0d3cbd --- /dev/null +++ b/rslib/src/import_export/text/mod.rs @@ -0,0 +1,20 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +#![allow(dead_code, unused_imports, unused_variables)] + +pub mod csv; + +use crate::prelude::*; + +#[derive(Debug)] +pub struct ForeignData { + default_deck: DeckId, + default_notetype: NotetypeId, + notes: Vec, +} + +#[derive(Debug, PartialEq, Default)] +pub struct ForeignNote { + fields: Vec, + tags: Vec, +}