Add start of csv importing on backend

This commit is contained in:
RumovZ 2022-05-03 22:08:31 +02:00
parent ae940f1faa
commit 29c691eabd
7 changed files with 299 additions and 1 deletions

View file

@ -17,6 +17,7 @@ service ImportExportService {
rpc ImportAnkiPackage(ImportAnkiPackageRequest)
returns (ImportAnkiPackageResponse);
rpc ExportAnkiPackage(ExportAnkiPackageRequest) returns (generic.UInt32);
rpc ImportCsv(ImportCsvRequest) returns (generic.Empty);
}
message ImportCollectionPackageRequest {
@ -92,3 +93,22 @@ message MediaEntries {
repeated MediaEntry entries = 1;
}
message ImportCsvRequest {
message Column {
enum Other {
IGNORE = 0;
TAGS = 1;
}
oneof variant {
uint32 field = 1;
Other other = 2;
}
}
string path = 1;
int64 deck_id = 2;
int64 notetype_id = 3;
repeated Column columns = 4;
string delimiter = 5;
bool allow_html = 6;
}

View file

@ -76,6 +76,7 @@ rust_library(
"//rslib/cargo:bytes",
"//rslib/cargo:chrono",
"//rslib/cargo:coarsetime",
"//rslib/cargo:csv",
"//rslib/cargo:flate2",
"//rslib/cargo:fluent",
"//rslib/cargo:fnv",

View file

@ -6,9 +6,17 @@ use std::path::Path;
use super::{progress::Progress, Backend};
pub(super) use crate::backend_proto::importexport_service::Service as ImportExportService;
use crate::{
backend_proto::{self as pb, export_anki_package_request::Selector},
backend_proto::{
self as pb,
export_anki_package_request::Selector,
import_csv_request::{
column::{Other as OtherColumn, Variant as ColumnVariant},
Column as ProtoColumn,
},
},
import_export::{
package::{import_colpkg, NoteLog},
text::csv::Column,
ExportProgress, ImportProgress,
},
prelude::*,
@ -77,6 +85,21 @@ impl ImportExportService for Backend {
})
.map(Into::into)
}
fn import_csv(&self, input: pb::ImportCsvRequest) -> Result<pb::Empty> {
let out = self.with_col(|col| {
col.import_csv(
&input.path,
input.deck_id.into(),
input.notetype_id.into(),
input.columns.into_iter().map(Into::into).collect(),
byte_from_string(&input.delimiter)?,
input.allow_html,
)
})?;
println!("{:?}", out);
Ok(pb::Empty {})
}
}
impl SearchNode {
@ -109,3 +132,21 @@ impl From<OpOutput<NoteLog>> for pb::ImportAnkiPackageResponse {
}
}
}
impl From<ProtoColumn> for Column {
fn from(column: ProtoColumn) -> Self {
match column.variant.unwrap_or(ColumnVariant::Other(0)) {
ColumnVariant::Field(idx) => Column::Field(idx as usize),
ColumnVariant::Other(i) => match OtherColumn::from_i32(i).unwrap_or_default() {
OtherColumn::Tags => Column::Tags,
OtherColumn::Ignore => Column::Ignore,
},
}
}
}
fn byte_from_string(s: &str) -> Result<u8> {
s.bytes()
.next()
.ok_or_else(|| AnkiError::invalid_input("empty string"))
}

View file

@ -185,6 +185,12 @@ impl From<regex::Error> for AnkiError {
}
}
impl From<csv::Error> for AnkiError {
fn from(err: csv::Error) -> Self {
AnkiError::InvalidInput(err.to_string())
}
}
#[derive(Debug, PartialEq)]
pub struct CardTypeError {
pub notetype: String,

View file

@ -4,6 +4,7 @@
mod gather;
mod insert;
pub mod package;
pub mod text;
use std::marker::PhantomData;

View file

@ -0,0 +1,209 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
#![allow(dead_code, unused_imports, unused_variables)]
use std::{
fs::File,
io::{BufRead, BufReader, Read, Seek},
};
use crate::{
import_export::text::{ForeignData, ForeignNote},
prelude::*,
};
#[derive(Debug, Clone, Copy)]
pub enum Column {
Field(usize),
Ignore,
Tags,
}
impl Collection {
pub fn import_csv(
&mut self,
path: &str,
deck_id: DeckId,
notetype_id: NotetypeId,
columns: Vec<Column>,
delimiter: u8,
allow_html: bool,
) -> Result<ForeignData> {
let notetype = self.get_notetype(notetype_id)?.ok_or(AnkiError::NotFound)?;
let fields_len = notetype.fields.len();
let file = File::open(path)?;
let notes = deserialize_csv(file, &columns, fields_len, delimiter)?;
Ok(ForeignData {
default_deck: deck_id,
default_notetype: notetype_id,
notes,
})
}
}
fn deserialize_csv(
reader: impl Read + Seek,
columns: &[Column],
fields_len: usize,
delimiter: u8,
) -> Result<Vec<ForeignNote>> {
let mut reader = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.comment(Some(b'#'))
.delimiter(delimiter)
.trim(csv::Trim::All)
.from_reader(reader_without_tags_line(reader)?);
deserialize_csv_reader(&mut reader, columns, fields_len)
}
/// Returns a reader with the first line stripped if it starts with "tags:",
/// which is allowed for historic reasons.
fn reader_without_tags_line(reader: impl Read + Seek) -> Result<impl Read> {
let mut buf_reader = BufReader::new(reader);
let mut first_line = String::new();
buf_reader.read_line(&mut first_line)?;
if !first_line.starts_with("tags:") {
buf_reader.rewind()?;
}
Ok(buf_reader)
}
fn deserialize_csv_reader(
reader: &mut csv::Reader<impl Read>,
columns: &[Column],
fields_len: usize,
) -> Result<Vec<ForeignNote>> {
reader
.records()
.into_iter()
.map(|res| {
res.map(|record| ForeignNote::from_record(record, columns, fields_len))
.map_err(Into::into)
})
.collect()
}
impl ForeignNote {
fn from_record(record: csv::StringRecord, columns: &[Column], fields_len: usize) -> Self {
let mut note = Self {
fields: vec!["".to_string(); fields_len],
..Default::default()
};
for (&column, value) in columns.iter().zip(record.iter()) {
note.add_column_value(column, value);
}
note
}
fn add_column_value(&mut self, column: Column, value: &str) {
match column {
Column::Ignore => (),
Column::Field(idx) => {
if let Some(field) = self.fields.get_mut(idx) {
field.push_str(value)
}
}
Column::Tags => self.tags.extend(value.split(' ').map(ToString::to_string)),
}
}
}
#[cfg(test)]
mod test {
use std::io::Cursor;
use super::*;
macro_rules! assert_imported_fields {
($options:expr,$csv:expr, $expected:expr) => {
let reader = Cursor::new($csv);
let notes = deserialize_csv(
reader,
&$options.columns,
$options.fields_len,
$options.delimiter,
)
.unwrap();
assert_eq!(notes.len(), $expected.len());
for (note, fields) in notes.iter().zip($expected.iter()) {
assert_eq!(note.fields.len(), fields.len());
for (note_field, field) in note.fields.iter().zip(fields.iter()) {
assert_eq!(note_field, field);
}
}
};
}
struct CsvOptions {
columns: Vec<Column>,
fields_len: usize,
delimiter: u8,
}
impl CsvOptions {
fn new() -> Self {
Self {
columns: vec![Column::Field(0), Column::Field(1)],
fields_len: 2,
delimiter: b',',
}
}
fn add_column(mut self, column: Column) -> Self {
self.columns.push(column);
self
}
fn columns(mut self, columns: Vec<Column>) -> Self {
self.columns = columns;
self
}
fn fields_len(mut self, fields_len: usize) -> Self {
self.fields_len = fields_len;
self
}
fn delimiter(mut self, delimiter: u8) -> Self {
self.delimiter = delimiter;
self
}
}
#[test]
fn should_allow_missing_columns() {
let options = CsvOptions::new().add_column(Column::Field(2)).fields_len(4);
assert_imported_fields!(
options,
"front,back\nfoo\n",
&[&["front", "back", "", ""], &["foo", "", "", ""]]
);
}
#[test]
fn should_respect_custom_delimiter() {
let options = CsvOptions::new().delimiter(b'|');
assert_imported_fields!(options, "fr,ont|ba,ck\n", &[&["fr,ont", "ba,ck"]]);
}
#[test]
fn should_ignore_first_line_starting_with_tags() {
let options = CsvOptions::new();
assert_imported_fields!(options, "tags:foo\nfront,back\n", &[&["front", "back"]]);
}
#[test]
fn should_respect_column_remapping() {
let options =
CsvOptions::new().columns(vec![Column::Field(1), Column::Ignore, Column::Field(0)]);
assert_imported_fields!(options, "front,foo,back\n", &[&["back", "front"]]);
}
#[test]
fn should_ignore_lines_starting_with_number_sign() {
let options = CsvOptions::new();
assert_imported_fields!(options, "#foo\nfront,back\n#bar\n", &[&["front", "back"]]);
}
}

View file

@ -0,0 +1,20 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
#![allow(dead_code, unused_imports, unused_variables)]
pub mod csv;
use crate::prelude::*;
#[derive(Debug)]
pub struct ForeignData {
default_deck: DeckId,
default_notetype: NotetypeId,
notes: Vec<ForeignNote>,
}
#[derive(Debug, PartialEq, Default)]
pub struct ForeignNote {
fields: Vec<String>,
tags: Vec<String>,
}