From ee2570624aa8c7cfefb4d009c8c52e9bca9008f0 Mon Sep 17 00:00:00 2001 From: RumovZ Date: Fri, 3 Jun 2022 11:07:24 +0200 Subject: [PATCH] Add CSV preview --- ftl/core/importing.ftl | 1 + proto/anki/import_export.proto | 1 + rslib/src/import_export/text/csv/import.rs | 24 ++-- rslib/src/import_export/text/csv/metadata.rs | 113 +++++++++++++------ rslib/src/text.rs | 4 +- ts/import-csv/ImportCsvPage.svelte | 22 +++- ts/import-csv/Preview.svelte | 55 +++++++++ ts/import-csv/index.ts | 1 + 8 files changed, 172 insertions(+), 49 deletions(-) create mode 100644 ts/import-csv/Preview.svelte diff --git a/ftl/core/importing.ftl b/ftl/core/importing.ftl index a4e6621ab..4e06fac78 100644 --- a/ftl/core/importing.ftl +++ b/ftl/core/importing.ftl @@ -108,3 +108,4 @@ importing-preserve = Preserve importing-update = Update importing-tag-all-notes = Tag all notes importing-tag-updated-notes = Tag updated notes +importing-file = File diff --git a/proto/anki/import_export.proto b/proto/anki/import_export.proto index 0ea23d706..41dc02652 100644 --- a/proto/anki/import_export.proto +++ b/proto/anki/import_export.proto @@ -163,6 +163,7 @@ message CsvMetadata { uint32 tags_column = 10; bool force_delimiter = 11; bool force_is_html = 12; + repeated generic.StringList preview = 13; } message ExportCardCsvRequest { diff --git a/rslib/src/import_export/text/csv/import.rs b/rslib/src/import_export/text/csv/import.rs index d9eeb9c7c..a71af9d3d 100644 --- a/rslib/src/import_export/text/csv/import.rs +++ b/rslib/src/import_export/text/csv/import.rs @@ -135,16 +135,10 @@ impl ColumnContext { fn deserialize_csv( &mut self, - mut reader: impl Read + Seek, + reader: impl Read + Seek, delimiter: Delimiter, ) -> Result> { - remove_tags_line_from_reader(&mut reader)?; - let mut csv_reader = csv::ReaderBuilder::new() - .has_headers(false) - .flexible(true) - .comment(Some(b'#')) - .delimiter(delimiter.byte()) - .from_reader(reader); + let mut csv_reader = build_csv_reader(reader, delimiter)?; self.deserialize_csv_reader(&mut csv_reader) } @@ -210,6 +204,19 @@ impl ColumnContext { } } +pub(super) fn build_csv_reader( + mut reader: impl Read + Seek, + delimiter: Delimiter, +) -> Result> { + remove_tags_line_from_reader(&mut reader)?; + Ok(csv::ReaderBuilder::new() + .has_headers(false) + .flexible(true) + .comment(Some(b'#')) + .delimiter(delimiter.byte()) + .from_reader(reader)) +} + fn stringify_fn(is_html: bool) -> fn(&str) -> String { if is_html { ToString::to_string @@ -275,6 +282,7 @@ mod test { id: 1, field_columns: vec![1, 2], })), + preview: Vec::new(), } } } diff --git a/rslib/src/import_export/text/csv/metadata.rs b/rslib/src/import_export/text/csv/metadata.rs index eade48fb7..019457f77 100644 --- a/rslib/src/import_export/text/csv/metadata.rs +++ b/rslib/src/import_export/text/csv/metadata.rs @@ -7,17 +7,24 @@ use std::{ io::{BufRead, BufReader, Read, Seek, SeekFrom}, }; +use itertools::Itertools; use strum::IntoEnumIterator; +use super::import::build_csv_reader; pub use crate::backend_proto::import_export::{ csv_metadata::{Deck as CsvDeck, Delimiter, MappedNotetype, Notetype as CsvNotetype}, CsvMetadata, }; use crate::{ - error::ImportError, import_export::text::NameOrId, notetype::NoteField, prelude::*, - text::is_html, + backend_proto::StringList, error::ImportError, import_export::text::NameOrId, + notetype::NoteField, prelude::*, text::is_html, }; +/// The maximum number of preview rows. +const PREVIEW_LENGTH: usize = 5; +/// The maximum number of characters per preview field. +const PREVIEW_FIELD_LENGTH: usize = 80; + impl Collection { pub fn get_csv_metadata( &mut self, @@ -25,30 +32,22 @@ impl Collection { delimiter: Option, notetype_id: Option, ) -> Result { - let reader = BufReader::new(File::open(path)?); - self.get_reader_metadata(reader, delimiter, notetype_id) + let mut reader = File::open(path)?; + self.get_reader_metadata(&mut reader, delimiter, notetype_id) } fn get_reader_metadata( &mut self, - mut reader: impl BufRead + Seek, + mut reader: impl Read + Seek, delimiter: Option, notetype_id: Option, ) -> Result { let mut metadata = CsvMetadata::default(); let meta_len = self.parse_meta_lines(&mut reader, &mut metadata)? as u64; - - reader.seek(SeekFrom::Start(meta_len))?; - maybe_set_fallback_delimiter(delimiter, &mut metadata, &mut reader)?; - - reader.seek(SeekFrom::Start(meta_len))?; - let mut csv_reader = csv::ReaderBuilder::new() - .delimiter(metadata.delimiter().byte()) - .from_reader(reader); - let record = csv_reader.headers()?; - - maybe_set_fallback_columns(&mut metadata, record)?; - maybe_set_fallback_is_html(&mut metadata, record)?; + maybe_set_fallback_delimiter(delimiter, &mut metadata, &mut reader, meta_len)?; + set_preview(&mut metadata, reader)?; + maybe_set_fallback_columns(&mut metadata)?; + maybe_set_fallback_is_html(&mut metadata)?; self.maybe_set_fallback_notetype(&mut metadata, notetype_id)?; self.maybe_init_notetype_map(&mut metadata)?; self.maybe_set_fallback_deck(&mut metadata)?; @@ -57,12 +56,9 @@ impl Collection { } /// Parses the meta head of the file and returns the total of meta bytes. - fn parse_meta_lines( - &mut self, - mut reader: impl BufRead, - metadata: &mut CsvMetadata, - ) -> Result { + fn parse_meta_lines(&mut self, reader: impl Read, metadata: &mut CsvMetadata) -> Result { let mut meta_len = 0; + let mut reader = BufReader::new(reader); let mut line = String::new(); let mut line_len = reader.read_line(&mut line)?; if self.parse_first_line(&line, metadata) { @@ -213,6 +209,32 @@ fn parse_columns(line: &str, delimiter: Delimiter) -> Result> { }) } +fn set_preview(metadata: &mut CsvMetadata, mut reader: impl Read + Seek) -> Result<()> { + reader.rewind()?; + let mut csv_reader = build_csv_reader(reader, metadata.delimiter())?; + let mut records = csv_reader.records().into_iter().take(PREVIEW_LENGTH); + + let first = records.next().transpose()?.unwrap_or_default(); + metadata.preview.push(build_preview_row(1, &first)); + + let min_len = metadata.preview[0].vals.len(); + for record in records { + metadata.preview.push(build_preview_row(min_len, &record?)); + } + + Ok(()) +} + +fn build_preview_row(min_len: usize, record: &csv::StringRecord) -> StringList { + StringList { + vals: record + .iter() + .pad_using(min_len, |_| "") + .map(|field| field.chars().take(PREVIEW_FIELD_LENGTH).collect()) + .collect(), + } +} + pub(super) fn collect_tags(txt: &str) -> Vec { txt.split_whitespace() .filter(|s| !s.is_empty()) @@ -271,24 +293,17 @@ fn ensure_first_field_is_mapped( Ok(()) } -fn maybe_set_fallback_columns( - metadata: &mut CsvMetadata, - record: &csv::StringRecord, -) -> Result<()> { +fn maybe_set_fallback_columns(metadata: &mut CsvMetadata) -> Result<()> { if metadata.column_labels.is_empty() { - metadata.column_labels = vec![String::new(); record.len()]; + metadata.column_labels = vec![String::new(); metadata.iter_preview_fields(1).count()]; } Ok(()) } -fn maybe_set_fallback_is_html( - metadata: &mut CsvMetadata, - record: &csv::StringRecord, -) -> Result<()> { - // TODO: should probably check more than one line; can reuse preview lines - // when it's implemented +fn maybe_set_fallback_is_html(metadata: &mut CsvMetadata) -> Result<()> { if !metadata.force_is_html { - metadata.is_html = record.iter().any(is_html); + let is_html = metadata.iter_preview_fields(PREVIEW_LENGTH).any(is_html); + metadata.is_html = is_html; } Ok(()) } @@ -296,11 +311,13 @@ fn maybe_set_fallback_is_html( fn maybe_set_fallback_delimiter( delimiter: Option, metadata: &mut CsvMetadata, - reader: impl Read, + mut reader: impl Read + Seek, + meta_len: u64, ) -> Result<()> { if let Some(delim) = delimiter { metadata.set_delimiter(delim); } else if !metadata.force_delimiter { + reader.seek(SeekFrom::Start(meta_len))?; metadata.set_delimiter(delimiter_from_reader(reader)?); } Ok(()) @@ -401,6 +418,14 @@ impl CsvMetadata { } columns } + + fn iter_preview_fields(&self, rows: usize) -> impl Iterator { + self.preview + .iter() + .take(rows) + .map(|row| row.vals.iter()) + .flatten() + } } impl NameOrId { @@ -413,6 +438,14 @@ impl NameOrId { } } +impl From for StringList { + fn from(record: csv::StringRecord) -> Self { + Self { + vals: record.iter().map(ToString::to_string).collect(), + } + } +} + #[cfg(test)] mod test { use std::io::Cursor; @@ -425,7 +458,7 @@ mod test { metadata!($col, $csv, None) }; ($col:expr,$csv:expr, $delim:expr) => { - $col.get_reader_metadata(BufReader::new(Cursor::new($csv.as_bytes())), $delim, None) + $col.get_reader_metadata(Cursor::new($csv.as_bytes()), $delim, None) .unwrap() }; } @@ -620,4 +653,12 @@ mod test { let meta = metadata!(col, "#columns:Back\tFront\nfoo,bar,baz\n"); assert_eq!(meta.unwrap_notetype_map(), &[2, 1]); } + + #[test] + fn should_gather_first_lines_into_preview() { + let mut col = open_test_collection(); + let meta = metadata!(col, "#separator: \nfoo bar\nbaz\n"); + assert_eq!(meta.preview[0].vals, ["foo", "bar"]); + assert_eq!(meta.preview[1].vals, ["baz", ""]); + } } diff --git a/rslib/src/text.rs b/rslib/src/text.rs index b46e64ded..e7b947511 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -172,8 +172,8 @@ lazy_static! { "#).unwrap(); } -pub fn is_html(text: &str) -> bool { - HTML.is_match(text) +pub fn is_html(text: impl AsRef) -> bool { + HTML.is_match(text.as_ref()) } pub fn html_to_text_line(html: &str, preserve_media_filenames: bool) -> Cow { diff --git a/ts/import-csv/ImportCsvPage.svelte b/ts/import-csv/ImportCsvPage.svelte index ffbbc4195..1a7b126c0 100644 --- a/ts/import-csv/ImportCsvPage.svelte +++ b/ts/import-csv/ImportCsvPage.svelte @@ -8,7 +8,13 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html import Row from "../components/Row.svelte"; import Spacer from "../components/Spacer.svelte"; import * as tr from "../lib/ftl"; - import { Decks, ImportExport, importExport, Notetypes } from "../lib/proto"; + import { + Decks, + Generic, + ImportExport, + importExport, + Notetypes, + } from "../lib/proto"; import DeckSelector from "./DeckSelector.svelte"; import DelimiterSelector from "./DelimiterSelector.svelte"; import DupeResolutionSelector from "./DupeResolutionSelector.svelte"; @@ -17,6 +23,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html import HtmlSwitch from "./HtmlSwitch.svelte"; import { getColumnOptions, getCsvMetadata } from "./lib"; import NotetypeSelector from "./NotetypeSelector.svelte"; + import Preview from "./Preview.svelte"; import StickyFooter from "./StickyFooter.svelte"; import Tags from "./Tags.svelte"; @@ -32,6 +39,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html export let updatedTags: string[]; export let columnLabels: string[]; export let tagsColumn: number; + export let preview: Generic.StringList[]; // Protobuf oneofs. Exactly one of these pairs is expected to be set. export let notetypeColumn: number | null; export let globalNotetype: ImportExport.CsvMetadata.MappedNotetype | null; @@ -44,6 +52,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html $: columnOptions = getColumnOptions(columnLabels, notetypeColumn, deckColumn); $: getCsvMetadata(path, delimiter).then((meta) => { columnLabels = meta.columnLabels; + preview = meta.preview; }); $: if (globalNotetype?.id !== lastNotetypeId) { lastNotetypeId = globalNotetype?.id; @@ -78,6 +87,15 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + + +
+ + + + + +
@@ -92,8 +110,6 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html {/if} - - diff --git a/ts/import-csv/Preview.svelte b/ts/import-csv/Preview.svelte new file mode 100644 index 000000000..a5ee43c3f --- /dev/null +++ b/ts/import-csv/Preview.svelte @@ -0,0 +1,55 @@ + + + + + {#each columnOptions.slice(1) as { label }} + + {/each} + {#each preview as row} + + {#each row.vals as cell} + + {/each} + + {/each} +
+ {label} +
{cell}
+ + diff --git a/ts/import-csv/index.ts b/ts/import-csv/index.ts index 551ce2496..20c3ff6a2 100644 --- a/ts/import-csv/index.ts +++ b/ts/import-csv/index.ts @@ -60,6 +60,7 @@ export async function setupImportCsvPage(path: string): Promise { columnLabels: metadata.columnLabels, tagsColumn: metadata.tagsColumn, globalNotetype: metadata.globalNotetype ?? null, + preview: metadata.preview, // Unset oneof numbers default to 0, which also means n/a here, // but it's vital to differentiate between unset and 0 when reserializing. notetypeColumn: metadata.notetypeColumn ? metadata.notetypeColumn : null,