Add CSV preview

This commit is contained in:
RumovZ 2022-06-03 11:07:24 +02:00
parent d04926e30f
commit ee2570624a
8 changed files with 172 additions and 49 deletions

View file

@ -108,3 +108,4 @@ importing-preserve = Preserve
importing-update = Update
importing-tag-all-notes = Tag all notes
importing-tag-updated-notes = Tag updated notes
importing-file = File

View file

@ -163,6 +163,7 @@ message CsvMetadata {
uint32 tags_column = 10;
bool force_delimiter = 11;
bool force_is_html = 12;
repeated generic.StringList preview = 13;
}
message ExportCardCsvRequest {

View file

@ -135,16 +135,10 @@ impl ColumnContext {
fn deserialize_csv(
&mut self,
mut reader: impl Read + Seek,
reader: impl Read + Seek,
delimiter: Delimiter,
) -> Result<Vec<ForeignNote>> {
remove_tags_line_from_reader(&mut reader)?;
let mut csv_reader = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.comment(Some(b'#'))
.delimiter(delimiter.byte())
.from_reader(reader);
let mut csv_reader = build_csv_reader(reader, delimiter)?;
self.deserialize_csv_reader(&mut csv_reader)
}
@ -210,6 +204,19 @@ impl ColumnContext {
}
}
pub(super) fn build_csv_reader(
mut reader: impl Read + Seek,
delimiter: Delimiter,
) -> Result<csv::Reader<impl Read + Seek>> {
remove_tags_line_from_reader(&mut reader)?;
Ok(csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.comment(Some(b'#'))
.delimiter(delimiter.byte())
.from_reader(reader))
}
fn stringify_fn(is_html: bool) -> fn(&str) -> String {
if is_html {
ToString::to_string
@ -275,6 +282,7 @@ mod test {
id: 1,
field_columns: vec![1, 2],
})),
preview: Vec::new(),
}
}
}

View file

@ -7,17 +7,24 @@ use std::{
io::{BufRead, BufReader, Read, Seek, SeekFrom},
};
use itertools::Itertools;
use strum::IntoEnumIterator;
use super::import::build_csv_reader;
pub use crate::backend_proto::import_export::{
csv_metadata::{Deck as CsvDeck, Delimiter, MappedNotetype, Notetype as CsvNotetype},
CsvMetadata,
};
use crate::{
error::ImportError, import_export::text::NameOrId, notetype::NoteField, prelude::*,
text::is_html,
backend_proto::StringList, error::ImportError, import_export::text::NameOrId,
notetype::NoteField, prelude::*, text::is_html,
};
/// The maximum number of preview rows.
const PREVIEW_LENGTH: usize = 5;
/// The maximum number of characters per preview field.
const PREVIEW_FIELD_LENGTH: usize = 80;
impl Collection {
pub fn get_csv_metadata(
&mut self,
@ -25,30 +32,22 @@ impl Collection {
delimiter: Option<Delimiter>,
notetype_id: Option<NotetypeId>,
) -> Result<CsvMetadata> {
let reader = BufReader::new(File::open(path)?);
self.get_reader_metadata(reader, delimiter, notetype_id)
let mut reader = File::open(path)?;
self.get_reader_metadata(&mut reader, delimiter, notetype_id)
}
fn get_reader_metadata(
&mut self,
mut reader: impl BufRead + Seek,
mut reader: impl Read + Seek,
delimiter: Option<Delimiter>,
notetype_id: Option<NotetypeId>,
) -> Result<CsvMetadata> {
let mut metadata = CsvMetadata::default();
let meta_len = self.parse_meta_lines(&mut reader, &mut metadata)? as u64;
reader.seek(SeekFrom::Start(meta_len))?;
maybe_set_fallback_delimiter(delimiter, &mut metadata, &mut reader)?;
reader.seek(SeekFrom::Start(meta_len))?;
let mut csv_reader = csv::ReaderBuilder::new()
.delimiter(metadata.delimiter().byte())
.from_reader(reader);
let record = csv_reader.headers()?;
maybe_set_fallback_columns(&mut metadata, record)?;
maybe_set_fallback_is_html(&mut metadata, record)?;
maybe_set_fallback_delimiter(delimiter, &mut metadata, &mut reader, meta_len)?;
set_preview(&mut metadata, reader)?;
maybe_set_fallback_columns(&mut metadata)?;
maybe_set_fallback_is_html(&mut metadata)?;
self.maybe_set_fallback_notetype(&mut metadata, notetype_id)?;
self.maybe_init_notetype_map(&mut metadata)?;
self.maybe_set_fallback_deck(&mut metadata)?;
@ -57,12 +56,9 @@ impl Collection {
}
/// Parses the meta head of the file and returns the total of meta bytes.
fn parse_meta_lines(
&mut self,
mut reader: impl BufRead,
metadata: &mut CsvMetadata,
) -> Result<usize> {
fn parse_meta_lines(&mut self, reader: impl Read, metadata: &mut CsvMetadata) -> Result<usize> {
let mut meta_len = 0;
let mut reader = BufReader::new(reader);
let mut line = String::new();
let mut line_len = reader.read_line(&mut line)?;
if self.parse_first_line(&line, metadata) {
@ -213,6 +209,32 @@ fn parse_columns(line: &str, delimiter: Delimiter) -> Result<Vec<String>> {
})
}
fn set_preview(metadata: &mut CsvMetadata, mut reader: impl Read + Seek) -> Result<()> {
reader.rewind()?;
let mut csv_reader = build_csv_reader(reader, metadata.delimiter())?;
let mut records = csv_reader.records().into_iter().take(PREVIEW_LENGTH);
let first = records.next().transpose()?.unwrap_or_default();
metadata.preview.push(build_preview_row(1, &first));
let min_len = metadata.preview[0].vals.len();
for record in records {
metadata.preview.push(build_preview_row(min_len, &record?));
}
Ok(())
}
fn build_preview_row(min_len: usize, record: &csv::StringRecord) -> StringList {
StringList {
vals: record
.iter()
.pad_using(min_len, |_| "")
.map(|field| field.chars().take(PREVIEW_FIELD_LENGTH).collect())
.collect(),
}
}
pub(super) fn collect_tags(txt: &str) -> Vec<String> {
txt.split_whitespace()
.filter(|s| !s.is_empty())
@ -271,24 +293,17 @@ fn ensure_first_field_is_mapped(
Ok(())
}
fn maybe_set_fallback_columns(
metadata: &mut CsvMetadata,
record: &csv::StringRecord,
) -> Result<()> {
fn maybe_set_fallback_columns(metadata: &mut CsvMetadata) -> Result<()> {
if metadata.column_labels.is_empty() {
metadata.column_labels = vec![String::new(); record.len()];
metadata.column_labels = vec![String::new(); metadata.iter_preview_fields(1).count()];
}
Ok(())
}
fn maybe_set_fallback_is_html(
metadata: &mut CsvMetadata,
record: &csv::StringRecord,
) -> Result<()> {
// TODO: should probably check more than one line; can reuse preview lines
// when it's implemented
fn maybe_set_fallback_is_html(metadata: &mut CsvMetadata) -> Result<()> {
if !metadata.force_is_html {
metadata.is_html = record.iter().any(is_html);
let is_html = metadata.iter_preview_fields(PREVIEW_LENGTH).any(is_html);
metadata.is_html = is_html;
}
Ok(())
}
@ -296,11 +311,13 @@ fn maybe_set_fallback_is_html(
fn maybe_set_fallback_delimiter(
delimiter: Option<Delimiter>,
metadata: &mut CsvMetadata,
reader: impl Read,
mut reader: impl Read + Seek,
meta_len: u64,
) -> Result<()> {
if let Some(delim) = delimiter {
metadata.set_delimiter(delim);
} else if !metadata.force_delimiter {
reader.seek(SeekFrom::Start(meta_len))?;
metadata.set_delimiter(delimiter_from_reader(reader)?);
}
Ok(())
@ -401,6 +418,14 @@ impl CsvMetadata {
}
columns
}
fn iter_preview_fields(&self, rows: usize) -> impl Iterator<Item = &String> {
self.preview
.iter()
.take(rows)
.map(|row| row.vals.iter())
.flatten()
}
}
impl NameOrId {
@ -413,6 +438,14 @@ impl NameOrId {
}
}
impl From<csv::StringRecord> for StringList {
fn from(record: csv::StringRecord) -> Self {
Self {
vals: record.iter().map(ToString::to_string).collect(),
}
}
}
#[cfg(test)]
mod test {
use std::io::Cursor;
@ -425,7 +458,7 @@ mod test {
metadata!($col, $csv, None)
};
($col:expr,$csv:expr, $delim:expr) => {
$col.get_reader_metadata(BufReader::new(Cursor::new($csv.as_bytes())), $delim, None)
$col.get_reader_metadata(Cursor::new($csv.as_bytes()), $delim, None)
.unwrap()
};
}
@ -620,4 +653,12 @@ mod test {
let meta = metadata!(col, "#columns:Back\tFront\nfoo,bar,baz\n");
assert_eq!(meta.unwrap_notetype_map(), &[2, 1]);
}
#[test]
fn should_gather_first_lines_into_preview() {
let mut col = open_test_collection();
let meta = metadata!(col, "#separator: \nfoo bar\nbaz\n");
assert_eq!(meta.preview[0].vals, ["foo", "bar"]);
assert_eq!(meta.preview[1].vals, ["baz", ""]);
}
}

View file

@ -172,8 +172,8 @@ lazy_static! {
"#).unwrap();
}
pub fn is_html(text: &str) -> bool {
HTML.is_match(text)
pub fn is_html(text: impl AsRef<str>) -> bool {
HTML.is_match(text.as_ref())
}
pub fn html_to_text_line(html: &str, preserve_media_filenames: bool) -> Cow<str> {

View file

@ -8,7 +8,13 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import Row from "../components/Row.svelte";
import Spacer from "../components/Spacer.svelte";
import * as tr from "../lib/ftl";
import { Decks, ImportExport, importExport, Notetypes } from "../lib/proto";
import {
Decks,
Generic,
ImportExport,
importExport,
Notetypes,
} from "../lib/proto";
import DeckSelector from "./DeckSelector.svelte";
import DelimiterSelector from "./DelimiterSelector.svelte";
import DupeResolutionSelector from "./DupeResolutionSelector.svelte";
@ -17,6 +23,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import HtmlSwitch from "./HtmlSwitch.svelte";
import { getColumnOptions, getCsvMetadata } from "./lib";
import NotetypeSelector from "./NotetypeSelector.svelte";
import Preview from "./Preview.svelte";
import StickyFooter from "./StickyFooter.svelte";
import Tags from "./Tags.svelte";
@ -32,6 +39,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
export let updatedTags: string[];
export let columnLabels: string[];
export let tagsColumn: number;
export let preview: Generic.StringList[];
// Protobuf oneofs. Exactly one of these pairs is expected to be set.
export let notetypeColumn: number | null;
export let globalNotetype: ImportExport.CsvMetadata.MappedNotetype | null;
@ -44,6 +52,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
$: columnOptions = getColumnOptions(columnLabels, notetypeColumn, deckColumn);
$: getCsvMetadata(path, delimiter).then((meta) => {
columnLabels = meta.columnLabels;
preview = meta.preview;
});
$: if (globalNotetype?.id !== lastNotetypeId) {
lastNotetypeId = globalNotetype?.id;
@ -78,6 +87,15 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
<Container class="csv-page">
<Row --cols={2}>
<Col --col-size={1} breakpoint="md">
<Container>
<Header heading={tr.importingFile()} />
<Spacer --height="1.5rem" />
<DelimiterSelector bind:delimiter disabled={forceDelimiter} />
<HtmlSwitch bind:isHtml disabled={forceIsHtml} />
<Preview {columnOptions} {preview} />
</Container>
</Col>
<Col --col-size={1} breakpoint="md">
<Container>
<Header heading={tr.importingImportOptions()} />
@ -92,8 +110,6 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
<DeckSelector {deckNameIds} bind:deckId />
{/if}
<DupeResolutionSelector bind:dupeResolution />
<DelimiterSelector bind:delimiter disabled={forceDelimiter} />
<HtmlSwitch bind:isHtml disabled={forceIsHtml} />
<Tags bind:globalTags bind:updatedTags />
</Container>
</Col>

View file

@ -0,0 +1,55 @@
<!--
Copyright: Ankitects Pty Ltd and contributors
License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
-->
<script lang="ts">
import type { Generic } from "../lib/proto";
import type { ColumnOption } from "./lib";
export let columnOptions: ColumnOption[];
export let preview: Generic.StringList[];
</script>
<table id="preview">
{#each columnOptions.slice(1) as { label }}
<th>
{label}
</th>
{/each}
{#each preview as row}
<tr>
{#each row.vals as cell}
<td>{cell}</td>
{/each}
</tr>
{/each}
</table>
<style lang="scss">
#preview {
border-collapse: collapse;
width: 100%;
white-space: nowrap;
th,
td {
border: 1px solid var(--faint-border);
padding: 0.25rem 0.5rem;
}
th {
background: var(--medium-border);
text-align: center;
}
tr {
&:nth-child(even) {
background: var(--frame-bg);
}
}
td {
text-align: start;
}
}
</style>

View file

@ -60,6 +60,7 @@ export async function setupImportCsvPage(path: string): Promise<ImportCsvPage> {
columnLabels: metadata.columnLabels,
tagsColumn: metadata.tagsColumn,
globalNotetype: metadata.globalNotetype ?? null,
preview: metadata.preview,
// Unset oneof numbers default to 0, which also means n/a here,
// but it's vital to differentiate between unset and 0 when reserializing.
notetypeColumn: metadata.notetypeColumn ? metadata.notetypeColumn : null,