mirror of
https://github.com/ankitects/anki.git
synced 2025-09-25 09:16:38 -04:00
Only allow fixed set of CSV delimiters
This commit is contained in:
parent
c565f77c23
commit
74ce551155
8 changed files with 139 additions and 97 deletions
|
@ -110,17 +110,28 @@ message ImportCsvRequest {
|
|||
int64 deck_id = 2;
|
||||
int64 notetype_id = 3;
|
||||
repeated CsvColumn columns = 4;
|
||||
uint32 delimiter = 5;
|
||||
CsvMetadata.Delimiter delimiter = 5;
|
||||
bool is_html = 6;
|
||||
}
|
||||
|
||||
message CsvMetadataRequest {
|
||||
string path = 1;
|
||||
optional uint32 delimiter = 2;
|
||||
optional CsvMetadata.Delimiter delimiter = 2;
|
||||
}
|
||||
|
||||
message CsvMetadata {
|
||||
uint32 delimiter = 1;
|
||||
// Order roughly in ascending expected frequency in note text, because the
|
||||
// delimiter detection algorithm is stupidly picking the first one it
|
||||
// encounters.
|
||||
enum Delimiter {
|
||||
TAB = 0;
|
||||
PIPE = 1;
|
||||
SEMICOLON = 2;
|
||||
COLON = 3;
|
||||
COMMA = 4;
|
||||
SPACE = 5;
|
||||
}
|
||||
Delimiter delimiter = 1;
|
||||
string tags = 2;
|
||||
repeated string columns = 3;
|
||||
int64 deck_id = 4;
|
||||
|
|
|
@ -36,6 +36,7 @@ StripHtmlMode = card_rendering_pb2.StripHtmlRequest
|
|||
ImportLogWithChanges = import_export_pb2.ImportResponse
|
||||
CsvColumn = import_export_pb2.ImportCsvRequest.CsvColumn
|
||||
CsvMetadata = import_export_pb2.CsvMetadata
|
||||
Delimiter = import_export_pb2.CsvMetadata.Delimiter
|
||||
|
||||
import copy
|
||||
import os
|
||||
|
@ -405,7 +406,7 @@ class Collection(DeprecatedNamesMixin):
|
|||
request.whole_collection.SetInParent()
|
||||
return self._backend.export_anki_package(request)
|
||||
|
||||
def get_csv_metadata(self, path: str, delimiter: int | None) -> CsvMetadata:
|
||||
def get_csv_metadata(self, path: str, delimiter: Delimiter.V | None) -> CsvMetadata:
|
||||
request = import_export_pb2.CsvMetadataRequest(path=path, delimiter=delimiter)
|
||||
return self._backend.get_csv_metadata(request)
|
||||
|
||||
|
@ -415,7 +416,7 @@ class Collection(DeprecatedNamesMixin):
|
|||
deck_id: DeckId,
|
||||
notetype_id: NotetypeId,
|
||||
columns: list[CsvColumn],
|
||||
delimiter: int,
|
||||
delimiter: Delimiter.V,
|
||||
is_html: bool,
|
||||
) -> ImportLogWithChanges:
|
||||
return self._backend.import_csv(
|
||||
|
|
|
@ -9,7 +9,7 @@ from typing import Optional, Sequence
|
|||
|
||||
import aqt.forms
|
||||
import aqt.main
|
||||
from anki.collection import CsvColumn, CsvMetadata
|
||||
from anki.collection import CsvColumn, CsvMetadata, Delimiter
|
||||
from anki.decks import DeckId
|
||||
from anki.models import NotetypeDict, NotetypeId
|
||||
from aqt.import_export.importing import import_progress_update, show_import_log
|
||||
|
@ -17,6 +17,15 @@ from aqt.operations import CollectionOp, QueryOp
|
|||
from aqt.qt import *
|
||||
from aqt.utils import HelpPage, disable_help_button, getText, openHelp, showWarning, tr
|
||||
|
||||
DELIMITERS = (
|
||||
("\\t", tr.importing_tab(), Delimiter.TAB),
|
||||
(",", tr.importing_comma(), Delimiter.COMMA),
|
||||
(" ", tr.studying_space(), Delimiter.SPACE),
|
||||
(";", tr.importing_semicolon(), Delimiter.SEMICOLON),
|
||||
(":", tr.importing_colon(), Delimiter.COLON),
|
||||
("|", tr.importing_colon(), Delimiter.PIPE),
|
||||
)
|
||||
|
||||
|
||||
class ChangeMap(QDialog):
|
||||
def __init__(self, mw: aqt.main.AnkiQt, model: dict, current: str) -> None:
|
||||
|
@ -99,7 +108,7 @@ class ImportDialog(QDialog):
|
|||
self._setup_choosers()
|
||||
self.column_map = ColumnMap(self.columns, self.model)
|
||||
self._render_mapping()
|
||||
self._set_delimiter_button_text()
|
||||
self._set_delimiter()
|
||||
self.frm.allowHTML.setChecked(self.is_html)
|
||||
self.frm.importMode.setCurrentIndex(self.mw.pm.profile.get("importMode", 1))
|
||||
self.frm.tagModified.setText(self.tags)
|
||||
|
@ -142,6 +151,13 @@ class ImportDialog(QDialog):
|
|||
)
|
||||
self.deck = aqt.deckchooser.DeckChooser(self.mw, self.frm.deckArea, label=False)
|
||||
|
||||
def _set_delimiter(self) -> None:
|
||||
for delimiter in DELIMITERS:
|
||||
if delimiter[2] == self.delimiter:
|
||||
txt = tr.importing_fields_separated_by(val=delimiter[1])
|
||||
self.frm.autoDetect.setText(txt)
|
||||
return
|
||||
|
||||
def onDelimiter(self) -> None:
|
||||
# Open a modal dialog to enter an delimiter
|
||||
# Todo/Idea Constrain the maximum width, so it doesnt take up that much screen space
|
||||
|
@ -154,20 +170,20 @@ class ImportDialog(QDialog):
|
|||
if not ok:
|
||||
return
|
||||
# Check if the entered value is valid and if not fallback to default
|
||||
# at the moment every single character entry as well as '\t' is valid
|
||||
delim = delim if len(delim) > 0 else "\t"
|
||||
delim = delim.replace("\\t", "\t") # un-escape it
|
||||
delimiter = ord(delim)
|
||||
if delimiter > 255:
|
||||
|
||||
txt = ""
|
||||
for delimiter in DELIMITERS:
|
||||
if delimiter[0] == delim:
|
||||
txt = tr.importing_fields_separated_by(val=delimiter[1])
|
||||
self.delimiter = delimiter[2]
|
||||
break
|
||||
if not txt:
|
||||
showWarning(
|
||||
tr.importing_multicharacter_separators_are_not_supported_please()
|
||||
)
|
||||
return
|
||||
|
||||
# self.hideMapping()
|
||||
# self.showMapping(hook=_update)
|
||||
self.delimiter = delimiter
|
||||
self._set_delimiter_button_text()
|
||||
self.frm.autoDetect.setText(txt)
|
||||
|
||||
def _update_columns(options: CsvMetadata) -> None:
|
||||
self.columns = options.columns
|
||||
|
@ -176,27 +192,10 @@ class ImportDialog(QDialog):
|
|||
|
||||
QueryOp(
|
||||
parent=self,
|
||||
op=lambda col: col.get_csv_metadata(self.path, delimiter),
|
||||
op=lambda col: col.get_csv_metadata(self.path, self.delimiter),
|
||||
success=_update_columns,
|
||||
).run_in_background()
|
||||
|
||||
def _set_delimiter_button_text(self) -> None:
|
||||
d = chr(self.delimiter)
|
||||
if d == "\t":
|
||||
d = tr.importing_tab()
|
||||
elif d == ",":
|
||||
d = tr.importing_comma()
|
||||
elif d == " ":
|
||||
d = tr.studying_space()
|
||||
elif d == ";":
|
||||
d = tr.importing_semicolon()
|
||||
elif d == ":":
|
||||
d = tr.importing_colon()
|
||||
else:
|
||||
d = repr(d)
|
||||
txt = tr.importing_fields_separated_by(val=d)
|
||||
self.frm.autoDetect.setText(txt)
|
||||
|
||||
def accept(self) -> None:
|
||||
# self.mw.pm.profile["importMode"] = self.importer.importMode
|
||||
self.mw.pm.profile["allowHTML"] = self.frm.allowHTML.isChecked()
|
||||
|
|
|
@ -106,6 +106,7 @@ pub fn write_backend_proto_rs() {
|
|||
"#[derive(strum::EnumIter)]",
|
||||
)
|
||||
.type_attribute("HelpPageLinkRequest.HelpPage", "#[derive(strum::EnumIter)]")
|
||||
.type_attribute("CsvMetadata.Delimiter", "#[derive(strum::EnumIter)]")
|
||||
.type_attribute(
|
||||
"Preferences.BackupLimits",
|
||||
"#[derive(Copy, serde_derive::Deserialize, serde_derive::Serialize)]",
|
||||
|
|
|
@ -82,18 +82,19 @@ impl ImportExportService for Backend {
|
|||
}
|
||||
|
||||
fn get_csv_metadata(&self, input: pb::CsvMetadataRequest) -> Result<pb::CsvMetadata> {
|
||||
let delimiter = input.delimiter.map(try_into_byte).transpose()?;
|
||||
let delimiter = input.delimiter.is_some().then(|| input.delimiter());
|
||||
self.with_col(|col| col.get_csv_metadata(&input.path, delimiter))
|
||||
}
|
||||
|
||||
fn import_csv(&self, input: pb::ImportCsvRequest) -> Result<pb::ImportResponse> {
|
||||
self.with_col(|col| {
|
||||
let delimiter = input.delimiter();
|
||||
col.import_csv(
|
||||
&input.path,
|
||||
input.deck_id.into(),
|
||||
input.notetype_id.into(),
|
||||
input.columns.into_iter().map(Into::into).collect(),
|
||||
try_into_byte(input.delimiter)?,
|
||||
delimiter,
|
||||
input.is_html,
|
||||
)
|
||||
})
|
||||
|
@ -150,8 +151,3 @@ impl From<CsvColumn> for Column {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn try_into_byte(u: impl TryInto<u8>) -> Result<u8> {
|
||||
u.try_into()
|
||||
.map_err(|_| AnkiError::invalid_input("expected single byte"))
|
||||
}
|
||||
|
|
|
@ -8,7 +8,10 @@ use std::{
|
|||
|
||||
use crate::{
|
||||
import_export::{
|
||||
text::{csv::Column, ForeignData, ForeignNote},
|
||||
text::{
|
||||
csv::{metadata::Delimiter, Column},
|
||||
ForeignData, ForeignNote,
|
||||
},
|
||||
NoteLog,
|
||||
},
|
||||
prelude::*,
|
||||
|
@ -21,7 +24,7 @@ impl Collection {
|
|||
deck_id: DeckId,
|
||||
notetype_id: NotetypeId,
|
||||
columns: Vec<Column>,
|
||||
delimiter: u8,
|
||||
delimiter: Delimiter,
|
||||
is_html: bool,
|
||||
) -> Result<OpOutput<NoteLog>> {
|
||||
let notetype = self.get_notetype(notetype_id)?.ok_or(AnkiError::NotFound)?;
|
||||
|
@ -44,7 +47,7 @@ fn deserialize_csv(
|
|||
mut reader: impl Read + Seek,
|
||||
columns: &[Column],
|
||||
fields_len: usize,
|
||||
delimiter: u8,
|
||||
delimiter: Delimiter,
|
||||
is_html: bool,
|
||||
) -> Result<Vec<ForeignNote>> {
|
||||
remove_tags_line_from_reader(&mut reader)?;
|
||||
|
@ -52,7 +55,7 @@ fn deserialize_csv(
|
|||
.has_headers(false)
|
||||
.flexible(true)
|
||||
.comment(Some(b'#'))
|
||||
.delimiter(delimiter)
|
||||
.delimiter(delimiter.byte())
|
||||
.trim(csv::Trim::All)
|
||||
.from_reader(reader);
|
||||
deserialize_csv_reader(&mut csv_reader, columns, fields_len, is_html)
|
||||
|
@ -150,7 +153,7 @@ mod test {
|
|||
struct CsvOptions {
|
||||
columns: Vec<Column>,
|
||||
fields_len: usize,
|
||||
delimiter: u8,
|
||||
delimiter: Delimiter,
|
||||
is_html: bool,
|
||||
}
|
||||
|
||||
|
@ -159,7 +162,7 @@ mod test {
|
|||
Self {
|
||||
columns: vec![Column::Field(0), Column::Field(1)],
|
||||
fields_len: 2,
|
||||
delimiter: b',',
|
||||
delimiter: Delimiter::Comma,
|
||||
is_html: false,
|
||||
}
|
||||
}
|
||||
|
@ -179,7 +182,7 @@ mod test {
|
|||
self
|
||||
}
|
||||
|
||||
fn delimiter(mut self, delimiter: u8) -> Self {
|
||||
fn delimiter(mut self, delimiter: Delimiter) -> Self {
|
||||
self.delimiter = delimiter;
|
||||
self
|
||||
}
|
||||
|
@ -203,7 +206,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn should_respect_custom_delimiter() {
|
||||
let options = CsvOptions::new().delimiter(b'|');
|
||||
let options = CsvOptions::new().delimiter(Delimiter::Pipe);
|
||||
assert_imported_fields!(options, "fr,ont|ba,ck\n", &[&["fr,ont", "ba,ck"]]);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,10 +6,17 @@ use std::{
|
|||
io::{BufRead, BufReader},
|
||||
};
|
||||
|
||||
use strum::IntoEnumIterator;
|
||||
|
||||
pub use crate::backend_proto::csv_metadata::Delimiter;
|
||||
use crate::{backend_proto::CsvMetadata, error::ImportError, prelude::*};
|
||||
|
||||
impl Collection {
|
||||
pub fn get_csv_metadata(&mut self, path: &str, delimiter: Option<u8>) -> Result<CsvMetadata> {
|
||||
pub fn get_csv_metadata(
|
||||
&mut self,
|
||||
path: &str,
|
||||
delimiter: Option<Delimiter>,
|
||||
) -> Result<CsvMetadata> {
|
||||
let reader = BufReader::new(File::open(path)?);
|
||||
self.get_reader_metadata(reader, delimiter)
|
||||
}
|
||||
|
@ -17,9 +24,13 @@ impl Collection {
|
|||
fn get_reader_metadata(
|
||||
&mut self,
|
||||
reader: impl BufRead,
|
||||
delimiter: Option<u8>,
|
||||
delimiter: Option<Delimiter>,
|
||||
) -> Result<CsvMetadata> {
|
||||
let mut metadata = CsvMetadata::default();
|
||||
let mut metadata = CsvMetadata {
|
||||
// mark as unset
|
||||
delimiter: -1,
|
||||
..Default::default()
|
||||
};
|
||||
let line = self.parse_meta_lines(reader, &mut metadata)?;
|
||||
set_delimiter(delimiter, &mut metadata, &line);
|
||||
set_columns(&mut metadata, &line, &self.tr)?;
|
||||
|
@ -71,7 +82,7 @@ impl Collection {
|
|||
match key.trim().to_ascii_lowercase().as_str() {
|
||||
"delimiter" => {
|
||||
if let Some(delimiter) = delimter_from_value(value) {
|
||||
metadata.delimiter = delimiter as u32;
|
||||
metadata.delimiter = delimiter as i32;
|
||||
}
|
||||
}
|
||||
"tags" => metadata.tags = value.trim().to_owned(),
|
||||
|
@ -97,8 +108,8 @@ impl Collection {
|
|||
}
|
||||
|
||||
fn parse_columns(&mut self, line: &str, metadata: &mut CsvMetadata) -> Result<Vec<String>> {
|
||||
let delimiter = if metadata.delimiter != 0 {
|
||||
metadata.delimiter as u8
|
||||
let delimiter = if metadata.delimiter != -1 {
|
||||
metadata.delimiter()
|
||||
} else {
|
||||
delimiter_from_line(line)
|
||||
};
|
||||
|
@ -122,7 +133,7 @@ impl Collection {
|
|||
|
||||
fn set_columns(metadata: &mut CsvMetadata, line: &str, tr: &I18n) -> Result<()> {
|
||||
if metadata.columns.is_empty() {
|
||||
let columns = map_single_record(line, metadata.delimiter as u8, |r| r.len())?;
|
||||
let columns = map_single_record(line, metadata.delimiter(), |r| r.len())?;
|
||||
metadata.columns = (0..columns)
|
||||
.map(|i| tr.importing_column(i + 1).to_string())
|
||||
.collect();
|
||||
|
@ -130,47 +141,41 @@ fn set_columns(metadata: &mut CsvMetadata, line: &str, tr: &I18n) -> Result<()>
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn set_delimiter(delimiter: Option<u8>, metadata: &mut CsvMetadata, line: &str) {
|
||||
fn set_delimiter(delimiter: Option<Delimiter>, metadata: &mut CsvMetadata, line: &str) {
|
||||
if let Some(delim) = delimiter {
|
||||
metadata.delimiter = delim as u32;
|
||||
} else if metadata.delimiter == 0 {
|
||||
// XXX: should '#delimiter:[NUL]' be supported?
|
||||
metadata.delimiter = delimiter_from_line(line) as u32;
|
||||
metadata.set_delimiter(delim);
|
||||
} else if metadata.delimiter == -1 {
|
||||
metadata.set_delimiter(delimiter_from_line(line));
|
||||
}
|
||||
}
|
||||
|
||||
fn delimter_from_value(value: &str) -> Option<u8> {
|
||||
// FIXME: bytes like '\n', '#' and '"' will likely cause issues
|
||||
Some(if value.as_bytes().len() == 1 {
|
||||
value.as_bytes()[0]
|
||||
} else {
|
||||
match value.trim().to_ascii_lowercase().as_str() {
|
||||
"tab" | "\\t" => b'\t',
|
||||
"semicolon" => b';',
|
||||
"comma" => b',',
|
||||
"space" => b' ',
|
||||
_ => return None,
|
||||
fn delimter_from_value(value: &str) -> Option<Delimiter> {
|
||||
let normed = value.to_ascii_lowercase();
|
||||
for delimiter in Delimiter::iter() {
|
||||
if normed.trim() == delimiter.name() || normed.as_bytes() == [delimiter.byte()] {
|
||||
return Some(delimiter);
|
||||
}
|
||||
})
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn delimiter_from_line(line: &str) -> u8 {
|
||||
fn delimiter_from_line(line: &str) -> Delimiter {
|
||||
// TODO: use smarter heuristic
|
||||
for byte in [b'\t', b';', b','] {
|
||||
if line.contains(byte as char) {
|
||||
return byte;
|
||||
for delimiter in Delimiter::iter() {
|
||||
if line.contains(delimiter.byte() as char) {
|
||||
return delimiter;
|
||||
}
|
||||
}
|
||||
b' '
|
||||
Delimiter::Space
|
||||
}
|
||||
|
||||
fn map_single_record<T>(
|
||||
line: &str,
|
||||
delimiter: u8,
|
||||
delimiter: Delimiter,
|
||||
op: impl FnOnce(&csv::StringRecord) -> T,
|
||||
) -> Result<T> {
|
||||
csv::ReaderBuilder::new()
|
||||
.delimiter(delimiter)
|
||||
.delimiter(delimiter.byte())
|
||||
.from_reader(line.as_bytes())
|
||||
.headers()
|
||||
.map_err(|_| AnkiError::ImportError(ImportError::Corrupt))
|
||||
|
@ -182,6 +187,30 @@ fn strip_line_ending(line: &str) -> &str {
|
|||
.unwrap_or_else(|| line.strip_suffix('\n').unwrap_or(line))
|
||||
}
|
||||
|
||||
impl Delimiter {
|
||||
pub fn byte(self) -> u8 {
|
||||
match self {
|
||||
Delimiter::Comma => b',',
|
||||
Delimiter::Semicolon => b';',
|
||||
Delimiter::Tab => b'\t',
|
||||
Delimiter::Space => b' ',
|
||||
Delimiter::Pipe => b'|',
|
||||
Delimiter::Colon => b':',
|
||||
}
|
||||
}
|
||||
|
||||
fn name(self) -> &'static str {
|
||||
match self {
|
||||
Delimiter::Comma => "comma",
|
||||
Delimiter::Semicolon => "semicolon",
|
||||
Delimiter::Tab => "tab",
|
||||
Delimiter::Space => "space",
|
||||
Delimiter::Pipe => "pipe",
|
||||
Delimiter::Colon => "colon",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
@ -218,27 +247,29 @@ mod test {
|
|||
#[test]
|
||||
fn should_detect_valid_delimiters() {
|
||||
let mut col = open_test_collection();
|
||||
assert_eq!(metadata!(col, "#delimiter: \n").delimiter, ' ' as u32);
|
||||
assert_eq!(metadata!(col, "#delimiter:space\n").delimiter, ' ' as u32);
|
||||
assert_eq!(metadata!(col, "#delimiter:\t\n").delimiter, '\t' as u32);
|
||||
assert_eq!(metadata!(col, "#delimiter:Tab\n").delimiter, '\t' as u32);
|
||||
assert_eq!(metadata!(col, "#delimiter:;\n").delimiter, ';' as u32);
|
||||
assert_eq!(
|
||||
metadata!(col, "#delimiter:SEMICOLON\n").delimiter,
|
||||
';' as u32
|
||||
metadata!(col, "#delimiter:comma\n").delimiter(),
|
||||
Delimiter::Comma
|
||||
);
|
||||
assert_eq!(
|
||||
metadata!(col, "#delimiter:\t\n").delimiter(),
|
||||
Delimiter::Tab
|
||||
);
|
||||
assert_eq!(metadata!(col, "#delimiter:,\n").delimiter, ',' as u32);
|
||||
assert_eq!(metadata!(col, "#delimiter:comma\n").delimiter, ',' as u32);
|
||||
assert_eq!(metadata!(col, "#delimiter:|\n").delimiter, '|' as u32);
|
||||
// fallback
|
||||
assert_eq!(metadata!(col, "#delimiter:foo\n").delimiter, ' ' as u32);
|
||||
assert_eq!(metadata!(col, "#delimiter:♥\n").delimiter, ' ' as u32);
|
||||
assert_eq!(
|
||||
metadata!(col, "#delimiter:foo\n").delimiter(),
|
||||
Delimiter::Space
|
||||
);
|
||||
assert_eq!(
|
||||
metadata!(col, "#delimiter:♥\n").delimiter(),
|
||||
Delimiter::Space
|
||||
);
|
||||
// pick up from first line
|
||||
assert_eq!(metadata!(col, "foo\tbar\n").delimiter, '\t' as u32);
|
||||
assert_eq!(metadata!(col, "foo\tbar\n").delimiter(), Delimiter::Tab);
|
||||
// override with provided
|
||||
assert_eq!(
|
||||
metadata!(col, "#delimiter: \nfoo\tbar\n", Some(b'|')).delimiter,
|
||||
'|' as u32
|
||||
metadata!(col, "#delimiter: \nfoo\tbar\n", Some(Delimiter::Pipe)).delimiter(),
|
||||
Delimiter::Pipe
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -281,7 +312,7 @@ mod test {
|
|||
);
|
||||
// override
|
||||
assert_eq!(
|
||||
metadata!(col, "#delimiter:;\nfoo;bar\n", Some(b'|')).columns,
|
||||
metadata!(col, "#delimiter:;\nfoo;bar\n", Some(Delimiter::Pipe)).columns,
|
||||
["Column 1"]
|
||||
);
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
pub mod csv;
|
||||
pub mod import;
|
||||
mod import;
|
||||
mod json;
|
||||
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
|
|
Loading…
Reference in a new issue