mirror of
https://github.com/ankitects/anki.git
synced 2025-09-18 14:02:21 -04:00
Loosen csv metadata parsing (#3862)
* add qsv-sniffer crate * use qsv-sniffer before falling back to old delimiter heuristic * update test metadata macro * revert impl * trim potential suffixed delimiters from non-freeform meta lines * add test
This commit is contained in:
parent
ab8692a91e
commit
d8c83ac075
1 changed files with 49 additions and 6 deletions
|
@ -121,19 +121,34 @@ impl Collection {
|
|||
}
|
||||
|
||||
fn parse_meta_value(&mut self, key: &str, value: &str, metadata: &mut CsvMetadata) {
|
||||
// trim potential delimiters past the first char* if
|
||||
// metadata line was mistakenly exported as a record
|
||||
// *to allow cases like #separator:,
|
||||
// ASSUMPTION: delimiters are not ascii-alphanumeric
|
||||
let trimmed_value = value
|
||||
.char_indices()
|
||||
.nth(1)
|
||||
.and_then(|(i, _)| {
|
||||
value[i..] // SAFETY: char_indices are on char boundaries
|
||||
.find(|c| !char::is_ascii_alphanumeric(&c))
|
||||
.map(|j| value.split_at(i + j).0)
|
||||
})
|
||||
.unwrap_or(value);
|
||||
|
||||
match key.trim().to_ascii_lowercase().as_str() {
|
||||
"separator" => {
|
||||
if let Some(delimiter) = delimiter_from_value(value) {
|
||||
if let Some(delimiter) = delimiter_from_value(trimmed_value) {
|
||||
metadata.delimiter = delimiter as i32;
|
||||
metadata.force_delimiter = true;
|
||||
}
|
||||
}
|
||||
"html" => {
|
||||
if let Ok(is_html) = value.to_lowercase().parse() {
|
||||
if let Ok(is_html) = trimmed_value.to_lowercase().parse() {
|
||||
metadata.is_html = is_html;
|
||||
metadata.force_is_html = true;
|
||||
}
|
||||
}
|
||||
// freeform values cannot be trimmed thus without knowing the exact delimiter
|
||||
"tags" => metadata.global_tags = collect_tags(value),
|
||||
"columns" => {
|
||||
if let Ok(columns) = parse_columns(value, metadata.delimiter()) {
|
||||
|
@ -151,22 +166,22 @@ impl Collection {
|
|||
}
|
||||
}
|
||||
"notetype column" => {
|
||||
if let Ok(n) = value.trim().parse() {
|
||||
if let Ok(n) = trimmed_value.trim().parse() {
|
||||
metadata.notetype = Some(CsvNotetype::NotetypeColumn(n));
|
||||
}
|
||||
}
|
||||
"deck column" => {
|
||||
if let Ok(n) = value.trim().parse() {
|
||||
if let Ok(n) = trimmed_value.trim().parse() {
|
||||
metadata.deck = Some(CsvDeck::DeckColumn(n));
|
||||
}
|
||||
}
|
||||
"tags column" => {
|
||||
if let Ok(n) = value.trim().parse() {
|
||||
if let Ok(n) = trimmed_value.trim().parse() {
|
||||
metadata.tags_column = n;
|
||||
}
|
||||
}
|
||||
"guid column" => {
|
||||
if let Ok(n) = value.trim().parse() {
|
||||
if let Ok(n) = trimmed_value.trim().parse() {
|
||||
metadata.guid_column = n;
|
||||
}
|
||||
}
|
||||
|
@ -891,4 +906,32 @@ pub(in crate::import_export) mod test {
|
|||
maybe_set_tags_column(&mut metadata, &meta_columns);
|
||||
assert_eq!(metadata.tags_column, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_allow_non_freeform_metadata_lines_to_be_suffixed_by_delimiters() {
|
||||
let mut col = Collection::new();
|
||||
let metadata = metadata!(
|
||||
col,
|
||||
r#"
|
||||
#separator:Pipe,,,,,,,
|
||||
#html:true|||||
|
||||
#tags:foo bar::世界,,,
|
||||
#guid column:8
|
||||
#tags column:123abc
|
||||
"#
|
||||
.trim()
|
||||
);
|
||||
assert_eq!(metadata.delimiter(), Delimiter::Pipe);
|
||||
assert!(metadata.is_html);
|
||||
assert_eq!(metadata.guid_column, 8);
|
||||
// tags is freeform, potential delimiters aren't trimmed
|
||||
assert_eq!(metadata.global_tags, ["foo", "bar::世界,,,"]);
|
||||
// ascii alphanumerics aren't trimmed away
|
||||
assert_eq!(metadata.tags_column, 0);
|
||||
|
||||
assert_eq!(
|
||||
metadata!(col, "#separator:\t|,:\n").delimiter(),
|
||||
Delimiter::Tab
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue