mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 22:42:25 -04:00
Support UTF-8-BOM when importing CSV (#2360)
The csv crate already supports it, but the meta line parsing didn't.
This commit is contained in:
parent
b97d1ac074
commit
f3ef242bc5
3 changed files with 27 additions and 1 deletions
|
@ -18,6 +18,7 @@ use crate::import_export::ImportProgress;
|
||||||
use crate::import_export::NoteLog;
|
use crate::import_export::NoteLog;
|
||||||
use crate::io::open_file;
|
use crate::io::open_file;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
|
use crate::text::strip_utf8_bom;
|
||||||
|
|
||||||
impl Collection {
|
impl Collection {
|
||||||
pub fn import_csv(
|
pub fn import_csv(
|
||||||
|
@ -222,7 +223,7 @@ fn remove_tags_line_from_reader(reader: &mut (impl Read + Seek)) -> Result<()> {
|
||||||
let mut buf_reader = BufReader::new(reader);
|
let mut buf_reader = BufReader::new(reader);
|
||||||
let mut first_line = String::new();
|
let mut first_line = String::new();
|
||||||
buf_reader.read_line(&mut first_line)?;
|
buf_reader.read_line(&mut first_line)?;
|
||||||
let offset = if first_line.starts_with("tags:") {
|
let offset = if strip_utf8_bom(&first_line).starts_with("tags:") {
|
||||||
first_line.as_bytes().len()
|
first_line.as_bytes().len()
|
||||||
} else {
|
} else {
|
||||||
0
|
0
|
||||||
|
@ -380,4 +381,13 @@ mod test {
|
||||||
assert_field_eq!(notes[1].fields, [Some("foo"), Some("bar")]);
|
assert_field_eq!(notes[1].fields, [Some("foo"), Some("bar")]);
|
||||||
assert_eq!(notes[1].notetype, NameOrId::Name(String::from("Cloze")));
|
assert_eq!(notes[1].notetype, NameOrId::Name(String::from("Cloze")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_ignore_bom() {
|
||||||
|
let metadata = CsvMetadata::defaults_for_testing();
|
||||||
|
assert_imported_fields!(metadata, "\u{feff}foo,bar\n", [[Some("foo"), Some("bar")]]);
|
||||||
|
assert!(import!(metadata, "\u{feff}#foo\n").is_empty());
|
||||||
|
assert!(import!(metadata, "\u{feff}#html:true\n").is_empty());
|
||||||
|
assert!(import!(metadata, "\u{feff}tags:foo\n").is_empty());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,7 @@ pub use crate::pb::import_export::CsvMetadata;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::text::html_to_text_line;
|
use crate::text::html_to_text_line;
|
||||||
use crate::text::is_html;
|
use crate::text::is_html;
|
||||||
|
use crate::text::strip_utf8_bom;
|
||||||
|
|
||||||
/// The maximum number of preview rows.
|
/// The maximum number of preview rows.
|
||||||
const PREVIEW_LENGTH: usize = 5;
|
const PREVIEW_LENGTH: usize = 5;
|
||||||
|
@ -96,6 +97,7 @@ impl Collection {
|
||||||
/// True if the line is a meta line, i.e. a comment, or starting with
|
/// True if the line is a meta line, i.e. a comment, or starting with
|
||||||
/// 'tags:'.
|
/// 'tags:'.
|
||||||
fn parse_first_line(&mut self, line: &str, metadata: &mut CsvMetadata) -> bool {
|
fn parse_first_line(&mut self, line: &str, metadata: &mut CsvMetadata) -> bool {
|
||||||
|
let line = strip_utf8_bom(line);
|
||||||
if let Some(tags) = line.strip_prefix("tags:") {
|
if let Some(tags) = line.strip_prefix("tags:") {
|
||||||
metadata.global_tags = collect_tags(tags);
|
metadata.global_tags = collect_tags(tags);
|
||||||
true
|
true
|
||||||
|
@ -739,4 +741,14 @@ mod test {
|
||||||
// html is stripped
|
// html is stripped
|
||||||
assert_eq!(meta.preview[1].vals, ["baz", ""]);
|
assert_eq!(meta.preview[1].vals, ["baz", ""]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_parse_first_first_line_despite_bom() {
|
||||||
|
let mut col = open_test_collection();
|
||||||
|
assert_eq!(
|
||||||
|
metadata!(col, "\u{feff}#separator:tab\n").delimiter(),
|
||||||
|
Delimiter::Tab
|
||||||
|
);
|
||||||
|
assert_eq!(metadata!(col, "\u{feff}tags:foo\n").global_tags, ["foo"]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,6 +62,10 @@ impl<'a, B: ?Sized + 'a + ToOwned> CowMapping<'a, B> for Cow<'a, B> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn strip_utf8_bom(s: &str) -> &str {
|
||||||
|
s.strip_prefix('\u{feff}').unwrap_or(s)
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum AvTag {
|
pub enum AvTag {
|
||||||
SoundOrVideo(String),
|
SoundOrVideo(String),
|
||||||
|
|
Loading…
Reference in a new issue