Support UTF-8-BOM when importing CSV (#2360)

The csv crate already supports it, but the meta line parsing didn't.
This commit is contained in:
RumovZ 2023-02-05 02:53:21 +01:00 committed by GitHub
parent b97d1ac074
commit f3ef242bc5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 1 deletions

View file

@ -18,6 +18,7 @@ use crate::import_export::ImportProgress;
use crate::import_export::NoteLog; use crate::import_export::NoteLog;
use crate::io::open_file; use crate::io::open_file;
use crate::prelude::*; use crate::prelude::*;
use crate::text::strip_utf8_bom;
impl Collection { impl Collection {
pub fn import_csv( pub fn import_csv(
@ -222,7 +223,7 @@ fn remove_tags_line_from_reader(reader: &mut (impl Read + Seek)) -> Result<()> {
let mut buf_reader = BufReader::new(reader); let mut buf_reader = BufReader::new(reader);
let mut first_line = String::new(); let mut first_line = String::new();
buf_reader.read_line(&mut first_line)?; buf_reader.read_line(&mut first_line)?;
let offset = if first_line.starts_with("tags:") { let offset = if strip_utf8_bom(&first_line).starts_with("tags:") {
first_line.as_bytes().len() first_line.as_bytes().len()
} else { } else {
0 0
@ -380,4 +381,13 @@ mod test {
assert_field_eq!(notes[1].fields, [Some("foo"), Some("bar")]); assert_field_eq!(notes[1].fields, [Some("foo"), Some("bar")]);
assert_eq!(notes[1].notetype, NameOrId::Name(String::from("Cloze"))); assert_eq!(notes[1].notetype, NameOrId::Name(String::from("Cloze")));
} }
#[test]
fn should_ignore_bom() {
let metadata = CsvMetadata::defaults_for_testing();
assert_imported_fields!(metadata, "\u{feff}foo,bar\n", [[Some("foo"), Some("bar")]]);
assert!(import!(metadata, "\u{feff}#foo\n").is_empty());
assert!(import!(metadata, "\u{feff}#html:true\n").is_empty());
assert!(import!(metadata, "\u{feff}tags:foo\n").is_empty());
}
} }

View file

@ -28,6 +28,7 @@ pub use crate::pb::import_export::CsvMetadata;
use crate::prelude::*; use crate::prelude::*;
use crate::text::html_to_text_line; use crate::text::html_to_text_line;
use crate::text::is_html; use crate::text::is_html;
use crate::text::strip_utf8_bom;
/// The maximum number of preview rows. /// The maximum number of preview rows.
const PREVIEW_LENGTH: usize = 5; const PREVIEW_LENGTH: usize = 5;
@ -96,6 +97,7 @@ impl Collection {
/// True if the line is a meta line, i.e. a comment, or starting with /// True if the line is a meta line, i.e. a comment, or starting with
/// 'tags:'. /// 'tags:'.
fn parse_first_line(&mut self, line: &str, metadata: &mut CsvMetadata) -> bool { fn parse_first_line(&mut self, line: &str, metadata: &mut CsvMetadata) -> bool {
let line = strip_utf8_bom(line);
if let Some(tags) = line.strip_prefix("tags:") { if let Some(tags) = line.strip_prefix("tags:") {
metadata.global_tags = collect_tags(tags); metadata.global_tags = collect_tags(tags);
true true
@ -739,4 +741,14 @@ mod test {
// html is stripped // html is stripped
assert_eq!(meta.preview[1].vals, ["baz", ""]); assert_eq!(meta.preview[1].vals, ["baz", ""]);
} }
#[test]
fn should_parse_first_first_line_despite_bom() {
let mut col = open_test_collection();
assert_eq!(
metadata!(col, "\u{feff}#separator:tab\n").delimiter(),
Delimiter::Tab
);
assert_eq!(metadata!(col, "\u{feff}tags:foo\n").global_tags, ["foo"]);
}
} }

View file

@ -62,6 +62,10 @@ impl<'a, B: ?Sized + 'a + ToOwned> CowMapping<'a, B> for Cow<'a, B> {
} }
} }
pub(crate) fn strip_utf8_bom(s: &str) -> &str {
s.strip_prefix('\u{feff}').unwrap_or(s)
}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum AvTag { pub enum AvTag {
SoundOrVideo(String), SoundOrVideo(String),