Escape HTML entities if CSV is not HTML

Also use name 'is_html' consistently.
This commit is contained in:
RumovZ 2022-05-10 09:59:40 +02:00
parent f7ac4497b4
commit 8f1bba6c07
6 changed files with 51 additions and 29 deletions

View file

@ -111,7 +111,7 @@ message ImportCsvRequest {
int64 notetype_id = 3; int64 notetype_id = 3;
repeated CsvColumn columns = 4; repeated CsvColumn columns = 4;
uint32 delimiter = 5; uint32 delimiter = 5;
bool allow_html = 6; bool is_html = 6;
} }
message CsvMetadataRequest { message CsvMetadataRequest {
@ -125,5 +125,5 @@ message CsvMetadata {
repeated string columns = 3; repeated string columns = 3;
int64 deck_id = 4; int64 deck_id = 4;
int64 notetype_id = 5; int64 notetype_id = 5;
optional bool html = 6; optional bool is_html = 6;
} }

View file

@ -416,7 +416,7 @@ class Collection(DeprecatedNamesMixin):
notetype_id: NotetypeId, notetype_id: NotetypeId,
columns: list[CsvColumn], columns: list[CsvColumn],
delimiter: int, delimiter: int,
allow_html: bool, is_html: bool,
) -> ImportLogWithChanges: ) -> ImportLogWithChanges:
return self._backend.import_csv( return self._backend.import_csv(
path=path, path=path,
@ -424,7 +424,7 @@ class Collection(DeprecatedNamesMixin):
notetype_id=notetype_id, notetype_id=notetype_id,
delimiter=delimiter, delimiter=delimiter,
columns=columns, columns=columns,
allow_html=allow_html, is_html=is_html,
) )
def import_json(self, json: str) -> ImportLogWithChanges: def import_json(self, json: str) -> ImportLogWithChanges:

View file

@ -100,7 +100,7 @@ class ImportDialog(QDialog):
self.column_map = ColumnMap(self.columns, self.model) self.column_map = ColumnMap(self.columns, self.model)
self._render_mapping() self._render_mapping()
self._set_delimiter_button_text() self._set_delimiter_button_text()
self.frm.allowHTML.setChecked(self.html) self.frm.allowHTML.setChecked(self.is_html)
self.frm.importMode.setCurrentIndex(self.mw.pm.profile.get("importMode", 1)) self.frm.importMode.setCurrentIndex(self.mw.pm.profile.get("importMode", 1))
self.frm.tagModified.setText(self.tags) self.frm.tagModified.setText(self.tags)
self.frm.tagModified.setCol(self.mw.col) self.frm.tagModified.setCol(self.mw.col)
@ -119,10 +119,10 @@ class ImportDialog(QDialog):
else: else:
self.model = self.mw.col.models.current() self.model = self.mw.col.models.current()
self.notetype_id = self.model["id"] self.notetype_id = self.model["id"]
if self.options.html is None: if self.options.is_html is None:
self.html = self.mw.pm.profile.get("allowHTML", True) self.is_html = self.mw.pm.profile.get("allowHTML", True)
else: else:
self.html = self.options.html self.is_html = self.options.is_html
def _setup_choosers(self) -> None: def _setup_choosers(self) -> None:
import aqt.deckchooser import aqt.deckchooser
@ -214,7 +214,7 @@ class ImportDialog(QDialog):
notetype_id=self.model["id"], notetype_id=self.model["id"],
delimiter=self.delimiter, delimiter=self.delimiter,
columns=self.column_map.csv_columns(), columns=self.column_map.csv_columns(),
allow_html=self.frm.allowHTML.isChecked(), is_html=self.frm.allowHTML.isChecked(),
), ),
).with_backend_progress(import_progress_update).success( ).with_backend_progress(import_progress_update).success(
show_import_log show_import_log

View file

@ -94,7 +94,7 @@ impl ImportExportService for Backend {
input.notetype_id.into(), input.notetype_id.into(),
input.columns.into_iter().map(Into::into).collect(), input.columns.into_iter().map(Into::into).collect(),
try_into_byte(input.delimiter)?, try_into_byte(input.delimiter)?,
//input.allow_html, input.is_html,
) )
}) })
.map(Into::into) .map(Into::into)

View file

@ -22,12 +22,12 @@ impl Collection {
notetype_id: NotetypeId, notetype_id: NotetypeId,
columns: Vec<Column>, columns: Vec<Column>,
delimiter: u8, delimiter: u8,
//allow_html: bool, is_html: bool,
) -> Result<OpOutput<NoteLog>> { ) -> Result<OpOutput<NoteLog>> {
let notetype = self.get_notetype(notetype_id)?.ok_or(AnkiError::NotFound)?; let notetype = self.get_notetype(notetype_id)?.ok_or(AnkiError::NotFound)?;
let fields_len = notetype.fields.len(); let fields_len = notetype.fields.len();
let file = File::open(path)?; let file = File::open(path)?;
let notes = deserialize_csv(file, &columns, fields_len, delimiter)?; let notes = deserialize_csv(file, &columns, fields_len, delimiter, is_html)?;
ForeignData { ForeignData {
// TODO: refactor to allow passing ids directly // TODO: refactor to allow passing ids directly
@ -45,6 +45,7 @@ fn deserialize_csv(
columns: &[Column], columns: &[Column],
fields_len: usize, fields_len: usize,
delimiter: u8, delimiter: u8,
is_html: bool,
) -> Result<Vec<ForeignNote>> { ) -> Result<Vec<ForeignNote>> {
remove_tags_line_from_reader(&mut reader)?; remove_tags_line_from_reader(&mut reader)?;
let mut csv_reader = csv::ReaderBuilder::new() let mut csv_reader = csv::ReaderBuilder::new()
@ -54,7 +55,7 @@ fn deserialize_csv(
.delimiter(delimiter) .delimiter(delimiter)
.trim(csv::Trim::All) .trim(csv::Trim::All)
.from_reader(reader); .from_reader(reader);
deserialize_csv_reader(&mut csv_reader, columns, fields_len) deserialize_csv_reader(&mut csv_reader, columns, fields_len, is_html)
} }
/// If the reader's first line starts with "tags:", which is allowed for historic /// If the reader's first line starts with "tags:", which is allowed for historic
@ -78,35 +79,45 @@ fn deserialize_csv_reader(
reader: &mut csv::Reader<impl Read>, reader: &mut csv::Reader<impl Read>,
columns: &[Column], columns: &[Column],
fields_len: usize, fields_len: usize,
is_html: bool,
) -> Result<Vec<ForeignNote>> { ) -> Result<Vec<ForeignNote>> {
reader reader
.records() .records()
.into_iter() .into_iter()
.map(|res| { .map(|res| {
res.map(|record| ForeignNote::from_record(record, columns, fields_len)) res.map(|record| ForeignNote::from_record(record, columns, fields_len, is_html))
.map_err(Into::into) .map_err(Into::into)
}) })
.collect() .collect()
} }
impl ForeignNote { impl ForeignNote {
fn from_record(record: csv::StringRecord, columns: &[Column], fields_len: usize) -> Self { fn from_record(
record: csv::StringRecord,
columns: &[Column],
fields_len: usize,
is_html: bool,
) -> Self {
let mut note = Self { let mut note = Self {
fields: vec!["".to_string(); fields_len], fields: vec!["".to_string(); fields_len],
..Default::default() ..Default::default()
}; };
for (&column, value) in columns.iter().zip(record.iter()) { for (&column, value) in columns.iter().zip(record.iter()) {
note.add_column_value(column, value); note.add_column_value(column, value, is_html);
} }
note note
} }
fn add_column_value(&mut self, column: Column, value: &str) { fn add_column_value(&mut self, column: Column, value: &str, is_html: bool) {
match column { match column {
Column::Ignore => (), Column::Ignore => (),
Column::Field(idx) => { Column::Field(idx) => {
if let Some(field) = self.fields.get_mut(idx) { if let Some(field) = self.fields.get_mut(idx) {
field.push_str(value) *field = if is_html {
value.to_string()
} else {
htmlescape::encode_minimal(value)
};
} }
} }
Column::Tags => self.tags.extend(value.split(' ').map(ToString::to_string)), Column::Tags => self.tags.extend(value.split(' ').map(ToString::to_string)),
@ -128,15 +139,11 @@ mod test {
&$options.columns, &$options.columns,
$options.fields_len, $options.fields_len,
$options.delimiter, $options.delimiter,
$options.is_html,
) )
.unwrap(); .unwrap();
assert_eq!(notes.len(), $expected.len()); let fields: Vec<_> = notes.into_iter().map(|note| note.fields).collect();
for (note, fields) in notes.iter().zip($expected.iter()) { assert_eq!(fields, $expected);
assert_eq!(note.fields.len(), fields.len());
for (note_field, field) in note.fields.iter().zip(fields.iter()) {
assert_eq!(note_field, field);
}
}
}; };
} }
@ -144,6 +151,7 @@ mod test {
columns: Vec<Column>, columns: Vec<Column>,
fields_len: usize, fields_len: usize,
delimiter: u8, delimiter: u8,
is_html: bool,
} }
impl CsvOptions { impl CsvOptions {
@ -152,6 +160,7 @@ mod test {
columns: vec![Column::Field(0), Column::Field(1)], columns: vec![Column::Field(0), Column::Field(1)],
fields_len: 2, fields_len: 2,
delimiter: b',', delimiter: b',',
is_html: false,
} }
} }
@ -174,6 +183,12 @@ mod test {
self.delimiter = delimiter; self.delimiter = delimiter;
self self
} }
#[allow(clippy::wrong_self_convention)]
fn is_html(mut self, is_html: bool) -> Self {
self.is_html = is_html;
self
}
} }
#[test] #[test]
@ -210,4 +225,11 @@ mod test {
let options = CsvOptions::new(); let options = CsvOptions::new();
assert_imported_fields!(options, "#foo\nfront,back\n#bar\n", &[&["front", "back"]]); assert_imported_fields!(options, "#foo\nfront,back\n#bar\n", &[&["front", "back"]]);
} }
#[test]
fn should_escape_html_entities_if_csv_is_html() {
assert_imported_fields!(CsvOptions::new(), "<hr>\n", &[&["&lt;hr&gt;", ""]]);
let with_html = CsvOptions::new().is_html(true);
assert_imported_fields!(with_html, "<hr>\n", &[&["<hr>", ""]]);
}
} }

View file

@ -90,7 +90,7 @@ impl Collection {
metadata.notetype_id = nt.id.0; metadata.notetype_id = nt.id.0;
} }
} }
"html" => metadata.html = value.to_lowercase().parse::<bool>().ok(), "is_html" => metadata.is_html = value.to_lowercase().parse::<bool>().ok(),
_ => (), _ => (),
} }
@ -245,9 +245,9 @@ mod test {
#[test] #[test]
fn should_detect_valid_html_toggle() { fn should_detect_valid_html_toggle() {
let mut col = open_test_collection(); let mut col = open_test_collection();
assert_eq!(metadata!(col, "#html:true\n").html, Some(true)); assert_eq!(metadata!(col, "#is_html:true\n").is_html, Some(true));
assert_eq!(metadata!(col, "#html:FALSE\n").html, Some(false)); assert_eq!(metadata!(col, "#is_html:FALSE\n").is_html, Some(false));
assert_eq!(metadata!(col, "#html:maybe\n").html, None); assert_eq!(metadata!(col, "#is_html:maybe\n").is_html, None);
} }
#[test] #[test]