CSV import/export fixes and features (#1898)

* Fix footer moving upwards

* Fix column detection

Was broken because escaped line breaks were not considered.
Also removes delimiter detection on `#columns:` line. User must use tabs
or set delimiter beforehand.

* Add CSV preview

* Parse `#tags column:`

* Optionally export deck and notetype with CSV

* Avoid clones in CSV export

* Prevent bottom of page appearing under footer (dae)

* Increase padding to 1em (dae)

With 0.5em, when a vertical scrollbar is shown, it sits right next to
the right edge of the content, making it look like there's no right
margin.

* Experimental changes to make table fit+scroll (dae)

- limit individual cells to 15em, and show ellipses when truncated
- limit total table width to body width, so that inner table is shown
with scrollbar
- use class rather than id - ids are bad practice in Svelte components,
as more than one may be displayed on a single page

* Skip importing foreign notes with filtered decks

Were implicitly imported into the default deck before.
Also some refactoring to fetch deck ids and names beforehand.

* Hide spacer below hidden field mapping

* Fix guid being replaced when updating note

* Fix dupe identity check

Canonify tags before checking if dupe is identical, but only add update
tags later if appropriate.

* Fix deck export for notes with missing card 1

* Fix note lines starting with `#`

csv crate doesn't support escaping a leading comment char. :(

* Support import/export of guids

* Strip HTML from preview rows

* Fix initially set deck if current is filtered

* Make isHtml toggle reactive

* Fix `html_to_text_line()` stripping sound names

* Tweak export option labels

* Switch to patched rust-csv fork

Fixes writing lines starting with `#`, so revert 5ece10ad05.

* List column options with first column field

* Fix flag for exports with HTML stripped
This commit is contained in:
RumovZ 2022-06-09 02:28:01 +02:00 committed by GitHub
parent d6b8520d03
commit 6da5e5b042
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
32 changed files with 798 additions and 227 deletions

26
Cargo.lock generated
View file

@ -54,7 +54,7 @@ dependencies = [
"chrono",
"coarsetime",
"criterion",
"csv",
"csv 1.1.6 (git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90)",
"env_logger",
"flate2",
"fluent",
@ -392,7 +392,7 @@ dependencies = [
"cast",
"clap",
"criterion-plot",
"csv",
"csv 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools",
"lazy_static",
"num-traits",
@ -506,12 +506,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
dependencies = [
"bstr",
"csv-core",
"csv-core 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
"itoa 0.4.8",
"ryu",
"serde",
]
[[package]]
name = "csv"
version = "1.1.6"
source = "git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90#1c9d3aab6f79a7d815c69f925a46a4590c115f90"
dependencies = [
"bstr",
"csv-core 0.1.10 (git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90)",
"itoa 1.0.1",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
@ -521,6 +533,14 @@ dependencies = [
"memchr",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "git+https://github.com/ankitects/rust-csv.git?rev=1c9d3aab6f79a7d815c69f925a46a4590c115f90#1c9d3aab6f79a7d815c69f925a46a4590c115f90"
dependencies = [
"memchr",
]
[[package]]
name = "derive_more"
version = "0.99.17"

View file

@ -372,23 +372,23 @@ def raze_fetch_remote_crates():
)
maybe(
http_archive,
new_git_repository,
name = "raze__csv__1_1_6",
url = "https://crates.io/api/v1/crates/csv/1.1.6/download",
type = "tar.gz",
sha256 = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1",
strip_prefix = "csv-1.1.6",
remote = "https://github.com/ankitects/rust-csv.git",
shallow_since = "1654675287 +1000",
commit = "1c9d3aab6f79a7d815c69f925a46a4590c115f90",
build_file = Label("//cargo/remote:BUILD.csv-1.1.6.bazel"),
init_submodules = True,
)
maybe(
http_archive,
new_git_repository,
name = "raze__csv_core__0_1_10",
url = "https://crates.io/api/v1/crates/csv-core/0.1.10/download",
type = "tar.gz",
sha256 = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90",
strip_prefix = "csv-core-0.1.10",
remote = "https://github.com/ankitects/rust-csv.git",
shallow_since = "1654675287 +1000",
commit = "1c9d3aab6f79a7d815c69f925a46a4590c115f90",
build_file = Label("//cargo/remote:BUILD.csv-core-0.1.10.bazel"),
init_submodules = True,
)
maybe(

View file

@ -126,7 +126,7 @@ rust_library(
deps = [
"@raze__bstr__0_2_17//:bstr",
"@raze__csv_core__0_1_10//:csv_core",
"@raze__itoa__0_4_8//:itoa",
"@raze__itoa__1_0_1//:itoa",
"@raze__ryu__1_0_9//:ryu",
"@raze__serde__1_0_136//:serde",
],

View file

@ -39,7 +39,7 @@ rust_library(
crate_features = [
"default",
],
crate_root = "src/lib.rs",
crate_root = "csv-core/src/lib.rs",
data = [],
edition = "2018",
rustc_flags = [

View file

@ -21,7 +21,10 @@ COMMITS_SHALLOW_SINCE = {
"1ee0892217e9a76bba4bb369ec5fab8854935a3c": "1619517354 +1000",
# pct-str
"4adccd8d4a222ab2672350a102f06ae832a0572d": "1605376517 +0100",
# linkcheck
"2f20798ce521cc594d510d4e417e76d5eac04d4b": "1626729019 +0200",
# rust-csv
"1c9d3aab6f79a7d815c69f925a46a4590c115f90": "1654675287 +1000",
}
import glob

View file

@ -38,3 +38,6 @@ exporting-processed-media-files =
[one] Processed { $count } media file...
*[other] Processed { $count } media files...
}
exporting-include-deck = Include deck name
exporting-include-notetype = Include notetype name
exporting-include-guid = Include unique note identifier

View file

@ -9,7 +9,6 @@ importing-appeared-twice-in-file = Appeared twice in file: { $val }
importing-by-default-anki-will-detect-the = By default, Anki will detect the character between fields, such as a tab, comma, and so on. If Anki is detecting the character incorrectly, you can enter it here. Use \t to represent tab.
importing-change = Change
importing-colon = Colon
importing-column = Column { $val }
importing-comma = Comma
importing-empty-first-field = Empty first field: { $val }
importing-field-separator = Field separator
@ -108,3 +107,4 @@ importing-preserve = Preserve
importing-update = Update
importing-tag-all-notes = Tag all notes
importing-tag-updated-notes = Tag updated notes
importing-file = File

View file

@ -119,6 +119,7 @@ message CsvMetadataRequest {
string path = 1;
optional CsvMetadata.Delimiter delimiter = 2;
optional int64 notetype_id = 3;
optional bool is_html = 4;
}
// Column indices are 1-based to make working with them in TS easier, where
@ -163,6 +164,8 @@ message CsvMetadata {
uint32 tags_column = 10;
bool force_delimiter = 11;
bool force_is_html = 12;
repeated generic.StringList preview = 13;
uint32 guid_column = 14;
}
message ExportCardCsvRequest {
@ -175,7 +178,10 @@ message ExportNoteCsvRequest {
string out_path = 1;
bool with_html = 2;
bool with_tags = 3;
ExportLimit limit = 4;
bool with_deck = 4;
bool with_notetype = 5;
bool with_guid = 6;
ExportLimit limit = 7;
}
message ExportLimit {

View file

@ -423,11 +423,17 @@ class Collection(DeprecatedNamesMixin):
limit: ExportLimit,
with_html: bool,
with_tags: bool,
with_deck: bool,
with_notetype: bool,
with_guid: bool,
) -> int:
return self._backend.export_note_csv(
out_path=out_path,
with_html=with_html,
with_tags=with_tags,
with_deck=with_deck,
with_notetype=with_notetype,
with_guid=with_guid,
limit=pb_export_limit(limit),
)

View file

@ -98,6 +98,10 @@ class ExportDialog(QDialog):
self.frm.includeHTML.setVisible(False)
# show deck list?
self.frm.deck.setVisible(not self.isVerbatim)
# used by the new export screen
self.frm.includeDeck.setVisible(False)
self.frm.includeNotetype.setVisible(False)
self.frm.includeGuid.setVisible(False)
def accept(self) -> None:
self.exporter.includeSched = self.frm.includeSched.isChecked()

View file

@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>563</width>
<height>245</height>
<width>610</width>
<height>348</height>
</rect>
</property>
<property name="windowTitle">
@ -77,6 +77,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="includeHTML">
<property name="text">
<string>exporting_include_html_and_media_references</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="includeTags">
<property name="text">
@ -88,9 +95,29 @@
</widget>
</item>
<item>
<widget class="QCheckBox" name="includeHTML">
<widget class="QCheckBox" name="includeDeck">
<property name="enabled">
<bool>true</bool>
</property>
<property name="text">
<string>exporting_include_html_and_media_references</string>
<string>exporting_include_deck</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="includeNotetype">
<property name="enabled">
<bool>true</bool>
</property>
<property name="text">
<string>exporting_include_notetype</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="includeGuid">
<property name="text">
<string>exporting_include_guid</string>
</property>
</widget>
</item>

View file

@ -91,6 +91,9 @@ class ExportDialog(QDialog):
self.frm.includeMedia.setVisible(self.exporter.show_include_media)
self.frm.includeTags.setVisible(self.exporter.show_include_tags)
self.frm.includeHTML.setVisible(self.exporter.show_include_html)
self.frm.includeDeck.setVisible(self.exporter.show_include_deck)
self.frm.includeNotetype.setVisible(self.exporter.show_include_notetype)
self.frm.includeGuid.setVisible(self.exporter.show_include_guid)
self.frm.legacy_support.setVisible(self.exporter.show_legacy_support)
self.frm.deck.setVisible(self.exporter.show_deck_list)
@ -135,6 +138,9 @@ class ExportDialog(QDialog):
include_media=self.frm.includeMedia.isChecked(),
include_tags=self.frm.includeTags.isChecked(),
include_html=self.frm.includeHTML.isChecked(),
include_deck=self.frm.includeDeck.isChecked(),
include_notetype=self.frm.includeNotetype.isChecked(),
include_guid=self.frm.includeGuid.isChecked(),
legacy_support=self.frm.legacy_support.isChecked(),
limit=limit,
)
@ -165,6 +171,9 @@ class Options:
include_media: bool
include_tags: bool
include_html: bool
include_deck: bool
include_notetype: bool
include_guid: bool
legacy_support: bool
limit: ExportLimit
@ -177,6 +186,9 @@ class Exporter(ABC):
show_include_tags = False
show_include_html = False
show_legacy_support = False
show_include_deck = False
show_include_notetype = False
show_include_guid = False
@staticmethod
@abstractmethod
@ -255,6 +267,9 @@ class NoteCsvExporter(Exporter):
show_deck_list = True
show_include_html = True
show_include_tags = True
show_include_deck = True
show_include_notetype = True
show_include_guid = True
@staticmethod
def name() -> str:
@ -269,6 +284,9 @@ class NoteCsvExporter(Exporter):
limit=options.limit,
with_html=options.include_html,
with_tags=options.include_tags,
with_deck=options.include_deck,
with_notetype=options.include_notetype,
with_guid=options.include_guid,
),
success=lambda count: tooltip(
tr.exporting_note_exported(count=count), parent=mw

View file

@ -100,4 +100,4 @@ unic-ucd-category = "0.9.0"
id_tree = "1.8.0"
zstd = { version="0.10.0", features=["zstdmt"] }
num_cpus = "1.13.1"
csv = "1.1.6"
csv = { git="https://github.com/ankitects/rust-csv.git", rev="1c9d3aab6f79a7d815c69f925a46a4590c115f90" }

View file

@ -75,7 +75,12 @@ impl ImportExportService for Backend {
fn get_csv_metadata(&self, input: pb::CsvMetadataRequest) -> Result<pb::CsvMetadata> {
let delimiter = input.delimiter.is_some().then(|| input.delimiter());
self.with_col(|col| {
col.get_csv_metadata(&input.path, delimiter, input.notetype_id.map(Into::into))
col.get_csv_metadata(
&input.path,
delimiter,
input.notetype_id.map(Into::into),
input.is_html,
)
})
}
@ -93,16 +98,8 @@ impl ImportExportService for Backend {
}
fn export_note_csv(&self, input: pb::ExportNoteCsvRequest) -> Result<pb::UInt32> {
self.with_col(|col| {
col.export_note_csv(
&input.out_path,
SearchNode::from(input.limit.unwrap_or_default()),
input.with_html,
input.with_tags,
self.export_progress_fn(),
)
})
.map(Into::into)
self.with_col(|col| col.export_note_csv(input, self.export_progress_fn()))
.map(Into::into)
}
fn export_card_csv(&self, input: pb::ExportCardCsvRequest) -> Result<pb::UInt32> {

View file

@ -1,7 +1,7 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{borrow::Cow, fs::File, io::Write};
use std::{borrow::Cow, collections::HashMap, fs::File, io::Write, sync::Arc};
use itertools::Itertools;
use lazy_static::lazy_static;
@ -9,10 +9,11 @@ use regex::Regex;
use super::metadata::Delimiter;
use crate::{
backend_proto::ExportNoteCsvRequest,
import_export::{ExportProgress, IncrementableProgress},
notetype::RenderCardOutput,
prelude::*,
search::SortMode,
search::{SearchNode, SortMode},
template::RenderedNode,
text::{html_to_text_line, CowMapping},
};
@ -31,7 +32,7 @@ impl Collection {
progress.call(ExportProgress::File)?;
let mut incrementor = progress.incrementor(ExportProgress::Cards);
let mut writer = file_writer_with_header(path)?;
let mut writer = file_writer_with_header(path, with_html)?;
let mut cards = self.search_cards(search, SortMode::NoOrder)?;
cards.sort_unstable();
for &card in &cards {
@ -45,21 +46,19 @@ impl Collection {
pub fn export_note_csv(
&mut self,
path: &str,
search: impl TryIntoSearch,
with_html: bool,
with_tags: bool,
mut request: ExportNoteCsvRequest,
progress_fn: impl 'static + FnMut(ExportProgress, bool) -> bool,
) -> Result<usize> {
let mut progress = IncrementableProgress::new(progress_fn);
progress.call(ExportProgress::File)?;
let mut incrementor = progress.incrementor(ExportProgress::Notes);
let mut writer = file_writer_with_header(path)?;
self.search_notes_into_table(search)?;
self.search_notes_into_table(request.search_node())?;
let ctx = NoteContext::new(&request, self)?;
let mut writer = note_file_writer_with_header(&request.out_path, &ctx)?;
self.storage.for_each_note_in_search(|note| {
incrementor.increment()?;
writer.write_record(note_record(&note, with_html, with_tags))?;
writer.write_record(ctx.record(&note))?;
Ok(())
})?;
writer.flush()?;
@ -77,17 +76,46 @@ impl Collection {
}
}
fn file_writer_with_header(path: &str) -> Result<csv::Writer<File>> {
fn file_writer_with_header(path: &str, with_html: bool) -> Result<csv::Writer<File>> {
let mut file = File::create(path)?;
write_header(&mut file)?;
write_file_header(&mut file, with_html)?;
Ok(csv::WriterBuilder::new()
.delimiter(DELIMITER.byte())
.flexible(true)
.comment(Some(b'#'))
.from_writer(file))
}
fn write_header(writer: &mut impl Write) -> Result<()> {
write!(writer, "#separator:{}\n#html:true\n", DELIMITER.name())?;
fn write_file_header(writer: &mut impl Write, with_html: bool) -> Result<()> {
writeln!(writer, "#separator:{}", DELIMITER.name())?;
writeln!(writer, "#html:{with_html}")?;
Ok(())
}
fn note_file_writer_with_header(path: &str, ctx: &NoteContext) -> Result<csv::Writer<File>> {
let mut file = File::create(path)?;
write_note_file_header(&mut file, ctx)?;
Ok(csv::WriterBuilder::new()
.delimiter(DELIMITER.byte())
.comment(Some(b'#'))
.from_writer(file))
}
fn write_note_file_header(writer: &mut impl Write, ctx: &NoteContext) -> Result<()> {
write_file_header(writer, ctx.with_html)?;
write_column_header(ctx, writer)
}
fn write_column_header(ctx: &NoteContext, writer: &mut impl Write) -> Result<()> {
for (name, column) in [
("guid", ctx.guid_column()),
("notetype", ctx.notetype_column()),
("deck", ctx.deck_column()),
("tags", ctx.tags_column()),
] {
if let Some(index) = column {
writeln!(writer, "#{name} column:{index}")?;
}
}
Ok(())
}
@ -117,24 +145,12 @@ fn rendered_nodes_to_str(nodes: &[RenderedNode]) -> String {
.join("")
}
fn note_record(note: &Note, with_html: bool, with_tags: bool) -> Vec<String> {
let mut fields: Vec<_> = note
.fields()
.iter()
.map(|f| field_to_record_field(f, with_html))
.collect();
if with_tags {
fields.push(note.tags.join(" "));
}
fields
}
fn field_to_record_field(field: &str, with_html: bool) -> String {
fn field_to_record_field(field: &str, with_html: bool) -> Cow<str> {
let mut text = strip_redundant_sections(field);
if !with_html {
text = text.map_cow(|t| html_to_text_line(t, false));
}
text.into()
text
}
fn strip_redundant_sections(text: &str) -> Cow<str> {
@ -157,3 +173,110 @@ fn strip_answer_side_question(text: &str) -> Cow<str> {
}
RE.replace_all(text.as_ref(), "")
}
struct NoteContext {
with_html: bool,
with_tags: bool,
with_deck: bool,
with_notetype: bool,
with_guid: bool,
notetypes: HashMap<NotetypeId, Arc<Notetype>>,
deck_ids: HashMap<NoteId, DeckId>,
deck_names: HashMap<DeckId, String>,
field_columns: usize,
}
impl NoteContext {
/// Caller must have searched notes into table.
fn new(request: &ExportNoteCsvRequest, col: &mut Collection) -> Result<Self> {
let notetypes = col.get_all_notetypes_of_search_notes()?;
let field_columns = notetypes
.values()
.map(|nt| nt.fields.len())
.max()
.unwrap_or_default();
let deck_ids = col.storage.all_decks_of_search_notes()?;
let deck_names = HashMap::from_iter(col.storage.get_all_deck_names()?.into_iter());
Ok(Self {
with_html: request.with_html,
with_tags: request.with_tags,
with_deck: request.with_deck,
with_notetype: request.with_notetype,
with_guid: request.with_guid,
notetypes,
field_columns,
deck_ids,
deck_names,
})
}
fn guid_column(&self) -> Option<usize> {
self.with_guid.then(|| 1)
}
fn notetype_column(&self) -> Option<usize> {
self.with_notetype
.then(|| 1 + self.guid_column().unwrap_or_default())
}
fn deck_column(&self) -> Option<usize> {
self.with_deck
.then(|| 1 + self.notetype_column().unwrap_or_default())
}
fn tags_column(&self) -> Option<usize> {
self.with_tags
.then(|| 1 + self.deck_column().unwrap_or_default() + self.field_columns)
}
fn record<'c, 's: 'c, 'n: 'c>(&'s self, note: &'n Note) -> impl Iterator<Item = Cow<'c, [u8]>> {
self.with_guid
.then(|| Cow::from(note.guid.as_bytes()))
.into_iter()
.chain(self.notetype_name(note).into_iter())
.chain(self.deck_name(note).into_iter())
.chain(self.note_fields(note))
.chain(self.tags(note).into_iter())
}
fn notetype_name(&self, note: &Note) -> Option<Cow<[u8]>> {
self.with_notetype.then(|| {
self.notetypes
.get(&note.notetype_id)
.map_or(Cow::from(vec![]), |nt| Cow::from(nt.name.as_bytes()))
})
}
fn deck_name(&self, note: &Note) -> Option<Cow<[u8]>> {
self.with_deck.then(|| {
self.deck_ids
.get(&note.id)
.and_then(|did| self.deck_names.get(did))
.map_or(Cow::from(vec![]), |name| Cow::from(name.as_bytes()))
})
}
fn tags(&self, note: &Note) -> Option<Cow<[u8]>> {
self.with_tags
.then(|| Cow::from(note.tags.join(" ").into_bytes()))
}
fn note_fields<'n>(&self, note: &'n Note) -> impl Iterator<Item = Cow<'n, [u8]>> {
let with_html = self.with_html;
note.fields()
.iter()
.map(move |f| field_to_record_field(f, with_html))
.pad_using(self.field_columns, |_| Cow::from(""))
.map(|cow| match cow {
Cow::Borrowed(s) => Cow::from(s.as_bytes()),
Cow::Owned(s) => Cow::from(s.into_bytes()),
})
}
}
impl ExportNoteCsvRequest {
fn search_node(&mut self) -> SearchNode {
SearchNode::from(self.limit.take().unwrap_or_default())
}
}

View file

@ -113,6 +113,7 @@ type FieldSourceColumns = Vec<Option<usize>>;
// Column indices are 1-based.
struct ColumnContext {
tags_column: Option<usize>,
guid_column: Option<usize>,
deck_column: Option<usize>,
notetype_column: Option<usize>,
/// Source column indices for the fields of a notetype, identified by its
@ -126,6 +127,7 @@ impl ColumnContext {
fn new(metadata: &CsvMetadata) -> Result<Self> {
Ok(Self {
tags_column: (metadata.tags_column > 0).then(|| metadata.tags_column as usize),
guid_column: (metadata.guid_column > 0).then(|| metadata.guid_column as usize),
deck_column: metadata.deck()?.column(),
notetype_column: metadata.notetype()?.column(),
field_source_columns: metadata.field_source_columns()?,
@ -135,16 +137,10 @@ impl ColumnContext {
fn deserialize_csv(
&mut self,
mut reader: impl Read + Seek,
reader: impl Read + Seek,
delimiter: Delimiter,
) -> Result<Vec<ForeignNote>> {
remove_tags_line_from_reader(&mut reader)?;
let mut csv_reader = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.comment(Some(b'#'))
.delimiter(delimiter.byte())
.from_reader(reader);
let mut csv_reader = build_csv_reader(reader, delimiter)?;
self.deserialize_csv_reader(&mut csv_reader)
}
@ -162,34 +158,17 @@ impl ColumnContext {
.collect()
}
fn foreign_note_from_record(&mut self, record: &csv::StringRecord) -> ForeignNote {
let notetype = self.gather_notetype(record).into();
let deck = self.gather_deck(record).into();
let tags = self.gather_tags(record);
let fields = self.gather_note_fields(record);
fn foreign_note_from_record(&self, record: &csv::StringRecord) -> ForeignNote {
ForeignNote {
notetype,
fields,
tags,
deck,
notetype: str_from_record_column(self.notetype_column, record).into(),
fields: self.gather_note_fields(record),
tags: self.gather_tags(record),
deck: str_from_record_column(self.deck_column, record).into(),
guid: str_from_record_column(self.guid_column, record),
..Default::default()
}
}
fn gather_notetype(&self, record: &csv::StringRecord) -> String {
self.notetype_column
.and_then(|i| record.get(i - 1))
.unwrap_or_default()
.to_string()
}
fn gather_deck(&self, record: &csv::StringRecord) -> String {
self.deck_column
.and_then(|i| record.get(i - 1))
.unwrap_or_default()
.to_string()
}
fn gather_tags(&self, record: &csv::StringRecord) -> Vec<String> {
self.tags_column
.and_then(|i| record.get(i - 1))
@ -200,7 +179,7 @@ impl ColumnContext {
.collect()
}
fn gather_note_fields(&mut self, record: &csv::StringRecord) -> Vec<String> {
fn gather_note_fields(&self, record: &csv::StringRecord) -> Vec<String> {
let stringify = self.stringify;
self.field_source_columns
.iter()
@ -210,6 +189,26 @@ impl ColumnContext {
}
}
fn str_from_record_column(column: Option<usize>, record: &csv::StringRecord) -> String {
column
.and_then(|i| record.get(i - 1))
.unwrap_or_default()
.to_string()
}
pub(super) fn build_csv_reader(
mut reader: impl Read + Seek,
delimiter: Delimiter,
) -> Result<csv::Reader<impl Read + Seek>> {
remove_tags_line_from_reader(&mut reader)?;
Ok(csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.comment(Some(b'#'))
.delimiter(delimiter.byte())
.from_reader(reader))
}
fn stringify_fn(is_html: bool) -> fn(&str) -> String {
if is_html {
ToString::to_string
@ -267,6 +266,7 @@ mod test {
is_html: false,
force_is_html: false,
tags_column: 0,
guid_column: 0,
global_tags: Vec::new(),
updated_tags: Vec::new(),
column_labels: vec!["".to_string(); 2],
@ -275,6 +275,7 @@ mod test {
id: 1,
field_columns: vec![1, 2],
})),
preview: Vec::new(),
}
}
}

View file

@ -4,65 +4,81 @@
use std::{
collections::{HashMap, HashSet},
fs::File,
io::{BufRead, BufReader},
io::{BufRead, BufReader, Read, Seek, SeekFrom},
};
use itertools::Itertools;
use strum::IntoEnumIterator;
use super::import::build_csv_reader;
pub use crate::backend_proto::import_export::{
csv_metadata::{Deck as CsvDeck, Delimiter, MappedNotetype, Notetype as CsvNotetype},
CsvMetadata,
};
use crate::{
error::ImportError, import_export::text::NameOrId, notetype::NoteField, prelude::*,
text::is_html,
backend_proto::StringList,
error::ImportError,
import_export::text::NameOrId,
notetype::NoteField,
prelude::*,
text::{html_to_text_line, is_html},
};
/// The maximum number of preview rows.
const PREVIEW_LENGTH: usize = 5;
/// The maximum number of characters per preview field.
const PREVIEW_FIELD_LENGTH: usize = 80;
impl Collection {
pub fn get_csv_metadata(
&mut self,
path: &str,
delimiter: Option<Delimiter>,
notetype_id: Option<NotetypeId>,
is_html: Option<bool>,
) -> Result<CsvMetadata> {
let reader = BufReader::new(File::open(path)?);
self.get_reader_metadata(reader, delimiter, notetype_id)
let mut reader = File::open(path)?;
self.get_reader_metadata(&mut reader, delimiter, notetype_id, is_html)
}
fn get_reader_metadata(
&mut self,
reader: impl BufRead,
mut reader: impl Read + Seek,
delimiter: Option<Delimiter>,
notetype_id: Option<NotetypeId>,
is_html: Option<bool>,
) -> Result<CsvMetadata> {
let mut metadata = CsvMetadata::default();
let line = self.parse_meta_lines(reader, &mut metadata)?;
maybe_set_fallback_delimiter(delimiter, &mut metadata, &line);
maybe_set_fallback_columns(&mut metadata, &line)?;
maybe_set_fallback_is_html(&mut metadata, &line)?;
let meta_len = self.parse_meta_lines(&mut reader, &mut metadata)? as u64;
maybe_set_fallback_delimiter(delimiter, &mut metadata, &mut reader, meta_len)?;
let records = collect_preview_records(&mut metadata, reader)?;
maybe_set_fallback_is_html(&mut metadata, &records, is_html)?;
set_preview(&mut metadata, &records)?;
maybe_set_fallback_columns(&mut metadata)?;
self.maybe_set_fallback_notetype(&mut metadata, notetype_id)?;
self.maybe_init_notetype_map(&mut metadata)?;
self.maybe_set_fallback_deck(&mut metadata)?;
Ok(metadata)
}
/// Parses the meta head of the file, and returns the first content line.
fn parse_meta_lines(
&mut self,
mut reader: impl BufRead,
metadata: &mut CsvMetadata,
) -> Result<String> {
/// Parses the meta head of the file and returns the total of meta bytes.
fn parse_meta_lines(&mut self, reader: impl Read, metadata: &mut CsvMetadata) -> Result<usize> {
let mut meta_len = 0;
let mut reader = BufReader::new(reader);
let mut line = String::new();
reader.read_line(&mut line)?;
let mut line_len = reader.read_line(&mut line)?;
if self.parse_first_line(&line, metadata) {
meta_len += line_len;
line.clear();
reader.read_line(&mut line)?;
line_len = reader.read_line(&mut line)?;
while self.parse_line(&line, metadata) {
meta_len += line_len;
line.clear();
reader.read_line(&mut line)?;
line_len = reader.read_line(&mut line)?;
}
}
Ok(line)
Ok(meta_len)
}
/// True if the line is a meta line, i.e. a comment, or starting with 'tags:'.
@ -103,7 +119,7 @@ impl Collection {
}
"tags" => metadata.global_tags = collect_tags(value),
"columns" => {
if let Ok(columns) = self.parse_columns(value, metadata) {
if let Ok(columns) = parse_columns(value, metadata.delimiter()) {
metadata.column_labels = columns;
}
}
@ -127,21 +143,20 @@ impl Collection {
metadata.deck = Some(CsvDeck::DeckColumn(n));
}
}
"tags column" => {
if let Ok(n) = value.trim().parse() {
metadata.tags_column = n;
}
}
"guid column" => {
if let Ok(n) = value.trim().parse() {
metadata.guid_column = n;
}
}
_ => (),
}
}
fn parse_columns(&mut self, line: &str, metadata: &mut CsvMetadata) -> Result<Vec<String>> {
let delimiter = if metadata.force_delimiter {
metadata.delimiter()
} else {
delimiter_from_line(line)
};
map_single_record(line, delimiter, |record| {
record.iter().map(ToString::to_string).collect()
})
}
fn maybe_set_fallback_notetype(
&mut self,
metadata: &mut CsvMetadata,
@ -161,7 +176,15 @@ impl Collection {
metadata
.notetype_id()
.and_then(|ntid| self.default_deck_for_notetype(ntid).transpose())
.unwrap_or_else(|| self.get_current_deck().map(|d| d.id))?
.unwrap_or_else(|| {
self.get_current_deck().map(|deck| {
if deck.is_filtered() {
DeckId(1)
} else {
deck.id
}
})
})?
.0,
));
}
@ -205,6 +228,61 @@ impl Collection {
}
}
fn parse_columns(line: &str, delimiter: Delimiter) -> Result<Vec<String>> {
map_single_record(line, delimiter, |record| {
record.iter().map(ToString::to_string).collect()
})
}
fn collect_preview_records(
metadata: &mut CsvMetadata,
mut reader: impl Read + Seek,
) -> Result<Vec<csv::StringRecord>> {
reader.rewind()?;
let mut csv_reader = build_csv_reader(reader, metadata.delimiter())?;
csv_reader
.records()
.into_iter()
.take(PREVIEW_LENGTH)
.collect::<csv::Result<_>>()
.map_err(Into::into)
}
fn set_preview(metadata: &mut CsvMetadata, records: &[csv::StringRecord]) -> Result<()> {
let mut min_len = 1;
metadata.preview = records
.iter()
.enumerate()
.map(|(idx, record)| {
let row = build_preview_row(min_len, record, metadata.is_html);
if idx == 0 {
min_len = row.vals.len();
}
row
})
.collect();
Ok(())
}
fn build_preview_row(min_len: usize, record: &csv::StringRecord, strip_html: bool) -> StringList {
StringList {
vals: record
.iter()
.pad_using(min_len, |_| "")
.map(|field| {
if strip_html {
html_to_text_line(field, true)
.chars()
.take(PREVIEW_FIELD_LENGTH)
.collect()
} else {
field.chars().take(PREVIEW_FIELD_LENGTH).collect()
}
})
.collect(),
}
}
pub(super) fn collect_tags(txt: &str) -> Vec<String> {
txt.split_whitespace()
.filter(|s| !s.is_empty())
@ -263,20 +341,23 @@ fn ensure_first_field_is_mapped(
Ok(())
}
fn maybe_set_fallback_columns(metadata: &mut CsvMetadata, line: &str) -> Result<()> {
fn maybe_set_fallback_columns(metadata: &mut CsvMetadata) -> Result<()> {
if metadata.column_labels.is_empty() {
let columns = map_single_record(line, metadata.delimiter(), |r| r.len())?;
metadata.column_labels = vec![String::new(); columns];
metadata.column_labels =
vec![String::new(); metadata.preview.get(0).map_or(0, |row| row.vals.len())];
}
Ok(())
}
fn maybe_set_fallback_is_html(metadata: &mut CsvMetadata, line: &str) -> Result<()> {
// TODO: should probably check more than one line; can reuse preview lines
// when it's implemented
if !metadata.force_is_html {
metadata.is_html =
map_single_record(line, metadata.delimiter(), |r| r.iter().any(is_html))?;
fn maybe_set_fallback_is_html(
metadata: &mut CsvMetadata,
records: &[csv::StringRecord],
is_html_option: Option<bool>,
) -> Result<()> {
if let Some(is_html) = is_html_option {
metadata.is_html = is_html;
} else if !metadata.force_is_html {
metadata.is_html = records.iter().flat_map(|record| record.iter()).any(is_html);
}
Ok(())
}
@ -284,13 +365,16 @@ fn maybe_set_fallback_is_html(metadata: &mut CsvMetadata, line: &str) -> Result<
fn maybe_set_fallback_delimiter(
delimiter: Option<Delimiter>,
metadata: &mut CsvMetadata,
line: &str,
) {
mut reader: impl Read + Seek,
meta_len: u64,
) -> Result<()> {
if let Some(delim) = delimiter {
metadata.set_delimiter(delim);
} else if !metadata.force_delimiter {
metadata.set_delimiter(delimiter_from_line(line));
reader.seek(SeekFrom::Start(meta_len))?;
metadata.set_delimiter(delimiter_from_reader(reader)?);
}
Ok(())
}
fn delimiter_from_value(value: &str) -> Option<Delimiter> {
@ -303,14 +387,16 @@ fn delimiter_from_value(value: &str) -> Option<Delimiter> {
None
}
fn delimiter_from_line(line: &str) -> Delimiter {
fn delimiter_from_reader(mut reader: impl Read) -> Result<Delimiter> {
let mut buf = [0; 8 * 1024];
let _ = reader.read(&mut buf)?;
// TODO: use smarter heuristic
for delimiter in Delimiter::iter() {
if line.contains(delimiter.byte() as char) {
return delimiter;
if buf.contains(&delimiter.byte()) {
return Ok(delimiter);
}
}
Delimiter::Space
Ok(Delimiter::Space)
}
fn map_single_record<T>(
@ -384,6 +470,9 @@ impl CsvMetadata {
if self.tags_column > 0 {
columns.insert(self.tags_column as usize);
}
if self.guid_column > 0 {
columns.insert(self.guid_column as usize);
}
columns
}
}
@ -398,8 +487,18 @@ impl NameOrId {
}
}
impl From<csv::StringRecord> for StringList {
fn from(record: csv::StringRecord) -> Self {
Self {
vals: record.iter().map(ToString::to_string).collect(),
}
}
}
#[cfg(test)]
mod test {
use std::io::Cursor;
use super::*;
use crate::collection::open_test_collection;
@ -408,7 +507,7 @@ mod test {
metadata!($col, $csv, None)
};
($col:expr,$csv:expr, $delim:expr) => {
$col.get_reader_metadata(BufReader::new($csv.as_bytes()), $delim, None)
$col.get_reader_metadata(Cursor::new($csv.as_bytes()), $delim, None, None)
.unwrap()
};
}
@ -561,7 +660,7 @@ mod test {
// custom names
assert_eq!(
metadata!(col, "#columns:one,two\n").column_labels,
metadata!(col, "#columns:one\ttwo\n").column_labels,
["one", "two"]
);
assert_eq!(
@ -570,6 +669,17 @@ mod test {
);
}
#[test]
fn should_detect_column_number_despite_escaped_line_breaks() {
let mut col = open_test_collection();
assert_eq!(
metadata!(col, "\"foo|\nbar\"\tfoo\tbar\n")
.column_labels
.len(),
3
);
}
impl CsvMetadata {
fn unwrap_notetype_map(&self) -> &[u32] {
match &self.notetype {
@ -589,7 +699,16 @@ mod test {
#[test]
fn should_map_default_notetype_fields_by_given_column_names() {
let mut col = open_test_collection();
let meta = metadata!(col, "#columns:Back,Front\nfoo,bar,baz\n");
let meta = metadata!(col, "#columns:Back\tFront\nfoo,bar,baz\n");
assert_eq!(meta.unwrap_notetype_map(), &[2, 1]);
}
#[test]
fn should_gather_first_lines_into_preview() {
let mut col = open_test_collection();
let meta = metadata!(col, "#separator: \nfoo bar\nbaz<br>\n");
assert_eq!(meta.preview[0].vals, ["foo", "bar"]);
// html is stripped
assert_eq!(meta.preview[1].vals, ["baz", ""]);
}
}

View file

@ -1,7 +1,12 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{borrow::Cow, collections::HashMap, mem, sync::Arc};
use std::{
borrow::Cow,
collections::{HashMap, HashSet},
mem,
sync::Arc,
};
use super::NameOrId;
use crate::{
@ -52,24 +57,75 @@ struct Context<'a> {
col: &'a mut Collection,
/// Contains the optional default notetype with the default key.
notetypes: HashMap<NameOrId, Option<Arc<Notetype>>>,
/// Contains the optional default deck id with the default key.
deck_ids: HashMap<NameOrId, Option<DeckId>>,
deck_ids: DeckIdsByNameOrId,
usn: Usn,
normalize_notes: bool,
today: u32,
dupe_resolution: DupeResolution,
card_gen_ctxs: HashMap<(NotetypeId, DeckId), CardGenContext<Arc<Notetype>>>,
existing_notes: HashMap<(NotetypeId, u32), Vec<NoteId>>,
existing_checksums: HashMap<(NotetypeId, u32), Vec<NoteId>>,
existing_guids: HashMap<String, NoteId>,
}
struct DeckIdsByNameOrId {
ids: HashSet<DeckId>,
names: HashMap<String, DeckId>,
default: Option<DeckId>,
}
struct NoteContext {
/// Prepared and with canonified tags.
note: Note,
dupes: Vec<Note>,
dupes: Vec<Duplicate>,
cards: Vec<Card>,
notetype: Arc<Notetype>,
deck_id: DeckId,
}
struct Duplicate {
note: Note,
identical: bool,
first_field_match: bool,
}
impl Duplicate {
fn new(dupe: Note, original: &Note, first_field_match: bool) -> Self {
let identical = dupe.equal_fields_and_tags(original);
Self {
note: dupe,
identical,
first_field_match,
}
}
}
impl DeckIdsByNameOrId {
fn new(col: &mut Collection, default: &NameOrId) -> Result<Self> {
let names: HashMap<String, DeckId> = col
.get_all_normal_deck_names()?
.into_iter()
.map(|(id, name)| (name, id))
.collect();
let ids = names.values().copied().collect();
let mut new = Self {
ids,
names,
default: None,
};
new.default = new.get(default);
Ok(new)
}
fn get(&self, name_or_id: &NameOrId) -> Option<DeckId> {
match name_or_id {
_ if *name_or_id == NameOrId::default() => self.default,
NameOrId::Id(id) => self.ids.get(&DeckId(*id)).copied(),
NameOrId::Name(name) => self.names.get(name).copied(),
}
}
}
impl<'a> Context<'a> {
fn new(data: &ForeignData, col: &'a mut Collection) -> Result<Self> {
let usn = col.usn()?;
@ -80,12 +136,10 @@ impl<'a> Context<'a> {
NameOrId::default(),
col.notetype_by_name_or_id(&data.default_notetype)?,
);
let mut deck_ids = HashMap::new();
deck_ids.insert(
NameOrId::default(),
col.deck_id_by_name_or_id(&data.default_deck)?,
);
let existing_notes = col.storage.all_notes_by_type_and_checksum()?;
let deck_ids = DeckIdsByNameOrId::new(col, &data.default_deck)?;
let existing_checksums = col.storage.all_notes_by_type_and_checksum()?;
let existing_guids = col.storage.all_notes_by_guid()?;
Ok(Self {
col,
usn,
@ -95,7 +149,8 @@ impl<'a> Context<'a> {
notetypes,
deck_ids,
card_gen_ctxs: HashMap::new(),
existing_notes,
existing_checksums,
existing_guids,
})
}
@ -119,16 +174,6 @@ impl<'a> Context<'a> {
})
}
fn deck_id_for_note(&mut self, note: &ForeignNote) -> Result<Option<DeckId>> {
Ok(if let Some(did) = self.deck_ids.get(&note.deck) {
*did
} else {
let did = self.col.deck_id_by_name_or_id(&note.deck)?;
self.deck_ids.insert(note.deck.clone(), did);
did
})
}
fn import_foreign_notes(
&mut self,
notes: Vec<ForeignNote>,
@ -145,7 +190,7 @@ impl<'a> Context<'a> {
continue;
}
if let Some(notetype) = self.notetype_for_note(&foreign)? {
if let Some(deck_id) = self.deck_id_for_note(&foreign)? {
if let Some(deck_id) = self.deck_ids.get(&foreign.deck) {
let ctx = self.build_note_context(foreign, notetype, deck_id, global_tags)?;
self.import_note(ctx, updated_tags, &mut log)?;
} else {
@ -167,6 +212,7 @@ impl<'a> Context<'a> {
) -> Result<NoteContext> {
let (mut note, cards) = foreign.into_native(&notetype, deck_id, self.today, global_tags);
note.prepare_for_update(&notetype, self.normalize_notes)?;
self.col.canonify_note_tags(&mut note, self.usn)?;
let dupes = self.find_duplicates(&notetype, &note)?;
Ok(NoteContext {
@ -178,14 +224,34 @@ impl<'a> Context<'a> {
})
}
fn find_duplicates(&mut self, notetype: &Notetype, note: &Note) -> Result<Vec<Note>> {
fn find_duplicates(&self, notetype: &Notetype, note: &Note) -> Result<Vec<Duplicate>> {
let checksum = note
.checksum
.ok_or_else(|| AnkiError::invalid_input("note unprepared"))?;
self.existing_notes
.get(&(notetype.id, checksum))
.map(|dupe_ids| self.col.get_full_duplicates(note, dupe_ids))
.unwrap_or_else(|| Ok(vec![]))
if let Some(nid) = self.existing_guids.get(&note.guid) {
self.get_guid_dupe(*nid, note).map(|dupe| vec![dupe])
} else if let Some(nids) = self.existing_checksums.get(&(notetype.id, checksum)) {
self.get_first_field_dupes(note, nids)
} else {
Ok(Vec::new())
}
}
fn get_guid_dupe(&self, nid: NoteId, original: &Note) -> Result<Duplicate> {
self.col
.storage
.get_note(nid)?
.ok_or(AnkiError::NotFound)
.map(|dupe| Duplicate::new(dupe, original, false))
}
fn get_first_field_dupes(&self, note: &Note, nids: &[NoteId]) -> Result<Vec<Duplicate>> {
Ok(self
.col
.get_full_duplicates(note, nids)?
.into_iter()
.map(|dupe| Duplicate::new(dupe, note, true))
.collect())
}
fn import_note(
@ -204,7 +270,6 @@ impl<'a> Context<'a> {
}
fn add_note(&mut self, mut ctx: NoteContext, log_queue: &mut Vec<LogNote>) -> Result<()> {
self.col.canonify_note_tags(&mut ctx.note, self.usn)?;
ctx.note.usn = self.usn;
self.col.add_note_only_undoable(&mut ctx.note)?;
self.add_cards(&mut ctx.cards, &ctx.note, ctx.deck_id, ctx.notetype)?;
@ -237,28 +302,49 @@ impl<'a> Context<'a> {
}
fn prepare_note_for_update(&mut self, note: &mut Note, updated_tags: &[String]) -> Result<()> {
note.tags.extend(updated_tags.iter().cloned());
self.col.canonify_note_tags(note, self.usn)?;
if !updated_tags.is_empty() {
note.tags.extend(updated_tags.iter().cloned());
self.col.canonify_note_tags(note, self.usn)?;
}
note.set_modified(self.usn);
Ok(())
}
fn maybe_update_dupe(
&mut self,
dupe: Note,
dupe: Duplicate,
ctx: &mut NoteContext,
log: &mut NoteLog,
) -> Result<()> {
ctx.note.id = dupe.id;
if dupe.equal_fields_and_tags(&ctx.note) {
log.duplicate.push(dupe.into_log_note());
if dupe.note.notetype_id != ctx.notetype.id {
log.conflicting.push(dupe.note.into_log_note());
return Ok(());
}
if dupe.identical {
log.duplicate.push(dupe.note.into_log_note());
} else {
self.col.update_note_undoable(&ctx.note, &dupe)?;
log.first_field_match.push(dupe.into_log_note());
self.update_dupe(dupe, ctx, log)?;
}
self.add_cards(&mut ctx.cards, &ctx.note, ctx.deck_id, ctx.notetype.clone())
}
fn update_dupe(
&mut self,
dupe: Duplicate,
ctx: &mut NoteContext,
log: &mut NoteLog,
) -> Result<()> {
ctx.note.id = dupe.note.id;
ctx.note.guid = dupe.note.guid.clone();
self.col.update_note_undoable(&ctx.note, &dupe.note)?;
if dupe.first_field_match {
log.first_field_match.push(dupe.note.into_log_note());
} else {
log.updated.push(dupe.note.into_log_note());
}
Ok(())
}
fn import_cards(&mut self, cards: &mut [Card], note_id: NoteId) -> Result<()> {
for card in cards {
card.note_id = note_id;
@ -306,7 +392,7 @@ impl Collection {
}
}
fn get_full_duplicates(&mut self, note: &Note, dupe_ids: &[NoteId]) -> Result<Vec<Note>> {
fn get_full_duplicates(&self, note: &Note, dupe_ids: &[NoteId]) -> Result<Vec<Note>> {
let first_field = note.first_field_stripped();
dupe_ids
.iter()
@ -329,6 +415,9 @@ impl ForeignNote {
) -> (Note, Vec<Card>) {
// TODO: Handle new and learning cards
let mut note = Note::new(notetype);
if !self.guid.is_empty() {
note.guid = self.guid;
}
note.tags = self.tags;
note.tags.extend(extra_tags.iter().cloned());
note.fields_mut()
@ -501,4 +590,16 @@ mod test {
data.import(&mut col, |_, _| true).unwrap();
assert_eq!(col.storage.get_all_notes()[0].tags, ["bar", "baz"]);
}
#[test]
fn should_match_note_with_same_guid() {
let mut col = open_test_collection();
let mut data = ForeignData::with_defaults();
data.add_note(&["foo"]);
data.notes[0].tags = vec![String::from("bar")];
data.global_tags = vec![String::from("baz")];
data.import(&mut col, |_, _| true).unwrap();
assert_eq!(col.storage.get_all_notes()[0].tags, ["bar", "baz"]);
}
}

View file

@ -25,6 +25,7 @@ pub struct ForeignData {
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
#[serde(default)]
pub struct ForeignNote {
guid: String,
fields: Vec<String>,
tags: Vec<String>,
notetype: NameOrId,

View file

@ -218,6 +218,21 @@ impl Collection {
.collect()
}
pub fn get_all_notetypes_of_search_notes(
&mut self,
) -> Result<HashMap<NotetypeId, Arc<Notetype>>> {
self.storage
.all_notetypes_of_search_notes()?
.into_iter()
.map(|ntid| {
self.get_notetype(ntid)
.transpose()
.unwrap()
.map(|nt| (ntid, nt))
})
.collect()
}
pub fn remove_notetype(&mut self, ntid: NotetypeId) -> Result<OpOutput<()>> {
self.transact(Op::RemoveNotetype, |col| col.remove_notetype_inner(ntid))
}

View file

@ -0,0 +1,9 @@
SELECT nid,
did
FROM cards
WHERE nid IN (
SELECT nid
FROM search_nids
)
GROUP BY nid
HAVING ord = MIN(ord)

View file

@ -131,6 +131,14 @@ impl SqliteStorage {
.collect()
}
/// Returns the deck id of the first existing card of every searched note.
pub(crate) fn all_decks_of_search_notes(&self) -> Result<HashMap<NoteId, DeckId>> {
self.db
.prepare_cached(include_str!("all_decks_of_search_notes.sql"))?
.query_and_then([], |r| Ok((r.get(0)?, r.get(1)?)))?
.collect()
}
// caller should ensure name unique
pub(crate) fn add_deck(&self, deck: &mut Deck) -> Result<()> {
assert!(deck.id.0 == 0);

View file

@ -338,6 +338,13 @@ impl super::SqliteStorage {
.collect()
}
pub(crate) fn all_notes_by_guid(&mut self) -> Result<HashMap<String, NoteId>> {
self.db
.prepare("SELECT guid, id FROM notes")?
.query_and_then([], |r| Ok((r.get(0)?, r.get(1)?)))?
.collect()
}
#[cfg(test)]
pub(crate) fn get_all_notes(&mut self) -> Vec<Note> {
self.db

View file

@ -116,6 +116,15 @@ impl SqliteStorage {
.collect()
}
pub(crate) fn all_notetypes_of_search_notes(&self) -> Result<Vec<NotetypeId>> {
self.db
.prepare_cached(
"SELECT DISTINCT mid FROM notes WHERE id IN (SELECT nid FROM search_nids)",
)?
.query_and_then([], |r| Ok(r.get(0)?))?
.collect()
}
pub fn get_all_notetype_names(&self) -> Result<Vec<(NotetypeId, String)>> {
self.db
.prepare_cached(include_str!("get_notetype_names.sql"))?

View file

@ -1,7 +1,7 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{borrow::Cow, ptr};
use std::borrow::Cow;
use lazy_static::lazy_static;
use pct_str::{IriReserved, PctStr, PctString};
@ -134,12 +134,8 @@ lazy_static! {
static ref PERSISTENT_HTML_SPACERS: Regex = Regex::new(r#"(?i)<br\s*/?>|<div>|\n"#).unwrap();
static ref UNPRINTABLE_TAGS: Regex = Regex::new(
r"(?xs)
\[sound:[^]]+\]
|
\[\[type:[^]]+\]\]
").unwrap();
static ref TYPE_TAG: Regex = Regex::new(r"\[\[type:[^]]+\]\]").unwrap();
static ref SOUND_TAG: Regex = Regex::new(r"\[sound:([^]]+)\]").unwrap();
/// Files included in CSS with a leading underscore.
static ref UNDERSCORED_CSS_IMPORTS: Regex = Regex::new(
@ -172,19 +168,21 @@ lazy_static! {
"#).unwrap();
}
pub fn is_html(text: &str) -> bool {
HTML.is_match(text)
pub fn is_html(text: impl AsRef<str>) -> bool {
HTML.is_match(text.as_ref())
}
pub fn html_to_text_line(html: &str, preserve_media_filenames: bool) -> Cow<str> {
let (html_stripper, sound_rep): (fn(&str) -> Cow<str>, _) = if preserve_media_filenames {
(strip_html_preserving_media_filenames, "$1")
} else {
(strip_html, "")
};
PERSISTENT_HTML_SPACERS
.replace_all(html, " ")
.map_cow(|s| UNPRINTABLE_TAGS.replace_all(s, ""))
.map_cow(if preserve_media_filenames {
strip_html_preserving_media_filenames
} else {
strip_html
})
.map_cow(|s| TYPE_TAG.replace_all(s, ""))
.map_cow(|s| SOUND_TAG.replace_all(s, sound_rep))
.map_cow(html_stripper)
.trim()
}
@ -330,16 +328,9 @@ pub(crate) fn extract_underscored_references(text: &str) -> Vec<&str> {
}
pub fn strip_html_preserving_media_filenames(html: &str) -> Cow<str> {
let without_fnames = HTML_MEDIA_TAGS.replace_all(html, r" ${1}${2}${3} ");
let without_html = strip_html(&without_fnames);
// no changes?
if let Cow::Borrowed(b) = without_html {
if ptr::eq(b, html) {
return Cow::Borrowed(html);
}
}
// make borrow checker happy
without_html.into_owned().into()
HTML_MEDIA_TAGS
.replace_all(html, r" ${1}${2}${3} ")
.map_cow(strip_html)
}
#[allow(dead_code)]

View file

@ -26,6 +26,6 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
/>
{/each}
{/await}
<Spacer --height="1.5rem" />
{/if}
<Spacer --height="1.5rem" />
<MapperRow label={tr.editingTags()} {columnOptions} bind:value={tagsColumn} />

View file

@ -8,7 +8,13 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import Row from "../components/Row.svelte";
import Spacer from "../components/Spacer.svelte";
import * as tr from "../lib/ftl";
import { Decks, ImportExport, importExport, Notetypes } from "../lib/proto";
import {
Decks,
Generic,
ImportExport,
importExport,
Notetypes,
} from "../lib/proto";
import DeckSelector from "./DeckSelector.svelte";
import DelimiterSelector from "./DelimiterSelector.svelte";
import DupeResolutionSelector from "./DupeResolutionSelector.svelte";
@ -17,6 +23,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import HtmlSwitch from "./HtmlSwitch.svelte";
import { getColumnOptions, getCsvMetadata } from "./lib";
import NotetypeSelector from "./NotetypeSelector.svelte";
import Preview from "./Preview.svelte";
import StickyFooter from "./StickyFooter.svelte";
import Tags from "./Tags.svelte";
@ -32,6 +39,8 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
export let updatedTags: string[];
export let columnLabels: string[];
export let tagsColumn: number;
export let guidColumn: number;
export let preview: Generic.StringList[];
// Protobuf oneofs. Exactly one of these pairs is expected to be set.
export let notetypeColumn: number | null;
export let globalNotetype: ImportExport.CsvMetadata.MappedNotetype | null;
@ -41,9 +50,17 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
let dupeResolution: ImportExport.ImportCsvRequest.DupeResolution;
let lastNotetypeId = globalNotetype?.id;
$: columnOptions = getColumnOptions(columnLabels, notetypeColumn, deckColumn);
$: getCsvMetadata(path, delimiter).then((meta) => {
$: columnOptions = getColumnOptions(
columnLabels,
preview[0].vals,
notetypeColumn,
deckColumn,
tagsColumn,
guidColumn,
);
$: getCsvMetadata(path, delimiter, undefined, isHtml).then((meta) => {
columnLabels = meta.columnLabels;
preview = meta.preview;
});
$: if (globalNotetype?.id !== lastNotetypeId) {
lastNotetypeId = globalNotetype?.id;
@ -66,6 +83,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
updatedTags,
columnLabels,
tagsColumn,
guidColumn,
notetypeColumn,
globalNotetype,
deckColumn,
@ -78,6 +96,15 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
<Container class="csv-page">
<Row --cols={2}>
<Col --col-size={1} breakpoint="md">
<Container>
<Header heading={tr.importingFile()} />
<Spacer --height="1.5rem" />
<DelimiterSelector bind:delimiter disabled={forceDelimiter} />
<HtmlSwitch bind:isHtml disabled={forceIsHtml} />
<Preview {columnOptions} {preview} />
</Container>
</Col>
<Col --col-size={1} breakpoint="md">
<Container>
<Header heading={tr.importingImportOptions()} />
@ -92,8 +119,6 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
<DeckSelector {deckNameIds} bind:deckId />
{/if}
<DupeResolutionSelector bind:dupeResolution />
<DelimiterSelector bind:delimiter disabled={forceDelimiter} />
<HtmlSwitch bind:isHtml disabled={forceIsHtml} />
<Tags bind:globalTags bind:updatedTags />
</Container>
</Col>

View file

@ -0,0 +1,65 @@
<!--
Copyright: Ankitects Pty Ltd and contributors
License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
-->
<script lang="ts">
import type { Generic } from "../lib/proto";
import type { ColumnOption } from "./lib";
export let columnOptions: ColumnOption[];
export let preview: Generic.StringList[];
</script>
<div class="outer">
<table class="preview">
{#each columnOptions.slice(1) as { label, shortLabel }}
<th>
{shortLabel || label}
</th>
{/each}
{#each preview as row}
<tr>
{#each row.vals as cell}
<td>{cell}</td>
{/each}
</tr>
{/each}
</table>
</div>
<style lang="scss">
.outer {
// approximate size based on body max width + margins
width: min(90vw, 65em);
overflow: auto;
}
.preview {
border-collapse: collapse;
white-space: nowrap;
th,
td {
text-overflow: ellipsis;
overflow: hidden;
border: 1px solid var(--faint-border);
padding: 0.25rem 0.5rem;
max-width: 15em;
}
th {
background: var(--medium-border);
text-align: center;
}
tr {
&:nth-child(even) {
background: var(--frame-bg);
}
}
td {
text-align: start;
}
}
</style>

View file

@ -37,11 +37,13 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
<style lang="scss">
.sticky-footer {
position: sticky;
position: fixed;
bottom: 0;
left: 0;
right: 0;
z-index: 10;
margin: 0.75rem;
margin: 0;
padding: 0.25rem;
background: var(--window-bg);

View file

@ -17,16 +17,13 @@
body {
width: min(100vw, 70em);
margin: 0 auto;
height: 100%;
padding: 1em;
// pad out the underside of the footer
padding-bottom: 5em;
}
html {
overflow-x: hidden;
height: 100%;
}
#main {
padding: 0.5em 0.5em 1em 0.5em;
}
// override the default down arrow colour in <select> elements

View file

@ -59,7 +59,9 @@ export async function setupImportCsvPage(path: string): Promise<ImportCsvPage> {
updatedTags: metadata.updatedTags,
columnLabels: metadata.columnLabels,
tagsColumn: metadata.tagsColumn,
guidColumn: metadata.guidColumn,
globalNotetype: metadata.globalNotetype ?? null,
preview: metadata.preview,
// Unset oneof numbers default to 0, which also means n/a here,
// but it's vital to differentiate between unset and 0 when reserializing.
notetypeColumn: metadata.notetypeColumn ? metadata.notetypeColumn : null,

View file

@ -11,14 +11,18 @@ import {
export interface ColumnOption {
label: string;
shortLabel?: string;
value: number;
disabled: boolean;
}
export function getColumnOptions(
columnLabels: string[],
firstRow: string[],
notetypeColumn: number | null,
deckColumn: number | null,
tagsColumn: number,
guidColumn: number,
): ColumnOption[] {
return [{ label: tr.changeNotetypeNothing(), value: 0, disabled: false }].concat(
columnLabels.map((label, index) => {
@ -27,22 +31,28 @@ export function getColumnOptions(
return columnOption(tr.notetypesNotetype(), true, index);
} else if (index === deckColumn) {
return columnOption(tr.decksDeck(), true, index);
} else if (index === guidColumn) {
return columnOption("GUID", true, index);
} else if (index === tagsColumn) {
return columnOption(tr.editingTags(), false, index);
} else if (label === "") {
return columnOption(index, false, index);
return columnOption(firstRow[index - 1], false, index, true);
} else {
return columnOption(`"${label}"`, false, index);
return columnOption(label, false, index);
}
}),
);
}
function columnOption(
label: string | number,
label: string,
disabled: boolean,
index: number,
shortLabel?: boolean,
): ColumnOption {
return {
label: tr.importingColumn({ val: label }),
label: label ? `${index}: ${label}` : index.toString(),
shortLabel: shortLabel ? index.toString() : undefined,
value: index,
disabled,
};
@ -58,12 +68,14 @@ export async function getCsvMetadata(
path: string,
delimiter?: ImportExport.CsvMetadata.Delimiter,
notetypeId?: number,
isHtml?: boolean,
): Promise<ImportExport.CsvMetadata> {
return importExport.getCsvMetadata(
ImportExport.CsvMetadataRequest.create({
path,
delimiter,
notetypeId,
isHtml,
}),
);
}