From 5a53da23ca86397345b45cf7bce6785f832d1381 Mon Sep 17 00:00:00 2001 From: RumovZ Date: Thu, 16 Feb 2023 08:53:36 +0100 Subject: [PATCH] Deck scoped dupe check (#2372) * Support limiting dupe check to deck * Expose deck limiting dupe check on frontend * Make CSV dupe options configurable with headers * Rename duplicate file headers * Change dupe check limit to enum --- ftl/core/importing.ftl | 2 + proto/anki/import_export.proto | 9 +- qt/aqt/import_export/importing.py | 4 +- rslib/build/protobuf.rs | 4 + rslib/src/config/number.rs | 1 + rslib/src/import_export/text/csv/import.rs | 20 ++-- rslib/src/import_export/text/csv/metadata.rs | 60 ++++++++++-- rslib/src/import_export/text/import.rs | 98 ++++++++++++++++--- rslib/src/import_export/text/mod.rs | 2 + rslib/src/storage/note/mod.rs | 22 ++++- .../note/notes_types_checksums_decks.sql | 9 ++ rslib/src/tests.rs | 34 +++++++ ts/import-csv/DeckDupeCheckSwitch.svelte | 41 ++++++++ ts/import-csv/DupeResolutionSelector.svelte | 4 +- ts/import-csv/ImportCsvPage.svelte | 4 + ts/import-csv/index.ts | 1 + 16 files changed, 275 insertions(+), 40 deletions(-) create mode 100644 rslib/src/storage/note/notes_types_checksums_decks.sql create mode 100644 ts/import-csv/DeckDupeCheckSwitch.svelte diff --git a/ftl/core/importing.ftl b/ftl/core/importing.ftl index c895fad1e..6c0ab352b 100644 --- a/ftl/core/importing.ftl +++ b/ftl/core/importing.ftl @@ -106,6 +106,8 @@ importing-update = Update importing-tag-all-notes = Tag all notes importing-tag-updated-notes = Tag updated notes importing-file = File +importing-match-scope = Match scope +importing-notetype-and-deck = Notetype and deck ## NO NEED TO TRANSLATE. This text is no longer used by Anki, and will be removed in the future. diff --git a/proto/anki/import_export.proto b/proto/anki/import_export.proto index fbe00086d..c2d475564 100644 --- a/proto/anki/import_export.proto +++ b/proto/anki/import_export.proto @@ -122,8 +122,8 @@ message CsvMetadataRequest { message CsvMetadata { enum DupeResolution { UPDATE = 0; - IGNORE = 1; - ADD = 2; + PRESERVE = 1; + DUPLICATE = 2; // UPDATE_IF_NEWER = 3; } // Order roughly in ascending expected frequency in note text, because the @@ -161,6 +161,10 @@ message CsvMetadata { // One-based. 0 means n/a. uint32 notetype_column = 9; } + enum MatchScope { + NOTETYPE = 0; + NOTETYPE_AND_DECK = 1; + } // One-based. 0 means n/a. uint32 tags_column = 10; bool force_delimiter = 11; @@ -168,6 +172,7 @@ message CsvMetadata { repeated generic.StringList preview = 13; uint32 guid_column = 14; DupeResolution dupe_resolution = 15; + MatchScope match_scope = 16; } message ExportCardCsvRequest { diff --git a/qt/aqt/import_export/importing.py b/qt/aqt/import_export/importing.py index 597e5e73c..7609a86f0 100644 --- a/qt/aqt/import_export/importing.py +++ b/qt/aqt/import_export/importing.py @@ -273,10 +273,10 @@ class LogQueue: def first_field_queue(log: ImportLogWithChanges.Log) -> LogQueue: - if log.dupe_resolution == DupeResolution.ADD: + if log.dupe_resolution == DupeResolution.DUPLICATE: summary_template = tr.importing_added_duplicate_with_first_field action_string = tr.adding_added() - elif log.dupe_resolution == DupeResolution.IGNORE: + elif log.dupe_resolution == DupeResolution.PRESERVE: summary_template = tr.importing_first_field_matched action_string = tr.importing_skipped() else: diff --git a/rslib/build/protobuf.rs b/rslib/build/protobuf.rs index 4f8b3b3ca..18f67880e 100644 --- a/rslib/build/protobuf.rs +++ b/rslib/build/protobuf.rs @@ -118,6 +118,10 @@ pub fn write_backend_proto_rs() { "CsvMetadata.DupeResolution", "#[derive(serde_derive::Deserialize, serde_derive::Serialize)]", ) + .type_attribute( + "CsvMetadata.MatchScope", + "#[derive(serde_derive::Deserialize, serde_derive::Serialize)]", + ) .compile_protos(paths.as_slice(), &[proto_dir]) .unwrap(); } diff --git a/rslib/src/config/number.rs b/rslib/src/config/number.rs index 7769b5449..9a13c39b7 100644 --- a/rslib/src/config/number.rs +++ b/rslib/src/config/number.rs @@ -9,6 +9,7 @@ use crate::prelude::*; #[strum(serialize_all = "camelCase")] pub enum I32ConfigKey { CsvDuplicateResolution, + MatchScope, } impl Collection { diff --git a/rslib/src/import_export/text/csv/import.rs b/rslib/src/import_export/text/csv/import.rs index 5adaf0633..eff82e7d8 100644 --- a/rslib/src/import_export/text/csv/import.rs +++ b/rslib/src/import_export/text/csv/import.rs @@ -28,21 +28,28 @@ impl Collection { progress_fn: impl 'static + FnMut(ImportProgress, bool) -> bool, ) -> Result> { let file = open_file(path)?; - let default_deck = metadata.deck()?.name_or_id(); - let default_notetype = metadata.notetype()?.name_or_id(); let mut ctx = ColumnContext::new(&metadata)?; let notes = ctx.deserialize_csv(file, metadata.delimiter())?; + let mut data = ForeignData::from(metadata); + data.notes = notes; + data.import(self, progress_fn) + } +} +impl From for ForeignData { + fn from(metadata: CsvMetadata) -> Self { ForeignData { dupe_resolution: metadata.dupe_resolution(), - default_deck, - default_notetype, - notes, + match_scope: metadata.match_scope(), + default_deck: metadata.deck().map(|d| d.name_or_id()).unwrap_or_default(), + default_notetype: metadata + .notetype() + .map(|nt| nt.name_or_id()) + .unwrap_or_default(), global_tags: metadata.global_tags, updated_tags: metadata.updated_tags, ..Default::default() } - .import(self, progress_fn) } } @@ -289,6 +296,7 @@ mod test { })), preview: Vec::new(), dupe_resolution: 0, + match_scope: 0, } } } diff --git a/rslib/src/import_export/text/csv/metadata.rs b/rslib/src/import_export/text/csv/metadata.rs index 190b8603d..dece5a6b5 100644 --- a/rslib/src/import_export/text/csv/metadata.rs +++ b/rslib/src/import_export/text/csv/metadata.rs @@ -23,6 +23,7 @@ pub use crate::pb::import_export::csv_metadata::Deck as CsvDeck; pub use crate::pb::import_export::csv_metadata::Delimiter; pub use crate::pb::import_export::csv_metadata::DupeResolution; pub use crate::pb::import_export::csv_metadata::MappedNotetype; +pub use crate::pb::import_export::csv_metadata::MatchScope; pub use crate::pb::import_export::csv_metadata::Notetype as CsvNotetype; pub use crate::pb::import_export::CsvMetadata; use crate::prelude::*; @@ -54,14 +55,7 @@ impl Collection { notetype_id: Option, is_html: Option, ) -> Result { - let dupe_resolution = - DupeResolution::from_i32(self.get_config_i32(I32ConfigKey::CsvDuplicateResolution)) - .map(|r| r as i32) - .unwrap_or_default(); - let mut metadata = CsvMetadata { - dupe_resolution, - ..Default::default() - }; + let mut metadata = CsvMetadata::from_config(self); let meta_len = self.parse_meta_lines(&mut reader, &mut metadata)? as u64; maybe_set_fallback_delimiter(delimiter, &mut metadata, &mut reader, meta_len)?; let records = collect_preview_records(&mut metadata, reader)?; @@ -168,6 +162,16 @@ impl Collection { metadata.guid_column = n; } } + "match scope" => { + if let Some(scope) = MatchScope::from_text(value) { + metadata.match_scope = scope as i32; + } + } + "if matches" => { + if let Some(resolution) = DupeResolution::from_text(value) { + metadata.dupe_resolution = resolution as i32; + } + } _ => (), } } @@ -244,6 +248,46 @@ impl Collection { } } +impl CsvMetadata { + /// Defaults with config values filled in. + fn from_config(col: &Collection) -> Self { + Self { + dupe_resolution: DupeResolution::from_config(col) as i32, + match_scope: MatchScope::from_config(col) as i32, + ..Default::default() + } + } +} + +impl DupeResolution { + fn from_config(col: &Collection) -> Self { + Self::from_i32(col.get_config_i32(I32ConfigKey::CsvDuplicateResolution)).unwrap_or_default() + } + + fn from_text(text: &str) -> Option { + match text { + "update current" => Some(Self::Update), + "keep current" => Some(Self::Preserve), + "keep both" => Some(Self::Duplicate), + _ => None, + } + } +} + +impl MatchScope { + fn from_config(col: &Collection) -> Self { + Self::from_i32(col.get_config_i32(I32ConfigKey::MatchScope)).unwrap_or_default() + } + + fn from_text(text: &str) -> Option { + match text { + "notetype" => Some(Self::Notetype), + "notetype + deck" => Some(Self::NotetypeAndDeck), + _ => None, + } + } +} + fn parse_columns(line: &str, delimiter: Delimiter) -> Result> { map_single_record(line, delimiter, |record| { record.iter().map(ToString::to_string).collect() diff --git a/rslib/src/import_export/text/import.rs b/rslib/src/import_export/text/import.rs index fde83056a..934254602 100644 --- a/rslib/src/import_export/text/import.rs +++ b/rslib/src/import_export/text/import.rs @@ -16,6 +16,7 @@ use crate::import_export::text::ForeignData; use crate::import_export::text::ForeignNote; use crate::import_export::text::ForeignNotetype; use crate::import_export::text::ForeignTemplate; +use crate::import_export::text::MatchScope; use crate::import_export::ImportProgress; use crate::import_export::IncrementableProgress; use crate::import_export::NoteLog; @@ -37,10 +38,7 @@ impl ForeignData { let mut progress = IncrementableProgress::new(progress_fn); progress.call(ImportProgress::File)?; col.transact(Op::Import, |col| { - col.set_config_i32_inner( - I32ConfigKey::CsvDuplicateResolution, - self.dupe_resolution as i32, - )?; + self.update_config(col)?; let mut ctx = Context::new(&self, col)?; ctx.import_foreign_notetypes(self.notetypes)?; ctx.import_foreign_notes( @@ -51,6 +49,15 @@ impl ForeignData { ) }) } + + fn update_config(&self, col: &mut Collection) -> Result<()> { + col.set_config_i32_inner( + I32ConfigKey::CsvDuplicateResolution, + self.dupe_resolution as i32, + )?; + col.set_config_i32_inner(I32ConfigKey::MatchScope, self.match_scope as i32)?; + Ok(()) + } } impl NoteLog { @@ -73,7 +80,7 @@ struct Context<'a> { today: u32, dupe_resolution: DupeResolution, card_gen_ctxs: HashMap<(NotetypeId, DeckId), CardGenContext>>, - existing_checksums: HashMap<(NotetypeId, u32), Vec>, + existing_checksums: ExistingChecksums, existing_guids: HashMap, } @@ -83,6 +90,37 @@ struct DeckIdsByNameOrId { default: Option, } +/// Notes in the collection indexed by notetype, checksum and optionally deck. +/// With deck, a note will be included in as many entries as its cards +/// have different original decks. +#[derive(Debug)] +enum ExistingChecksums { + ByNotetype(HashMap<(NotetypeId, u32), Vec>), + ByNotetypeAndDeck(HashMap<(NotetypeId, u32, DeckId), Vec>), +} + +impl ExistingChecksums { + fn new(col: &mut Collection, match_scope: MatchScope) -> Result { + match match_scope { + MatchScope::Notetype => col + .storage + .all_notes_by_type_and_checksum() + .map(Self::ByNotetype), + MatchScope::NotetypeAndDeck => col + .storage + .all_notes_by_type_checksum_and_deck() + .map(Self::ByNotetypeAndDeck), + } + } + + fn get(&self, notetype: NotetypeId, checksum: u32, deck: DeckId) -> Option<&Vec> { + match self { + Self::ByNotetype(map) => map.get(&(notetype, checksum)), + Self::ByNotetypeAndDeck(map) => map.get(&(notetype, checksum, deck)), + } + } +} + struct NoteContext<'a> { note: ForeignNote, dupes: Vec, @@ -152,7 +190,7 @@ impl<'a> Context<'a> { col.notetype_by_name_or_id(&data.default_notetype)?, ); let deck_ids = DeckIdsByNameOrId::new(col, &data.default_deck)?; - let existing_checksums = col.storage.all_notes_by_type_and_checksum()?; + let existing_checksums = ExistingChecksums::new(col, data.match_scope)?; let existing_guids = col.storage.all_notes_by_guid()?; Ok(Self { @@ -247,7 +285,7 @@ impl<'a> Context<'a> { updated_tags: &'tags [String], ) -> Result> { self.prepare_foreign_note(&mut note)?; - let dupes = self.find_duplicates(¬etype, ¬e)?; + let dupes = self.find_duplicates(¬etype, ¬e, deck_id)?; Ok(NoteContext { note, dupes, @@ -263,11 +301,16 @@ impl<'a> Context<'a> { self.col.canonify_foreign_tags(note, self.usn) } - fn find_duplicates(&self, notetype: &Notetype, note: &ForeignNote) -> Result> { + fn find_duplicates( + &self, + notetype: &Notetype, + note: &ForeignNote, + deck_id: DeckId, + ) -> Result> { if note.guid.is_empty() { if let Some(nids) = note .checksum() - .and_then(|csum| self.existing_checksums.get(&(notetype.id, csum))) + .and_then(|csum| self.existing_checksums.get(notetype.id, csum, deck_id)) { return self.get_first_field_dupes(note, nids); } @@ -297,15 +340,15 @@ impl<'a> Context<'a> { fn import_note(&mut self, ctx: NoteContext, log: &mut NoteLog) -> Result<()> { match self.dupe_resolution { _ if !ctx.is_dupe() => self.add_note(ctx, log)?, - DupeResolution::Add if ctx.is_guid_dupe() => { + DupeResolution::Duplicate if ctx.is_guid_dupe() => { log.duplicate.push(ctx.note.into_log_note()) } - DupeResolution::Add if !ctx.has_first_field() => { + DupeResolution::Duplicate if !ctx.has_first_field() => { log.empty_first_field.push(ctx.note.into_log_note()) } - DupeResolution::Add => self.add_note(ctx, log)?, + DupeResolution::Duplicate => self.add_note(ctx, log)?, DupeResolution::Update => self.update_with_note(ctx, log)?, - DupeResolution::Ignore => log.first_field_match.push(ctx.note.into_log_note()), + DupeResolution::Preserve => log.first_field_match.push(ctx.note.into_log_note()), } Ok(()) } @@ -588,6 +631,8 @@ impl ForeignTemplate { mod test { use super::*; use crate::collection::open_test_collection; + use crate::tests::DeckAdder; + use crate::tests::NoteAdder; impl ForeignData { fn with_defaults() -> Self { @@ -611,7 +656,7 @@ mod test { let mut col = open_test_collection(); let mut data = ForeignData::with_defaults(); data.add_note(&["same", "old"]); - data.dupe_resolution = DupeResolution::Add; + data.dupe_resolution = DupeResolution::Duplicate; data.clone().import(&mut col, |_, _| true).unwrap(); data.import(&mut col, |_, _| true).unwrap(); @@ -623,7 +668,7 @@ mod test { let mut col = open_test_collection(); let mut data = ForeignData::with_defaults(); data.add_note(&["same", "old"]); - data.dupe_resolution = DupeResolution::Ignore; + data.dupe_resolution = DupeResolution::Preserve; data.clone().import(&mut col, |_, _| true).unwrap(); assert_eq!(col.storage.notes_table_len(), 1); @@ -711,4 +756,27 @@ mod test { data.import(&mut col, |_, _| true).unwrap(); assert_eq!(col.storage.get_all_notes()[0].tags, ["bar", "baz"]); } + + #[test] + fn should_only_update_duplicates_in_same_deck_if_limit_is_enabled() { + let mut col = open_test_collection(); + let other_deck_id = DeckAdder::new("other").add(&mut col).id; + NoteAdder::basic(&mut col) + .fields(&["foo", "old"]) + .add(&mut col); + NoteAdder::basic(&mut col) + .fields(&["foo", "old"]) + .deck(other_deck_id) + .add(&mut col); + let mut data = ForeignData::with_defaults(); + data.match_scope = MatchScope::NotetypeAndDeck; + data.add_note(&["foo", "new"]); + + data.import(&mut col, |_, _| true).unwrap(); + let notes = col.storage.get_all_notes(); + // same deck, should be updated + assert_eq!(notes[0].fields()[1], "new"); + // other deck, should be unchanged + assert_eq!(notes[1].fields()[1], "old"); + } } diff --git a/rslib/src/import_export/text/mod.rs b/rslib/src/import_export/text/mod.rs index 4e4a8120c..dc64903ed 100644 --- a/rslib/src/import_export/text/mod.rs +++ b/rslib/src/import_export/text/mod.rs @@ -10,11 +10,13 @@ use serde_derive::Serialize; use super::LogNote; use crate::pb::import_export::csv_metadata::DupeResolution; +use crate::pb::import_export::csv_metadata::MatchScope; #[derive(Debug, Clone, Default, Serialize, Deserialize)] #[serde(default)] pub struct ForeignData { dupe_resolution: DupeResolution, + match_scope: MatchScope, default_deck: NameOrId, default_notetype: NameOrId, notes: Vec, diff --git a/rslib/src/storage/note/mod.rs b/rslib/src/storage/note/mod.rs index e2e23c548..c5b16df9c 100644 --- a/rslib/src/storage/note/mod.rs +++ b/rslib/src/storage/note/mod.rs @@ -7,15 +7,11 @@ use std::collections::HashSet; use rusqlite::params; use rusqlite::Row; -use crate::error::Result; use crate::import_export::package::NoteMeta; -use crate::notes::Note; -use crate::notes::NoteId; use crate::notes::NoteTags; -use crate::notetype::NotetypeId; +use crate::prelude::*; use crate::tags::join_tags; use crate::tags::split_tags; -use crate::timestamp::TimestampMillis; pub(crate) fn split_fields(fields: &str) -> Vec { fields.split('\x1f').map(Into::into).collect() @@ -195,6 +191,22 @@ impl super::SqliteStorage { Ok(map) } + pub(crate) fn all_notes_by_type_checksum_and_deck( + &self, + ) -> Result>> { + let mut map = HashMap::new(); + let mut stmt = self + .db + .prepare(include_str!("notes_types_checksums_decks.sql"))?; + let mut rows = stmt.query([])?; + while let Some(row) = rows.next()? { + map.entry((row.get(1)?, row.get(2)?, row.get(3)?)) + .or_insert_with(Vec::new) + .push(row.get(0)?); + } + Ok(map) + } + /// Return total number of notes. Slow. pub(crate) fn total_notes(&self) -> Result { self.db diff --git a/rslib/src/storage/note/notes_types_checksums_decks.sql b/rslib/src/storage/note/notes_types_checksums_decks.sql new file mode 100644 index 000000000..21514235b --- /dev/null +++ b/rslib/src/storage/note/notes_types_checksums_decks.sql @@ -0,0 +1,9 @@ +SELECT DISTINCT notes.id, + notes.mid, + notes.csum, + CASE + WHEN cards.odid = 0 THEN cards.did + ELSE cards.odid + END AS did +FROM notes + JOIN cards ON notes.id = cards.nid \ No newline at end of file diff --git a/rslib/src/tests.rs b/rslib/src/tests.rs index 3baf66365..0540d589c 100644 --- a/rslib/src/tests.rs +++ b/rslib/src/tests.rs @@ -173,3 +173,37 @@ impl DeckAdder { deck } } + +#[derive(Debug, Clone)] +pub(crate) struct NoteAdder { + note: Note, + deck: DeckId, +} + +impl NoteAdder { + pub(crate) fn new(col: &mut Collection, notetype: &str) -> Self { + Self { + note: col.new_note(notetype), + deck: DeckId(1), + } + } + + pub(crate) fn basic(col: &mut Collection) -> Self { + Self::new(col, "basic") + } + + pub(crate) fn fields(mut self, fields: &[&str]) -> Self { + *self.note.fields_mut() = fields.iter().map(ToString::to_string).collect(); + self + } + + pub(crate) fn deck(mut self, deck: DeckId) -> Self { + self.deck = deck; + self + } + + pub(crate) fn add(mut self, col: &mut Collection) -> Note { + col.add_note(&mut self.note, self.deck).unwrap(); + self.note + } +} diff --git a/ts/import-csv/DeckDupeCheckSwitch.svelte b/ts/import-csv/DeckDupeCheckSwitch.svelte new file mode 100644 index 000000000..5f1da1bd8 --- /dev/null +++ b/ts/import-csv/DeckDupeCheckSwitch.svelte @@ -0,0 +1,41 @@ + + + + + + {tr.importingMatchScope()} + + + + + diff --git a/ts/import-csv/DupeResolutionSelector.svelte b/ts/import-csv/DupeResolutionSelector.svelte index a5d23ee73..c222a8b8a 100644 --- a/ts/import-csv/DupeResolutionSelector.svelte +++ b/ts/import-csv/DupeResolutionSelector.svelte @@ -19,11 +19,11 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html label: tr.importingUpdate(), }, { - value: ImportExport.CsvMetadata.DupeResolution.ADD, + value: ImportExport.CsvMetadata.DupeResolution.DUPLICATE, label: tr.importingDuplicate(), }, { - value: ImportExport.CsvMetadata.DupeResolution.IGNORE, + value: ImportExport.CsvMetadata.DupeResolution.PRESERVE, label: tr.importingPreserve(), }, ]; diff --git a/ts/import-csv/ImportCsvPage.svelte b/ts/import-csv/ImportCsvPage.svelte index b8f321f36..bc6fff47f 100644 --- a/ts/import-csv/ImportCsvPage.svelte +++ b/ts/import-csv/ImportCsvPage.svelte @@ -11,6 +11,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html import Container from "../components/Container.svelte"; import Row from "../components/Row.svelte"; import Spacer from "../components/Spacer.svelte"; + import DeckDupeCheckSwitch from "./DeckDupeCheckSwitch.svelte"; import DeckSelector from "./DeckSelector.svelte"; import DelimiterSelector from "./DelimiterSelector.svelte"; import DupeResolutionSelector from "./DupeResolutionSelector.svelte"; @@ -27,6 +28,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html export let notetypeNameIds: Notetypes.NotetypeNameId[]; export let deckNameIds: Decks.DeckNameId[]; export let dupeResolution: ImportExport.CsvMetadata.DupeResolution; + export let matchScope: ImportExport.CsvMetadata.MatchScope; export let delimiter: ImportExport.CsvMetadata.Delimiter; export let forceDelimiter: boolean; @@ -73,6 +75,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html path, metadata: ImportExport.CsvMetadata.create({ dupeResolution, + matchScope, delimiter, forceDelimiter, isHtml, @@ -119,6 +122,7 @@ License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html {/if} + diff --git a/ts/import-csv/index.ts b/ts/import-csv/index.ts index 49d75ed2e..a723f2ee4 100644 --- a/ts/import-csv/index.ts +++ b/ts/import-csv/index.ts @@ -48,6 +48,7 @@ export async function setupImportCsvPage(path: string): Promise { deckNameIds: decks.entries, notetypeNameIds: notetypes.entries, dupeResolution: metadata.dupeResolution, + matchScope: metadata.matchScope, delimiter: metadata.delimiter, forceDelimiter: metadata.forceDelimiter, isHtml: metadata.isHtml,