Anki/rslib/src/dbcheck.rs
Damien Elmes dac532953e
Refactor progress handling (#2549)
Previously it was Backend's responsibility to store the last progress,
and when calling routines in Collection, one had to construct and pass
in a Fn, which wasn't the most ergonomic. This PR adds the last progress
state to the collection, so that the routines no longer need a separate
progress arg, and makes some other tweaks to improve ergonomics.

ThrottlingProgressHandler has been tweaked so that it now stores the
current state, so that callers don't need to store it separately. When
a long-running routine starts, it calls col.new_progress_handler(),
which automatically initializes the data to defaults, and updates the
shared UI state, so we no longer need to manually update the state at
the start of an operation.

The backend shares the Arc<Mutex<>> with the collection, so it can get
at the current state, and so we can update the state when importing a
backup.

Other tweaks:

- The current Incrementor was awkward to use in the media check, which
uses a single incrementing value across multiple method calls, so I've
added a simpler alternative for such cases. The old incrementor method
has been kept, but implemented directly on ThrottlingProgressHandler.
- The full sync code was passing the progress handler in a complicated
way that may once have been required, but no longer is.
- On the Qt side, timers are now stopped before deletion, or they keep
running for a few seconds.
- I left the ChangeTracker using a closure, as it's used for both importing
and syncing.
2023-06-19 13:48:32 +10:00

664 lines
20 KiB
Rust

// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::collections::HashSet;
use std::sync::Arc;
use anki_i18n::I18n;
use itertools::Itertools;
use tracing::debug;
use crate::collection::Collection;
use crate::config::SchedulerVersion;
use crate::error::AnkiError;
use crate::error::DbError;
use crate::error::DbErrorKind;
use crate::error::Result;
use crate::notetype::all_stock_notetypes;
use crate::notetype::AlreadyGeneratedCardInfo;
use crate::notetype::CardGenContext;
use crate::notetype::Notetype;
use crate::notetype::NotetypeId;
use crate::notetype::NotetypeKind;
use crate::prelude::*;
use crate::progress::ThrottlingProgressHandler;
use crate::timestamp::TimestampMillis;
use crate::timestamp::TimestampSecs;
#[derive(Debug, Default, PartialEq, Eq)]
pub struct CheckDatabaseOutput {
card_properties_invalid: usize,
card_position_too_high: usize,
cards_missing_note: usize,
decks_missing: usize,
revlog_properties_invalid: usize,
templates_missing: usize,
card_ords_duplicated: usize,
field_count_mismatch: usize,
notetypes_recovered: usize,
invalid_utf8: usize,
invalid_ids: usize,
}
#[derive(Debug, Clone, Copy, Default)]
pub enum DatabaseCheckProgress {
#[default]
Integrity,
Optimize,
Cards,
Notes {
current: usize,
total: usize,
},
History,
}
impl CheckDatabaseOutput {
pub fn to_i18n_strings(&self, tr: &I18n) -> Vec<String> {
let mut probs = Vec::new();
if self.notetypes_recovered > 0 {
probs.push(tr.database_check_notetypes_recovered());
}
if self.card_position_too_high > 0 {
probs.push(tr.database_check_new_card_high_due(self.card_position_too_high));
}
if self.card_properties_invalid > 0 {
probs.push(tr.database_check_card_properties(self.card_properties_invalid));
}
if self.cards_missing_note > 0 {
probs.push(tr.database_check_card_missing_note(self.cards_missing_note));
}
if self.decks_missing > 0 {
probs.push(tr.database_check_missing_decks(self.decks_missing));
}
if self.field_count_mismatch > 0 {
probs.push(tr.database_check_field_count(self.field_count_mismatch));
}
if self.card_ords_duplicated > 0 {
probs.push(tr.database_check_duplicate_card_ords(self.card_ords_duplicated));
}
if self.templates_missing > 0 {
probs.push(tr.database_check_missing_templates(self.templates_missing));
}
if self.revlog_properties_invalid > 0 {
probs.push(tr.database_check_revlog_properties(self.revlog_properties_invalid));
}
if self.invalid_utf8 > 0 {
probs.push(tr.database_check_notes_with_invalid_utf8(self.invalid_utf8));
}
if self.invalid_ids > 0 {
probs.push(tr.database_check_fixed_invalid_ids(self.invalid_ids));
}
probs.into_iter().map(Into::into).collect()
}
}
impl Collection {
/// Check the database, returning a list of problems that were fixed.
pub(crate) fn check_database(&mut self) -> Result<CheckDatabaseOutput> {
let mut progress = self.new_progress_handler();
progress.set(DatabaseCheckProgress::Integrity)?;
debug!("quick check");
if self.storage.quick_check_corrupt() {
debug!("quick check failed");
return Err(AnkiError::db_error(
self.tr.database_check_corrupt(),
DbErrorKind::Corrupt,
));
}
progress.set(DatabaseCheckProgress::Optimize)?;
debug!("optimize");
self.storage.optimize()?;
self.transact_no_undo(|col| col.check_database_inner(progress))
}
fn check_database_inner(
&mut self,
mut progress: ThrottlingProgressHandler<DatabaseCheckProgress>,
) -> Result<CheckDatabaseOutput> {
let mut out = CheckDatabaseOutput::default();
// cards first, as we need to be able to read them to process notes
progress.set(DatabaseCheckProgress::Cards)?;
debug!("check cards");
self.check_card_properties(&mut out)?;
self.check_orphaned_cards(&mut out)?;
debug!("check decks");
self.check_missing_deck_ids(&mut out)?;
self.check_filtered_cards(&mut out)?;
debug!("check notetypes");
self.check_notetypes(&mut out, &mut progress)?;
progress.set(DatabaseCheckProgress::History)?;
debug!("check review log");
self.check_revlog(&mut out)?;
debug!("missing decks");
self.check_missing_deck_names(&mut out)?;
self.update_next_new_position()?;
debug!("invalid ids");
out.invalid_ids = self.maybe_fix_invalid_ids()?;
debug!("db check finished: {:#?}", out);
Ok(out)
}
fn check_card_properties(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let timing = self.timing_today()?;
let (new_cnt, other_cnt) = self.storage.fix_card_properties(
timing.days_elapsed,
TimestampSecs::now(),
self.usn()?,
self.scheduler_version() == SchedulerVersion::V1,
)?;
out.card_position_too_high = new_cnt;
out.card_properties_invalid += other_cnt;
Ok(())
}
fn check_orphaned_cards(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let cnt = self.storage.delete_orphaned_cards()?;
if cnt > 0 {
self.set_schema_modified()?;
out.cards_missing_note = cnt;
}
Ok(())
}
fn check_missing_deck_ids(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let usn = self.usn()?;
for did in self.storage.missing_decks()? {
self.recover_missing_deck(did, usn)?;
out.decks_missing += 1;
}
Ok(())
}
fn check_filtered_cards(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let decks = self.storage.get_decks_map()?;
let mut wrong = 0;
for (cid, did) in self.storage.all_filtered_cards_by_deck()? {
// we expect calling code to ensure all decks already exist
if let Some(deck) = decks.get(&did) {
if !deck.is_filtered() {
let mut card = self.storage.get_card(cid)?.unwrap();
card.original_deck_id.0 = 0;
card.original_due = 0;
self.storage.update_card(&card)?;
wrong += 1;
}
}
}
if wrong > 0 {
self.set_schema_modified()?;
out.card_properties_invalid += wrong;
}
Ok(())
}
fn check_notetypes(
&mut self,
out: &mut CheckDatabaseOutput,
progress: &mut ThrottlingProgressHandler<DatabaseCheckProgress>,
) -> Result<()> {
let nids_by_notetype = self.storage.all_note_ids_by_notetype()?;
let norm = self.get_config_bool(BoolKey::NormalizeNoteText);
let usn = self.usn()?;
let stamp_millis = TimestampMillis::now();
let stamp_secs = TimestampSecs::now();
let expanded_tags = self.storage.expanded_tags()?;
self.storage.clear_all_tags()?;
let total_notes = self.storage.total_notes()?;
progress.set(DatabaseCheckProgress::Notes {
current: 0,
total: total_notes as usize,
})?;
for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) {
debug!("check notetype: {}", ntid);
let mut group = group.peekable();
let nt = match self.get_notetype(ntid)? {
None => {
let first_note = self.storage.get_note(group.peek().unwrap().1)?.unwrap();
out.notetypes_recovered += 1;
self.recover_notetype(stamp_millis, first_note.fields().len(), ntid)?
}
Some(nt) => nt,
};
let mut genctx = None;
for (_, nid) in group {
progress.increment(|p| {
let DatabaseCheckProgress::Notes { current, .. } = p else { unreachable!() };
current
})?;
let mut note = self.get_note_fixing_invalid_utf8(nid, out)?;
let original = note.clone();
let cards = self.storage.existing_cards_for_note(nid)?;
out.card_ords_duplicated += self.remove_duplicate_card_ordinals(&cards)?;
out.templates_missing += self.remove_cards_without_template(&nt, &cards)?;
// fix fields
if note.fields().len() != nt.fields.len() {
note.fix_field_count(&nt);
note.tags.push("db-check".into());
out.field_count_mismatch += 1;
}
if note.mtime > stamp_secs {
note.mtime = stamp_secs;
}
// note type ID may have changed if we created a recovery notetype
note.notetype_id = nt.id;
// write note, updating tags and generating missing cards
let ctx = genctx.get_or_insert_with(|| {
CardGenContext::new(
nt.as_ref(),
self.get_last_deck_added_to_for_notetype(nt.id),
usn,
)
});
self.update_note_inner_generating_cards(
ctx, &mut note, &original, false, norm, true,
)?;
}
}
// the note rebuilding process took care of adding tags back, so we just need
// to ensure to restore the collapse state
self.storage.restore_expanded_tags(&expanded_tags)?;
// if the collection is empty and the user has deleted all note types, ensure at
// least one note type exists
if self.storage.get_all_notetype_names()?.is_empty() {
let mut nt = all_stock_notetypes(&self.tr).remove(0);
self.add_notetype_inner(&mut nt, usn, true)?;
}
if out.card_ords_duplicated > 0
|| out.field_count_mismatch > 0
|| out.templates_missing > 0
|| out.notetypes_recovered > 0
{
self.set_schema_modified()?;
}
Ok(())
}
fn get_note_fixing_invalid_utf8(
&self,
nid: NoteId,
out: &mut CheckDatabaseOutput,
) -> Result<Note> {
match self.storage.get_note(nid) {
Ok(note) => Ok(note.unwrap()),
Err(err) => match err {
AnkiError::DbError {
source:
DbError {
kind: DbErrorKind::Utf8,
..
},
} => {
// fix note then fetch again
self.storage.fix_invalid_utf8_in_note(nid)?;
out.invalid_utf8 += 1;
Ok(self.storage.get_note(nid)?.unwrap())
}
// other errors are unhandled
_ => Err(err),
},
}
}
fn remove_duplicate_card_ordinals(
&mut self,
cards: &[AlreadyGeneratedCardInfo],
) -> Result<usize> {
let mut ords = HashSet::new();
let mut removed = 0;
for card in cards {
if !ords.insert(card.ord) {
self.storage.remove_card(card.id)?;
removed += 1;
}
}
Ok(removed)
}
fn remove_cards_without_template(
&mut self,
nt: &Notetype,
cards: &[AlreadyGeneratedCardInfo],
) -> Result<usize> {
if nt.config.kind() == NotetypeKind::Cloze {
return Ok(0);
}
let mut removed = 0;
for card in cards {
if card.ord as usize >= nt.templates.len() {
self.storage.remove_card(card.id)?;
removed += 1;
}
}
Ok(removed)
}
fn recover_notetype(
&mut self,
stamp: TimestampMillis,
field_count: usize,
previous_id: NotetypeId,
) -> Result<Arc<Notetype>> {
debug!("create recovery notetype");
let extra_cards_required = self
.storage
.highest_card_ordinal_for_notetype(previous_id)?;
let mut basic = all_stock_notetypes(&self.tr).remove(0);
let mut field = 3;
while basic.fields.len() < field_count {
basic.add_field(format!("{}", field));
field += 1;
}
basic.name = format!("db-check-{}-{}", stamp, field_count);
let qfmt = basic.templates[0].config.q_format.clone();
let afmt = basic.templates[0].config.a_format.clone();
for n in 0..extra_cards_required {
basic.add_template(&format!("Card {}", n + 2), &qfmt, &afmt);
}
self.add_notetype(&mut basic, true)?;
Ok(Arc::new(basic))
}
fn check_revlog(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let cnt = self.storage.fix_revlog_properties()?;
if cnt > 0 {
self.set_schema_modified()?;
out.revlog_properties_invalid = cnt;
}
Ok(())
}
fn check_missing_deck_names(&mut self, out: &mut CheckDatabaseOutput) -> Result<()> {
let names = self.storage.get_all_deck_names()?;
out.decks_missing += self.add_missing_deck_names(&names)?;
Ok(())
}
fn update_next_new_position(&mut self) -> Result<()> {
let pos = self.storage.max_new_card_position().unwrap_or(0);
self.set_next_card_position(pos)
}
pub(crate) fn maybe_fix_invalid_ids(&mut self) -> Result<usize> {
let now = TimestampMillis::now();
let tomorrow = now.adding_secs(24 * 60 * 60).0;
let num_invalid_ids = self.storage.invalid_ids(tomorrow)?;
if num_invalid_ids > 0 {
self.storage.fix_invalid_ids(tomorrow, now.0)?;
self.set_schema_modified()?;
}
Ok(num_invalid_ids)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::decks::DeckId;
use crate::search::SortMode;
#[test]
fn cards() -> Result<()> {
let mut col = Collection::new();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
col.add_note(&mut note, DeckId(1))?;
// card properties
col.storage
.db
.execute_batch("update cards set ivl=1.5,due=2000000,odue=1.5")?;
let out = col.check_database()?;
assert_eq!(
out,
CheckDatabaseOutput {
card_properties_invalid: 2,
card_position_too_high: 1,
..Default::default()
}
);
// should be idempotent
assert_eq!(col.check_database()?, Default::default());
// missing deck
col.storage.db.execute_batch("update cards set did=123")?;
let out = col.check_database()?;
assert_eq!(
out,
CheckDatabaseOutput {
decks_missing: 1,
..Default::default()
}
);
assert_eq!(
col.storage
.get_deck(DeckId(123))?
.unwrap()
.name
.as_native_str(),
"recovered123"
);
// missing note
col.storage.remove_note(note.id)?;
let out = col.check_database()?;
assert_eq!(
out,
CheckDatabaseOutput {
cards_missing_note: 1,
..Default::default()
}
);
assert_eq!(
col.storage.db_scalar::<u32>("select count(*) from cards")?,
0
);
Ok(())
}
#[test]
fn revlog() -> Result<()> {
let mut col = Collection::new();
col.storage.db.execute_batch(
"
insert into revlog (id,cid,usn,ease,ivl,lastIvl,factor,time,type)
values (0,0,0,0,1.5,1.5,0,0,0)",
)?;
let out = col.check_database()?;
assert_eq!(
out,
CheckDatabaseOutput {
revlog_properties_invalid: 1,
..Default::default()
}
);
assert!(col
.storage
.db_scalar::<bool>("select ivl = lastIvl = 1 from revlog")?);
Ok(())
}
#[test]
fn note_card_link() -> Result<()> {
let mut col = Collection::new();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
col.add_note(&mut note, DeckId(1))?;
// duplicate ordinals
let cid = col.search_cards("", SortMode::NoOrder)?[0];
let mut card = col.storage.get_card(cid)?.unwrap();
card.id.0 += 1;
col.storage.add_card(&mut card)?;
let out = col.check_database()?;
assert_eq!(
out,
CheckDatabaseOutput {
card_ords_duplicated: 1,
..Default::default()
}
);
assert_eq!(
col.storage.db_scalar::<u32>("select count(*) from cards")?,
1
);
// missing templates
let cid = col.search_cards("", SortMode::NoOrder)?[0];
let mut card = col.storage.get_card(cid)?.unwrap();
card.id.0 += 1;
card.template_idx = 10;
col.storage.add_card(&mut card)?;
let out = col.check_database()?;
assert_eq!(
out,
CheckDatabaseOutput {
templates_missing: 1,
..Default::default()
}
);
assert_eq!(
col.storage.db_scalar::<u32>("select count(*) from cards")?,
1
);
Ok(())
}
#[test]
fn note_fields() -> Result<()> {
let mut col = Collection::new();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
col.add_note(&mut note, DeckId(1))?;
// excess fields get joined into the last one
col.storage
.db
.execute_batch("update notes set flds = 'a\x1fb\x1fc\x1fd'")?;
let out = col.check_database()?;
assert_eq!(
out,
CheckDatabaseOutput {
field_count_mismatch: 1,
..Default::default()
}
);
let note = col.storage.get_note(note.id)?.unwrap();
assert_eq!(&note.fields()[..], &["a", "b; c; d"]);
// missing fields get filled with blanks
col.storage
.db
.execute_batch("update notes set flds = 'a'")?;
let out = col.check_database()?;
assert_eq!(
out,
CheckDatabaseOutput {
field_count_mismatch: 1,
..Default::default()
}
);
let note = col.storage.get_note(note.id)?.unwrap();
assert_eq!(&note.fields()[..], &["a", ""]);
Ok(())
}
#[test]
fn deck_names() -> Result<()> {
let mut col = Collection::new();
let deck = col.get_or_create_normal_deck("foo::bar::baz")?;
// includes default
assert_eq!(col.storage.get_all_deck_names()?.len(), 4);
col.storage
.db
.prepare("delete from decks where id != ? and id != 1")?
.execute([deck.id])?;
assert_eq!(col.storage.get_all_deck_names()?.len(), 2);
let out = col.check_database()?;
assert_eq!(
out,
CheckDatabaseOutput {
decks_missing: 1, // only counts the immediate parent that was missing
..Default::default()
}
);
assert_eq!(
&col.storage
.get_all_deck_names()?
.iter()
.map(|(_, name)| name)
.collect::<Vec<_>>(),
&["Default", "foo", "foo::bar", "foo::bar::baz"]
);
Ok(())
}
#[test]
fn tags() -> Result<()> {
let mut col = Collection::new();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
note.tags.push("one".into());
note.tags.push("two".into());
col.add_note(&mut note, DeckId(1))?;
col.set_tag_collapsed("one", false)?;
col.check_database()?;
assert!(col.storage.get_tag("one")?.unwrap().expanded);
assert!(!col.storage.get_tag("two")?.unwrap().expanded);
Ok(())
}
}