diff --git a/CONTRIBUTORS b/CONTRIBUTORS index b03108e16..de57518b2 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -242,6 +242,7 @@ Lee Doughty <32392044+leedoughty@users.noreply.github.com> memchr Max Romanowski Aldlss +Daniel Pechersky ******************** diff --git a/Cargo.lock b/Cargo.lock index fe88eb3ab..333cd6110 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -124,6 +124,7 @@ dependencies = [ "once_cell", "pbkdf2", "percent-encoding-iri", + "permutation", "phf 0.11.3", "pin-project", "prettyplease", @@ -2233,9 +2234,8 @@ dependencies = [ [[package]] name = "fsrs" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04954cc67c3c11ee342a2ee1f5222bf76d73f7772df08d37dc9a6cdd73c467eb" +version = "5.2.0" +source = "git+https://github.com/open-spaced-repetition/fsrs-rs.git#1e271981367454468391f1c686af03a0aa7aab3c" dependencies = [ "burn", "itertools 0.14.0", @@ -4561,6 +4561,12 @@ name = "percent-encoding-iri" version = "2.2.0" source = "git+https://github.com/ankitects/rust-url.git?rev=bb930b8d089f4d30d7d19c12e54e66191de47b88#bb930b8d089f4d30d7d19c12e54e66191de47b88" +[[package]] +name = "permutation" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7" + [[package]] name = "pest" version = "2.8.1" diff --git a/Cargo.toml b/Cargo.toml index 2e9489cb8..35511e64f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,8 +33,8 @@ git = "https://github.com/ankitects/linkcheck.git" rev = "184b2ca50ed39ca43da13f0b830a463861adb9ca" [workspace.dependencies.fsrs] -version = "5.1.0" -# git = "https://github.com/open-spaced-repetition/fsrs-rs.git" +# version = "5.1.0" +git = "https://github.com/open-spaced-repetition/fsrs-rs.git" # path = "../open-spaced-repetition/fsrs-rs" [workspace.dependencies] @@ -99,6 +99,7 @@ num_cpus = "1.17.0" num_enum = "0.7.3" once_cell = "1.21.3" pbkdf2 = { version = "0.12", features = ["simple"] } +permutation = "0.4.1" phf = { version = "0.11.3", features = ["macros"] } pin-project = "1.1.10" prettyplease = "0.2.34" diff --git a/rslib/Cargo.toml b/rslib/Cargo.toml index 9be9e8d87..efb9b998e 100644 --- a/rslib/Cargo.toml +++ b/rslib/Cargo.toml @@ -76,6 +76,7 @@ num_enum.workspace = true once_cell.workspace = true pbkdf2.workspace = true percent-encoding-iri.workspace = true +permutation.workspace = true phf.workspace = true pin-project.workspace = true prost.workspace = true diff --git a/rslib/src/scheduler/fsrs/memory_state.rs b/rslib/src/scheduler/fsrs/memory_state.rs index 420ead5a3..a65c1c79f 100644 --- a/rslib/src/scheduler/fsrs/memory_state.rs +++ b/rslib/src/scheduler/fsrs/memory_state.rs @@ -9,6 +9,7 @@ use fsrs::MemoryState; use fsrs::FSRS; use fsrs::FSRS5_DEFAULT_DECAY; use fsrs::FSRS6_DEFAULT_DECAY; +use itertools::Either; use itertools::Itertools; use super::params::ignore_revlogs_before_ms_from_config; @@ -58,6 +59,18 @@ pub(crate) struct UpdateMemoryStateEntry { pub ignore_before: TimestampMillis, } +trait ChunkIntoVecs { + fn chunk_into_vecs(&mut self, chunk_size: usize) -> impl Iterator>; +} + +impl ChunkIntoVecs for Vec { + fn chunk_into_vecs(&mut self, chunk_size: usize) -> impl Iterator> { + std::iter::from_fn(move || { + (!self.is_empty()).then(|| self.drain(..chunk_size.min(self.len())).collect()) + }) + } +} + impl Collection { /// For each provided set of params, locate cards with the provided search, /// and update their memory state. @@ -79,124 +92,247 @@ impl Collection { let search = SearchBuilder::all([search.into(), SearchNode::State(StateKind::New).negated()]); let revlog = self.revlog_for_srs(search)?; - let reschedule = req.as_ref().map(|e| e.reschedule).unwrap_or_default(); - let last_revlog_info = if reschedule { - Some(get_last_revlog_info(&revlog)) - } else { - None + + let Some(req) = &req else { + let items = fsrs_items_for_memory_states( + &FSRS::new(Some([].as_slice()))?, + revlog, + timing.next_day_at, + 0.9, + ignore_before, + )?; + + let on_updated_card = self.create_progress_closure(items.len())?; + + // clear FSRS data if FSRS is disabled + self.clear_fsrs_data_for_cards( + items.into_iter().map(|(card_id, _)| card_id), + usn, + on_updated_card, + )?; + continue; }; - let mut rescheduler = self - .get_config_bool(BoolKey::LoadBalancerEnabled) - .then(|| Rescheduler::new(self)) - .transpose()?; - let fsrs = FSRS::new(req.as_ref().map(|w| &w.params[..]).or(Some([].as_slice())))?; - let decay = req.as_ref().map(|w| get_decay_from_params(&w.params)); - let historical_retention = req.as_ref().map(|w| w.historical_retention); + + let fsrs = FSRS::new(Some(&req.params[..]))?; + let last_revlog_info = req.reschedule.then(|| get_last_revlog_info(&revlog)); + let items = fsrs_items_for_memory_states( &fsrs, revlog, timing.next_day_at, - historical_retention.unwrap_or(0.9), + req.historical_retention, ignore_before, )?; - let mut progress = self.new_progress_handler::(); - progress.update(false, |s| s.total_cards = items.len() as u32)?; - for (idx, (card_id, item)) in items.into_iter().enumerate() { - progress.update(true, |state| state.current_cards = idx as u32 + 1)?; + + let mut on_updated_card = self.create_progress_closure(items.len())?; + + let (items, cards_without_items): (Vec<(CardId, FsrsItemForMemoryState)>, Vec) = + items.into_iter().partition_map(|(card_id, item)| { + if let Some(item) = item { + Either::Left((card_id, item)) + } else { + Either::Right(card_id) + } + }); + + let decay = get_decay_from_params(&req.params); + + // Store decay and desired retention in the card so that add-ons, card info, + // stats and browser search/sorts don't need to access the deck config. + // Unlike memory states, scheduler doesn't use decay and dr stored in the card. + let set_decay_and_desired_retention = move |card: &mut Card| { + let deck_id = card.original_or_current_deck_id(); + + let desired_retention = *req + .deck_desired_retention + .get(&deck_id) + .unwrap_or(&req.preset_desired_retention); + + card.desired_retention = Some(desired_retention); + card.decay = Some(decay); + }; + + self.update_memory_state_for_itemless_cards( + cards_without_items, + set_decay_and_desired_retention, + usn, + &mut on_updated_card, + )?; + + let mut rescheduler = self + .get_config_bool(BoolKey::LoadBalancerEnabled) + .then(|| Rescheduler::new(self)) + .transpose()?; + + let reschedule = move |card: &mut Card, + collection: &mut Self, + fsrs: &FSRS| + -> Result<()> { + // we are rescheduling + let Some(last_revlog_info) = &last_revlog_info else { + return Ok(()); + }; + + // we have a last review time for the card + let Some(last_info) = last_revlog_info.get(&card.id) else { + return Ok(()); + }; + let Some(last_review) = &last_info.last_reviewed_at else { + return Ok(()); + }; + // the card isn't in (re)learning + if card.ctype != CardType::Review { + return Ok(()); + }; + + let deck = collection + .get_deck(card.original_or_current_deck_id())? + .or_not_found(card.original_or_current_deck_id())?; + let deckconfig_id = deck.config_id().unwrap(); + // reschedule it + let days_elapsed = timing.next_day_at.elapsed_days_since(*last_review) as i32; + let original_interval = card.interval; + let interval = fsrs.next_interval( + Some( + card.memory_state + .expect("We set it before this function is called") + .stability, + ), + card.desired_retention + .expect("We set it before this function is called"), + 0, + ); + card.interval = rescheduler + .as_mut() + .and_then(|r| { + r.find_interval( + interval, + 1, + req.max_interval, + days_elapsed as u32, + deckconfig_id, + get_fuzz_seed(card, true), + ) + }) + .unwrap_or_else(|| { + with_review_fuzz(card.get_fuzz_factor(true), interval, 1, req.max_interval) + }); + let due = if card.original_due != 0 { + &mut card.original_due + } else { + &mut card.due + }; + let new_due = (timing.days_elapsed as i32) - days_elapsed + card.interval as i32; + if let Some(rescheduler) = &mut rescheduler { + rescheduler.update_due_cnt_per_day(*due, new_due, deckconfig_id); + } + *due = new_due; + // Add a rescheduled revlog entry + collection.log_rescheduled_review(card, original_interval, usn)?; + + Ok(()) + }; + + self.update_memory_state_for_cards_with_items( + items, + &fsrs, + set_decay_and_desired_retention, + reschedule, + usn, + on_updated_card, + )?; + } + Ok(()) + } + + fn create_progress_closure(&self, item_count: usize) -> Result Result<()>> { + let mut progress = self.new_progress_handler::(); + progress.update(false, |s| { + s.total_cards = item_count as u32; + s.current_cards = 1; + })?; + let on_updated_card = move || progress.update(true, |p| p.current_cards += 1); + Ok(on_updated_card) + } + + fn clear_fsrs_data_for_cards( + &mut self, + cards: impl Iterator, + usn: Usn, + mut on_updated_card: impl FnMut() -> Result<()>, + ) -> Result<()> { + for card_id in cards { + let mut card = self.storage.get_card(card_id)?.or_not_found(card_id)?; + let original = card.clone(); + card.clear_fsrs_data(); + self.update_card_inner(&mut card, original, usn)?; + on_updated_card()? + } + Ok(()) + } + + fn update_memory_state_for_itemless_cards( + &mut self, + cards: Vec, + mut set_decay_and_desired_retention: impl FnMut(&mut Card), + usn: Usn, + mut on_updated_card: impl FnMut() -> Result<()>, + ) -> Result<()> { + for card_id in cards { + let mut card = self.storage.get_card(card_id)?.or_not_found(card_id)?; + let original = card.clone(); + set_decay_and_desired_retention(&mut card); + card.memory_state = None; + self.update_card_inner(&mut card, original, usn)?; + on_updated_card()?; + } + Ok(()) + } + + fn update_memory_state_for_cards_with_items( + &mut self, + items: Vec<(CardId, FsrsItemForMemoryState)>, + fsrs: &FSRS, + mut set_decay_and_desired_retention: impl FnMut(&mut Card), + mut maybe_reschedule_card: impl FnMut(&mut Card, &mut Self, &FSRS) -> Result<()>, + usn: Usn, + mut on_updated_card: impl FnMut() -> Result<()>, + ) -> Result<()> { + const FSRS_BATCH_SIZE: usize = 1000; + + let mut to_update = Vec::new(); + let mut fsrs_items = Vec::new(); + let mut starting_states = Vec::new(); + + for (card_id, item) in items.into_iter() { + to_update.push(card_id); + fsrs_items.push(item.item); + starting_states.push(item.starting_state); + } + + // fsrs.memory_state_batch is O(nm) where n is the number of cards and m is the + // max review count between all items. Therefore we want to pass batches + // to fsrs.memory_state_batch where the review count is relatively even. + let mut p = permutation::sort_unstable_by_key(&fsrs_items, |item| item.reviews.len()); + p.apply_slice_in_place(&mut to_update); + p.apply_slice_in_place(&mut fsrs_items); + p.apply_slice_in_place(&mut starting_states); + + for ((to_update, fsrs_items), starting_states) in to_update + .chunk_into_vecs(FSRS_BATCH_SIZE) + .zip_eq(fsrs_items.chunk_into_vecs(FSRS_BATCH_SIZE)) + .zip_eq(starting_states.chunk_into_vecs(FSRS_BATCH_SIZE)) + { + let memory_states = fsrs.memory_state_batch(fsrs_items, starting_states)?; + + for (card_id, memory_state) in to_update.into_iter().zip_eq(memory_states) { let mut card = self.storage.get_card(card_id)?.or_not_found(card_id)?; let original = card.clone(); - if let Some(req) = &req { - let preset_desired_retention = req.preset_desired_retention; - // Store decay and desired retention in the card so that add-ons, card info, - // stats and browser search/sorts don't need to access the deck config. - // Unlike memory states, scheduler doesn't use decay and dr stored in the card. - let deck_id = card.original_or_current_deck_id(); - let desired_retention = *req - .deck_desired_retention - .get(&deck_id) - .unwrap_or(&preset_desired_retention); - card.desired_retention = Some(desired_retention); - card.decay = decay; - if let Some(item) = item { - card.set_memory_state(&fsrs, Some(item), historical_retention.unwrap())?; - // if rescheduling - if let Some(reviews) = &last_revlog_info { - // and we have a last review time for the card - if let Some(last_info) = reviews.get(&card.id) { - if let Some(last_review) = &last_info.last_reviewed_at { - let days_elapsed = - timing.next_day_at.elapsed_days_since(*last_review) as i32; - // and the card's not new - if let Some(state) = &card.memory_state { - // or in (re)learning - if card.ctype == CardType::Review { - let deck = self - .get_deck(card.original_or_current_deck_id())? - .or_not_found(card.original_or_current_deck_id())?; - let deckconfig_id = deck.config_id().unwrap(); - // reschedule it - let original_interval = card.interval; - let interval = fsrs.next_interval( - Some(state.stability), - desired_retention, - 0, - ); - card.interval = rescheduler - .as_mut() - .and_then(|r| { - r.find_interval( - interval, - 1, - req.max_interval, - days_elapsed as u32, - deckconfig_id, - get_fuzz_seed(&card, true), - ) - }) - .unwrap_or_else(|| { - with_review_fuzz( - card.get_fuzz_factor(true), - interval, - 1, - req.max_interval, - ) - }); - let due = if card.original_due != 0 { - &mut card.original_due - } else { - &mut card.due - }; - let new_due = (timing.days_elapsed as i32) - - days_elapsed - + card.interval as i32; - if let Some(rescheduler) = &mut rescheduler { - rescheduler.update_due_cnt_per_day( - *due, - new_due, - deckconfig_id, - ); - } - *due = new_due; - // Add a rescheduled revlog entry - self.log_rescheduled_review( - &card, - original_interval, - usn, - )?; - } - } - } - } - } - } else { - // clear memory states if item is None - card.memory_state = None; - } - } else { - // clear FSRS data if FSRS is disabled - card.clear_fsrs_data(); - } + set_decay_and_desired_retention(&mut card); + card.memory_state = Some(memory_state.into()); + maybe_reschedule_card(&mut card, self, fsrs)?; self.update_card_inner(&mut card, original, usn)?; + on_updated_card()?; } } Ok(()) @@ -269,7 +405,7 @@ impl Card { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct FsrsItemForMemoryState { pub item: FSRSItem, /// When revlogs have been truncated, this stores the initial state at first @@ -504,4 +640,26 @@ mod tests { ); Ok(()) } + + mod update_memory_state { + use super::*; + use crate::collection::CollectionBuilder; + + #[test] + fn smoke() { + let mut collection = CollectionBuilder::default().build().unwrap(); + let entry = UpdateMemoryStateEntry { + req: None, + search: SearchNode::WholeCollection, + ignore_before: TimestampMillis(0), + }; + + collection + .transact(Op::UpdateDeckConfig, |collection| { + collection.update_memory_state(vec![entry]).unwrap(); + Ok(()) + }) + .unwrap(); + } + } }