This commit is contained in:
Daniel Pechersky 2025-09-16 01:37:45 +07:00
parent f54e79c737
commit 4670e1efc3

View file

@ -13,8 +13,6 @@ use itertools::Itertools;
use super::params::ignore_revlogs_before_ms_from_config; use super::params::ignore_revlogs_before_ms_from_config;
use super::rescheduler::Rescheduler; use super::rescheduler::Rescheduler;
use crate::card::CardType;
use crate::prelude::*;
use crate::revlog::RevlogEntry; use crate::revlog::RevlogEntry;
use crate::scheduler::answering::get_fuzz_seed; use crate::scheduler::answering::get_fuzz_seed;
use crate::scheduler::fsrs::params::reviews_for_fsrs; use crate::scheduler::fsrs::params::reviews_for_fsrs;
@ -23,6 +21,8 @@ use crate::scheduler::states::fuzz::with_review_fuzz;
use crate::search::Negated; use crate::search::Negated;
use crate::search::SearchNode; use crate::search::SearchNode;
use crate::search::StateKind; use crate::search::StateKind;
use crate::{card::CardType, progress::ThrottlingProgressHandler};
use crate::{prelude::*, scheduler::timing::SchedTimingToday};
#[derive(Debug, Clone, Copy, Default)] #[derive(Debug, Clone, Copy, Default)]
pub struct ComputeMemoryProgress { pub struct ComputeMemoryProgress {
@ -80,9 +80,6 @@ impl Collection {
&mut self, &mut self,
entries: Vec<UpdateMemoryStateEntry>, entries: Vec<UpdateMemoryStateEntry>,
) -> Result<()> { ) -> Result<()> {
const ITEM_CHUNK_SIZE: usize = 100_000;
const FSRS_CHUNK_SIZE: usize = 1000;
let timing = self.timing_today()?; let timing = self.timing_today()?;
let usn = self.usn()?; let usn = self.usn()?;
for UpdateMemoryStateEntry { for UpdateMemoryStateEntry {
@ -94,159 +91,197 @@ impl Collection {
let search = let search =
SearchBuilder::all([search.into(), SearchNode::State(StateKind::New).negated()]); SearchBuilder::all([search.into(), SearchNode::State(StateKind::New).negated()]);
let revlog = self.revlog_for_srs(search)?; let revlog = self.revlog_for_srs(search)?;
let reschedule = req.as_ref().map(|e| e.reschedule).unwrap_or_default(); let reschedule = req.as_ref().map(|e| e.reschedule).unwrap_or(false);
let last_revlog_info = reschedule.then(|| get_last_revlog_info(&revlog)); let last_revlog_info = reschedule.then(|| get_last_revlog_info(&revlog));
let mut rescheduler = self let rescheduler = self
.get_config_bool(BoolKey::LoadBalancerEnabled) .get_config_bool(BoolKey::LoadBalancerEnabled)
.then(|| Rescheduler::new(self)) .then(|| Rescheduler::new(self))
.transpose()?; .transpose()?;
let fsrs = FSRS::new(req.as_ref().map(|w| &w.params[..]).or(Some([].as_slice())))?; let fsrs = FSRS::new(req.as_ref().map(|w| &w.params[..]).or(Some([].as_slice())))?;
let decay = req.as_ref().map(|w| get_decay_from_params(&w.params));
let historical_retention = req.as_ref().map(|w| w.historical_retention); let historical_retention = req.as_ref().map(|w| w.historical_retention);
let mut items = fsrs_items_for_memory_states( let items = fsrs_items_for_memory_states(
&fsrs, &fsrs,
revlog, revlog,
timing.next_day_at, timing.next_day_at,
historical_retention.unwrap_or(0.9), historical_retention.unwrap_or(0.9),
ignore_before, ignore_before,
)?; )?;
let mut progress = self.new_progress_handler::<ComputeMemoryProgress>(); let mut progress = self.new_progress_handler::<ComputeMemoryProgress>();
progress.update(false, |s| s.total_cards = items.len() as u32)?; progress.update(false, |s| s.total_cards = items.len() as u32)?;
let Some(req) = &req else { let Some(req) = &req else {
// clear FSRS data if FSRS is disabled // clear FSRS data if FSRS is disabled
for (idx, (card_id, _)) in items.into_iter().enumerate() { self.clear_fsrs_data_for_cards(
progress.update(true, |state| state.current_cards = idx as u32 + 1)?; items.into_iter().map(|(card_id, _)| card_id),
let mut card = self.storage.get_card(card_id)?.or_not_found(card_id)?; usn,
let original = card.clone(); &mut progress,
card.clear_fsrs_data(); )?;
self.update_card_inner(&mut card, original, usn)?;
}
return Ok(()); return Ok(());
}; };
let preset_desired_retention = req.preset_desired_retention; let last_revlog_info = last_revlog_info.unwrap();
let mut to_update = Vec::new(); self.update_memory_state_for_items(
let mut fsrs_items = Vec::new(); items,
let mut starting_states = Vec::new(); last_revlog_info,
for (i, items) in items.chunk_into_vecs(ITEM_CHUNK_SIZE).enumerate() { req,
progress.update(true, |state| { &fsrs,
let end_of_chunk_index = i * ITEM_CHUNK_SIZE + items.len(); rescheduler,
state.current_cards = end_of_chunk_index as u32 + 1 &mut progress,
})?; timing,
for (card_id, item) in items.into_iter() { usn,
let mut card = self.storage.get_card(card_id)?.or_not_found(card_id)?; )?;
let original = card.clone(); }
Ok(())
}
// Store decay and desired retention in the card so that add-ons, card info, fn clear_fsrs_data_for_cards(
// stats and browser search/sorts don't need to access the deck config. &mut self,
// Unlike memory states, scheduler doesn't use decay and dr stored in the card. cards: impl Iterator<Item = CardId>,
let deck_id = card.original_or_current_deck_id(); usn: Usn,
let desired_retention = *req progress: &mut ThrottlingProgressHandler<ComputeMemoryProgress>,
.deck_desired_retention ) -> Result<()> {
.get(&deck_id) for (idx, card_id) in cards.enumerate() {
.unwrap_or(&preset_desired_retention); progress.update(true, |state| state.current_cards = idx as u32 + 1)?;
card.desired_retention = Some(desired_retention); let mut card = self.storage.get_card(card_id)?.or_not_found(card_id)?;
card.decay = decay; let original = card.clone();
if let Some(item) = item { card.clear_fsrs_data();
to_update.push((card, original)); self.update_card_inner(&mut card, original, usn)?;
fsrs_items.push(item.item); }
starting_states.push(item.starting_state); Ok(())
} else { }
// clear memory states if item is None
card.memory_state = None; #[expect(clippy::too_many_arguments)]
self.update_card_inner(&mut card, original, usn)?; fn update_memory_state_for_items(
} &mut self,
mut items: Vec<(CardId, Option<FsrsItemForMemoryState>)>,
last_revlog_info: HashMap<CardId, LastRevlogInfo>,
req: &UpdateMemoryStateRequest,
fsrs: &FSRS,
mut rescheduler: Option<Rescheduler>,
progress: &mut ThrottlingProgressHandler<ComputeMemoryProgress>,
timing: SchedTimingToday,
usn: Usn,
) -> Result<()> {
const ITEM_CHUNK_SIZE: usize = 100_000;
const FSRS_CHUNK_SIZE: usize = 1000;
let decay = get_decay_from_params(&req.params);
let mut to_update = Vec::new();
let mut fsrs_items = Vec::new();
let mut starting_states = Vec::new();
for (i, items) in items.chunk_into_vecs(ITEM_CHUNK_SIZE).enumerate() {
progress.update(true, |state| {
let end_of_chunk_index = i * ITEM_CHUNK_SIZE + items.len();
state.current_cards = end_of_chunk_index as u32 + 1
})?;
for (card_id, item) in items.into_iter() {
let mut card = self.storage.get_card(card_id)?.or_not_found(card_id)?;
let original = card.clone();
// Store decay and desired retention in the card so that add-ons, card info,
// stats and browser search/sorts don't need to access the deck config.
// Unlike memory states, scheduler doesn't use decay and dr stored in the card.
let deck_id = card.original_or_current_deck_id();
let desired_retention = *req
.deck_desired_retention
.get(&deck_id)
.unwrap_or(&req.preset_desired_retention);
card.desired_retention = Some(desired_retention);
card.decay = Some(decay);
if let Some(item) = item {
to_update.push((card, original));
fsrs_items.push(item.item);
starting_states.push(item.starting_state);
} else {
// clear memory states if item is None
card.memory_state = None;
self.update_card_inner(&mut card, original, usn)?;
} }
}
// fsrs.memory_state_batch is O(nm) where n is the number of cards and m is the max review count between all items. // fsrs.memory_state_batch is O(nm) where n is the number of cards and m is the max review count between all items.
// Therefore we want to pass batches to fsrs.memory_state_batch where the review count is relatively even. // Therefore we want to pass batches to fsrs.memory_state_batch where the review count is relatively even.
let mut p = let mut p = permutation::sort_unstable_by_key(&fsrs_items, |item| item.reviews.len());
permutation::sort_unstable_by_key(&fsrs_items, |item| item.reviews.len()); p.apply_slice_in_place(&mut to_update);
p.apply_slice_in_place(&mut to_update); p.apply_slice_in_place(&mut fsrs_items);
p.apply_slice_in_place(&mut fsrs_items); p.apply_slice_in_place(&mut starting_states);
p.apply_slice_in_place(&mut starting_states);
for ((to_update, fsrs_items), starting_states) in to_update for ((to_update, fsrs_items), starting_states) in to_update
.chunk_into_vecs(FSRS_CHUNK_SIZE) .chunk_into_vecs(FSRS_CHUNK_SIZE)
.zip_eq(fsrs_items.chunk_into_vecs(FSRS_CHUNK_SIZE)) .zip_eq(fsrs_items.chunk_into_vecs(FSRS_CHUNK_SIZE))
.zip_eq(starting_states.chunk_into_vecs(FSRS_CHUNK_SIZE)) .zip_eq(starting_states.chunk_into_vecs(FSRS_CHUNK_SIZE))
{
let memory_states = fsrs.memory_state_batch(fsrs_items, starting_states)?;
for ((mut card, original), memory_state) in to_update.into_iter().zip(memory_states)
{ {
let memory_states = fsrs.memory_state_batch(fsrs_items, starting_states)?; card.memory_state = Some(memory_state.into());
for ((mut card, original), memory_state) in 'reschedule_card: {
to_update.into_iter().zip(memory_states) // we have a last review time for the card
{ let Some(last_info) = last_revlog_info.get(&card.id) else {
card.memory_state = Some(memory_state.into()); break 'reschedule_card;
};
let Some(last_review) = &last_info.last_reviewed_at else {
break 'reschedule_card;
};
// the card isn't in (re)learning
if card.ctype != CardType::Review {
break 'reschedule_card;
};
'reschedule_card: { let deck = self
// if rescheduling .get_deck(card.original_or_current_deck_id())?
let Some(reviews) = &last_revlog_info else { .or_not_found(card.original_or_current_deck_id())?;
break 'reschedule_card; let deckconfig_id = deck.config_id().unwrap();
}; // reschedule it
let days_elapsed =
// and we have a last review time for the card timing.next_day_at.elapsed_days_since(*last_review) as i32;
let Some(last_info) = reviews.get(&card.id) else { let original_interval = card.interval;
break 'reschedule_card; let interval = fsrs.next_interval(
}; Some(memory_state.stability),
let Some(last_review) = &last_info.last_reviewed_at else { card.desired_retention
break 'reschedule_card; .expect("We set desired retention above"),
}; 0,
// or in (re)learning );
if card.ctype != CardType::Review { card.interval = rescheduler
break 'reschedule_card; .as_mut()
}; .and_then(|r| {
r.find_interval(
let deck = self interval,
.get_deck(card.original_or_current_deck_id())? 1,
.or_not_found(card.original_or_current_deck_id())?; req.max_interval,
let deckconfig_id = deck.config_id().unwrap(); days_elapsed as u32,
// reschedule it deckconfig_id,
let days_elapsed = get_fuzz_seed(&card, true),
timing.next_day_at.elapsed_days_since(*last_review) as i32; )
let original_interval = card.interval; })
let interval = fsrs.next_interval( .unwrap_or_else(|| {
Some(memory_state.stability), with_review_fuzz(
card.desired_retention card.get_fuzz_factor(true),
.expect("We set desired retention above"), interval,
0, 1,
); req.max_interval,
card.interval = rescheduler )
.as_mut() });
.and_then(|r| { let due = if card.original_due != 0 {
r.find_interval( &mut card.original_due
interval, } else {
1, &mut card.due
req.max_interval, };
days_elapsed as u32, let new_due =
deckconfig_id, (timing.days_elapsed as i32) - days_elapsed + card.interval as i32;
get_fuzz_seed(&card, true), if let Some(rescheduler) = &mut rescheduler {
) rescheduler.update_due_cnt_per_day(*due, new_due, deckconfig_id);
})
.unwrap_or_else(|| {
with_review_fuzz(
card.get_fuzz_factor(true),
interval,
1,
req.max_interval,
)
});
let due = if card.original_due != 0 {
&mut card.original_due
} else {
&mut card.due
};
let new_due =
(timing.days_elapsed as i32) - days_elapsed + card.interval as i32;
if let Some(rescheduler) = &mut rescheduler {
rescheduler.update_due_cnt_per_day(*due, new_due, deckconfig_id);
}
*due = new_due;
// Add a rescheduled revlog entry
self.log_rescheduled_review(&card, original_interval, usn)?;
} }
self.update_card_inner(&mut card, original, usn)?; *due = new_due;
// Add a rescheduled revlog entry
self.log_rescheduled_review(&card, original_interval, usn)?;
} }
self.update_card_inner(&mut card, original, usn)?;
} }
} }
} }