Batch both max # of items processed and max # of items passed to fsrs

2025-09-18 14:02:21 -04:00 · 2025-09-15 20:59:32 +07:00 · 2025-09-15 20:59:32 +07:00 · f54e79c737
commit f54e79c737
parent dda1324872
4 changed files with 140 additions and 94 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -124,6 +124,7 @@ dependencies = [
 "once_cell",
 "pbkdf2",
 "percent-encoding-iri",
 "permutation",
 "phf 0.11.3",
 "pin-project",
 "prettyplease",
@ -4560,6 +4561,12 @@ name = "percent-encoding-iri"
 version = "2.2.0"
 source = "git+https://github.com/ankitects/rust-url.git?rev=bb930b8d089f4d30d7d19c12e54e66191de47b88#bb930b8d089f4d30d7d19c12e54e66191de47b88"
 [[package]]
 name = "permutation"
 version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7"
 [[package]]
 name = "pest"
 version = "2.8.1"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -102,6 +102,7 @@ num_cpus = "1.17.0"
 num_enum = "0.7.3"
 once_cell = "1.21.3"
 pbkdf2 = { version = "0.12", features = ["simple"] }
 permutation = "0.4.1"
 phf = { version = "0.11.3", features = ["macros"] }
 pin-project = "1.1.10"
 prettyplease = "0.2.34"
--- a/rslib/Cargo.toml
+++ b/rslib/Cargo.toml
@ -76,6 +76,7 @@ num_enum.workspace = true
 once_cell.workspace = true
 pbkdf2.workspace = true
 percent-encoding-iri.workspace = true
 permutation.workspace = true
 phf.workspace = true
 pin-project.workspace = true
 prost.workspace = true
--- a/rslib/src/scheduler/fsrs/memory_state.rs
+++ b/rslib/src/scheduler/fsrs/memory_state.rs
@ -58,6 +58,18 @@ pub(crate) struct UpdateMemoryStateEntry {
    pub ignore_before: TimestampMillis,
 }
 trait ChunkIntoVecs<T> {
    fn chunk_into_vecs(&mut self, chunk_size: usize) -> impl Iterator<Item = Vec<T>>;
 }
 impl<T> ChunkIntoVecs<T> for Vec<T> {
    fn chunk_into_vecs(&mut self, chunk_size: usize) -> impl Iterator<Item = Vec<T>> {
        std::iter::from_fn(move || {
            (!self.is_empty()).then(|| self.split_off(chunk_size.min(self.len())))
        })
    }
 }
 impl Collection {
    /// For each provided set of params, locate cards with the provided search,
    /// and update their memory state.
@ -68,6 +80,9 @@ impl Collection {
        &mut self,
        entries: Vec<UpdateMemoryStateEntry>,
    ) -> Result<()> {
        const ITEM_CHUNK_SIZE: usize = 100_000;
        const FSRS_CHUNK_SIZE: usize = 1000;
        let timing = self.timing_today()?;
        let usn = self.usn()?;
        for UpdateMemoryStateEntry {
@ -88,7 +103,7 @@ impl Collection {
            let fsrs = FSRS::new(req.as_ref().map(|w| &w.params[..]).or(Some([].as_slice())))?;
            let decay = req.as_ref().map(|w| get_decay_from_params(&w.params));
            let historical_retention = req.as_ref().map(|w| w.historical_retention);
-            let items = fsrs_items_for_memory_states(
+            let mut items = fsrs_items_for_memory_states(
                &fsrs,
                revlog,
                timing.next_day_at,
@ -114,8 +129,12 @@ impl Collection {
            let mut to_update = Vec::new();
            let mut fsrs_items = Vec::new();
            let mut starting_states = Vec::new();
-            for (idx, (card_id, item)) in items.into_iter().enumerate() {
+            for (i, items) in items.chunk_into_vecs(ITEM_CHUNK_SIZE).enumerate() {
-                progress.update(true, |state| state.current_cards = idx as u32 + 1)?;
+                progress.update(true, |state| {
                    let end_of_chunk_index = i * ITEM_CHUNK_SIZE + items.len();
                    state.current_cards = end_of_chunk_index as u32 + 1
                })?;
                for (card_id, item) in items.into_iter() {
                    let mut card = self.storage.get_card(card_id)?.or_not_found(card_id)?;
                    let original = card.clone();
@ -140,9 +159,24 @@ impl Collection {
                    }
                }
                // fsrs.memory_state_batch is O(nm) where n is the number of cards and m is the max review count between all items.
                // Therefore we want to pass batches to fsrs.memory_state_batch where the review count is relatively even.
                let mut p =
                    permutation::sort_unstable_by_key(&fsrs_items, |item| item.reviews.len());
                p.apply_slice_in_place(&mut to_update);
                p.apply_slice_in_place(&mut fsrs_items);
                p.apply_slice_in_place(&mut starting_states);
                for ((to_update, fsrs_items), starting_states) in to_update
                    .chunk_into_vecs(FSRS_CHUNK_SIZE)
                    .zip_eq(fsrs_items.chunk_into_vecs(FSRS_CHUNK_SIZE))
                    .zip_eq(starting_states.chunk_into_vecs(FSRS_CHUNK_SIZE))
                {
                    let memory_states = fsrs.memory_state_batch(fsrs_items, starting_states)?;
-            for ((mut card, original), memory_state) in to_update.into_iter().zip(memory_states) {
+                    for ((mut card, original), memory_state) in
                        to_update.into_iter().zip(memory_states)
                    {
                        card.memory_state = Some(memory_state.into());
                        'reschedule_card: {
@ -168,7 +202,8 @@ impl Collection {
                                .or_not_found(card.original_or_current_deck_id())?;
                            let deckconfig_id = deck.config_id().unwrap();
                            // reschedule it
-                    let days_elapsed = timing.next_day_at.elapsed_days_since(*last_review) as i32;
+                            let days_elapsed =
                                timing.next_day_at.elapsed_days_since(*last_review) as i32;
                            let original_interval = card.interval;
                            let interval = fsrs.next_interval(
                                Some(memory_state.stability),
@ -213,6 +248,8 @@ impl Collection {
                        self.update_card_inner(&mut card, original, usn)?;
                    }
                }
            }
        }
        Ok(())
    }