diff --git a/rslib/src/scheduler/answering/mod.rs b/rslib/src/scheduler/answering/mod.rs index 2d3c34a28..7c25f4a6a 100644 --- a/rslib/src/scheduler/answering/mod.rs +++ b/rslib/src/scheduler/answering/mod.rs @@ -32,7 +32,7 @@ use crate::deckconfig::DeckConfig; use crate::deckconfig::LeechAction; use crate::decks::Deck; use crate::prelude::*; -use crate::scheduler::fsrs::memory_state::single_card_revlog_to_item; +use crate::scheduler::fsrs::memory_state::fsrs_item_for_memory_state; use crate::scheduler::states::PreviewState; use crate::search::SearchNode; @@ -437,7 +437,7 @@ impl Collection { // and will need its initial memory state to be calculated based on review // history. let revlog = self.revlog_for_srs(SearchNode::CardIds(card.id.to_string()))?; - let item = single_card_revlog_to_item( + let item = fsrs_item_for_memory_state( &fsrs, revlog, timing.next_day_at, diff --git a/rslib/src/scheduler/fsrs/memory_state.rs b/rslib/src/scheduler/fsrs/memory_state.rs index 4f1ea90c8..4cb5bb8fe 100644 --- a/rslib/src/scheduler/fsrs/memory_state.rs +++ b/rslib/src/scheduler/fsrs/memory_state.rs @@ -14,7 +14,7 @@ use crate::card::CardType; use crate::prelude::*; use crate::revlog::RevlogEntry; use crate::revlog::RevlogReviewKind; -use crate::scheduler::fsrs::params::single_card_revlog_to_items; +use crate::scheduler::fsrs::params::reviews_for_fsrs; use crate::scheduler::fsrs::params::Params; use crate::scheduler::states::fuzz::with_review_fuzz; use crate::search::Negated; @@ -71,7 +71,7 @@ impl Collection { }; let fsrs = FSRS::new(req.as_ref().map(|w| &w.params[..]).or(Some([].as_slice())))?; let historical_retention = req.as_ref().map(|w| w.historical_retention); - let items = fsrs_items_for_memory_state( + let items = fsrs_items_for_memory_states( &fsrs, revlog, timing.next_day_at, @@ -156,7 +156,7 @@ impl Collection { let historical_retention = config.inner.historical_retention; let fsrs = FSRS::new(Some(config.fsrs_params()))?; let revlog = self.revlog_for_srs(SearchNode::CardIds(card.id.to_string()))?; - let item = single_card_revlog_to_item( + let item = fsrs_item_for_memory_state( &fsrs, revlog, self.timing_today()?.next_day_at, @@ -175,7 +175,7 @@ impl Card { pub(crate) fn set_memory_state( &mut self, fsrs: &FSRS, - item: Option, + item: Option, historical_retention: f32, ) -> Result<()> { let memory_state = if let Some(i) = item { @@ -196,22 +196,21 @@ impl Card { } #[derive(Debug)] -pub(crate) struct FsrsItemWithStartingState { +pub(crate) struct FsrsItemForMemoryState { pub item: FSRSItem, /// When revlogs have been truncated, this stores the initial state at first /// review pub starting_state: Option, } -/// When updating memory state, FSRS only requires the last FSRSItem that -/// contains the full history. -pub(crate) fn fsrs_items_for_memory_state( +/// Like [fsrs_item_for_memory_state], but for updating multiple cards at once. +pub(crate) fn fsrs_items_for_memory_states( fsrs: &FSRS, revlogs: Vec, next_day_at: TimestampSecs, historical_retention: f32, ignore_revlogs_before: TimestampMillis, -) -> Result)>> { +) -> Result)>> { revlogs .into_iter() .chunk_by(|r| r.cid) @@ -219,7 +218,7 @@ pub(crate) fn fsrs_items_for_memory_state( .map(|(card_id, group)| { Ok(( card_id, - single_card_revlog_to_item( + fsrs_item_for_memory_state( fsrs, group.collect(), next_day_at, @@ -273,41 +272,35 @@ fn get_last_revlog_info(revlogs: &[RevlogEntry]) -> HashMap, next_day_at: TimestampSecs, historical_retention: f32, ignore_revlogs_before: TimestampMillis, -) -> Result> { +) -> Result> { struct FirstReview { interval: f32, ease_factor: f32, } - let first_review = entries - .iter() - // ignore manual and rescheduled revlogs and revlogs before the cutoff - .find(|e| e.button_chosen > 0 && e.id.0 >= ignore_revlogs_before.0) - .map(|e| FirstReview { - interval: e.interval.max(1) as f32, - ease_factor: if e.ease_factor == 0 { - 2500 - } else { - e.ease_factor - } as f32 - / 1000.0, - }); - if let Some((mut items, revlogs_complete, _)) = - single_card_revlog_to_items(entries, next_day_at, false, ignore_revlogs_before) - { - let mut item = items.pop().unwrap(); - if revlogs_complete { - Ok(Some(FsrsItemWithStartingState { + if let Some(mut output) = reviews_for_fsrs(entries, next_day_at, false, ignore_revlogs_before) { + let mut item = output.fsrs_items.pop().unwrap(); + if output.revlogs_complete { + Ok(Some(FsrsItemForMemoryState { item, starting_state: None, })) - } else if let Some(first_review) = first_review { + } else if let Some(first_user_grade) = output.filtered_revlogs.first() { // the revlog has been truncated, but not fully + let first_review = FirstReview { + interval: first_user_grade.interval.max(1) as f32, + ease_factor: if first_user_grade.ease_factor == 0 { + 2500 + } else { + first_user_grade.ease_factor + } as f32 + / 1000.0, + }; let mut starting_state = fsrs.memory_state_from_sm2( first_review.ease_factor, first_review.interval, @@ -317,15 +310,12 @@ pub(crate) fn single_card_revlog_to_item( if first_review.ease_factor <= 1.1 { starting_state.difficulty = (first_review.ease_factor - 0.1) * 9.0 + 1.0; } + // remove the first review because it has been converted to the starting state item.reviews.remove(0); - if item.reviews.is_empty() { - Ok(None) - } else { - Ok(Some(FsrsItemWithStartingState { - item, - starting_state: Some(starting_state), - })) - } + Ok(Some(FsrsItemForMemoryState { + item, + starting_state: Some(starting_state), + })) } else { // only manual and rescheduled revlogs; treat like empty Ok(None) @@ -360,15 +350,15 @@ mod tests { // cards without any learning steps due to truncated history still have memory // state calculated let fsrs = FSRS::new(Some(&[])).unwrap(); - let item = single_card_revlog_to_item( + let item = fsrs_item_for_memory_state( &fsrs, vec![ RevlogEntry { ease_factor: 2500, interval: 100, - ..revlog(RevlogReviewKind::Review, 100) + ..revlog(RevlogReviewKind::Review, 99) }, - revlog(RevlogReviewKind::Review, 1), + revlog(RevlogReviewKind::Review, 0), ], TimestampSecs::now(), 0.9, @@ -394,9 +384,9 @@ mod tests { difficulty: 5.7909784, }), ); - // but if there's only a single revlog entry, we'll fall back on current card - // state - let item = single_card_revlog_to_item( + // cards with a single review-type entry also get memory states from revlog + // rather than card states + let item = fsrs_item_for_memory_state( &fsrs, vec![RevlogEntry { ease_factor: 2500, @@ -406,17 +396,15 @@ mod tests { TimestampSecs::now(), 0.9, 0.into(), - )?; - assert!(item.is_none()); - card.interval = 123; - card.ease_factor = 2000; - card.ctype = CardType::Review; - card.set_memory_state(&fsrs, item, 0.9)?; + )? + .unwrap(); + assert!(item.item.reviews.is_empty()); + card.set_memory_state(&fsrs, Some(item), 0.9)?; assert_int_eq( card.memory_state, Some(FsrsMemoryState { - stability: 122.99994, - difficulty: 7.334526, + stability: 99.999954, + difficulty: 5.840841, }), ); Ok(()) diff --git a/rslib/src/scheduler/fsrs/params.rs b/rslib/src/scheduler/fsrs/params.rs index e01b260d6..1b282b0fb 100644 --- a/rslib/src/scheduler/fsrs/params.rs +++ b/rslib/src/scheduler/fsrs/params.rs @@ -229,43 +229,56 @@ fn fsrs_items_for_training( .chunk_by(|r| r.cid) .into_iter() .filter_map(|(_cid, entries)| { - single_card_revlog_to_items(entries.collect(), next_day_at, true, review_revlogs_before) + reviews_for_fsrs(entries.collect(), next_day_at, true, review_revlogs_before) }) .flat_map(|i| { - review_count += i.2; + review_count += i.filtered_revlogs.len(); - i.0 + i.fsrs_items }) .collect_vec(); revlogs.sort_by_cached_key(|r| r.reviews.len()); (revlogs, review_count) } -/// Transform the revlog history for a card into a list of FSRSItems. FSRS -/// expects multiple items for a given card when training - for revlog -/// `[1,2,3]`, we create FSRSItems corresponding to `[1,2]` and `[1,2,3]` -/// in training, and `[1]`, [1,2]` and `[1,2,3]` when calculating memory -/// state. +pub(crate) struct ReviewsForFsrs { + /// The revlog entries that remain after filtering (e.g. excluding + /// review entries prior to a card being reset). + pub filtered_revlogs: Vec, + /// FSRS items derived from the filtered revlogs. + pub fsrs_items: Vec, + /// True if there is enough history to derive memory state from history + /// alone. If false, memory state will be derived from SM2. + pub revlogs_complete: bool, +} + +/// Filter out unwanted revlog entries, then create a series of FSRS items for +/// training/memory state calculation. /// -/// Returns (items, revlog_complete, review_count). -/// revlog_complete is assumed when the revlogs have a learning step, or start -/// with manual scheduling. When revlogs are incomplete, the starting difficulty -/// is later inferred from the SM2 data, instead of using the standard FSRS -/// initial difficulty. review_count is the number of reviews used after -/// filtering out unwanted ones. -pub(crate) fn single_card_revlog_to_items( +/// Filtering consists of removing revlog entries before the supplied timestamp, +/// and removing items such as reviews that happened prior to a card being reset +/// to new. +pub(crate) fn reviews_for_fsrs( mut entries: Vec, next_day_at: TimestampSecs, training: bool, ignore_revlogs_before: TimestampMillis, -) -> Option<(Vec, bool, usize)> { +) -> Option { let mut first_of_last_learn_entries = None; + let mut first_user_grade_idx = None; let mut revlogs_complete = false; + // Working backwards from the latest review... for (index, entry) in entries.iter().enumerate().rev() { - if matches!( - (entry.review_kind, entry.button_chosen), - (RevlogReviewKind::Learning, 1..=4) - ) { + if entry.review_kind == RevlogReviewKind::Filtered && entry.ease_factor == 0 { + continue; + } + let within_cutoff = entry.id.0 > ignore_revlogs_before.0; + let user_graded = matches!(entry.button_chosen, 1..=4); + if user_graded && within_cutoff { + first_user_grade_idx = Some(index); + } + + if user_graded && entry.review_kind == RevlogReviewKind::Learning { first_of_last_learn_entries = Some(index); revlogs_complete = true; } else if first_of_last_learn_entries.is_some() { @@ -274,32 +287,25 @@ pub(crate) fn single_card_revlog_to_items( (entry.review_kind, entry.ease_factor), (RevlogReviewKind::Manual, 0) ) { - // If we find a `Learn` entry after the `Forget` entry, we should - // ignore the entries before the `Forget` entry + // Ignore entries prior to a `Reset` if a learning step has come after, + // but consider revlogs complete. if first_of_last_learn_entries.is_some() { + revlogs_complete = true; + break; + // Ignore entries prior to a `Reset` if the user has graded a card + // after the reset. + } else if first_user_grade_idx.is_some() { revlogs_complete = false; break; - // If we don't find a `Learn` entry after the `Forget` entry, it's - // a new card and we should ignore all entries + // User has not graded the card since it was reset, so all history + // filtered out. } else { return None; } } } - if !revlogs_complete { - revlogs_complete = matches!( - entries.first(), - Some(RevlogEntry { - review_kind: RevlogReviewKind::Manual, - .. - }) | Some(RevlogEntry { - review_kind: RevlogReviewKind::Rescheduled, - .. - }) - ); - } if training { - // While training ignore the entire card if the first learning step of the last + // While training, ignore the entire card if the first learning step of the last // group of learning steps is before the ignore_revlogs_before date if let Some(idx) = first_of_last_learn_entries { if entries[idx].id.0 < ignore_revlogs_before.0 { @@ -307,38 +313,29 @@ pub(crate) fn single_card_revlog_to_items( } } } else { - // While reviewing if the first learning step is before the ignore date, - // ignore every review before and including the last learning step + // While reviewing, if the first learning step is before the ignore date, + // we ignore it, and will fall back on SM2 info and the last user grade below. if let Some(idx) = first_of_last_learn_entries { if entries[idx].id.0 < ignore_revlogs_before.0 && idx < entries.len() - 1 { - let last_learn_entry = entries - .iter() - .enumerate() - .rev() - .find(|(_idx, e)| e.review_kind == RevlogReviewKind::Learning) - .map(|(idx, _)| idx); - - entries.drain(..(last_learn_entry? + 1)); revlogs_complete = false; first_of_last_learn_entries = None; } } } - let first_relearn = entries - .iter() - .enumerate() - .find(|(_idx, e)| { - e.id.0 > ignore_revlogs_before.0 && e.review_kind == RevlogReviewKind::Relearning - }) - .map(|(idx, _)| idx); - if let Some(idx) = first_of_last_learn_entries.or(first_relearn) { - // start from the (re)learning step + if let Some(idx) = first_of_last_learn_entries { + // start from the learning step if idx > 0 { entries.drain(..idx); } } else if training { // when training, we ignore cards that don't have any learning steps return None; + } else if let Some(idx) = first_user_grade_idx { + // if there are no learning entries, but the user has reviewed the card, + // we ignore all entries before the first grade + if idx > 0 { + entries.drain(..idx); + } } // Filter out unwanted entries @@ -384,7 +381,11 @@ pub(crate) fn single_card_revlog_to_items( if items.is_empty() { None } else { - Some((items, revlogs_complete, entries.len())) + Some(ReviewsForFsrs { + fsrs_items: items, + revlogs_complete, + filtered_revlogs: entries, + }) } } @@ -443,8 +444,8 @@ pub(crate) mod tests { training: bool, ignore_before: TimestampMillis, ) -> Option> { - single_card_revlog_to_items(revlog.to_vec(), NEXT_DAY_AT, training, ignore_before) - .map(|i| i.0) + reviews_for_fsrs(revlog.to_vec(), NEXT_DAY_AT, training, ignore_before) + .map(|i| i.fsrs_items) } pub(crate) fn convert(revlog: &[RevlogEntry], training: bool) -> Option> { @@ -598,6 +599,19 @@ pub(crate) mod tests { ); } + #[test] + fn partially_ignored_learning_steps_terminate_training() { + let revlogs = &[ + revlog(RevlogReviewKind::Learning, 10), + revlog(RevlogReviewKind::Learning, 8), + revlog(RevlogReviewKind::Review, 6), + ]; + // | = Ignore before + // L = learning step + // L | L R + assert_eq!(convert_ignore_before(revlogs, true, days_ago_ms(9)), None); + } + #[test] fn ignore_before_date_between_learning_steps_when_reviewing() { let revlogs = &[ @@ -614,7 +628,7 @@ pub(crate) mod tests { convert_ignore_before(revlogs, false, days_ago_ms(9)) .unwrap() .len(), - 1 + 2 ); // | L L R assert_eq!( diff --git a/rslib/src/stats/card.rs b/rslib/src/stats/card.rs index c2bf9e562..1db368445 100644 --- a/rslib/src/stats/card.rs +++ b/rslib/src/stats/card.rs @@ -6,7 +6,7 @@ use fsrs::FSRS; use crate::card::CardType; use crate::prelude::*; use crate::revlog::RevlogEntry; -use crate::scheduler::fsrs::memory_state::single_card_revlog_to_item; +use crate::scheduler::fsrs::memory_state::fsrs_item_for_memory_state; use crate::scheduler::fsrs::params::ignore_revlogs_before_ms_from_config; use crate::scheduler::timing::is_unix_epoch_timestamp; @@ -144,7 +144,7 @@ impl Collection { for entry in revlog { accumulated_revlog.push(entry.clone()); - let item = single_card_revlog_to_item( + let item = fsrs_item_for_memory_state( &fsrs, accumulated_revlog.clone(), next_day_at,