Fix/fallback to non-manual entry when first_of_last_learn_entries non found (#3639)

* Fix/fallback to non-manual entry when first_of_last_learn_entries non found

* refactor single_card_revlog_to_item(s)

* update unit test of bypassed_learning_is_handled

* move comment line

* remove first_relearn_entries

* skip cram entry

* only pick non_manual_entries after ignore date

* fallback to non_manual_entries if the first learning step is before the ignore date

* pass ci

* update ignore_before_date_between_learning_steps_when_reviewing

* shorten the comment

* Minor refactoring

- fsrs_items_for_memory_state - fsrs_items_for_memory_states
- single_card_revlog_to_item -> fsrs_item_for_memory_state
(to match fsrs_items_for_training)
- single_card_revlog_to_items -> reviews_for_fsrs
- Use struct instead of tuple for reviews_for_fsrs output
- Don't return count, since we're already returning the filtered list

* More renaming/comment tweaks

- non_manual_entries -> first_user_grade_idx
- change comments to reflect the fact that we're working backwards
- Use "user-graded" rather than "non-manual"

* Add extra unit test

* Some wording tweaks
This commit is contained in:
Jarrett Ye 2024-12-17 20:34:19 +08:00 committed by GitHub
parent 4d20945319
commit 474dbc2812
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 120 additions and 118 deletions

View file

@ -32,7 +32,7 @@ use crate::deckconfig::DeckConfig;
use crate::deckconfig::LeechAction;
use crate::decks::Deck;
use crate::prelude::*;
use crate::scheduler::fsrs::memory_state::single_card_revlog_to_item;
use crate::scheduler::fsrs::memory_state::fsrs_item_for_memory_state;
use crate::scheduler::states::PreviewState;
use crate::search::SearchNode;
@ -437,7 +437,7 @@ impl Collection {
// and will need its initial memory state to be calculated based on review
// history.
let revlog = self.revlog_for_srs(SearchNode::CardIds(card.id.to_string()))?;
let item = single_card_revlog_to_item(
let item = fsrs_item_for_memory_state(
&fsrs,
revlog,
timing.next_day_at,

View file

@ -14,7 +14,7 @@ use crate::card::CardType;
use crate::prelude::*;
use crate::revlog::RevlogEntry;
use crate::revlog::RevlogReviewKind;
use crate::scheduler::fsrs::params::single_card_revlog_to_items;
use crate::scheduler::fsrs::params::reviews_for_fsrs;
use crate::scheduler::fsrs::params::Params;
use crate::scheduler::states::fuzz::with_review_fuzz;
use crate::search::Negated;
@ -71,7 +71,7 @@ impl Collection {
};
let fsrs = FSRS::new(req.as_ref().map(|w| &w.params[..]).or(Some([].as_slice())))?;
let historical_retention = req.as_ref().map(|w| w.historical_retention);
let items = fsrs_items_for_memory_state(
let items = fsrs_items_for_memory_states(
&fsrs,
revlog,
timing.next_day_at,
@ -156,7 +156,7 @@ impl Collection {
let historical_retention = config.inner.historical_retention;
let fsrs = FSRS::new(Some(config.fsrs_params()))?;
let revlog = self.revlog_for_srs(SearchNode::CardIds(card.id.to_string()))?;
let item = single_card_revlog_to_item(
let item = fsrs_item_for_memory_state(
&fsrs,
revlog,
self.timing_today()?.next_day_at,
@ -175,7 +175,7 @@ impl Card {
pub(crate) fn set_memory_state(
&mut self,
fsrs: &FSRS,
item: Option<FsrsItemWithStartingState>,
item: Option<FsrsItemForMemoryState>,
historical_retention: f32,
) -> Result<()> {
let memory_state = if let Some(i) = item {
@ -196,22 +196,21 @@ impl Card {
}
#[derive(Debug)]
pub(crate) struct FsrsItemWithStartingState {
pub(crate) struct FsrsItemForMemoryState {
pub item: FSRSItem,
/// When revlogs have been truncated, this stores the initial state at first
/// review
pub starting_state: Option<MemoryState>,
}
/// When updating memory state, FSRS only requires the last FSRSItem that
/// contains the full history.
pub(crate) fn fsrs_items_for_memory_state(
/// Like [fsrs_item_for_memory_state], but for updating multiple cards at once.
pub(crate) fn fsrs_items_for_memory_states(
fsrs: &FSRS,
revlogs: Vec<RevlogEntry>,
next_day_at: TimestampSecs,
historical_retention: f32,
ignore_revlogs_before: TimestampMillis,
) -> Result<Vec<(CardId, Option<FsrsItemWithStartingState>)>> {
) -> Result<Vec<(CardId, Option<FsrsItemForMemoryState>)>> {
revlogs
.into_iter()
.chunk_by(|r| r.cid)
@ -219,7 +218,7 @@ pub(crate) fn fsrs_items_for_memory_state(
.map(|(card_id, group)| {
Ok((
card_id,
single_card_revlog_to_item(
fsrs_item_for_memory_state(
fsrs,
group.collect(),
next_day_at,
@ -273,41 +272,35 @@ fn get_last_revlog_info(revlogs: &[RevlogEntry]) -> HashMap<CardId, LastRevlogIn
/// a truncated revlog), we return the starting state inferred from the first
/// revlog entry, so that the first review is not treated as if started from
/// scratch.
pub(crate) fn single_card_revlog_to_item(
pub(crate) fn fsrs_item_for_memory_state(
fsrs: &FSRS,
entries: Vec<RevlogEntry>,
next_day_at: TimestampSecs,
historical_retention: f32,
ignore_revlogs_before: TimestampMillis,
) -> Result<Option<FsrsItemWithStartingState>> {
) -> Result<Option<FsrsItemForMemoryState>> {
struct FirstReview {
interval: f32,
ease_factor: f32,
}
let first_review = entries
.iter()
// ignore manual and rescheduled revlogs and revlogs before the cutoff
.find(|e| e.button_chosen > 0 && e.id.0 >= ignore_revlogs_before.0)
.map(|e| FirstReview {
interval: e.interval.max(1) as f32,
ease_factor: if e.ease_factor == 0 {
2500
} else {
e.ease_factor
} as f32
/ 1000.0,
});
if let Some((mut items, revlogs_complete, _)) =
single_card_revlog_to_items(entries, next_day_at, false, ignore_revlogs_before)
{
let mut item = items.pop().unwrap();
if revlogs_complete {
Ok(Some(FsrsItemWithStartingState {
if let Some(mut output) = reviews_for_fsrs(entries, next_day_at, false, ignore_revlogs_before) {
let mut item = output.fsrs_items.pop().unwrap();
if output.revlogs_complete {
Ok(Some(FsrsItemForMemoryState {
item,
starting_state: None,
}))
} else if let Some(first_review) = first_review {
} else if let Some(first_user_grade) = output.filtered_revlogs.first() {
// the revlog has been truncated, but not fully
let first_review = FirstReview {
interval: first_user_grade.interval.max(1) as f32,
ease_factor: if first_user_grade.ease_factor == 0 {
2500
} else {
first_user_grade.ease_factor
} as f32
/ 1000.0,
};
let mut starting_state = fsrs.memory_state_from_sm2(
first_review.ease_factor,
first_review.interval,
@ -317,15 +310,12 @@ pub(crate) fn single_card_revlog_to_item(
if first_review.ease_factor <= 1.1 {
starting_state.difficulty = (first_review.ease_factor - 0.1) * 9.0 + 1.0;
}
// remove the first review because it has been converted to the starting state
item.reviews.remove(0);
if item.reviews.is_empty() {
Ok(None)
} else {
Ok(Some(FsrsItemWithStartingState {
item,
starting_state: Some(starting_state),
}))
}
Ok(Some(FsrsItemForMemoryState {
item,
starting_state: Some(starting_state),
}))
} else {
// only manual and rescheduled revlogs; treat like empty
Ok(None)
@ -360,15 +350,15 @@ mod tests {
// cards without any learning steps due to truncated history still have memory
// state calculated
let fsrs = FSRS::new(Some(&[])).unwrap();
let item = single_card_revlog_to_item(
let item = fsrs_item_for_memory_state(
&fsrs,
vec![
RevlogEntry {
ease_factor: 2500,
interval: 100,
..revlog(RevlogReviewKind::Review, 100)
..revlog(RevlogReviewKind::Review, 99)
},
revlog(RevlogReviewKind::Review, 1),
revlog(RevlogReviewKind::Review, 0),
],
TimestampSecs::now(),
0.9,
@ -394,9 +384,9 @@ mod tests {
difficulty: 5.7909784,
}),
);
// but if there's only a single revlog entry, we'll fall back on current card
// state
let item = single_card_revlog_to_item(
// cards with a single review-type entry also get memory states from revlog
// rather than card states
let item = fsrs_item_for_memory_state(
&fsrs,
vec![RevlogEntry {
ease_factor: 2500,
@ -406,17 +396,15 @@ mod tests {
TimestampSecs::now(),
0.9,
0.into(),
)?;
assert!(item.is_none());
card.interval = 123;
card.ease_factor = 2000;
card.ctype = CardType::Review;
card.set_memory_state(&fsrs, item, 0.9)?;
)?
.unwrap();
assert!(item.item.reviews.is_empty());
card.set_memory_state(&fsrs, Some(item), 0.9)?;
assert_int_eq(
card.memory_state,
Some(FsrsMemoryState {
stability: 122.99994,
difficulty: 7.334526,
stability: 99.999954,
difficulty: 5.840841,
}),
);
Ok(())

View file

@ -229,43 +229,56 @@ fn fsrs_items_for_training(
.chunk_by(|r| r.cid)
.into_iter()
.filter_map(|(_cid, entries)| {
single_card_revlog_to_items(entries.collect(), next_day_at, true, review_revlogs_before)
reviews_for_fsrs(entries.collect(), next_day_at, true, review_revlogs_before)
})
.flat_map(|i| {
review_count += i.2;
review_count += i.filtered_revlogs.len();
i.0
i.fsrs_items
})
.collect_vec();
revlogs.sort_by_cached_key(|r| r.reviews.len());
(revlogs, review_count)
}
/// Transform the revlog history for a card into a list of FSRSItems. FSRS
/// expects multiple items for a given card when training - for revlog
/// `[1,2,3]`, we create FSRSItems corresponding to `[1,2]` and `[1,2,3]`
/// in training, and `[1]`, [1,2]` and `[1,2,3]` when calculating memory
/// state.
pub(crate) struct ReviewsForFsrs {
/// The revlog entries that remain after filtering (e.g. excluding
/// review entries prior to a card being reset).
pub filtered_revlogs: Vec<RevlogEntry>,
/// FSRS items derived from the filtered revlogs.
pub fsrs_items: Vec<FSRSItem>,
/// True if there is enough history to derive memory state from history
/// alone. If false, memory state will be derived from SM2.
pub revlogs_complete: bool,
}
/// Filter out unwanted revlog entries, then create a series of FSRS items for
/// training/memory state calculation.
///
/// Returns (items, revlog_complete, review_count).
/// revlog_complete is assumed when the revlogs have a learning step, or start
/// with manual scheduling. When revlogs are incomplete, the starting difficulty
/// is later inferred from the SM2 data, instead of using the standard FSRS
/// initial difficulty. review_count is the number of reviews used after
/// filtering out unwanted ones.
pub(crate) fn single_card_revlog_to_items(
/// Filtering consists of removing revlog entries before the supplied timestamp,
/// and removing items such as reviews that happened prior to a card being reset
/// to new.
pub(crate) fn reviews_for_fsrs(
mut entries: Vec<RevlogEntry>,
next_day_at: TimestampSecs,
training: bool,
ignore_revlogs_before: TimestampMillis,
) -> Option<(Vec<FSRSItem>, bool, usize)> {
) -> Option<ReviewsForFsrs> {
let mut first_of_last_learn_entries = None;
let mut first_user_grade_idx = None;
let mut revlogs_complete = false;
// Working backwards from the latest review...
for (index, entry) in entries.iter().enumerate().rev() {
if matches!(
(entry.review_kind, entry.button_chosen),
(RevlogReviewKind::Learning, 1..=4)
) {
if entry.review_kind == RevlogReviewKind::Filtered && entry.ease_factor == 0 {
continue;
}
let within_cutoff = entry.id.0 > ignore_revlogs_before.0;
let user_graded = matches!(entry.button_chosen, 1..=4);
if user_graded && within_cutoff {
first_user_grade_idx = Some(index);
}
if user_graded && entry.review_kind == RevlogReviewKind::Learning {
first_of_last_learn_entries = Some(index);
revlogs_complete = true;
} else if first_of_last_learn_entries.is_some() {
@ -274,32 +287,25 @@ pub(crate) fn single_card_revlog_to_items(
(entry.review_kind, entry.ease_factor),
(RevlogReviewKind::Manual, 0)
) {
// If we find a `Learn` entry after the `Forget` entry, we should
// ignore the entries before the `Forget` entry
// Ignore entries prior to a `Reset` if a learning step has come after,
// but consider revlogs complete.
if first_of_last_learn_entries.is_some() {
revlogs_complete = true;
break;
// Ignore entries prior to a `Reset` if the user has graded a card
// after the reset.
} else if first_user_grade_idx.is_some() {
revlogs_complete = false;
break;
// If we don't find a `Learn` entry after the `Forget` entry, it's
// a new card and we should ignore all entries
// User has not graded the card since it was reset, so all history
// filtered out.
} else {
return None;
}
}
}
if !revlogs_complete {
revlogs_complete = matches!(
entries.first(),
Some(RevlogEntry {
review_kind: RevlogReviewKind::Manual,
..
}) | Some(RevlogEntry {
review_kind: RevlogReviewKind::Rescheduled,
..
})
);
}
if training {
// While training ignore the entire card if the first learning step of the last
// While training, ignore the entire card if the first learning step of the last
// group of learning steps is before the ignore_revlogs_before date
if let Some(idx) = first_of_last_learn_entries {
if entries[idx].id.0 < ignore_revlogs_before.0 {
@ -307,38 +313,29 @@ pub(crate) fn single_card_revlog_to_items(
}
}
} else {
// While reviewing if the first learning step is before the ignore date,
// ignore every review before and including the last learning step
// While reviewing, if the first learning step is before the ignore date,
// we ignore it, and will fall back on SM2 info and the last user grade below.
if let Some(idx) = first_of_last_learn_entries {
if entries[idx].id.0 < ignore_revlogs_before.0 && idx < entries.len() - 1 {
let last_learn_entry = entries
.iter()
.enumerate()
.rev()
.find(|(_idx, e)| e.review_kind == RevlogReviewKind::Learning)
.map(|(idx, _)| idx);
entries.drain(..(last_learn_entry? + 1));
revlogs_complete = false;
first_of_last_learn_entries = None;
}
}
}
let first_relearn = entries
.iter()
.enumerate()
.find(|(_idx, e)| {
e.id.0 > ignore_revlogs_before.0 && e.review_kind == RevlogReviewKind::Relearning
})
.map(|(idx, _)| idx);
if let Some(idx) = first_of_last_learn_entries.or(first_relearn) {
// start from the (re)learning step
if let Some(idx) = first_of_last_learn_entries {
// start from the learning step
if idx > 0 {
entries.drain(..idx);
}
} else if training {
// when training, we ignore cards that don't have any learning steps
return None;
} else if let Some(idx) = first_user_grade_idx {
// if there are no learning entries, but the user has reviewed the card,
// we ignore all entries before the first grade
if idx > 0 {
entries.drain(..idx);
}
}
// Filter out unwanted entries
@ -384,7 +381,11 @@ pub(crate) fn single_card_revlog_to_items(
if items.is_empty() {
None
} else {
Some((items, revlogs_complete, entries.len()))
Some(ReviewsForFsrs {
fsrs_items: items,
revlogs_complete,
filtered_revlogs: entries,
})
}
}
@ -443,8 +444,8 @@ pub(crate) mod tests {
training: bool,
ignore_before: TimestampMillis,
) -> Option<Vec<FSRSItem>> {
single_card_revlog_to_items(revlog.to_vec(), NEXT_DAY_AT, training, ignore_before)
.map(|i| i.0)
reviews_for_fsrs(revlog.to_vec(), NEXT_DAY_AT, training, ignore_before)
.map(|i| i.fsrs_items)
}
pub(crate) fn convert(revlog: &[RevlogEntry], training: bool) -> Option<Vec<FSRSItem>> {
@ -598,6 +599,19 @@ pub(crate) mod tests {
);
}
#[test]
fn partially_ignored_learning_steps_terminate_training() {
let revlogs = &[
revlog(RevlogReviewKind::Learning, 10),
revlog(RevlogReviewKind::Learning, 8),
revlog(RevlogReviewKind::Review, 6),
];
// | = Ignore before
// L = learning step
// L | L R
assert_eq!(convert_ignore_before(revlogs, true, days_ago_ms(9)), None);
}
#[test]
fn ignore_before_date_between_learning_steps_when_reviewing() {
let revlogs = &[
@ -614,7 +628,7 @@ pub(crate) mod tests {
convert_ignore_before(revlogs, false, days_ago_ms(9))
.unwrap()
.len(),
1
2
);
// | L L R
assert_eq!(

View file

@ -6,7 +6,7 @@ use fsrs::FSRS;
use crate::card::CardType;
use crate::prelude::*;
use crate::revlog::RevlogEntry;
use crate::scheduler::fsrs::memory_state::single_card_revlog_to_item;
use crate::scheduler::fsrs::memory_state::fsrs_item_for_memory_state;
use crate::scheduler::fsrs::params::ignore_revlogs_before_ms_from_config;
use crate::scheduler::timing::is_unix_epoch_timestamp;
@ -144,7 +144,7 @@ impl Collection {
for entry in revlog {
accumulated_revlog.push(entry.clone());
let item = single_card_revlog_to_item(
let item = fsrs_item_for_memory_state(
&fsrs,
accumulated_revlog.clone(),
next_day_at,