Rework v3 fuzzing (#1474)

* Remove flooring in v3 scheduler code It is no longer supposed to be an exact port of the old Python code. * Rework v3 fuzzing https://github.com/ankitects/anki/issues/1416#issuecomment-958208149 * Ensure length of fuzz range is larger than 1 Only for new intervals larger than 1 and respecting max review interval. * add the beginnings of a unit test * Clarify `fuzz_factor` doc string * Fix Python tests for 2021 scheduler * Fix fuzz test 1.0 is not a valid fuzz factor. * Add tests for fuzzing in Rust * Use range notation in fuzz factor doc * Strip redundant tests
2025-12-21 10:52:57 -05:00 · 2021-11-06 01:39:24 +01:00 · 2021-11-06 01:39:24 +01:00 · 283776d8e7
commit 283776d8e7
parent 5a9a03e65a
4 changed files with 164 additions and 55 deletions
--- a/pylib/tests/test_schedv2.py
+++ b/pylib/tests/test_schedv2.py
@ -38,6 +38,8 @@ def test_clock():


 def checkRevIvl(col, c, targetIvl):
+    if is_2021():
+        return
    min, max = col.sched._fuzzIvlRange(targetIvl)
    assert min <= c.ivl <= max

@ -715,6 +717,7 @@ def test_suspend():


 def test_filt_reviewing_early_normal():
+    to_int = round if is_2021() else int
    col = getEmptyCol()
    note = col.newNote()
    note["Front"] = "one"
@ -743,9 +746,9 @@ def test_filt_reviewing_early_normal():
    c = col.sched.getCard()
    assert col.sched.answerButtons(c) == 4
    assert col.sched.nextIvl(c, 1) == 600
-    assert col.sched.nextIvl(c, 2) == int(75 * 1.2) * 86400
-    assert col.sched.nextIvl(c, 3) == int(75 * 2.5) * 86400
-    assert col.sched.nextIvl(c, 4) == int(75 * 2.5 * 1.15) * 86400
+    assert col.sched.nextIvl(c, 2) == to_int(75 * 1.2) * 86400
+    assert col.sched.nextIvl(c, 3) == to_int(75 * 2.5) * 86400
+    assert col.sched.nextIvl(c, 4) == to_int(75 * 2.5 * 1.15) * 86400

    # answer 'good'
    col.sched.answerCard(c, 3)
@ -765,9 +768,9 @@ def test_filt_reviewing_early_normal():
    col.reset()
    c = col.sched.getCard()

-    assert col.sched.nextIvl(c, 2) == 60 * 86400
+    assert col.sched.nextIvl(c, 2) == 100 * 1.2 / 2 * 86400
    assert col.sched.nextIvl(c, 3) == 100 * 86400
-    assert col.sched.nextIvl(c, 4) == 114 * 86400
+    assert col.sched.nextIvl(c, 4) == to_int(100 * (1.3 - (1.3 - 1) / 2)) * 86400


 def test_filt_keep_lrn_state():
--- a/rslib/src/scheduler/answering/mod.rs
+++ b/rslib/src/scheduler/answering/mod.rs
@ -8,6 +8,8 @@ mod relearning;
 mod review;
 mod revlog;

+use rand::{prelude::*, rngs::StdRng};
+
 use revlog::RevlogEntryPartial;

 use super::{
@ -59,7 +61,7 @@ impl CardStateUpdater {
    /// state handling code from the rest of the Anki codebase.
    pub(crate) fn state_context(&self) -> StateContext<'_> {
        StateContext {
-            fuzz_seed: self.fuzz_seed,
+            fuzz_factor: get_fuzz_factor(self.fuzz_seed),
            steps: self.learn_steps(),
            graduating_interval_good: self.config.inner.graduating_interval_good,
            graduating_interval_easy: self.config.inner.graduating_interval_easy,
@ -428,6 +430,12 @@ fn get_fuzz_seed(card: &Card) -> Option<u64> {
    }
 }

+/// Return a fuzz factor from the range `0.0..1.0`, using the provided seed.
+/// None if seed is None.
+fn get_fuzz_factor(seed: Option<u64>) -> Option<f32> {
+    seed.map(|s| StdRng::seed_from_u64(s).gen_range(0.0..1.0))
+}
+
 #[cfg(test)]
 mod test {
    use super::*;
--- a/rslib/src/scheduler/states/mod.rs
+++ b/rslib/src/scheduler/states/mod.rs
@ -18,7 +18,6 @@ pub use learning::LearnState;
 pub use new::NewState;
 pub use normal::NormalState;
 pub use preview_filter::PreviewState;
-use rand::{prelude::*, rngs::StdRng};
 pub use relearning::RelearnState;
 pub use rescheduling_filter::ReschedulingFilterState;
 pub use review::ReviewState;
@ -69,7 +68,8 @@ impl CardState {

 /// Info required during state transitions.
 pub(crate) struct StateContext<'a> {
-    pub fuzz_seed: Option<u64>,
+    /// In range `0.0..1.0`. Used to pick the final interval from the fuzz range.
+    pub fuzz_factor: Option<f32>,

    // learning
    pub steps: LearningSteps<'a>,
@ -95,45 +95,90 @@ pub(crate) struct StateContext<'a> {
 }

 impl<'a> StateContext<'a> {
-    pub(crate) fn with_review_fuzz(&self, interval: f32) -> u32 {
-        // fixme: floor() is to match python
-        let interval = interval.floor();
-        if let Some(seed) = self.fuzz_seed {
-            let mut rng = StdRng::seed_from_u64(seed);
-            let (lower, upper) = if interval < 2.0 {
-                (1.0, 1.0)
-            } else if interval < 3.0 {
-                (2.0, 3.0)
-            } else if interval < 7.0 {
-                fuzz_range(interval, 0.25, 0.0)
-            } else if interval < 30.0 {
-                fuzz_range(interval, 0.15, 2.0)
-            } else {
-                fuzz_range(interval, 0.05, 4.0)
-            };
-            if lower >= upper {
-                lower
-            } else {
-                rng.gen_range(lower..upper)
-            }
+    /// Return the minimum and maximum review intervals.
+    /// - `maximum` is `self.maximum_review_interval`, but at least 1.
+    /// - `minimum` is as passed, but at least 1, and at most `maximum`.
+    pub(crate) fn min_and_max_review_intervals(&self, minimum: u32) -> (u32, u32) {
+        let maximum = self.maximum_review_interval.max(1);
+        let minimum = minimum.max(1).min(maximum);
+        (minimum, maximum)
+    }
+
+    /// Apply fuzz, respecting the passed bounds.
+    /// Caller must ensure reasonable bounds.
+    pub(crate) fn with_review_fuzz(&self, interval: f32, minimum: u32, maximum: u32) -> u32 {
+        if let Some(fuzz_factor) = self.fuzz_factor {
+            let (lower, upper) = constrained_fuzz_bounds(interval, minimum, maximum);
+            (lower as f32 + fuzz_factor * ((1 + upper - lower) as f32)).floor() as u32
        } else {
-            interval
+            (interval.round() as u32).max(minimum).min(maximum)
        }
-        .round() as u32
    }

    pub(crate) fn fuzzed_graduating_interval_good(&self) -> u32 {
-        self.with_review_fuzz(self.graduating_interval_good as f32)
+        let (minimum, maximum) = self.min_and_max_review_intervals(1);
+        self.with_review_fuzz(self.graduating_interval_good as f32, minimum, maximum)
    }

    pub(crate) fn fuzzed_graduating_interval_easy(&self) -> u32 {
-        self.with_review_fuzz(self.graduating_interval_easy as f32)
+        let (minimum, maximum) = self.min_and_max_review_intervals(1);
+        self.with_review_fuzz(self.graduating_interval_easy as f32, minimum, maximum)
+    }
+
+    #[cfg(test)]
+    pub(crate) fn defaults_for_testing() -> Self {
+        Self {
+            fuzz_factor: None,
+            steps: LearningSteps::new(&[60.0, 600.0]),
+            graduating_interval_good: 1,
+            graduating_interval_easy: 4,
+            initial_ease_factor: 2.5,
+            hard_multiplier: 1.2,
+            easy_multiplier: 1.3,
+            interval_multiplier: 1.0,
+            maximum_review_interval: 36500,
+            leech_threshold: 8,
+            relearn_steps: LearningSteps::new(&[600.0]),
+            lapse_multiplier: 0.0,
+            minimum_lapse_interval: 1,
+            in_filtered_deck: false,
+            preview_step: 10,
+        }
    }
 }

-fn fuzz_range(interval: f32, factor: f32, minimum: f32) -> (f32, f32) {
+/// Return the bounds of the fuzz range, respecting `minimum` and `maximum`.
+/// Ensure the upper bound is larger than the lower bound, if `maximum` allows
+/// it and it is larger than 1.
+fn constrained_fuzz_bounds(interval: f32, minimum: u32, maximum: u32) -> (u32, u32) {
+    let (lower, mut upper) = fuzz_bounds(interval);
+    let lower = lower.max(minimum);
+    if upper == lower && upper != 1 {
+        upper = lower + 1;
+    };
+    (lower, upper.min(maximum))
+}
+
+fn fuzz_bounds(interval: f32) -> (u32, u32) {
+    if interval < 2.0 {
+        (1, 1)
+    } else if interval < 3.0 {
+        (2, 3)
+    } else if interval < 7.0 {
+        fuzz_range(interval, 0.25, 0.0)
+    } else if interval < 30.0 {
+        fuzz_range(interval, 0.15, 2.0)
+    } else {
+        fuzz_range(interval, 0.05, 4.0)
+    }
+}
+
+fn fuzz_range(interval: f32, factor: f32, minimum: f32) -> (u32, u32) {
    let delta = (interval * factor).max(minimum).max(1.0);
-    (interval - delta, interval + delta + 1.0)
+    (
+        (interval - delta).round() as u32,
+        (interval + delta).round() as u32,
+    )
 }

 #[derive(Debug, Clone)]
@ -186,3 +231,66 @@ impl From<ReschedulingFilterState> for CardState {
        CardState::Filtered(FilteredState::Rescheduling(state))
    }
 }
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn min_and_max_review_intervals() {
+        let mut ctx = StateContext::defaults_for_testing();
+        ctx.maximum_review_interval = 0;
+        assert_eq!(ctx.min_and_max_review_intervals(0), (1, 1));
+        assert_eq!(ctx.min_and_max_review_intervals(2), (1, 1));
+        ctx.maximum_review_interval = 3;
+        assert_eq!(ctx.min_and_max_review_intervals(0), (1, 3));
+        assert_eq!(ctx.min_and_max_review_intervals(2), (2, 3));
+        assert_eq!(ctx.min_and_max_review_intervals(4), (3, 3));
+    }
+
+    fn assert_lower_middle_upper(
+        ctx: &mut StateContext,
+        interval: f32,
+        minimum: u32,
+        maximum: u32,
+        lower: u32,
+        middle: u32,
+        upper: u32,
+    ) {
+        ctx.fuzz_factor = Some(0.0);
+        assert_eq!(ctx.with_review_fuzz(interval, minimum, maximum), lower);
+        ctx.fuzz_factor = Some(0.5);
+        assert_eq!(ctx.with_review_fuzz(interval, minimum, maximum), middle);
+        ctx.fuzz_factor = Some(0.99);
+        assert_eq!(ctx.with_review_fuzz(interval, minimum, maximum), upper);
+    }
+
+    #[test]
+    fn with_review_fuzz() {
+        let mut ctx = StateContext::defaults_for_testing();
+
+        // no fuzz
+        assert_eq!(ctx.with_review_fuzz(1.5, 1, 100), 2);
+        assert_eq!(ctx.with_review_fuzz(0.1, 1, 100), 1);
+        assert_eq!(ctx.with_review_fuzz(101.0, 1, 100), 100);
+
+        // no fuzzing for an interval of 1
+        assert_lower_middle_upper(&mut ctx, 1.0, 1, 1000, 1, 1, 1);
+        // fuzz range is (2, 3) for an interval of 2
+        assert_lower_middle_upper(&mut ctx, 2.0, 1, 1000, 2, 3, 3);
+        // 25%, 15%, 5% percent fuzz, but at least 1, 2, 4
+        assert_lower_middle_upper(&mut ctx, 5.0, 1, 1000, 4, 5, 6);
+        assert_lower_middle_upper(&mut ctx, 20.0, 1, 1000, 17, 20, 23);
+        assert_lower_middle_upper(&mut ctx, 100.0, 1, 1000, 95, 100, 105);
+
+        // ensure fuzz range of at least 2, if allowed
+        assert_lower_middle_upper(&mut ctx, 2.0, 2, 1000, 2, 3, 3);
+        assert_lower_middle_upper(&mut ctx, 2.0, 3, 1000, 3, 4, 4);
+        assert_lower_middle_upper(&mut ctx, 2.0, 3, 3, 3, 3, 3);
+
+        // respect limits and preserve uniform distribution of valid intervals
+        assert_lower_middle_upper(&mut ctx, 100.0, 101, 1000, 101, 103, 105);
+        assert_lower_middle_upper(&mut ctx, 100.0, 1, 99, 95, 97, 99);
+        assert_lower_middle_upper(&mut ctx, 100.0, 97, 103, 97, 100, 103);
+    }
+}
--- a/rslib/src/scheduler/states/review.rs
+++ b/rslib/src/scheduler/states/review.rs
@ -65,8 +65,7 @@ impl ReviewState {
    }

    pub(crate) fn failing_review_interval(self, ctx: &StateContext) -> u32 {
-        // fixme: floor() is for python
-        (((self.scheduled_days as f32) * ctx.lapse_multiplier).floor() as u32)
+        (((self.scheduled_days as f32) * ctx.lapse_multiplier) as u32)
            .max(ctx.minimum_lapse_interval)
            .max(1)
    }
@ -141,13 +140,11 @@ impl ReviewState {
            self.scheduled_days + 1
        };

-        // fixme: floor() is to match python
-
        let hard_interval =
            constrain_passing_interval(ctx, current_interval * hard_factor, hard_minimum, true);
        let good_interval = constrain_passing_interval(
            ctx,
-            (current_interval + (days_late / 2.0).floor()) * self.ease_factor,
+            (current_interval + days_late / 2.0) * self.ease_factor,
            hard_interval + 1,
            true,
        );
@ -184,13 +181,10 @@ impl ReviewState {
            constrain_passing_interval(ctx, (elapsed * self.ease_factor).max(scheduled), 0, false);

        let easy_interval = {
-            // currently flooring() f64s to match python output
-            let easy_mult = ctx.easy_multiplier as f64;
-            let reduced_bonus = easy_mult - (easy_mult - 1.0) / 2.0;
+            let reduced_bonus = ctx.easy_multiplier - (ctx.easy_multiplier - 1.0) / 2.0;
            constrain_passing_interval(
                ctx,
-                ((elapsed as f64 * self.ease_factor as f64).max(scheduled as f64) * reduced_bonus)
-                    .floor() as f32,
+                (elapsed * self.ease_factor).max(scheduled) * reduced_bonus,
                0,
                false,
            )
@ -218,17 +212,13 @@ fn leech_threshold_met(lapses: u32, threshold: u32) -> bool {
 /// - Ensure it is at least `minimum`, and at least 1.
 /// - Ensure it is at or below the configured maximum interval.
 fn constrain_passing_interval(ctx: &StateContext, interval: f32, minimum: u32, fuzz: bool) -> u32 {
-    // fixme: floor is to match python
-    let interval = interval.floor() * ctx.interval_multiplier;
-    let interval = if fuzz {
-        ctx.with_review_fuzz(interval)
+    let interval = interval * ctx.interval_multiplier;
+    let (minimum, maximum) = ctx.min_and_max_review_intervals(minimum);
+    if fuzz {
+        ctx.with_review_fuzz(interval, minimum, maximum)
    } else {
-        interval.floor() as u32
-    };
-    interval
-        .max(minimum)
-        .min(ctx.maximum_review_interval)
-        .max(1)
+        (interval.round() as u32).max(minimum).min(maximum)
+    }
 }

 #[cfg(test)]