load balancer! (#3230)

* start of load balancer

* add configuration options; option to load balance per deck

* formatting

* clippy

* add myself to contributors

* cleanup

* cargo fmt

* copyright header on load_balancer.rs

* remove extra space

* more formatting

* python formatting

* ignore this being None

only doing this cause python has awful lambdas and can't
loop in a meaningful way without doing this

* only calculate notes on each day if we are trying to avoid siblings

* don't fuzz intervals if the load balancer is enabled

* force generator to eval so this actually happens

* load balance instead of fuzzing, rather than in addition to

* use builtin fuzz_bounds rather than reinvent something new

* print some debug info on how its load balancing

* clippy

* more accurately load balance only when we want to fuzz

* incorrectly doublechecking the presence of the load balancer

* more printfs for debugging

* avoid siblings -> disperse siblings

* load balance learning graduating intervals

* load balancer: respect min/max intervals; graduating easy should be at least +1 good

* filter out after-days under minimum interval

* this is an inclusive check

* switch load balancer to caching instead of on the fly calculation

* handle case where load balancer would balance outside of its bounds

* disable lb when unselecting it in preferences

* call load_balancer in StateContext::with_review_fuzz instead of next to

* rebuild load balancer when card queue is rebuilt

* remove now-unused configuration options

* add note option to notetype to enable/disable sibling dispersion

* add options to exclude decks from load balancing

* theres a lint checking that the link actually exists so I guess I'll add the anchor back in later?

* how did I even update this

* move load balancer to cardqueue

* remove per-deck balancing options

* improve determining whether to disperse siblings when load balancing

* don't recalculate notes on days every time

* remove debug code

* remove all configuration; load balancer enabled by default; disperse siblings if bury_reviews is set

* didn't fully remove caring about decks from load balancer sql query

* load balancer should only count cards in the same preset

* fuzz interval if its outside of load balancer's range

* also check minimum when bailing out of load balancer

* cleanup; make tests happy

* experimental weight-based load balance fuzzing

* take into account interval when weighting as it seems to help

* if theres no cards the interval weight is just 1.0

* make load balancer disableable through debug console

* remove debug prints

* typo

* remove debugging print

* explain a bit how load balancer works

* properly balance per preset

* use inclusive range rather than +1

* -1 type cast

* move type hint somewhere less ugly; fix comment typo

* Reuse existing deck list from parent function (dae)

Minor optimisation
This commit is contained in:
Jake Probst 2024-08-16 22:50:54 -07:00 committed by GitHub
parent a87a44da2c
commit c6cb4e4373
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 369 additions and 5 deletions

View file

@ -181,6 +181,7 @@ James Elmore <email@jameselmore.org>
Ian Samir Yep Manzano <https://github.com/isym444>
David Culley <6276049+davidculley@users.noreply.github.com>
Rastislav Kish <rastislav.kish@protonmail.com>
jake <jake@sharnoth.com>
Expertium <https://github.com/Expertium>
Christian Donat <https://github.com/cdonat2>
Asuka Minato <https://asukaminato.eu.org>

View file

@ -54,6 +54,7 @@ message ConfigKey {
RANDOM_ORDER_REPOSITION = 23;
SHIFT_POSITION_OF_EXISTING_CARDS = 24;
RENDER_LATEX = 25;
LOAD_BALANCER_ENABLED = 26;
}
enum String {
SET_DUE_BROWSER = 0;
@ -115,6 +116,7 @@ message Preferences {
bool show_remaining_due_counts = 3;
bool show_intervals_on_buttons = 4;
uint32 time_limit_secs = 5;
bool load_balancer_enabled = 6;
}
message Editing {
bool adding_defaults_to_current_deck = 1;

View file

@ -972,6 +972,16 @@ class Collection(DeprecatedNamesMixin):
)
return self.set_config(key, value, undoable=undoable)
def _get_enable_load_balancer(self) -> bool:
return self.get_config_bool(Config.Bool.LOAD_BALANCER_ENABLED)
def _set_enable_load_balancer(self, value: bool) -> None:
self.set_config_bool(Config.Bool.LOAD_BALANCER_ENABLED, value)
load_balancer_enabled = property(
fget=_get_enable_load_balancer, fset=_set_enable_load_balancer
)
# Stats
##########################################################################

View file

@ -37,6 +37,7 @@ impl From<BoolKeyProto> for BoolKey {
BoolKeyProto::RandomOrderReposition => BoolKey::RandomOrderReposition,
BoolKeyProto::ShiftPositionOfExistingCards => BoolKey::ShiftPositionOfExistingCards,
BoolKeyProto::RenderLatex => BoolKey::RenderLatex,
BoolKeyProto::LoadBalancerEnabled => BoolKey::LoadBalancerEnabled,
}
}
}

View file

@ -40,6 +40,7 @@ pub enum BoolKey {
WithScheduling,
WithDeckConfigs,
Fsrs,
LoadBalancerEnabled,
#[strum(to_string = "normalize_note_text")]
NormalizeNoteText,
#[strum(to_string = "dayLearnFirst")]
@ -73,6 +74,7 @@ impl Collection {
| BoolKey::CardCountsSeparateInactive
| BoolKey::RestorePositionBrowser
| BoolKey::RestorePositionReviewer
| BoolKey::LoadBalancerEnabled
| BoolKey::NormalizeNoteText => self.get_config_optional(key).unwrap_or(true),
// other options default to false

View file

@ -98,6 +98,7 @@ impl Collection {
show_intervals_on_buttons: self
.get_config_bool(BoolKey::ShowIntervalsAboveAnswerButtons),
time_limit_secs: self.get_answer_time_limit_secs(),
load_balancer_enabled: self.get_config_bool(BoolKey::LoadBalancerEnabled),
})
}
@ -117,6 +118,8 @@ impl Collection {
s.show_intervals_on_buttons,
)?;
self.set_answer_time_limit_secs(s.time_limit_secs)?;
self.set_config_bool_inner(BoolKey::LoadBalancerEnabled, s.load_balancer_enabled)?;
Ok(())
}

View file

@ -16,6 +16,7 @@ use revlog::RevlogEntryPartial;
use super::fsrs::weights::ignore_revlogs_before_ms_from_config;
use super::queue::BuryMode;
use super::states::load_balancer::LoadBalancerContext;
use super::states::steps::LearningSteps;
use super::states::CardState;
use super::states::FilteredState;
@ -26,6 +27,7 @@ use super::timespan::answer_button_time_collapsible;
use super::timing::SchedTimingToday;
use crate::card::CardQueue;
use crate::card::CardType;
use crate::config::BoolKey;
use crate::deckconfig::DeckConfig;
use crate::deckconfig::LeechAction;
use crate::decks::Deck;
@ -77,7 +79,10 @@ impl CardStateUpdater {
/// Returns information required when transitioning from one card state to
/// another with `next_states()`. This separate structure decouples the
/// state handling code from the rest of the Anki codebase.
pub(crate) fn state_context(&self) -> StateContext<'_> {
pub(crate) fn state_context<'a>(
&'a self,
load_balancer: Option<LoadBalancerContext<'a>>,
) -> StateContext<'a> {
StateContext {
fuzz_factor: get_fuzz_factor(self.fuzz_seed),
steps: self.learn_steps(),
@ -89,6 +94,8 @@ impl CardStateUpdater {
interval_multiplier: self.config.inner.interval_multiplier,
maximum_review_interval: self.config.inner.maximum_review_interval,
leech_threshold: self.config.inner.leech_threshold,
load_balancer: load_balancer
.map(|load_balancer| load_balancer.set_fuzz_seed(self.fuzz_seed)),
relearn_steps: self.relearn_steps(),
lapse_multiplier: self.config.inner.lapse_multiplier,
minimum_lapse_interval: self.config.inner.minimum_lapse_interval,
@ -215,9 +222,36 @@ impl Collection {
/// Return the next states that will be applied for each answer button.
pub fn get_scheduling_states(&mut self, cid: CardId) -> Result<SchedulingStates> {
let card = self.storage.get_card(cid)?.or_not_found(cid)?;
let deck = self.get_deck(card.deck_id)?.or_not_found(card.deck_id)?;
let note_id = deck
.config_id()
.map(|deck_config_id| self.get_deck_config(deck_config_id, false))
.transpose()?
.flatten()
.map(|deck_config| deck_config.inner.bury_reviews)
.unwrap_or(false)
.then_some(card.note_id);
let ctx = self.card_state_updater(card)?;
let current = ctx.current_card_state();
let state_ctx = ctx.state_context();
let load_balancer = self
.get_config_bool(BoolKey::LoadBalancerEnabled)
.then(|| {
let deckconfig_id = deck.config_id();
self.state.card_queues.as_ref().and_then(|card_queues| {
Some(
card_queues
.load_balancer
.review_context(note_id, deckconfig_id?),
)
})
})
.flatten();
let state_ctx = ctx.state_context(load_balancer);
Ok(current.next_states(&state_ctx))
}
@ -305,11 +339,26 @@ impl Collection {
card.custom_data = data;
card.validate_custom_data()?;
}
self.update_card_inner(&mut card, original, usn)?;
if answer.new_state.leeched() {
self.add_leech_tag(card.note_id)?;
}
if card.queue == CardQueue::Review {
let deck = self.get_deck(card.deck_id)?;
if let Some(card_queues) = self.state.card_queues.as_mut() {
if let Some(deckconfig_id) = deck.and_then(|deck| deck.config_id()) {
card_queues.load_balancer.add_card(
card.id,
card.note_id,
deckconfig_id,
card.interval,
)
}
}
}
self.update_queues_after_answering_card(
&card,
timing,

View file

@ -25,6 +25,7 @@ use crate::deckconfig::ReviewCardOrder;
use crate::deckconfig::ReviewMix;
use crate::decks::limits::LimitTreeMap;
use crate::prelude::*;
use crate::scheduler::states::load_balancer::LoadBalancer;
use crate::scheduler::timing::SchedTimingToday;
/// Temporary holder for review cards that will be built into a queue.
@ -99,13 +100,14 @@ pub(super) struct QueueSortOptions {
pub(super) new_review_mix: ReviewMix,
}
#[derive(Debug, Clone)]
#[derive(Debug)]
pub(super) struct QueueBuilder {
pub(super) new: Vec<NewCard>,
pub(super) review: Vec<DueCard>,
pub(super) learning: Vec<DueCard>,
pub(super) day_learning: Vec<DueCard>,
limits: LimitTreeMap,
load_balancer: LoadBalancer,
context: Context,
}
@ -144,12 +146,19 @@ impl QueueBuilder {
let sort_options = sort_options(&root_deck, &config_map);
let deck_map = col.storage.get_decks_map()?;
let did_to_dcid = deck_map
.values()
.filter_map(|deck| Some((deck.id, deck.config_id()?)))
.collect::<HashMap<_, _>>();
let load_balancer = LoadBalancer::new(timing.days_elapsed, did_to_dcid, &col.storage)?;
Ok(QueueBuilder {
new: Vec::new(),
review: Vec::new(),
learning: Vec::new(),
day_learning: Vec::new(),
limits,
load_balancer,
context: Context {
timing,
config_map,
@ -201,6 +210,7 @@ impl QueueBuilder {
learn_ahead_secs,
current_day: self.context.timing.days_elapsed,
build_time: TimestampMillis::now(),
load_balancer: self.load_balancer,
current_learning_cutoff: now,
}
}

View file

@ -23,6 +23,7 @@ use self::undo::QueueUpdate;
use super::states::SchedulingStates;
use super::timing::SchedTimingToday;
use crate::prelude::*;
use crate::scheduler::states::load_balancer::LoadBalancer;
use crate::timestamp::TimestampSecs;
#[derive(Debug)]
@ -37,6 +38,7 @@ pub(crate) struct CardQueues {
/// counts are zero. Ensures we don't show a newly-due learning card after a
/// user returns from editing a review card.
current_learning_cutoff: TimestampSecs,
pub(crate) load_balancer: LoadBalancer,
}
#[derive(Debug, Copy, Clone)]

View file

@ -38,6 +38,18 @@ impl Collection {
}
queues.push_undo_entry(update.entry);
}
if let Some(card_queues) = self.state.card_queues.as_mut() {
match &update.entry {
QueueEntry::IntradayLearning(entry) => {
card_queues.load_balancer.remove_card(entry.id);
}
QueueEntry::Main(entry) => {
card_queues.load_balancer.remove_card(entry.id);
}
}
}
self.save_undo(UndoableQueueChange::CardAnswerUndone(update));
Ok(())

View file

@ -34,7 +34,10 @@ static FUZZ_RANGES: [FuzzRange; 3] = [
impl<'a> StateContext<'a> {
/// Apply fuzz, respecting the passed bounds.
pub(crate) fn with_review_fuzz(&self, interval: f32, minimum: u32, maximum: u32) -> u32 {
with_review_fuzz(self.fuzz_factor, interval, minimum, maximum)
self.load_balancer
.as_ref()
.and_then(|load_balancer| load_balancer.find_interval(interval, minimum, maximum))
.unwrap_or_else(|| with_review_fuzz(self.fuzz_factor, interval, minimum, maximum))
}
}
@ -74,7 +77,7 @@ pub(crate) fn with_review_fuzz(
/// Return the bounds of the fuzz range, respecting `minimum` and `maximum`.
/// Ensure the upper bound is larger than the lower bound, if `maximum` allows
/// it and it is larger than 1.
fn constrained_fuzz_bounds(interval: f32, minimum: u32, maximum: u32) -> (u32, u32) {
pub(crate) fn constrained_fuzz_bounds(interval: f32, minimum: u32, maximum: u32) -> (u32, u32) {
let minimum = minimum.min(maximum);
let interval = interval.clamp(minimum as f32, maximum as f32);
let (mut lower, mut upper) = fuzz_bounds(interval);

View file

@ -0,0 +1,239 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::collections::HashMap;
use std::collections::HashSet;
use rand::distributions::Distribution;
use rand::distributions::WeightedIndex;
use rand::rngs::StdRng;
use rand::SeedableRng;
use super::fuzz::constrained_fuzz_bounds;
use crate::card::CardId;
use crate::deckconfig::DeckConfigId;
use crate::notes::NoteId;
use crate::prelude::*;
use crate::storage::SqliteStorage;
const MAX_LOAD_BALANCE_INTERVAL: usize = 90;
// due to the nature of load balancing, we may schedule things in the future and
// so need to keep more than just the `MAX_LOAD_BALANCE_INTERVAL` days in our
// cache. a flat 10% increase over the max interval should be enough to not have
// problems
const LOAD_BALANCE_DAYS: usize = (MAX_LOAD_BALANCE_INTERVAL as f32 * 1.1) as usize;
const SIBLING_PENALTY: f32 = 0.001;
#[derive(Debug, Default)]
struct LoadBalancerDay {
cards: Vec<(CardId, NoteId)>,
notes: HashSet<NoteId>,
}
impl LoadBalancerDay {
fn add(&mut self, cid: CardId, nid: NoteId) {
self.cards.push((cid, nid));
self.notes.insert(nid);
}
fn remove(&mut self, cid: CardId) {
if let Some(index) = self.cards.iter().position(|c| c.0 == cid) {
let (_, rnid) = self.cards.swap_remove(index);
// if all cards of a note are removed, remove note
if !self.cards.iter().any(|(_cid, nid)| *nid == rnid) {
self.notes.remove(&rnid);
}
}
}
fn has_sibling(&self, nid: &NoteId) -> bool {
self.notes.contains(nid)
}
}
pub struct LoadBalancerContext<'a> {
load_balancer: &'a LoadBalancer,
note_id: Option<NoteId>,
deckconfig_id: DeckConfigId,
fuzz_seed: Option<u64>,
}
impl<'a> LoadBalancerContext<'a> {
pub fn find_interval(&self, interval: f32, minimum: u32, maximum: u32) -> Option<u32> {
self.load_balancer.find_interval(
interval,
minimum,
maximum,
self.deckconfig_id,
self.fuzz_seed,
self.note_id,
)
}
pub fn set_fuzz_seed(mut self, fuzz_seed: Option<u64>) -> Self {
self.fuzz_seed = fuzz_seed;
self
}
}
#[derive(Debug)]
pub struct LoadBalancer {
/// Load balancer operates at the preset level, it only counts
/// cards in the same preset as the card being balanced.
days_by_preset: HashMap<DeckConfigId, [LoadBalancerDay; LOAD_BALANCE_DAYS]>,
}
impl LoadBalancer {
pub fn new(
today: u32,
did_to_dcid: HashMap<DeckId, DeckConfigId>,
storage: &SqliteStorage,
) -> Result<LoadBalancer> {
let cards_on_each_day =
storage.get_all_cards_due_in_range(today, today + LOAD_BALANCE_DAYS as u32)?;
let days_by_preset = cards_on_each_day
.into_iter()
// for each day, group all cards on each day by their deck config id
.map(|cards_on_day| {
cards_on_day
.into_iter()
.filter_map(|(cid, nid, did)| Some((cid, nid, did_to_dcid.get(&did)?)))
.fold(
HashMap::<_, Vec<_>>::new(),
|mut day_group_by_dcid, (cid, nid, dcid)| {
day_group_by_dcid.entry(dcid).or_default().push((cid, nid));
day_group_by_dcid
},
)
})
.enumerate()
// consolidate card by day groups into groups of [LoadBalancerDay; LOAD_BALANCE_DAYS]s
.fold(
HashMap::new(),
|mut deckconfig_group, (day_index, days_grouped_by_dcid)| {
for (group, cards) in days_grouped_by_dcid.into_iter() {
let day = deckconfig_group
.entry(*group)
.or_insert_with(|| std::array::from_fn(|_| LoadBalancerDay::default()));
for (cid, nid) in cards {
day[day_index].add(cid, nid);
}
}
deckconfig_group
},
);
Ok(LoadBalancer { days_by_preset })
}
pub fn review_context(
&self,
note_id: Option<NoteId>,
deckconfig_id: DeckConfigId,
) -> LoadBalancerContext {
LoadBalancerContext {
load_balancer: self,
note_id,
deckconfig_id,
fuzz_seed: None,
}
}
/// The main load balancing function
/// Given an interval and min/max range it does its best to find the best
/// day within the standard fuzz range to schedule a card that leads to
/// a consistent workload.
///
/// It works by using a weighted random, assigning a weight between 0.0 and
/// 1.0 to each day in the fuzz range for an interval.
/// the weight takes into account the number of cards due on a day as well
/// as the interval itself.
/// `weight = (1 / (cards_due))**2 * (1 / target_interval)`
///
/// By including the target_interval in the calculation, the interval is
/// slightly biased to be due earlier. Without this, the load balancer
/// ends up being very biased towards later days, especially around
/// graduating intervals.
///
/// if a note_id is provided, it attempts to avoid placing a card on a day
/// that already has that note_id (aka avoid siblings)
fn find_interval(
&self,
interval: f32,
minimum: u32,
maximum: u32,
deckconfig_id: DeckConfigId,
fuzz_seed: Option<u64>,
note_id: Option<NoteId>,
) -> Option<u32> {
// if we're sending a card far out into the future, the need to balance is low
if interval as usize > MAX_LOAD_BALANCE_INTERVAL
|| minimum as usize > MAX_LOAD_BALANCE_INTERVAL
{
return None;
}
let (before_days, after_days) = constrained_fuzz_bounds(interval, minimum, maximum);
let days = self.days_by_preset.get(&deckconfig_id)?;
let interval_days = &days[before_days as usize..=after_days as usize];
// calculate weights for each day
let intervals_and_weights = interval_days
.iter()
.enumerate()
.map(|(interval_index, interval_day)| {
let target_interval = interval_index as u32 + before_days;
// if there is a sibling on this day, give it a very low weight
let sibling_multiplier = note_id
.and_then(|note_id| {
interval_day
.has_sibling(&note_id)
.then_some(SIBLING_PENALTY)
})
.unwrap_or(1.0);
let weight = match interval_day.cards.len() {
0 => 1.0, // if theres no cards due on this day, give it the full 1.0 weight
card_count => {
let card_count_weight = (1.0 / card_count as f32).powi(2);
let card_interval_weight = 1.0 / target_interval as f32;
card_count_weight * card_interval_weight * sibling_multiplier
}
};
(target_interval, weight)
})
.collect::<Vec<_>>();
let mut rng = StdRng::seed_from_u64(fuzz_seed?);
let weighted_intervals =
WeightedIndex::new(intervals_and_weights.iter().map(|k| k.1)).ok()?;
let selected_interval_index = weighted_intervals.sample(&mut rng);
Some(intervals_and_weights[selected_interval_index].0)
}
pub fn add_card(&mut self, cid: CardId, nid: NoteId, dcid: DeckConfigId, interval: u32) {
if let Some(days) = self.days_by_preset.get_mut(&dcid) {
if let Some(day) = days.get_mut(interval as usize) {
day.add(cid, nid);
}
}
}
pub fn remove_card(&mut self, cid: CardId) {
for (_, days) in self.days_by_preset.iter_mut() {
for day in days.iter_mut() {
day.remove(cid);
}
}
}
}

View file

@ -5,6 +5,7 @@ pub(crate) mod filtered;
pub(crate) mod fuzz;
pub(crate) mod interval_kind;
pub(crate) mod learning;
pub(crate) mod load_balancer;
pub(crate) mod new;
pub(crate) mod normal;
pub(crate) mod preview_filter;
@ -17,6 +18,7 @@ pub use filtered::FilteredState;
use fsrs::NextStates;
pub(crate) use interval_kind::IntervalKind;
pub use learning::LearnState;
use load_balancer::LoadBalancerContext;
pub use new::NewState;
pub use normal::NormalState;
pub use preview_filter::PreviewState;
@ -99,6 +101,7 @@ pub(crate) struct StateContext<'a> {
pub interval_multiplier: f32,
pub maximum_review_interval: u32,
pub leech_threshold: u32,
pub load_balancer: Option<LoadBalancerContext<'a>>,
// relearning
pub relearn_steps: LearningSteps<'a>,
@ -133,6 +136,7 @@ impl<'a> StateContext<'a> {
interval_multiplier: 1.0,
maximum_review_interval: 36500,
leech_threshold: 8,
load_balancer: None,
relearn_steps: LearningSteps::new(&[10.0]),
lapse_multiplier: 0.0,
minimum_lapse_interval: 1,

View file

@ -581,6 +581,32 @@ impl super::SqliteStorage {
Ok(())
}
pub(crate) fn get_all_cards_due_in_range(
&self,
min_day: u32,
max_day: u32,
) -> Result<Vec<Vec<(CardId, NoteId, DeckId)>>> {
Ok(self
.db
.prepare_cached("select id, nid, did, due from cards where due >= ?1 and due < ?2 ")?
.query_and_then([min_day, max_day], |row: &Row| {
Ok::<_, rusqlite::Error>((
row.get::<_, CardId>(0)?,
row.get::<_, NoteId>(1)?,
row.get::<_, DeckId>(2)?,
row.get::<_, i32>(3)?,
))
})?
.flatten()
.fold(
vec![Vec::new(); (max_day - min_day) as usize],
|mut acc, (card_id, note_id, deck_id, due)| {
acc[due as usize - min_day as usize].push((card_id, note_id, deck_id));
acc
},
))
}
pub(crate) fn congrats_info(&self, current: &Deck, today: u32) -> Result<CongratsInfo> {
// NOTE: this line is obsolete in v3 as it's run on queue build, but kept to
// prevent errors for v1/v2 users before they upgrade