From 0f9216c12741f255f563112f63c786b201cee0aa Mon Sep 17 00:00:00 2001 From: user1823 <92206575+user1823@users.noreply.github.com> Date: Fri, 11 Apr 2025 14:59:23 +0530 Subject: [PATCH] Replace some means in Stats with medians (#3900) * Display median interval in Stats instead of mean Median is better suited than mean for reporting skewed data. * Display median ease in Stats instead of mean * Update difficulty.ts * Update ease.ts * Update statistics.ftl * Format eases.rs * Remove unused import * Change Median back to Average in UI * Revert "Change Median back to Average in UI" This reverts commit e0c1e3f8e4c79459686b0f820e70e3fc68381b4a. * Preserve the old translations for now (dae) --- ftl/core/statistics.ftl | 16 ++++++++++++---- rslib/src/stats/graphs/eases.rs | 33 +++++++++++++++++++++------------ ts/routes/graphs/difficulty.ts | 2 +- ts/routes/graphs/ease.ts | 4 ++-- ts/routes/graphs/intervals.ts | 10 +++++----- 5 files changed, 41 insertions(+), 24 deletions(-) diff --git a/ftl/core/statistics.ftl b/ftl/core/statistics.ftl index 48a9afb48..bff394bff 100644 --- a/ftl/core/statistics.ftl +++ b/ftl/core/statistics.ftl @@ -148,7 +148,7 @@ statistics-card-ease-title = Card Ease statistics-card-difficulty-title = Card Difficulty statistics-card-stability-title = Card Stability statistics-card-stability-subtitle = The delay at which retrievability falls to 90%. -statistics-average-stability = Average stability +statistics-median-stability = Median stability statistics-card-retrievability-title = Card Retrievability statistics-card-ease-subtitle = The lower the ease, the more frequently a card will appear. statistics-card-difficulty-subtitle2 = The higher the difficulty, the slower stability will increase. @@ -261,7 +261,7 @@ statistics-total = Total statistics-days-studied = Days studied statistics-average-answer-time-label = Average answer time statistics-average = Average -statistics-average-interval = Average interval +statistics-median-interval = Median interval statistics-due-tomorrow = Due tomorrow # This string, ‘Daily load,’ appears in the ‘Future due’ table and represents a # forecasted estimate of the number of cards expected to be reviewed daily in @@ -287,11 +287,19 @@ statistics-cards-per-day = [one] { $count } card/day *[other] { $count } cards/day } -statistics-average-ease = Average ease -statistics-average-difficulty = Average difficulty +statistics-median-ease = Median ease +statistics-median-difficulty = Median difficulty statistics-average-retrievability = Average retrievability statistics-estimated-total-knowledge = Estimated total knowledge statistics-save-pdf = Save PDF statistics-saved = Saved. statistics-stats = stats statistics-title = Statistics + +## These strings are no longer used - you do not need to translate them if they +## are not already translated. + +statistics-average-stability = Average stability +statistics-average-interval = Average interval +statistics-average-ease = Average ease +statistics-average-difficulty = Average difficulty diff --git a/rslib/src/stats/graphs/eases.rs b/rslib/src/stats/graphs/eases.rs index d70be4e7c..9b8e703e6 100644 --- a/rslib/src/stats/graphs/eases.rs +++ b/rslib/src/stats/graphs/eases.rs @@ -10,37 +10,46 @@ impl GraphsContext { /// (SM-2, FSRS) pub(super) fn eases(&self) -> (Eases, Eases) { let mut eases = Eases::default(); - let mut card_with_ease_count: usize = 0; + let mut ease_values = Vec::new(); let mut difficulty = Eases::default(); - let mut card_with_difficulty_count: usize = 0; + let mut difficulty_values = Vec::new(); for card in &self.cards { if let Some(state) = card.memory_state { *difficulty .eases .entry(percent_to_bin(state.difficulty() * 100.0)) .or_insert_with(Default::default) += 1; - difficulty.average += state.difficulty(); - card_with_difficulty_count += 1; + difficulty_values.push(state.difficulty()); } else if matches!(card.ctype, CardType::Review | CardType::Relearn) { *eases .eases .entry((card.ease_factor / 10) as u32) .or_insert_with(Default::default) += 1; - eases.average += card.ease_factor as f32; - card_with_ease_count += 1; + ease_values.push(card.ease_factor as f32); } } - if card_with_ease_count != 0 { - eases.average = eases.average / 10.0 / card_with_ease_count as f32; - } - if card_with_difficulty_count != 0 { - difficulty.average = difficulty.average * 100.0 / card_with_difficulty_count as f32; - } + + eases.average = median(&mut ease_values) / 10.0; + difficulty.average = median(&mut difficulty_values) * 100.0; (eases, difficulty) } } +/// Helper function to calculate the median of a vector +fn median(data: &mut [f32]) -> f32 { + if data.is_empty() { + return 0.0; + } + data.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mid = data.len() / 2; + if data.len() % 2 == 0 { + (data[mid - 1] + data[mid]) / 2.0 + } else { + data[mid] + } +} + /// Bins the number into a bin of 0, 5, .. 95 pub(super) fn percent_to_bin(x: f32) -> u32 { if x == 100.0 { diff --git a/ts/routes/graphs/difficulty.ts b/ts/routes/graphs/difficulty.ts index 91f8aa634..1778e2513 100644 --- a/ts/routes/graphs/difficulty.ts +++ b/ts/routes/graphs/difficulty.ts @@ -101,7 +101,7 @@ export function prepareData( const xTickFormat = (num: number): string => localizedNumber(num, 0) + "%"; const tableData = [ { - label: tr.statisticsAverageDifficulty(), + label: tr.statisticsMedianDifficulty(), value: xTickFormat(data.average), }, ]; diff --git a/ts/routes/graphs/ease.ts b/ts/routes/graphs/ease.ts index c0cf06252..26de02096 100644 --- a/ts/routes/graphs/ease.ts +++ b/ts/routes/graphs/ease.ts @@ -106,8 +106,8 @@ export function prepareData( const xTickFormat = (num: number): string => localizedNumber(num, 0) + "%"; const tableData = [ { - label: tr.statisticsAverageEase(), - value: xTickFormat(sum(Array.from(allEases.entries()).map(([k, v]) => (k + 2.5) * v)) / total), + label: tr.statisticsMedianEase(), + value: xTickFormat(data.average), }, ]; diff --git a/ts/routes/graphs/intervals.ts b/ts/routes/graphs/intervals.ts index 5c53e1378..ffd41b238 100644 --- a/ts/routes/graphs/intervals.ts +++ b/ts/routes/graphs/intervals.ts @@ -10,7 +10,7 @@ import * as tr from "@generated/ftl"; import { localizedNumber } from "@tslib/i18n"; import { timeSpan } from "@tslib/time"; import type { Bin } from "d3"; -import { bin, extent, interpolateBlues, mean, quantile, scaleLinear, scaleSequential, sum } from "d3"; +import { bin, extent, interpolateBlues, quantile, scaleLinear, scaleSequential, sum } from "d3"; import type { SearchDispatch, TableDatum } from "./graph-helpers"; import { numericMap } from "./graph-helpers"; @@ -168,12 +168,12 @@ export function prepareIntervalData( dispatch("search", { query }); } - const meanInterval = Math.round(mean(allIntervals) ?? 0); - const meanIntervalString = timeSpan(meanInterval * 86400, false); + const medianInterval = Math.round(quantile(allIntervals, 0.5) ?? 0); + const medianIntervalString = timeSpan(medianInterval * 86400, false); const tableData = [ { - label: fsrs ? tr.statisticsAverageStability() : tr.statisticsAverageInterval(), - value: meanIntervalString, + label: fsrs ? tr.statisticsMedianStability() : tr.statisticsMedianInterval(), + value: medianIntervalString, }, ];