From 31b7464c67343905e5a46001835b9f18c98af90b Mon Sep 17 00:00:00 2001
From: RumovZ <gp5glkw78@relay.firefox.com>
Date: Fri, 2 Sep 2022 03:22:49 +0200
Subject: [PATCH] Add card meta for persisting custom scheduling state (#2040)

* Add card meta for persisting custom scheduling state

* Rename meta -> custom_data

* Enforce limits on size of custom data

Large values will slow down table scans of the cards table, and it's
easier to be strict now and possibly relax things in the future than
the opposite.

* Pack card states and customData into a single message

+ default customData to empty if it can't be parsed

Co-authored-by: Damien Elmes <gpg@ankiweb.net>
---
 proto/anki/cards.proto                   |  1 +
 proto/anki/scheduler.proto               |  7 +++
 pylib/anki/cards.py                      |  6 +--
 pylib/anki/scheduler/v3.py               | 13 ++++-
 qt/aqt/mediasrv.py                       | 18 +++----
 qt/aqt/reviewer.py                       | 23 ++++++---
 rslib/src/backend/card.rs                |  5 ++
 rslib/src/backend/scheduler/answering.rs |  1 +
 rslib/src/card/mod.rs                    |  3 ++
 rslib/src/scheduler/answering/mod.rs     |  4 ++
 rslib/src/scheduler/answering/preview.rs |  4 ++
 rslib/src/storage/card/data.rs           | 62 +++++++++++++++++++++---
 rslib/src/storage/card/mod.rs            |  1 +
 rslib/src/sync/mod.rs                    |  9 +++-
 ts/reviewer/answering.ts                 | 35 ++++++++-----
 15 files changed, 151 insertions(+), 41 deletions(-)

diff --git a/proto/anki/cards.proto b/proto/anki/cards.proto
index d33767238..afb196b38 100644
--- a/proto/anki/cards.proto
+++ b/proto/anki/cards.proto
@@ -44,6 +44,7 @@ message Card {
   int64 original_deck_id = 16;
   uint32 flags = 17;
   optional uint32 original_position = 18;
+  string custom_data = 19;
 }
 
 message UpdateCardsRequest {
diff --git a/proto/anki/scheduler.proto b/proto/anki/scheduler.proto
index 6fd2cda57..a68402271 100644
--- a/proto/anki/scheduler.proto
+++ b/proto/anki/scheduler.proto
@@ -240,6 +240,7 @@ message CardAnswer {
   Rating rating = 4;
   int64 answered_at_millis = 5;
   uint32 milliseconds_taken = 6;
+  string custom_data = 7;
 }
 
 message CustomStudyRequest {
@@ -303,3 +304,9 @@ message RepositionDefaultsResponse {
   bool random = 1;
   bool shift = 2;
 }
+
+// Data required to support the v3 scheduler's custom scheduling feature
+message CustomScheduling {
+  NextCardStates states = 1;
+  string custom_data = 2;
+}
diff --git a/pylib/anki/cards.py b/pylib/anki/cards.py
index 607792514..56b83e788 100644
--- a/pylib/anki/cards.py
+++ b/pylib/anki/cards.py
@@ -92,6 +92,7 @@ class Card(DeprecatedNamesMixin):
         self.original_position = (
             card.original_position if card.HasField("original_position") else None
         )
+        self.custom_data = card.custom_data
 
     def _to_backend_card(self) -> cards_pb2.Card:
         # mtime & usn are set by backend
@@ -111,9 +112,8 @@ class Card(DeprecatedNamesMixin):
             original_due=self.odue,
             original_deck_id=self.odid,
             flags=self.flags,
-            original_position=self.original_position
-            if self.original_position is not None
-            else None,
+            original_position=self.original_position,
+            custom_data=self.custom_data,
         )
 
     def flush(self) -> None:
diff --git a/pylib/anki/scheduler/v3.py b/pylib/anki/scheduler/v3.py
index c23d26922..faf5c1ccf 100644
--- a/pylib/anki/scheduler/v3.py
+++ b/pylib/anki/scheduler/v3.py
@@ -31,6 +31,7 @@ QueuedCards = scheduler_pb2.QueuedCards
 SchedulingState = scheduler_pb2.SchedulingState
 NextStates = scheduler_pb2.NextCardStates
 CardAnswer = scheduler_pb2.CardAnswer
+CustomScheduling = scheduler_pb2.CustomScheduling
 
 
 class Scheduler(SchedulerBaseWithLegacy):
@@ -61,7 +62,12 @@ class Scheduler(SchedulerBaseWithLegacy):
     ##########################################################################
 
     def build_answer(
-        self, *, card: Card, states: NextStates, rating: CardAnswer.Rating.V
+        self,
+        *,
+        card: Card,
+        states: NextStates,
+        custom_data: str,
+        rating: CardAnswer.Rating.V,
     ) -> CardAnswer:
         "Build input for answer_card()."
         if rating == CardAnswer.AGAIN:
@@ -79,6 +85,7 @@ class Scheduler(SchedulerBaseWithLegacy):
             card_id=card.id,
             current_state=states.current,
             new_state=new_state,
+            custom_data=custom_data,
             rating=rating,
             answered_at_millis=int_time(1000),
             milliseconds_taken=card.time_taken(capped=False),
@@ -163,7 +170,9 @@ class Scheduler(SchedulerBaseWithLegacy):
 
         states = self.col._backend.get_next_card_states(card.id)
         changes = self.answer_card(
-            self.build_answer(card=card, states=states, rating=rating)
+            self.build_answer(
+                card=card, states=states, custom_data=card.custom_data, rating=rating
+            )
         )
 
         # tests assume card will be mutated, so we need to reload it
diff --git a/qt/aqt/mediasrv.py b/qt/aqt/mediasrv.py
index dba07a44a..f28236dd1 100644
--- a/qt/aqt/mediasrv.py
+++ b/qt/aqt/mediasrv.py
@@ -27,7 +27,7 @@ from anki import hooks
 from anki._vendor import stringcase
 from anki.collection import OpChanges
 from anki.decks import DeckConfigsForUpdate, UpdateDeckConfigs
-from anki.scheduler.v3 import NextStates
+from anki.scheduler.v3 import CustomScheduling
 from anki.utils import dev_mode
 from aqt.changenotetype import ChangeNotetypeDialog
 from aqt.deckoptions import DeckOptionsDialog
@@ -412,18 +412,18 @@ def update_deck_configs() -> bytes:
     return b""
 
 
-def next_card_states() -> bytes:
-    if states := aqt.mw.reviewer.get_next_states():
-        return states.SerializeToString()
+def get_custom_scheduling() -> bytes:
+    if scheduling := aqt.mw.reviewer.get_custom_scheduling():
+        return scheduling.SerializeToString()
     else:
         return b""
 
 
-def set_next_card_states() -> bytes:
+def set_custom_scheduling() -> bytes:
     key = request.headers.get("key", "")
-    input = NextStates()
+    input = CustomScheduling()
     input.ParseFromString(request.data)
-    aqt.mw.reviewer.set_next_states(key, input)
+    aqt.mw.reviewer.set_custom_scheduling(key, input)
     return b""
 
 
@@ -455,8 +455,8 @@ post_handler_list = [
     congrats_info,
     get_deck_configs_for_update,
     update_deck_configs,
-    next_card_states,
-    set_next_card_states,
+    get_custom_scheduling,
+    set_custom_scheduling,
     change_notetype,
     import_csv,
 ]
diff --git a/qt/aqt/reviewer.py b/qt/aqt/reviewer.py
index ca984121a..09b3e32cb 100644
--- a/qt/aqt/reviewer.py
+++ b/qt/aqt/reviewer.py
@@ -17,7 +17,7 @@ from anki import hooks
 from anki.cards import Card, CardId
 from anki.collection import Config, OpChanges, OpChangesWithCount
 from anki.scheduler.base import ScheduleCardsAsNew
-from anki.scheduler.v3 import CardAnswer, NextStates, QueuedCards
+from anki.scheduler.v3 import CardAnswer, CustomScheduling, NextStates, QueuedCards
 from anki.scheduler.v3 import Scheduler as V3Scheduler
 from anki.tags import MARKED_TAG
 from anki.types import assert_exhaustive
@@ -82,11 +82,14 @@ class V3CardInfo:
 
     queued_cards: QueuedCards
     next_states: NextStates
+    custom_data: str
 
     @staticmethod
     def from_queue(queued_cards: QueuedCards) -> V3CardInfo:
         return V3CardInfo(
-            queued_cards=queued_cards, next_states=queued_cards.cards[0].next_states
+            queued_cards=queued_cards,
+            next_states=queued_cards.cards[0].next_states,
+            custom_data=queued_cards.cards[0].card.custom_data,
         )
 
     def top_card(self) -> QueuedCards.QueuedCard:
@@ -259,23 +262,24 @@ class Reviewer:
         self.card = Card(self.mw.col, backend_card=self._v3.top_card().card)
         self.card.start_timer()
 
-    def get_next_states(self) -> NextStates | None:
+    def get_custom_scheduling(self) -> CustomScheduling | None:
         if v3 := self._v3:
-            return v3.next_states
+            return CustomScheduling(states=v3.next_states, custom_data=v3.custom_data)
         else:
             return None
 
-    def set_next_states(self, key: str, states: NextStates) -> None:
+    def set_custom_scheduling(self, key: str, scheduling: CustomScheduling) -> None:
         if key != self._state_mutation_key:
             return
 
         if v3 := self._v3:
-            v3.next_states = states
+            v3.next_states = scheduling.states
+            v3.custom_data = scheduling.custom_data
 
     def _run_state_mutation_hook(self) -> None:
         if self._v3 and (js := self._state_mutation_js):
             self.web.eval(
-                f"anki.mutateNextCardStates('{self._state_mutation_key}', (states) => {{ {js} }})"
+                f"anki.mutateNextCardStates('{self._state_mutation_key}', (states, customData) => {{ {js} }})"
             )
 
     # Audio
@@ -431,7 +435,10 @@ class Reviewer:
 
         if (v3 := self._v3) and (sched := cast(V3Scheduler, self.mw.col.sched)):
             answer = sched.build_answer(
-                card=self.card, states=v3.next_states, rating=v3.rating_from_ease(ease)
+                card=self.card,
+                states=v3.next_states,
+                custom_data=v3.custom_data,
+                rating=v3.rating_from_ease(ease),
             )
 
             def after_answer(changes: OpChanges) -> None:
diff --git a/rslib/src/backend/card.rs b/rslib/src/backend/card.rs
index fad64614d..0cafc0f8c 100644
--- a/rslib/src/backend/card.rs
+++ b/rslib/src/backend/card.rs
@@ -26,6 +26,9 @@ impl CardsService for Backend {
                 .into_iter()
                 .map(TryInto::try_into)
                 .collect::<Result<Vec<Card>, AnkiError>>()?;
+            for card in &cards {
+                card.validate_custom_data()?;
+            }
             col.update_cards_maybe_undoable(cards, !input.skip_undo_entry)
         })
         .map(Into::into)
@@ -87,6 +90,7 @@ impl TryFrom<pb::Card> for Card {
             original_deck_id: DeckId(c.original_deck_id),
             flags: c.flags as u8,
             original_position: c.original_position,
+            custom_data: c.custom_data,
         })
     }
 }
@@ -112,6 +116,7 @@ impl From<Card> for pb::Card {
             original_deck_id: c.original_deck_id.0,
             flags: c.flags as u32,
             original_position: c.original_position.map(Into::into),
+            custom_data: c.custom_data,
         }
     }
 }
diff --git a/rslib/src/backend/scheduler/answering.rs b/rslib/src/backend/scheduler/answering.rs
index 3d3447408..7e69010cc 100644
--- a/rslib/src/backend/scheduler/answering.rs
+++ b/rslib/src/backend/scheduler/answering.rs
@@ -19,6 +19,7 @@ impl From<pb::CardAnswer> for CardAnswer {
             new_state: answer.new_state.unwrap_or_default().into(),
             answered_at: TimestampMillis(answer.answered_at_millis),
             milliseconds_taken: answer.milliseconds_taken,
+            custom_data: answer.custom_data,
         }
     }
 }
diff --git a/rslib/src/card/mod.rs b/rslib/src/card/mod.rs
index 41c11ec0d..6fd1a4bda 100644
--- a/rslib/src/card/mod.rs
+++ b/rslib/src/card/mod.rs
@@ -79,6 +79,8 @@ pub struct Card {
     pub(crate) flags: u8,
     /// The position in the new queue before leaving it.
     pub(crate) original_position: Option<u32>,
+    /// JSON object or empty; exposed through the reviewer for persisting custom state
+    pub(crate) custom_data: String,
 }
 
 impl Default for Card {
@@ -102,6 +104,7 @@ impl Default for Card {
             original_deck_id: DeckId(0),
             flags: 0,
             original_position: None,
+            custom_data: String::new(),
         }
     }
 }
diff --git a/rslib/src/scheduler/answering/mod.rs b/rslib/src/scheduler/answering/mod.rs
index 861fcba1d..003f64dbc 100644
--- a/rslib/src/scheduler/answering/mod.rs
+++ b/rslib/src/scheduler/answering/mod.rs
@@ -41,6 +41,7 @@ pub struct CardAnswer {
     pub rating: Rating,
     pub answered_at: TimestampMillis,
     pub milliseconds_taken: u32,
+    pub custom_data: String,
 }
 
 impl CardAnswer {
@@ -273,6 +274,8 @@ impl Collection {
         self.maybe_bury_siblings(&original, &updater.config)?;
         let timing = updater.timing;
         let mut card = updater.into_card();
+        card.custom_data = answer.custom_data.clone();
+        card.validate_custom_data()?;
         self.update_card_inner(&mut card, original, usn)?;
         if answer.new_state.leeched() {
             self.add_leech_tag(card.note_id)?;
@@ -419,6 +422,7 @@ pub mod test_helpers {
                 rating,
                 answered_at: TimestampMillis::now(),
                 milliseconds_taken: 0,
+                custom_data: String::new(),
             })?;
             Ok(PostAnswerState {
                 card_id: queued.card.id,
diff --git a/rslib/src/scheduler/answering/preview.rs b/rslib/src/scheduler/answering/preview.rs
index 1ef3b6cf5..70c50850b 100644
--- a/rslib/src/scheduler/answering/preview.rs
+++ b/rslib/src/scheduler/answering/preview.rs
@@ -92,6 +92,7 @@ mod test {
             rating: Rating::Again,
             answered_at: TimestampMillis::now(),
             milliseconds_taken: 0,
+            custom_data: String::new(),
         })?;
 
         c = col.storage.get_card(c.id)?.unwrap();
@@ -106,6 +107,7 @@ mod test {
             rating: Rating::Hard,
             answered_at: TimestampMillis::now(),
             milliseconds_taken: 0,
+            custom_data: String::new(),
         })?;
         c = col.storage.get_card(c.id)?.unwrap();
         assert_eq!(c.queue, CardQueue::PreviewRepeat);
@@ -119,6 +121,7 @@ mod test {
             rating: Rating::Good,
             answered_at: TimestampMillis::now(),
             milliseconds_taken: 0,
+            custom_data: String::new(),
         })?;
         c = col.storage.get_card(c.id)?.unwrap();
         assert_eq!(c.queue, CardQueue::PreviewRepeat);
@@ -132,6 +135,7 @@ mod test {
             rating: Rating::Easy,
             answered_at: TimestampMillis::now(),
             milliseconds_taken: 0,
+            custom_data: String::new(),
         })?;
         c = col.storage.get_card(c.id)?.unwrap();
         assert_eq!(c.queue, CardQueue::DayLearn);
diff --git a/rslib/src/storage/card/data.rs b/rslib/src/storage/card/data.rs
index 5f7338497..4c303c70a 100644
--- a/rslib/src/storage/card/data.rs
+++ b/rslib/src/storage/card/data.rs
@@ -1,32 +1,45 @@
 // Copyright: Ankitects Pty Ltd and contributors
 // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
 
+use std::collections::HashMap;
+
 use rusqlite::{
     types::{FromSql, FromSqlError, ToSqlOutput, ValueRef},
     ToSql,
 };
 use serde_derive::{Deserialize, Serialize};
+use serde_json::Value;
 
 use crate::{prelude::*, serde::default_on_invalid};
 
 /// Helper for serdeing the card data column.
 #[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
 #[serde(default)]
-pub(super) struct CardData {
+pub(crate) struct CardData {
     #[serde(
         skip_serializing_if = "Option::is_none",
         rename = "pos",
         deserialize_with = "default_on_invalid"
     )]
     pub(crate) original_position: Option<u32>,
+    /// A string representation of a JSON object storing optional data
+    /// associated with the card, so v3 custom scheduling code can persist
+    /// state.
+    #[serde(default, rename = "cd", skip_serializing_if = "meta_is_empty")]
+    pub(crate) custom_data: String,
 }
 
 impl CardData {
-    pub(super) fn from_card(card: &Card) -> Self {
+    pub(crate) fn from_card(card: &Card) -> Self {
         Self {
             original_position: card.original_position,
+            custom_data: card.custom_data.clone(),
         }
     }
+
+    pub(crate) fn from_str(s: &str) -> Self {
+        serde_json::from_str(s).unwrap_or_default()
+    }
 }
 
 impl FromSql for CardData {
@@ -53,8 +66,45 @@ pub(crate) fn card_data_string(card: &Card) -> String {
     serde_json::to_string(&CardData::from_card(card)).unwrap()
 }
 
-/// Extract original position from JSON `data`.
-pub(crate) fn original_position_from_card_data(card_data: &str) -> Option<u32> {
-    let data: CardData = serde_json::from_str(card_data).unwrap_or_default();
-    data.original_position
+fn meta_is_empty(s: &str) -> bool {
+    matches!(s, "" | "{}")
+}
+
+fn validate_custom_data(json_str: &str) -> Result<()> {
+    if !meta_is_empty(json_str) {
+        let object: HashMap<&str, Value> = serde_json::from_str(json_str)
+            .map_err(|e| AnkiError::invalid_input(format!("custom data not an object: {e}")))?;
+        if object.keys().any(|k| k.as_bytes().len() > 8) {
+            return Err(AnkiError::invalid_input(
+                "custom data keys must be <= 8 bytes",
+            ));
+        }
+        if json_str.len() > 100 {
+            return Err(AnkiError::invalid_input(
+                "serialized custom data must be under 100 bytes",
+            ));
+        }
+    }
+    Ok(())
+}
+
+impl Card {
+    pub(crate) fn validate_custom_data(&self) -> Result<()> {
+        validate_custom_data(&self.custom_data)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    #[test]
+    fn validation() {
+        assert!(validate_custom_data("").is_ok());
+        assert!(validate_custom_data("{}").is_ok());
+        assert!(validate_custom_data(r#"{"foo": 5}"#).is_ok());
+        assert!(validate_custom_data(r#"["foo"]"#).is_err());
+        assert!(validate_custom_data(r#"{"日": 5}"#).is_ok());
+        assert!(validate_custom_data(r#"{"日本語": 5}"#).is_err());
+        assert!(validate_custom_data(&format!(r#"{{"foo": "{}"}}"#, "x".repeat(100))).is_err());
+    }
 }
diff --git a/rslib/src/storage/card/mod.rs b/rslib/src/storage/card/mod.rs
index 2e0549d06..adc81731b 100644
--- a/rslib/src/storage/card/mod.rs
+++ b/rslib/src/storage/card/mod.rs
@@ -69,6 +69,7 @@ fn row_to_card(row: &Row) -> result::Result<Card, rusqlite::Error> {
         original_deck_id: row.get(15)?,
         flags: row.get(16)?,
         original_position: data.original_position,
+        custom_data: data.custom_data,
     })
 }
 
diff --git a/rslib/src/sync/mod.rs b/rslib/src/sync/mod.rs
index eb3c43d6c..3f60686a4 100644
--- a/rslib/src/sync/mod.rs
+++ b/rslib/src/sync/mod.rs
@@ -28,7 +28,7 @@ use crate::{
     revlog::RevlogEntry,
     serde::{default_on_invalid, deserialize_int_from_number},
     storage::{
-        card::data::{card_data_string, original_position_from_card_data},
+        card::data::{card_data_string, CardData},
         open_and_check_sqlite_file, SchemaVersion,
     },
     tags::{join_tags, split_tags, Tag},
@@ -1081,6 +1081,10 @@ impl Collection {
 
 impl From<CardEntry> for Card {
     fn from(e: CardEntry) -> Self {
+        let CardData {
+            original_position,
+            custom_data,
+        } = CardData::from_str(&e.data);
         Card {
             id: e.id,
             note_id: e.nid,
@@ -1099,7 +1103,8 @@ impl From<CardEntry> for Card {
             original_due: e.odue,
             original_deck_id: e.odid,
             flags: e.flags,
-            original_position: original_position_from_card_data(&e.data),
+            original_position,
+            custom_data,
         }
     }
 }
diff --git a/ts/reviewer/answering.ts b/ts/reviewer/answering.ts
index 5aea76d4e..f3a633763 100644
--- a/ts/reviewer/answering.ts
+++ b/ts/reviewer/answering.ts
@@ -4,25 +4,38 @@
 import { postRequest } from "../lib/postrequest";
 import { Scheduler } from "../lib/proto";
 
-async function getNextStates(): Promise<Scheduler.NextCardStates> {
-    return Scheduler.NextCardStates.decode(
-        await postRequest("/_anki/nextCardStates", ""),
+async function getCustomScheduling(): Promise<Scheduler.CustomScheduling> {
+    return Scheduler.CustomScheduling.decode(
+        await postRequest("/_anki/getCustomScheduling", ""),
     );
 }
 
-async function setNextStates(
+async function setCustomScheduling(
     key: string,
-    states: Scheduler.NextCardStates,
+    scheduling: Scheduler.CustomScheduling,
 ): Promise<void> {
-    const data: Uint8Array = Scheduler.NextCardStates.encode(states).finish();
-    await postRequest("/_anki/setNextCardStates", data, { key });
+    const bytes = Scheduler.CustomScheduling.encode(scheduling).finish();
+    await postRequest("/_anki/setCustomScheduling", bytes, { key });
 }
 
 export async function mutateNextCardStates(
     key: string,
-    mutator: (states: Scheduler.NextCardStates) => void,
+    mutator: (
+        states: Scheduler.NextCardStates,
+        customData: Record<string, unknown>,
+    ) => void,
 ): Promise<void> {
-    const states = await getNextStates();
-    mutator(states);
-    await setNextStates(key, states);
+    const scheduling = await getCustomScheduling();
+    let customData = {};
+    try {
+        customData = JSON.parse(scheduling.customData);
+    } catch {
+        // can't be parsed
+    }
+
+    mutator(scheduling.states!, customData);
+
+    scheduling.customData = JSON.stringify(customData);
+
+    await setCustomScheduling(key, scheduling);
 }