diff --git a/proto/backend.proto b/proto/backend.proto
index 2522639eb..33010736c 100644
--- a/proto/backend.proto
+++ b/proto/backend.proto
@@ -168,5 +168,6 @@ message TTSTag {
     string field_text = 1;
     string lang = 2;
     repeated string voices = 3;
-    repeated string other_args = 4;
+    float speed = 4;
+    repeated string other_args = 5;
 }
diff --git a/pylib/anki/rsbackend.py b/pylib/anki/rsbackend.py
index 00738588b..65fd89494 100644
--- a/pylib/anki/rsbackend.py
+++ b/pylib/anki/rsbackend.py
@@ -56,6 +56,7 @@ def av_tag_to_native(tag: pb.AVTag) -> AVTag:
             lang=tag.tts.lang,
             voices=list(tag.tts.voices),
             other_args=list(tag.tts.other_args),
+            speed=tag.tts.speed,
         )
 
 
diff --git a/pylib/anki/sound.py b/pylib/anki/sound.py
index ca05a6c21..a3bcfe99d 100644
--- a/pylib/anki/sound.py
+++ b/pylib/anki/sound.py
@@ -24,6 +24,7 @@ class TTSTag:
     field_text: str
     lang: str
     voices: List[str]
+    speed: float
     # each arg should be in the form 'foo=bar'
     other_args: List[str]
 
diff --git a/qt/aqt/tts.py b/qt/aqt/tts.py
index 62adee8a3..e06e67a81 100644
--- a/qt/aqt/tts.py
+++ b/qt/aqt/tts.py
@@ -126,8 +126,11 @@ class MacTTSPlayer(TTSProcessPlayer):
         voice = match.voice
         assert isinstance(voice, MacVoice)
 
+        default_wpm = 170
+        words_per_min = str(int(default_wpm * tag.speed))
+
         self._process = subprocess.Popen(
-            ["say", "-v", voice.original_name, "-f", "-"],
+            ["say", "-v", voice.original_name, "-r", words_per_min, "-f", "-"],
             stdin=subprocess.PIPE,
             stdout=subprocess.DEVNULL,
             stderr=subprocess.DEVNULL,
@@ -171,8 +174,21 @@ class MacTTSFilePlayer(MacTTSPlayer):
         voice = match.voice
         assert isinstance(voice, MacVoice)
 
+        default_wpm = 170
+        words_per_min = str(int(default_wpm * tag.speed))
+
         self._process = subprocess.Popen(
-            ["say", "-v", voice.original_name, "-f", "-", "-o", self.tmppath],
+            [
+                "say",
+                "-v",
+                voice.original_name,
+                "-r",
+                words_per_min,
+                "-f",
+                "-",
+                "-o",
+                self.tmppath,
+            ],
             stdin=subprocess.PIPE,
             stdout=subprocess.DEVNULL,
             stderr=subprocess.DEVNULL,
@@ -441,6 +457,7 @@ if isWin:
             try:
                 native_voice = voice.handle
                 self.speaker.Voice = native_voice
+                self.speaker.Rate = self._rate_for_speed(tag.speed)
                 self.speaker.Speak(tag.field_text, 1)
                 gui_hooks.av_player_did_begin_playing(self, tag)
 
@@ -454,7 +471,12 @@ if isWin:
                 self._terminate_flag = False
 
         def _tidy_name(self, name: str) -> str:
-            "eg. Microsoft Haruka Desktop -> MS-Haruka."
+            "eg. Microsoft Haruka Desktop -> Microsoft-Haruka."
             return re.sub(r"^Microsoft (.+) Desktop$", "Microsoft_\\1", name).replace(
                 " ", "_"
             )
+
+        def _rate_for_speed(self, speed: float) -> int:
+            "eg. 1.5 -> 15, 0.5 -> -5"
+            speed = (speed * 10) - 10
+            return int(max(-10, min(10, speed)))
diff --git a/rslib/src/backend.rs b/rslib/src/backend.rs
index 2df96bd9f..a14ef4d14 100644
--- a/rslib/src/backend.rs
+++ b/rslib/src/backend.rs
@@ -195,12 +195,14 @@ impl Backend {
                     lang,
                     voices,
                     other_args,
+                    speed,
                 } => pt::AvTag {
                     value: Some(pt::av_tag::Value::Tts(pt::TtsTag {
                         field_text,
                         lang,
                         voices,
                         other_args,
+                        speed,
                     })),
                 },
             })
diff --git a/rslib/src/text.rs b/rslib/src/text.rs
index 154638c25..f0a97a192 100644
--- a/rslib/src/text.rs
+++ b/rslib/src/text.rs
@@ -15,6 +15,7 @@ pub enum AVTag {
         field_text: String,
         lang: String,
         voices: Vec<String>,
+        speed: f32,
         other_args: Vec<String>,
     },
 }
@@ -102,9 +103,10 @@ pub fn extract_av_tags<'a>(text: &'a str, question_side: bool) -> (Cow<'a, str>,
 
 fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
     let mut other_args = vec![];
-    let mut split_args = args.split(' ');
+    let mut split_args = args.split_ascii_whitespace();
     let lang = split_args.next().unwrap_or("");
     let mut voices = None;
+    let mut speed = 1.0;
 
     for remaining_arg in split_args {
         if remaining_arg.starts_with("voices=") {
@@ -112,6 +114,13 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
                 .split('=')
                 .nth(1)
                 .map(|voices| voices.split(',').map(ToOwned::to_owned).collect());
+        } else if remaining_arg.starts_with("speed=") {
+            speed = remaining_arg
+                .split('=')
+                .nth(1)
+                .unwrap()
+                .parse()
+                .unwrap_or(1.0);
         } else {
             other_args.push(remaining_arg.to_owned());
         }
@@ -121,6 +130,7 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
         field_text: strip_html_for_tts(field_text).into(),
         lang: lang.into(),
         voices: voices.unwrap_or_else(Vec::new),
+        speed,
         other_args,
     }
 }
@@ -188,7 +198,7 @@ mod test {
     #[test]
     fn test_audio() {
         let s =
-            "abc[sound:fo&amp;o.mp3]def[anki:tts][en_US voices=Bob,Jane]foo<br>1&gt;2[/anki:tts]gh";
+            "abc[sound:fo&amp;o.mp3]def[anki:tts][en_US voices=Bob,Jane speed=1.2]foo<br>1&gt;2[/anki:tts]gh";
         assert_eq!(strip_av_tags(s), "abcdefgh");
 
         let (text, tags) = extract_av_tags(s, true);
@@ -202,7 +212,8 @@ mod test {
                     field_text: "foo 1>2".into(),
                     lang: "en_US".into(),
                     voices: vec!["Bob".into(), "Jane".into()],
-                    other_args: vec![]
+                    other_args: vec![],
+                    speed: 1.2
                 },
             ]
         );