diff --git a/proto/backend.proto b/proto/backend.proto index 38e070ceb..1c8b0a1cb 100644 --- a/proto/backend.proto +++ b/proto/backend.proto @@ -16,6 +16,8 @@ message BackendInput { BrowserRowsIn browser_rows = 20; RenderCardIn render_card = 21; int64 local_minutes_west = 22; + string strip_av_tags = 23; + string get_av_tags = 24; } } @@ -28,6 +30,8 @@ message BackendOutput { BrowserRowsOut browser_rows = 20; RenderCardOut render_card = 21; sint32 local_minutes_west = 22; + string strip_av_tags = 23; + GetAVTagsOut get_av_tags = 24; BackendError error = 2047; } @@ -44,14 +48,6 @@ message InvalidInputError { string info = 1; } -message PlusOneIn { - int32 num = 1; -} - -message PlusOneOut { - int32 num = 1; -} - message TemplateParseError { string info = 1; } @@ -150,3 +146,19 @@ message RenderedTemplateReplacement { string current_text = 2; repeated string filters = 3; } + +message GetAVTagsOut { + repeated AVTag av_tags = 1; +} + +message AVTag { + oneof value { + string sound_or_video = 1; + TTSTag tts = 2; + } +} + +message TTSTag { + repeated string args = 1; + string text = 2; +} diff --git a/pylib/anki/rsbackend.py b/pylib/anki/rsbackend.py index 8cf741a3b..e342e1fc9 100644 --- a/pylib/anki/rsbackend.py +++ b/pylib/anki/rsbackend.py @@ -9,6 +9,7 @@ import ankirspy # pytype: disable=import-error import anki.backend_pb2 as pb import anki.buildinfo from anki.models import AllTemplateReqs +from anki.sound import AVTag, SoundOrVideoTag, TTSTag assert ankirspy.buildhash() == anki.buildinfo.buildhash @@ -45,6 +46,14 @@ def proto_template_reqs_to_legacy( return legacy_reqs +def av_tag_to_native(tag: pb.AVTag) -> AVTag: + val = tag.WhichOneof("value") + if val == "sound_or_video": + return SoundOrVideoTag(filename=tag.sound_or_video) + else: + return TTSTag(args=list(tag.tts.args), text=tag.tts.text) + + @dataclass class TemplateReplacement: field_name: str @@ -143,3 +152,16 @@ class RustBackend: return self._run_command( pb.BackendInput(local_minutes_west=stamp) ).local_minutes_west + + def strip_av_tags(self, text: str) -> str: + return self._run_command(pb.BackendInput(strip_av_tags=text)).strip_av_tags + + def get_av_tags(self, text: str) -> List[AVTag]: + return list( + map( + av_tag_to_native, + self._run_command( + pb.BackendInput(get_av_tags=text) + ).get_av_tags.av_tags, + ) + ) diff --git a/pylib/anki/sound.py b/pylib/anki/sound.py index 74de2e911..a5092333d 100644 --- a/pylib/anki/sound.py +++ b/pylib/anki/sound.py @@ -1,18 +1,61 @@ # Copyright: Ankitects Pty Ltd and contributors # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html -import re -from typing import List +""" +Sound/TTS references extracted from card text. -# Shared utils +Use collection.backend.strip_av_tags(string) to remove all tags, +and collection.backend.get_av_tags(string) to get a list of AVTags. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import List, Union + + +@dataclass +class TTSTag: + """Records information about a text to speech tag. + + See tts.py for more information. + """ + + args: List[str] + text: str + + +@dataclass +class SoundOrVideoTag: + """Contains the filename inside a [sound:...] tag. + + Video files also use [sound:...]. + """ + + filename: str + + +# note this does not include image tags, which are handled with HTML. +AVTag = Union[SoundOrVideoTag, TTSTag] + +# Legacy utils ########################################################################## +# these will be removed in the future _soundReg = r"\[sound:(.*?)\]" def allSounds(text) -> List: - return re.findall(_soundReg, text) + from aqt import mw + + return [ + x.filename + for x in mw.col.backend.get_av_tags(text) + if isinstance(x, SoundOrVideoTag) + ] def stripSounds(text) -> str: - return re.sub(_soundReg, "", text) + from aqt import mw + + return mw.col.backend.strip_av_tags(text) diff --git a/rslib/Cargo.toml b/rslib/Cargo.toml index 04c7a766d..222b677d1 100644 --- a/rslib/Cargo.toml +++ b/rslib/Cargo.toml @@ -15,6 +15,7 @@ lazy_static = "1.4.0" regex = "1.3.3" hex = "0.4.0" blake3 = "0.1.0" +htmlescape = "0.3.1" [build-dependencies] prost-build = "0.5.0" diff --git a/rslib/src/backend.rs b/rslib/src/backend.rs index 337e9c8ee..470ca4316 100644 --- a/rslib/src/backend.rs +++ b/rslib/src/backend.rs @@ -10,6 +10,7 @@ use crate::template::{ render_card, without_legacy_template_directives, FieldMap, FieldRequirements, ParsedTemplate, RenderedNode, }; +use crate::text::{av_tags_in_string, strip_av_tags, AVTag}; use prost::Message; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; @@ -98,6 +99,8 @@ impl Backend { Value::LocalMinutesWest(stamp) => { OValue::LocalMinutesWest(local_minutes_west_for_stamp(stamp)) } + Value::StripAvTags(text) => OValue::StripAvTags(strip_av_tags(&text).into()), + Value::GetAvTags(text) => OValue::GetAvTags(self.get_av_tags(&text)), }) } @@ -178,6 +181,24 @@ impl Backend { answer_nodes: rendered_nodes_to_proto(anodes), }) } + + fn get_av_tags(&self, text: &str) -> pt::GetAvTagsOut { + let tags = av_tags_in_string(text) + .map(|avtag| match avtag { + AVTag::SoundOrVideo(file) => pt::AvTag { + value: Some(pt::av_tag::Value::SoundOrVideo(file.to_string())), + }, + AVTag::TextToSpeech { args, field_text } => pt::AvTag { + value: Some(pt::av_tag::Value::Tts(pt::TtsTag { + args: args.iter().map(|&s| s.to_string()).collect(), + text: field_text.to_string(), + })), + }, + }) + .collect(); + + pt::GetAvTagsOut { av_tags: tags } + } } fn ords_hash_to_set(ords: HashSet) -> Vec { diff --git a/rslib/src/template.rs b/rslib/src/template.rs index 0858c29a1..d58b7d733 100644 --- a/rslib/src/template.rs +++ b/rslib/src/template.rs @@ -3,7 +3,7 @@ use crate::err::{Result, TemplateError}; use crate::template_filters::apply_filters; -use crate::text::strip_sounds; +use crate::text::strip_av_tags; use lazy_static::lazy_static; use nom; use nom::branch::alt; @@ -443,7 +443,7 @@ pub fn render_card( // if the question side didn't have any unknown filters, we can pass // FrontSide in now if let [RenderedNode::Text { ref text }] = *qnodes.as_slice() { - context.front_text = Some(strip_sounds(text)); + context.front_text = Some(strip_av_tags(text)); } // answer side diff --git a/rslib/src/template_filters.rs b/rslib/src/template_filters.rs index 5d7207030..e224d1ce3 100644 --- a/rslib/src/template_filters.rs +++ b/rslib/src/template_filters.rs @@ -75,8 +75,12 @@ fn apply_filter<'a>( // an empty filter name (caused by using two colons) is ignored "" => text.into(), _ => { - // unrecognized filter - return (false, None); + if filter_name.starts_with("tts ") { + tts_filter(filter_name, text) + } else { + // unrecognized filter + return (false, None); + } } }; @@ -285,6 +289,11 @@ return false;"> .into() } +fn tts_filter(filter_name: &str, text: &str) -> Cow<'static, str> { + let args = filter_name.splitn(2, ' ').nth(1).unwrap_or(""); + + format!("[anki:tts][{}]{}[/anki:tts]", args, text).into() +} // Tests //---------------------------------------- @@ -293,7 +302,7 @@ mod test { use crate::template::RenderContext; use crate::template_filters::{ apply_filters, cloze_filter, furigana_filter, hint_filter, kana_filter, kanji_filter, - type_cloze_filter, type_filter, + tts_filter, type_cloze_filter, type_filter, }; use crate::text::strip_html; @@ -368,4 +377,12 @@ field ctx.card_ord = 2; assert_eq!(cloze_filter(text, &ctx).as_ref(), ""); } + + #[test] + fn test_tts() { + assert_eq!( + tts_filter("tts lang=en_US", "foo"), + "[anki:tts][lang=en_US]foo[/anki:tts]" + ); + } } diff --git a/rslib/src/text.rs b/rslib/src/text.rs index ef81be90c..f07fe1f48 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -1,12 +1,22 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +use htmlescape; use lazy_static::lazy_static; use regex::Regex; use std::borrow::Cow; use std::collections::HashSet; use std::ptr; +#[derive(Debug, PartialEq)] +pub enum AVTag<'a> { + SoundOrVideo(Cow<'a, str>), + TextToSpeech { + args: Vec<&'a str>, + field_text: Cow<'a, str>, + }, +} + lazy_static! { static ref HTML: Regex = Regex::new(concat!( "(?si)", @@ -22,9 +32,16 @@ lazy_static! { r#"(?i)]+src=["']?([^"'>]+)["']?[^>]*>"# ).unwrap(); - static ref SOUND_TAG: Regex = Regex::new( - r"\[sound:(.*?)\]" - ).unwrap(); + // videos are also in sound tags + static ref AV_TAGS: Regex = Regex::new( + r#"(?xs) + \[sound:(.*?)\] # 1 - the filename in a sound tag + | + \[anki:tts\] + \[(.*?)\] # 2 - arguments to tts call + (.*?) # 3 - field text + \[/anki:tts\] + "#).unwrap(); static ref CLOZED_TEXT: Regex = Regex::new( r"(?s)\{\{c(\d+)::.+?\}\}" @@ -35,8 +52,43 @@ pub fn strip_html(html: &str) -> Cow { HTML.replace_all(html, "") } -pub fn strip_sounds(html: &str) -> Cow { - SOUND_TAG.replace_all(html, "") +pub fn decode_entities(html: &str) -> Cow { + if html.contains('&') { + match htmlescape::decode_html(html) { + Ok(text) => text, + Err(e) => format!("{:?}", e), + } + .into() + } else { + // nothing to do + html.into() + } +} + +pub fn strip_html_for_tts(html: &str) -> Cow { + match HTML.replace_all(html, " ") { + Cow::Borrowed(_) => decode_entities(html), + Cow::Owned(s) => decode_entities(&s).to_string().into(), + } +} + +pub fn strip_av_tags(text: &str) -> Cow { + AV_TAGS.replace_all(text, "") +} + +pub fn av_tags_in_string(text: &str) -> impl Iterator { + AV_TAGS.captures_iter(text).map(|caps| { + if let Some(av_file) = caps.get(1) { + AVTag::SoundOrVideo(decode_entities(av_file.as_str())) + } else { + let args = caps.get(2).unwrap(); + let field_text = caps.get(3).unwrap(); + AVTag::TextToSpeech { + args: args.as_str().split(' ').collect(), + field_text: strip_html_for_tts(field_text.as_str()), + } + } + }) } pub fn strip_html_preserving_image_filenames(html: &str) -> Cow { @@ -64,7 +116,10 @@ pub fn cloze_numbers_in_string(html: &str) -> HashSet { #[cfg(test)] mod test { - use crate::text::{cloze_numbers_in_string, strip_html, strip_html_preserving_image_filenames}; + use crate::text::{ + av_tags_in_string, cloze_numbers_in_string, strip_av_tags, strip_html, + strip_html_preserving_image_filenames, AVTag, + }; use std::collections::HashSet; #[test] @@ -95,4 +150,20 @@ mod test { vec![1, 2].into_iter().collect::>() ); } + + #[test] + fn test_audio() { + let s = "abc[sound:fo&o.mp3]def[anki:tts][lang=en_US voices=Bob,Jane]foo
1>2[/anki:tts]gh"; + assert_eq!(strip_av_tags(s), "abcdefgh"); + assert_eq!( + av_tags_in_string(s).collect::>(), + vec![ + AVTag::SoundOrVideo("fo&o.mp3".into()), + AVTag::TextToSpeech { + args: vec!["lang=en_US", "voices=Bob,Jane"], + field_text: "foo 1>2".into() + }, + ] + ); + } }