From 7d7656d86fd38650e71edaf9623fbd9daf8c3594 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Fri, 10 Jan 2020 21:04:52 +1000 Subject: [PATCH] add ruby filters --- rslib/src/lib.rs | 1 + rslib/src/template.rs | 43 +--------- rslib/src/template_filters.rs | 145 ++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 42 deletions(-) create mode 100644 rslib/src/template_filters.rs diff --git a/rslib/src/lib.rs b/rslib/src/lib.rs index 5f63b11b6..2d33dff60 100644 --- a/rslib/src/lib.rs +++ b/rslib/src/lib.rs @@ -7,4 +7,5 @@ pub mod backend; pub mod err; pub mod sched; pub mod template; +pub mod template_filters; pub mod text; diff --git a/rslib/src/template.rs b/rslib/src/template.rs index ae5e59e25..06cc7d554 100644 --- a/rslib/src/template.rs +++ b/rslib/src/template.rs @@ -2,6 +2,7 @@ // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html use crate::err::{AnkiError, Result}; +use crate::template_filters::apply_filters; use lazy_static::lazy_static; use nom; use nom::branch::alt; @@ -377,48 +378,6 @@ fn unknown_field_message(field_name: &str, filters: &[&str]) -> String { ) } -// Filtering -//---------------------------------------- - -/// Applies built in filters, returning the resulting text and remaining filters. -/// -/// The first non-standard filter that is encountered will terminate processing, -/// so non-standard filters must come at the end. -fn apply_filters<'a>(text: &'a str, filters: &[&str]) -> (Cow<'a, str>, Vec) { - let mut text: Cow = text.into(); - - for (idx, &filter_name) in filters.iter().enumerate() { - match apply_filter(filter_name, text.as_ref()) { - Some(output) => { - text = output.into(); - } - None => { - // unrecognized filter, return current text and remaining filters - return ( - text, - filters.iter().skip(idx).map(ToString::to_string).collect(), - ); - } - } - } - - // all filters processed - (text, vec![]) -} - -fn apply_filter(filter_name: &str, text: &str) -> Option { - let output_text = match filter_name { - "text" => text_filter(text), - _ => return None, - }; - output_text.into() -} - -fn text_filter(text: &str) -> String { - // fixme: implement properly - Regex::new(r"<.+?>").unwrap().replace_all(text, "").into() -} - // Field requirements //---------------------------------------- diff --git a/rslib/src/template_filters.rs b/rslib/src/template_filters.rs new file mode 100644 index 000000000..15e39088a --- /dev/null +++ b/rslib/src/template_filters.rs @@ -0,0 +1,145 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use crate::text::strip_html; +use lazy_static::lazy_static; +use regex::{Captures, Regex}; +use std::borrow::Cow; + +// Filtering +//---------------------------------------- + +/// Applies built in filters, returning the resulting text and remaining filters. +/// +/// The first non-standard filter that is encountered will terminate processing, +/// so non-standard filters must come at the end. +pub(crate) fn apply_filters<'a>(text: &'a str, filters: &[&str]) -> (Cow<'a, str>, Vec) { + let mut text: Cow = text.into(); + + for (idx, &filter_name) in filters.iter().enumerate() { + match apply_filter(filter_name, text.as_ref()) { + (true, None) => { + // filter did not change text + } + (true, Some(output)) => { + // text updated + text = output.into(); + } + (false, _) => { + // unrecognized filter, return current text and remaining filters + return ( + text, + filters.iter().skip(idx).map(ToString::to_string).collect(), + ); + } + } + } + + // all filters processed + (text, vec![]) +} + +/// Apply one filter. +/// +/// Returns true if filter was valid. +/// Returns string if input text changed. +fn apply_filter<'a>(filter_name: &str, text: &'a str) -> (bool, Option) { + let output_text = match filter_name { + "text" => strip_html(text), + "furigana" => furigana_filter(text), + "kanji" => kanji_filter(text), + "kana" => kana_filter(text), + _ => return (false, None), + }; + + ( + true, + match output_text { + Cow::Owned(o) => Some(o), + _ => None, + }, + ) +} + +// Cloze filter +//---------------------------------------- + +// Ruby filters +//---------------------------------------- + +lazy_static! { + static ref FURIGANA: Regex = Regex::new(r" ?([^ >]+?)\[(.+?)\]").unwrap(); +} + +/// Did furigana regex match a sound tag? +fn captured_sound(caps: &Captures) -> bool { + caps.get(2).unwrap().as_str().starts_with("sound:") +} + +fn kana_filter(text: &str) -> Cow { + FURIGANA + .replace_all(&text.replace(" ", " "), |caps: &Captures| { + if captured_sound(caps) { + caps.get(0).unwrap().as_str().to_owned() + } else { + caps.get(2).unwrap().as_str().to_owned() + } + }) + .into_owned() + .into() +} + +fn kanji_filter(text: &str) -> Cow { + FURIGANA + .replace_all(&text.replace(" ", " "), |caps: &Captures| { + if captured_sound(caps) { + caps.get(0).unwrap().as_str().to_owned() + } else { + caps.get(1).unwrap().as_str().to_owned() + } + }) + .into_owned() + .into() +} + +fn furigana_filter(text: &str) -> Cow { + FURIGANA + .replace_all(&text.replace(" ", " "), |caps: &Captures| { + if captured_sound(caps) { + caps.get(0).unwrap().as_str().to_owned() + } else { + format!( + "{}{}", + caps.get(1).unwrap().as_str(), + caps.get(2).unwrap().as_str() + ) + } + }) + .into_owned() + .into() +} + +// Other filters +//---------------------------------------- + +// - type +// - hint + +// Tests +//---------------------------------------- + +#[cfg(test)] +mod test { + use crate::template_filters::{furigana_filter, kana_filter, kanji_filter}; + + #[test] + fn test_furigana() { + let text = "test first[second] third[fourth]"; + assert_eq!(kana_filter(text).as_ref(), "testsecondfourth"); + assert_eq!(kanji_filter(text).as_ref(), "testfirstthird"); + assert_eq!( + furigana_filter("first[second]").as_ref(), + "firstsecond" + ); + } +}