diff --git a/rslib/src/import_export/package/apkg/import/notes.rs b/rslib/src/import_export/package/apkg/import/notes.rs index 5c68af29c..b7f1abd3f 100644 --- a/rslib/src/import_export/package/apkg/import/notes.rs +++ b/rslib/src/import_export/package/apkg/import/notes.rs @@ -17,7 +17,10 @@ use crate::{ ImportProgress, }, prelude::*, - text::{replace_media_refs, strip_html_preserving_media_filenames, CowMapping}, + text::{ + replace_media_refs, strip_html_preserving_media_filenames, truncate_to_char_boundary, + CowMapping, + }, }; struct NoteContext<'a> { @@ -73,7 +76,7 @@ impl Note { let mut reduced = strip_html_preserving_media_filenames(&field) .get_owned() .unwrap_or(field); - reduced.truncate(80); + truncate_to_char_boundary(&mut reduced, 80); reduced }) .collect(), diff --git a/rslib/src/media/files.rs b/rslib/src/media/files.rs index ab720b635..077e4ce93 100644 --- a/rslib/src/media/files.rs +++ b/rslib/src/media/files.rs @@ -241,18 +241,18 @@ fn split_and_truncate_filename(fname: &str, max_bytes: usize) -> (&str, &str) { }; // cap extension to 10 bytes so stem_len can't be negative - ext = truncate_to_char_boundary(ext, 10); + ext = truncated_to_char_boundary(ext, 10); // cap stem, allowing for the . and a trailing _ let stem_len = max_bytes - ext.len() - 2; - stem = truncate_to_char_boundary(stem, stem_len); + stem = truncated_to_char_boundary(stem, stem_len); (stem, ext) } -/// Trim a string on a valid UTF8 boundary. +/// Return a substring on a valid UTF8 boundary. /// Based on a funtion in the Rust stdlib. -fn truncate_to_char_boundary(s: &str, mut max: usize) -> &str { +fn truncated_to_char_boundary(s: &str, mut max: usize) -> &str { if max >= s.len() { s } else { diff --git a/rslib/src/text.rs b/rslib/src/text.rs index 9b58c8137..e423510fd 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -206,6 +206,17 @@ pub fn strip_html_for_tts(html: &str) -> Cow { .map_cow(strip_html) } +/// Truncate a String on a valid UTF8 boundary. +pub(crate) fn truncate_to_char_boundary(s: &mut String, mut max: usize) { + if max >= s.len() { + return; + } + while !s.is_char_boundary(max) { + max -= 1; + } + s.truncate(max); +} + #[derive(Debug)] pub(crate) struct MediaRef<'a> { pub full_ref: &'a str, @@ -589,4 +600,14 @@ mod test { "[sound:spam]", ); } + + #[test] + fn truncate() { + let mut s = "日本語".to_string(); + truncate_to_char_boundary(&mut s, 6); + assert_eq!(&s, "日本"); + let mut s = "日本語".to_string(); + truncate_to_char_boundary(&mut s, 1); + assert_eq!(&s, ""); + } }