Fix a crash when truncating note text

String::truncate() is a bit of a footgun, and I've hit this before
too :-)
This commit is contained in:
Damien Elmes 2022-04-28 20:51:36 +10:00
parent 24231214c5
commit cf9d490576
3 changed files with 30 additions and 6 deletions

View file

@ -17,7 +17,10 @@ use crate::{
ImportProgress,
},
prelude::*,
text::{replace_media_refs, strip_html_preserving_media_filenames, CowMapping},
text::{
replace_media_refs, strip_html_preserving_media_filenames, truncate_to_char_boundary,
CowMapping,
},
};
struct NoteContext<'a> {
@ -73,7 +76,7 @@ impl Note {
let mut reduced = strip_html_preserving_media_filenames(&field)
.get_owned()
.unwrap_or(field);
reduced.truncate(80);
truncate_to_char_boundary(&mut reduced, 80);
reduced
})
.collect(),

View file

@ -241,18 +241,18 @@ fn split_and_truncate_filename(fname: &str, max_bytes: usize) -> (&str, &str) {
};
// cap extension to 10 bytes so stem_len can't be negative
ext = truncate_to_char_boundary(ext, 10);
ext = truncated_to_char_boundary(ext, 10);
// cap stem, allowing for the . and a trailing _
let stem_len = max_bytes - ext.len() - 2;
stem = truncate_to_char_boundary(stem, stem_len);
stem = truncated_to_char_boundary(stem, stem_len);
(stem, ext)
}
/// Trim a string on a valid UTF8 boundary.
/// Return a substring on a valid UTF8 boundary.
/// Based on a funtion in the Rust stdlib.
fn truncate_to_char_boundary(s: &str, mut max: usize) -> &str {
fn truncated_to_char_boundary(s: &str, mut max: usize) -> &str {
if max >= s.len() {
s
} else {

View file

@ -206,6 +206,17 @@ pub fn strip_html_for_tts(html: &str) -> Cow<str> {
.map_cow(strip_html)
}
/// Truncate a String on a valid UTF8 boundary.
pub(crate) fn truncate_to_char_boundary(s: &mut String, mut max: usize) {
if max >= s.len() {
return;
}
while !s.is_char_boundary(max) {
max -= 1;
}
s.truncate(max);
}
#[derive(Debug)]
pub(crate) struct MediaRef<'a> {
pub full_ref: &'a str,
@ -589,4 +600,14 @@ mod test {
"<img src=spam>[sound:spam]<img src=baz.jpg>",
);
}
#[test]
fn truncate() {
let mut s = "日本語".to_string();
truncate_to_char_boundary(&mut s, 6);
assert_eq!(&s, "日本");
let mut s = "日本語".to_string();
truncate_to_char_boundary(&mut s, 1);
assert_eq!(&s, "");
}
}