latex: remove blank lines inside math blocks (fixes #3466)

This commit is contained in:
Matilda Bergström 2025-09-11 09:49:34 +02:00
parent 539054c34d
commit 91fad11528

View file

@ -64,18 +64,24 @@ pub fn extract_latex(text: &str, svg: bool) -> (Cow<'_, str>, Vec<ExtractedLatex
let mut extracted = vec![]; let mut extracted = vec![];
let new_text = LATEX.replace_all(text, |caps: &Captures| { let new_text = LATEX.replace_all(text, |caps: &Captures| {
// [latex] blocks preserve newlines between <br>/<div>
// [$] and [$$] blocks remove <br>/<div> completely so that
// no blank lines are introduced inside math environments.
let latex = match (caps.get(1), caps.get(2), caps.get(3)) { let latex = match (caps.get(1), caps.get(2), caps.get(3)) {
(Some(m), _, _) => m.as_str().into(), (Some(m), _, _) => strip_html_for_latex(m.as_str()).into(),
(_, Some(m), _) => format!("${}$", m.as_str()), (_, Some(m), _) => format!("${}$", strip_html_for_latex_math(m.as_str())),
(_, _, Some(m)) => format!(r"\begin{{displaymath}}{}\end{{displaymath}}", m.as_str()), (_, _, Some(m)) => format!(
r"\begin{{displaymath}}{}\end{{displaymath}}",
strip_html_for_latex_math(m.as_str())
),
_ => unreachable!(), _ => unreachable!(),
}; };
let latex_text = strip_html_for_latex(&latex);
let fname = fname_for_latex(&latex_text, svg); let fname = fname_for_latex(&latex, svg);
let img_link = image_link_for_fname(&latex_text, &fname); let img_link = image_link_for_fname(&latex, &fname);
extracted.push(ExtractedLatex { extracted.push(ExtractedLatex {
fname, fname,
latex: latex_text.into(), latex: latex.into(),
}); });
img_link img_link
@ -96,6 +102,20 @@ fn strip_html_for_latex(html: &str) -> Cow<'_, str> {
out out
} }
/// Removes HTML breaks (<br>, <div>) from math blocks instead of
/// converting them to newlines. This prevents LaTeX environments
/// from being broken by unintended blank lines.
fn strip_html_for_latex_math(html: &str) -> Cow<'_, str> {
let mut out: Cow<str> = html.into();
if let Cow::Owned(o) = LATEX_NEWLINES.replace_all(html, "") {
out = o.into();
}
if let Cow::Owned(o) = strip_html(out.as_ref()) {
out = o.into();
}
out
}
fn fname_for_latex(latex: &str, svg: bool) -> String { fn fname_for_latex(latex: &str, svg: bool) -> String {
let ext = if svg { "svg" } else { "png" }; let ext = if svg { "svg" } else { "png" };
let csum = hex::encode(sha1_of_data(latex.as_bytes())); let csum = hex::encode(sha1_of_data(latex.as_bytes()));
@ -146,4 +166,18 @@ mod test {
}] }]
); );
} }
/// Ensures that math blocks do not contain unintended blank lines
/// when <br> or <div> elements are present in the HTML.
#[test]
fn no_blank_lines_in_math() {
let input = "[$$]\\begin{tikzcd}x & y\\end{tikzcd}[/$$]";
let (_, extracts) = extract_latex(input, false);
let extracted = &extracts[0].latex;
assert!(
!extracted.contains("\n\n"),
"Should not contain blank lines inside math"
);
}
} }