From 91fad115285b802b095cb240b5706754d102562f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matilda=20Bergstr=C3=B6m?= Date: Thu, 11 Sep 2025 09:49:34 +0200 Subject: [PATCH] latex: remove blank lines inside math blocks (fixes #3466) --- rslib/src/latex.rs | 48 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/rslib/src/latex.rs b/rslib/src/latex.rs index 02056b721..a72a376bf 100644 --- a/rslib/src/latex.rs +++ b/rslib/src/latex.rs @@ -64,18 +64,24 @@ pub fn extract_latex(text: &str, svg: bool) -> (Cow<'_, str>, Vec/
+ // [$] and [$$] blocks remove
/
completely so that + // no blank lines are introduced inside math environments. let latex = match (caps.get(1), caps.get(2), caps.get(3)) { - (Some(m), _, _) => m.as_str().into(), - (_, Some(m), _) => format!("${}$", m.as_str()), - (_, _, Some(m)) => format!(r"\begin{{displaymath}}{}\end{{displaymath}}", m.as_str()), + (Some(m), _, _) => strip_html_for_latex(m.as_str()).into(), + (_, Some(m), _) => format!("${}$", strip_html_for_latex_math(m.as_str())), + (_, _, Some(m)) => format!( + r"\begin{{displaymath}}{}\end{{displaymath}}", + strip_html_for_latex_math(m.as_str()) + ), _ => unreachable!(), }; - let latex_text = strip_html_for_latex(&latex); - let fname = fname_for_latex(&latex_text, svg); - let img_link = image_link_for_fname(&latex_text, &fname); + + let fname = fname_for_latex(&latex, svg); + let img_link = image_link_for_fname(&latex, &fname); extracted.push(ExtractedLatex { fname, - latex: latex_text.into(), + latex: latex.into(), }); img_link @@ -96,6 +102,20 @@ fn strip_html_for_latex(html: &str) -> Cow<'_, str> { out } +/// Removes HTML breaks (
,
) from math blocks instead of +/// converting them to newlines. This prevents LaTeX environments +/// from being broken by unintended blank lines. +fn strip_html_for_latex_math(html: &str) -> Cow<'_, str> { + let mut out: Cow = html.into(); + if let Cow::Owned(o) = LATEX_NEWLINES.replace_all(html, "") { + out = o.into(); + } + if let Cow::Owned(o) = strip_html(out.as_ref()) { + out = o.into(); + } + out +} + fn fname_for_latex(latex: &str, svg: bool) -> String { let ext = if svg { "svg" } else { "png" }; let csum = hex::encode(sha1_of_data(latex.as_bytes())); @@ -146,4 +166,18 @@ mod test { }] ); } + + /// Ensures that math blocks do not contain unintended blank lines + /// when
or
elements are present in the HTML. + #[test] + fn no_blank_lines_in_math() { + let input = "[$$]\\begin{tikzcd}x & y\\end{tikzcd}[/$$]"; + let (_, extracts) = extract_latex(input, false); + let extracted = &extracts[0].latex; + + assert!( + !extracted.contains("\n\n"), + "Should not contain blank lines inside math" + ); + } }