Time out after 60s when checking URLs (#2785)

This commit is contained in:
RumovZ 2023-10-31 22:46:49 +01:00 committed by GitHub
parent 252b4e7f00
commit feaaaa230a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -6,6 +6,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::env; use std::env;
use std::iter; use std::iter;
use std::time::Duration;
use anki::links::help_page_link_suffix; use anki::links::help_page_link_suffix;
use anki::links::help_page_to_link; use anki::links::help_page_to_link;
@ -21,11 +22,14 @@ use regex::Regex;
use reqwest::Url; use reqwest::Url;
use strum::IntoEnumIterator; use strum::IntoEnumIterator;
const WEB_TIMEOUT: Duration = Duration::from_secs(60);
/// Aggregates [`Outcome`]s by collecting the error messages of the invalid /// Aggregates [`Outcome`]s by collecting the error messages of the invalid
/// ones. /// ones.
#[derive(Default)] #[derive(Default)]
struct Outcomes(Vec<String>); struct Outcomes(Vec<String>);
#[derive(Debug)]
enum Outcome { enum Outcome {
Valid, Valid,
Invalid(String), Invalid(String),
@ -98,23 +102,23 @@ async fn check_links() {
async fn check_url(page: CheckableUrl, ctx: &BasicContext) -> Outcome { async fn check_url(page: CheckableUrl, ctx: &BasicContext) -> Outcome {
let link = page.url(); let link = page.url();
match Url::parse(&link) { match Url::parse(&link) {
Ok(url) => { Ok(url) if url.as_str() == link => {
if url.as_str() == link { let future = check_web(&url, ctx);
match check_web(&url, ctx).await { let timeout = tokio::time::timeout(WEB_TIMEOUT, future);
Ok(()) => Outcome::Valid, match timeout.await {
Err(Reason::Dom) => Outcome::Invalid(format!( Err(_) => Outcome::Invalid(format!("Timed out: {link}")),
"'#{}' not found on '{}{}'", Ok(Ok(())) => Outcome::Valid,
url.fragment().unwrap(), Ok(Err(Reason::Dom)) => Outcome::Invalid(format!(
url.domain().unwrap(), "'#{}' not found on '{}{}'",
url.path(), url.fragment().unwrap(),
)), url.domain().unwrap(),
Err(Reason::Web(err)) => Outcome::Invalid(err.to_string()), url.path(),
_ => unreachable!(), )),
} Ok(Err(Reason::Web(err))) => Outcome::Invalid(err.to_string()),
} else { _ => unreachable!(),
Outcome::Invalid(format!("'{}' is not a valid URL part", page.anchor(),))
} }
} }
Ok(_) => Outcome::Invalid(format!("'{}' is not a valid URL part", page.anchor(),)),
Err(err) => Outcome::Invalid(err.to_string()), Err(err) => Outcome::Invalid(err.to_string()),
} }
} }