Created UNFINISHED scraper

This commit is contained in:
Maddox Werts 2025-08-02 16:23:51 -04:00
parent 5d5df73b7c
commit 48b69828c5

View file

@ -0,0 +1,107 @@
use regex::bytes::Regex;
// Libraries
use reqwest;
use scraper::{Html, Selector};
use std::io::Result;
// Macros
macro_rules! map_err {
($expr:expr) => {
$expr.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
};
}
// Functions
fn extract_memberships(content: &str) -> Result<Vec<String>> {
// Creating a Regex Pattern
let re = map_err!(Regex::new(
r"(?P<name>[A-Z][a-zA-Z ]+?)\s+\$ ?(\d+)\.?(\d{0,2})\s+Single Wash.+?\$ ?(?P<price>\d{1,3})\. ?(?P<decimals>\d{2})\s+Monthly\s+Membership"
))?;
// Creating a Membership holder
let mut memberships: Vec<(String, f32)> = Vec::new();
// Iterate through RegEx with Content
for caps in re.captures_iter(content.as_bytes()) {
// Getting our components
let name_bytes = &caps["name"];
let price_bytes = &caps["price"];
let decimals_bytes = &caps["decimals"];
let name = match str::from_utf8(name_bytes) {
Ok(n) => n.trim(),
Err(_) => continue,
};
let price_str = match (str::from_utf8(price_bytes), str::from_utf8(decimals_bytes)) {
(Ok(p), Ok(d)) => format!("{}.{}", p, d),
_ => continue,
};
if let Ok(price) = price_str.parse::<f32>() {
memberships.push((name.to_string(), price));
}
}
// Sorting our memberships from High to Low
memberships.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
// Formatting our memberships and returning it
Ok(memberships
.into_iter()
.map(|(name, price)| format!("{} (${:0.2})", name, price))
.collect())
}
// Structures
pub struct Scraper {
url: String,
}
// Implementations
impl Scraper {
// Constructors
/// Creates a new Scraper
///
/// # Arguments
/// * `url` - A String for the URL you're scraping from
///
/// # Examples
/// ```rs
/// let url = "https://google.com";
/// let scraper = Scraper::new(url);
/// ```
pub fn new(url: &str) -> Self {
// Returning ourself
Self {
url: String::from(url),
}
}
// Functions
/// Scrapes our URL for Membership Plans
///
/// # Examples
/// ```rs
/// println!("{}", scraper.start()?);
/// ```
pub async fn start(&self) -> Result<Vec<String>> {
// Using Reqwest to pull data from our URL
let response = map_err!(map_err!(reqwest::get(&self.url).await)?.text().await)?;
// Parsing the HTML
let document = Html::parse_document(&response);
// Selecting the Membership section of the page
let selector = Selector::parse("#monthly").unwrap();
// Getting the text out of the document
let content = document
.select(&selector)
.next()
.map(|element| element.text().collect::<Vec<_>>().join(" "))
.unwrap_or_else(|| String::from("Cannot find Content."));
// Extracting our Memberships and sending it out
Ok(extract_memberships(&content)?)
}
}