diff --git a/project/src/backend/scraper.rs b/project/src/backend/scraper.rs new file mode 100644 index 0000000..c83f762 --- /dev/null +++ b/project/src/backend/scraper.rs @@ -0,0 +1,107 @@ +use regex::bytes::Regex; +// Libraries +use reqwest; +use scraper::{Html, Selector}; +use std::io::Result; + +// Macros +macro_rules! map_err { + ($expr:expr) => { + $expr.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)) + }; +} + +// Functions +fn extract_memberships(content: &str) -> Result> { + // Creating a Regex Pattern + let re = map_err!(Regex::new( + r"(?P[A-Z][a-zA-Z ]+?)\s+\$ ?(\d+)\.?(\d{0,2})\s+Single Wash.+?\$ ?(?P\d{1,3})\. ?(?P\d{2})\s+Monthly\s+Membership" + ))?; + + // Creating a Membership holder + let mut memberships: Vec<(String, f32)> = Vec::new(); + + // Iterate through RegEx with Content + for caps in re.captures_iter(content.as_bytes()) { + // Getting our components + let name_bytes = &caps["name"]; + let price_bytes = &caps["price"]; + let decimals_bytes = &caps["decimals"]; + + let name = match str::from_utf8(name_bytes) { + Ok(n) => n.trim(), + Err(_) => continue, + }; + let price_str = match (str::from_utf8(price_bytes), str::from_utf8(decimals_bytes)) { + (Ok(p), Ok(d)) => format!("{}.{}", p, d), + _ => continue, + }; + + if let Ok(price) = price_str.parse::() { + memberships.push((name.to_string(), price)); + } + } + + // Sorting our memberships from High to Low + memberships.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + + // Formatting our memberships and returning it + Ok(memberships + .into_iter() + .map(|(name, price)| format!("{} (${:0.2})", name, price)) + .collect()) +} + +// Structures +pub struct Scraper { + url: String, +} + +// Implementations +impl Scraper { + // Constructors + /// Creates a new Scraper + /// + /// # Arguments + /// * `url` - A String for the URL you're scraping from + /// + /// # Examples + /// ```rs + /// let url = "https://google.com"; + /// let scraper = Scraper::new(url); + /// ``` + pub fn new(url: &str) -> Self { + // Returning ourself + Self { + url: String::from(url), + } + } + + // Functions + /// Scrapes our URL for Membership Plans + /// + /// # Examples + /// ```rs + /// println!("{}", scraper.start()?); + /// ``` + pub async fn start(&self) -> Result> { + // Using Reqwest to pull data from our URL + let response = map_err!(map_err!(reqwest::get(&self.url).await)?.text().await)?; + + // Parsing the HTML + let document = Html::parse_document(&response); + + // Selecting the Membership section of the page + let selector = Selector::parse("#monthly").unwrap(); + + // Getting the text out of the document + let content = document + .select(&selector) + .next() + .map(|element| element.text().collect::>().join(" ")) + .unwrap_or_else(|| String::from("Cannot find Content.")); + + // Extracting our Memberships and sending it out + Ok(extract_memberships(&content)?) + } +}