generated from OBJNULL/Dockerized-Rust
Created UNFINISHED scraper
This commit is contained in:
parent
5d5df73b7c
commit
48b69828c5
1 changed files with 107 additions and 0 deletions
107
project/src/backend/scraper.rs
Normal file
107
project/src/backend/scraper.rs
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
use regex::bytes::Regex;
|
||||||
|
// Libraries
|
||||||
|
use reqwest;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
use std::io::Result;
|
||||||
|
|
||||||
|
// Macros
|
||||||
|
macro_rules! map_err {
|
||||||
|
($expr:expr) => {
|
||||||
|
$expr.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Functions
|
||||||
|
fn extract_memberships(content: &str) -> Result<Vec<String>> {
|
||||||
|
// Creating a Regex Pattern
|
||||||
|
let re = map_err!(Regex::new(
|
||||||
|
r"(?P<name>[A-Z][a-zA-Z ]+?)\s+\$ ?(\d+)\.?(\d{0,2})\s+Single Wash.+?\$ ?(?P<price>\d{1,3})\. ?(?P<decimals>\d{2})\s+Monthly\s+Membership"
|
||||||
|
))?;
|
||||||
|
|
||||||
|
// Creating a Membership holder
|
||||||
|
let mut memberships: Vec<(String, f32)> = Vec::new();
|
||||||
|
|
||||||
|
// Iterate through RegEx with Content
|
||||||
|
for caps in re.captures_iter(content.as_bytes()) {
|
||||||
|
// Getting our components
|
||||||
|
let name_bytes = &caps["name"];
|
||||||
|
let price_bytes = &caps["price"];
|
||||||
|
let decimals_bytes = &caps["decimals"];
|
||||||
|
|
||||||
|
let name = match str::from_utf8(name_bytes) {
|
||||||
|
Ok(n) => n.trim(),
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let price_str = match (str::from_utf8(price_bytes), str::from_utf8(decimals_bytes)) {
|
||||||
|
(Ok(p), Ok(d)) => format!("{}.{}", p, d),
|
||||||
|
_ => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Ok(price) = price_str.parse::<f32>() {
|
||||||
|
memberships.push((name.to_string(), price));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sorting our memberships from High to Low
|
||||||
|
memberships.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||||
|
|
||||||
|
// Formatting our memberships and returning it
|
||||||
|
Ok(memberships
|
||||||
|
.into_iter()
|
||||||
|
.map(|(name, price)| format!("{} (${:0.2})", name, price))
|
||||||
|
.collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Structures
|
||||||
|
pub struct Scraper {
|
||||||
|
url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implementations
|
||||||
|
impl Scraper {
|
||||||
|
// Constructors
|
||||||
|
/// Creates a new Scraper
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `url` - A String for the URL you're scraping from
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```rs
|
||||||
|
/// let url = "https://google.com";
|
||||||
|
/// let scraper = Scraper::new(url);
|
||||||
|
/// ```
|
||||||
|
pub fn new(url: &str) -> Self {
|
||||||
|
// Returning ourself
|
||||||
|
Self {
|
||||||
|
url: String::from(url),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Functions
|
||||||
|
/// Scrapes our URL for Membership Plans
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```rs
|
||||||
|
/// println!("{}", scraper.start()?);
|
||||||
|
/// ```
|
||||||
|
pub async fn start(&self) -> Result<Vec<String>> {
|
||||||
|
// Using Reqwest to pull data from our URL
|
||||||
|
let response = map_err!(map_err!(reqwest::get(&self.url).await)?.text().await)?;
|
||||||
|
|
||||||
|
// Parsing the HTML
|
||||||
|
let document = Html::parse_document(&response);
|
||||||
|
|
||||||
|
// Selecting the Membership section of the page
|
||||||
|
let selector = Selector::parse("#monthly").unwrap();
|
||||||
|
|
||||||
|
// Getting the text out of the document
|
||||||
|
let content = document
|
||||||
|
.select(&selector)
|
||||||
|
.next()
|
||||||
|
.map(|element| element.text().collect::<Vec<_>>().join(" "))
|
||||||
|
.unwrap_or_else(|| String::from("Cannot find Content."));
|
||||||
|
|
||||||
|
// Extracting our Memberships and sending it out
|
||||||
|
Ok(extract_memberships(&content)?)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue