generated from OBJNULL/Dockerized-Rust
Created UNFINISHED scraper
This commit is contained in:
parent
5d5df73b7c
commit
48b69828c5
1 changed files with 107 additions and 0 deletions
107
project/src/backend/scraper.rs
Normal file
107
project/src/backend/scraper.rs
Normal file
|
@ -0,0 +1,107 @@
|
|||
use regex::bytes::Regex;
|
||||
// Libraries
|
||||
use reqwest;
|
||||
use scraper::{Html, Selector};
|
||||
use std::io::Result;
|
||||
|
||||
// Macros
|
||||
macro_rules! map_err {
|
||||
($expr:expr) => {
|
||||
$expr.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
|
||||
};
|
||||
}
|
||||
|
||||
// Functions
|
||||
fn extract_memberships(content: &str) -> Result<Vec<String>> {
|
||||
// Creating a Regex Pattern
|
||||
let re = map_err!(Regex::new(
|
||||
r"(?P<name>[A-Z][a-zA-Z ]+?)\s+\$ ?(\d+)\.?(\d{0,2})\s+Single Wash.+?\$ ?(?P<price>\d{1,3})\. ?(?P<decimals>\d{2})\s+Monthly\s+Membership"
|
||||
))?;
|
||||
|
||||
// Creating a Membership holder
|
||||
let mut memberships: Vec<(String, f32)> = Vec::new();
|
||||
|
||||
// Iterate through RegEx with Content
|
||||
for caps in re.captures_iter(content.as_bytes()) {
|
||||
// Getting our components
|
||||
let name_bytes = &caps["name"];
|
||||
let price_bytes = &caps["price"];
|
||||
let decimals_bytes = &caps["decimals"];
|
||||
|
||||
let name = match str::from_utf8(name_bytes) {
|
||||
Ok(n) => n.trim(),
|
||||
Err(_) => continue,
|
||||
};
|
||||
let price_str = match (str::from_utf8(price_bytes), str::from_utf8(decimals_bytes)) {
|
||||
(Ok(p), Ok(d)) => format!("{}.{}", p, d),
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
if let Ok(price) = price_str.parse::<f32>() {
|
||||
memberships.push((name.to_string(), price));
|
||||
}
|
||||
}
|
||||
|
||||
// Sorting our memberships from High to Low
|
||||
memberships.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
|
||||
// Formatting our memberships and returning it
|
||||
Ok(memberships
|
||||
.into_iter()
|
||||
.map(|(name, price)| format!("{} (${:0.2})", name, price))
|
||||
.collect())
|
||||
}
|
||||
|
||||
// Structures
|
||||
pub struct Scraper {
|
||||
url: String,
|
||||
}
|
||||
|
||||
// Implementations
|
||||
impl Scraper {
|
||||
// Constructors
|
||||
/// Creates a new Scraper
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `url` - A String for the URL you're scraping from
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rs
|
||||
/// let url = "https://google.com";
|
||||
/// let scraper = Scraper::new(url);
|
||||
/// ```
|
||||
pub fn new(url: &str) -> Self {
|
||||
// Returning ourself
|
||||
Self {
|
||||
url: String::from(url),
|
||||
}
|
||||
}
|
||||
|
||||
// Functions
|
||||
/// Scrapes our URL for Membership Plans
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rs
|
||||
/// println!("{}", scraper.start()?);
|
||||
/// ```
|
||||
pub async fn start(&self) -> Result<Vec<String>> {
|
||||
// Using Reqwest to pull data from our URL
|
||||
let response = map_err!(map_err!(reqwest::get(&self.url).await)?.text().await)?;
|
||||
|
||||
// Parsing the HTML
|
||||
let document = Html::parse_document(&response);
|
||||
|
||||
// Selecting the Membership section of the page
|
||||
let selector = Selector::parse("#monthly").unwrap();
|
||||
|
||||
// Getting the text out of the document
|
||||
let content = document
|
||||
.select(&selector)
|
||||
.next()
|
||||
.map(|element| element.text().collect::<Vec<_>>().join(" "))
|
||||
.unwrap_or_else(|| String::from("Cannot find Content."));
|
||||
|
||||
// Extracting our Memberships and sending it out
|
||||
Ok(extract_memberships(&content)?)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue