Tuan_H
(Tuan H)
1
# import libraries
from bs4 import BeautifulSoup
import requests
import time
import datetime
import csv
import pandas as pd
import smtplib
# Connect to Website and pull in data
URL = 'https://www.bookdepository.com/bestbooksever'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"}
page = requests.get(URL, headers=headers)
soup1 = BeautifulSoup(page.content, "html.parser")
soup2 = BeautifulSoup(soup1.prettify(), "html.parser")
## Loop the books
product = soup2.findAll('div', 'book-item')
for x in product:
title = x.find('h3', class_ = 'title').get_text().strip()
author = x.find('p', class_ = 'author').get_text().strip()
date_published = x.find('p', class_ = 'published').get_text().strip()
price = x.find('p', class_ = 'price').get_text().split()[0]
print('Title :', title)
print('Author :', author)
print('Published :', date_published)
print('Price:', price, '\n')
def check_book():
##Connect to the website
URL = 'https://www.bookdepository.com/bestbooksever'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"}
page = requests.get(URL, headers=headers)
soup1 = BeautifulSoup(page.content, "html.parser")
soup2 = BeautifulSoup(soup1.prettify(), "html.parser")
## Create a column header
header = ['Title', 'Author', 'Published', 'Price', 'Scraped Date']
## Write the data into csv
with open('BestBook.csv','a+') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(header)
for div in soup2.find_all('div', 'book-item'):
title = div.find('h3', class_ = 'title').get_text().strip()
author = div.find('p', class_ = 'author').get_text().strip()
date_published = div.find('p', class_ = 'published').get_text().strip()
price = int(float(div.find('p', class_ = 'price').get_text().split()[0][1:].replace(',','')))
scraped_date = datetime.date.today()
writer.writerow([title, author, date_published, price, scraped_date])
## Autonomate function to check the book daily
while(True):
check_book()
time.sleep(86400)