import urllib.request
import bs4
def map_site(website):
try:
starting_page = str(urllib.request.urlopen(website).read()) #html file content of the starting page
soup = bs4.BeautifulSoup(starting_page, 'html.parser') #bs4 object
for link in soup.find_all('a'):
yield link.get('href')
except ValueError or urllib.error.URLError:
print('no such website exists, try inputting the exact url.')
def extract_html(urls):
map = []
for url in urls:
try:
print('extracting html from:', url)
print(str(urllib.request.urlopen(url).read()))
map.append(str(urllib.request.urlopen(url).read()))
except:
print('oops, an error occured')
pass
return map
if __name__ == '__main__':
user_input = input('input full url --> ')
print(extract_html(map_site(user_input)))