Under Indigo the environment cannot find BeautifulSoup " embedded script, line 57, at top level ImportError: No module named bs4", despite trying to include it using the Python invocation: sys.path.append ("/anaconda2/lib/python2.7/site-packages"). Can someone please help be get to BeautifulSoup? Line 57 is: from bs4 import BeautifulSoup # to parse the HTML on a web page
Thank you,
Stan Krasnow
My python code is:
- Code: Select all
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 29 14:46:27 2019
@author: XXX
"""
# Scrape selected data from CeleriusVI_URL
#
# Two key websites:
# https://teamtreehouse.com/community/help-with-pythons-beautiful-soup-html-question
# https://martechwithme.com/introduction-to-web-scraping-with-python-extracting-data-from-a-page/
#
import sys
# sys.path.append ("/Library/Python/2.7/site-packages")
from indigo_attachments import log_or_print as lop
lop ("Starting Celerius VI Scraping")
#
from random import randint
USER_AGENTS = [
('Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/57.0.2987.110 '
'Safari/537.36'), # chrome
('Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/61.0.3163.79 '
'Safari/537.36'), # chrome
('Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:55.0) '
'Gecko/20100101 '
'Firefox/55.0'), # firefox
('Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/61.0.3163.91 '
'Safari/537.36'), # chrome
('Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/62.0.3202.89 '
'Safari/537.36'), # chrome
('Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/63.0.3239.108 '
'Safari/537.36'), # chrome
]
random_agent_count = randint(0, (len(USER_AGENTS)-1))
import requests # to fetch a web page
from datetime import datetime # to get today's date for email
# ###########################################################################
# anaconda2 ▸ pkgs ▸ beautifulsoup4-4.7.1-py27_1 ▸ lib ▸ python2.7 ▸ site-packages done below
# sys.path.append ("/anaconda2/pkgs/beautifulsoup4-4.7.1-py27_1/lib/python2.7/site-packages")
# anaconda2 ▸ lib ▸ python2.7 ▸ site-packages done below
# sys.path.append ("/anaconda2/lib/python2.7/site-packages")
# ###########################################################################
sys.path.append ("/anaconda2/lib/python2.7/site-packages")
from bs4 import BeautifulSoup # to parse the HTML on a web page
import smtplib # to send an email
#
headers = {'User-Agent': USER_AGENTS[random_agent_count]} # try to avoid scraping defences
CeleriusVI_URL = "https://www.bloomberg.com/quote/ACELVIP:LX"
request = requests.get(CeleriusVI_URL, headers=headers) # First "fetch" the URL using requests
content = request.content # Then retrieve the content
# Give the web content to BeautifulSoup
soup = BeautifulSoup(content, 'lxml')
# Get the as of date
# <span class="fieldLabel__9f45bef7"><span>Total Assets (M EUR) (On 06/28/2019)</span></span>
# <span>Total Assets (M EUR) (On 06/28/2019)</span>
find_CeleriusVI_as_of_date = soup.find_all("span", class_="fieldLabel__9f45bef7", attrs= "Total Assets (M EUR)")
CeleriusVI_HTML_index = 6 # trial and error index of SPANs
find_text = find_CeleriusVI_as_of_date[CeleriusVI_HTML_index].text
# lop (str(find_CeleriusVI_as_of_date) + "XXX")
CeleriusVI_as_of_date = "o" + find_text[6:] # clean-up return from BS; add lower case o in the word On
CeleriusVI_as_of_date = CeleriusVI_as_of_date[:-1] # clean-up return from BS;
# Get the Celerius Current Asset Value
# <span class="fieldValue__2d582aa7">82.348</span>
find_CeleriusVI_Value = soup.find_all("span", class_ = "fieldValue__2d582aa7")
CeleriusVI_HTML_index = 7 # trial and error index of SPANs
CeleriusVI_Value = find_CeleriusVI_Value[CeleriusVI_HTML_index].text
euro = u"€" # unicode character for printing
lop (CeleriusVI_as_of_date + " the value of Celerius VI is " + euro + CeleriusVI_Value + "m")
lop ("End of Celerius VI Scraping")
#