Issue in invoking “onclick” event using PyQt & javascript

matt0121

New Member
I am trying to scrape data from a website using beautiful soup. By default, this webpage shows 18 items and after clicking on a javascript button "showAlldevices" all 41 items are visible. Beautiful soup scrapes data only for items visible by default, to get data for all items I used PyQt module and invoked the click event using the javascript code. Below is the referred code:\[code\]import csvimport urllib2import sysimport timefrom bs4 import BeautifulSoupfrom PyQt4.QtGui import * from PyQt4.QtCore import * from PyQt4.QtWebKit import * class Render(QWebPage): def __init__(self, url): self.app = QApplication(sys.argv) QWebPage.__init__(self) self.loadFinished.connect(self._loadFinished) self.mainFrame().load(QUrl(url)) self.app.exec_() def _loadFinished(self, result): self.frame = self.mainFrame() self.app.quit() url = 'http://www.att.com/shop/wireless/devices/smartphones.html' r = Render(url)jsClick = """var evObj = document.createEvent('MouseEvents'); evObj.initEvent('click', true, true ); this.dispatchEvent(evObj); """allSelector = "a#deviceShowAllLink" allButton = r.frame.documentElement().findFirst(allSelector)allButton.evaluateJavaScript(jsClick) html = allButton.webFrame().toHtml()page = htmlsoup = BeautifulSoup(page)soup.prettify()with open('Smartphones_26decv2.0.csv', 'wb') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') spamwriter.writerow(["Date","Day of Week","Device Name","Price"]) items = soup.findAll('a', {"class": "clickStreamSingleItem"},text=True) prices = soup.findAll('div', {"class": "listGrid-price"}) for item, price in zip(items, prices): textcontent = u' '.join(price.stripped_strings) if textcontent: spamwriter.writerow([time.strftime("%Y-%m-%d"),time.strftime("%A") ,unicode(item.string).encode('utf8').strip(),textcontent])\[/code\]I am feeding the html to beautiful soup by using this line of code \[code\]html = allButton.webFrame().toHtml()\[/code\] This code is running without any errors but I am still not getting data for all 41 items in the output csvPlease pardon my ignorance if I am asking anything fundamental here, as I am new to programming and help me in solving this issue.
 
Back
Top