Script Scrapping
Script Scrapping
Script Scrapping
sleep(1)
driver.find_element_by_xpath('//*[@id="body-
content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div').cli
ck()
#select_newest.select_by_visible_text('Newest')
driver.find_element_by_xpath('//*[@id="body-
content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div').cli
ck()
sleep(2)
#driver.find_element_by_css_selector('.review-filter.id-review-sort-
filter.dropdown-menu-container').click()
driver.find_element_by_css_selector('.displayed-child').click()
#driver.find_element_by_xpath("//button[@data-dropdown-value='1']").click()
driver.execute_script("document.querySelectorAll('button.dropdown-child')
[0].click()")
reviews_df = []
for i in range(1,100):
try:
for elem in driver.find_elements_by_class_name('single-review'):
print(str(i))
content = elem.get_attribute('outerHTML')
soup = BeautifulSoup(content, "html.parser")
#print(soup.prettify())
date = soup.find('span',class_='review-date').get_text()
rating = soup.find('div',class_='tiny-star')['aria-label'][6:7]
title = soup.find('span',class_='review-title').get_text()
txt = soup.find('div',class_='review-body').get_text().replace('Full
Review','')[len(title)+1:]
print(soup.get_text())
temp = pd.DataFrame({'Date':date,'Rating':rating,'Review
Title':title,'Review Text':txt},index=[0])
print('-'*10)
reviews_df.append(temp)
#print(elem)
except:
print('s')
driver.find_element_by_xpath('//*[@id="body-
content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div').cli
ck()
reviews_df = pd.concat(reviews_df,ignore_index=True)
reviews_df.to_csv(Ptitle+'_reviews_kaiaccess.csv', encoding='utf-8')
#driver.close()