File tree Expand file tree Collapse file tree 2 files changed +45
-0
lines changed Expand file tree Collapse file tree 2 files changed +45
-0
lines changed Original file line number Diff line number Diff line change
1
+ # @Time : 2019/11/25 16:55
2
+ # @Author : Libuda
3
+ # @FileName: spider.py
4
+ # @Software: PyCharm
5
+
6
+ import pandas
7
+ import time
8
+ from selenium import webdriver
9
+
10
+ phone_num = 13628398278
11
+ driver = webdriver .Chrome (r'C:\Users\lenovo\PycharmProjects\Spider\chromedriver.exe' )
12
+ df = pandas .DataFrame ()
13
+ file_path = r"C:\Users\lenovo\PycharmProjects\leetcode-python-\抖音后台爬虫\res.xls"
14
+ driver .get ("https://e.douyin.com/site/" )
15
+ print ("请您进行登录及手动进行所有的筛选" )
16
+ yes = input ("您是否已确认进行爬取" )
17
+ if yes == "y" :
18
+ all_data_len = driver .find_element_by_xpath (
19
+ '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[1]' ).text .split (
20
+ "条" )[0 ].split ("共" )[1 ]
21
+ print ("总共 {} 条数据" .format (all_data_len ))
22
+ num_tem = '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/div/div/div[1]/div/table/tbody/tr[{}]/td[4]'
23
+ date_tem = '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/div/div/div[1]/div/table/tbody/tr[{}]/td[6]'
24
+
25
+ while len (df ) < int (all_data_len ):
26
+ time .sleep (3 ) # 时间间隔
27
+ res_data = []
28
+ for i in range (1 , 11 ):
29
+ res_dic = {}
30
+ res_dic ['phone_number' ] = driver .find_element_by_xpath (num_tem .format (i )).text
31
+ res_dic ['date' ] = driver .find_element_by_xpath (date_tem .format (i )).text
32
+ res_data .append (res_dic )
33
+ df = df .append (res_data )
34
+ df .to_excel (file_path , index = 0 )
35
+ # 点击下一页
36
+ try :
37
+
38
+ driver .find_element_by_css_selector ('.ant-pagination-next' ).click ()
39
+ except Exception as e :
40
+ try :
41
+ driver .find_element_by_xpath (
42
+ '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[12]' ).click ()
43
+ except Exception :
44
+ driver .find_element_by_xpath (
45
+ '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[10]' ).click ()
You can’t perform that action at this time.
0 commit comments