1
+ import xlrd
2
+ from xlutils .copy import copy
3
+ from selenium import webdriver
4
+ import time
5
+ import schedule
6
+ import pandas
7
+ import datetime
8
+
9
+ wait_time = 3 # 各个阶段等待时间
10
+ time_jiange = 60 #时间间隔
11
+ driver = webdriver .Chrome (r"C:\Users\qi\Desktop\all\chromedriver.exe" )
12
+ link_file_path = r"C:\Users\qi\Desktop\all\link.xls"
13
+ phone_file_path = r"C:\Users\qi\Desktop\all\phone_number.xls"
14
+
15
+ link_ecel = xlrd .open_workbook (link_file_path )
16
+ link_tables = link_ecel .sheet_by_index (0 )
17
+ link_get_col = 2
18
+ link_write_col = 3
19
+
20
+ phone_excel = xlrd .open_workbook (phone_file_path )
21
+ phoe_tables = phone_excel .sheet_by_index (0 )
22
+ phone_get_col = 1
23
+ phone_write_col = 2
24
+
25
+
26
+ phone_can_use_index = 0
27
+
28
+
29
+ def get_keywords_data (tables , row , col ):
30
+ actual_data = tables .cell_value (row , col )
31
+ return actual_data
32
+
33
+
34
+ def write_to_excel (file_path , row , col , value ):
35
+ work_book = xlrd .open_workbook (file_path , formatting_info = False )
36
+ write_to_work = copy (work_book )
37
+ sheet_data = write_to_work .get_sheet (0 )
38
+ sheet_data .write (row , col , str (value ))
39
+ write_to_work .save (file_path )
40
+
41
+ def get_phone_number (end_date ):
42
+ phone_num = 13628398278
43
+ #driver = webdriver.Chrome(r'E:\【樊登读书】\【python程序】\抖音后台客户电话导出\houtai\chromedriver.exe')
44
+ df = pandas .DataFrame ()
45
+ #file_path =r"E:\【樊登读书】\【python程序】\抖音后台客户电话导出\houtai\锦集20191130.xls"
46
+ driver .get ("https://e.douyin.com/site/" )
47
+ print ("请您进行登录及手动进行所有的筛选" )
48
+ yes = input ("您是否已确认进行爬取" )
49
+ if yes == "y" :
50
+ all_data_len = driver .find_element_by_xpath ('//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[1]' ).text .split ("条" )[0 ].split ("共" )[1 ]
51
+ print ("总共 {} 条数据" .format (all_data_len ))
52
+ num_tem = '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/div/div/div[1]/div/table/tbody/tr[{}]/td[4]'
53
+ date_tem = '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/div/div/div[1]/div/table/tbody/tr[{}]/td[6]'
54
+ flag = True
55
+
56
+ while flag :
57
+ time .sleep (wait_time ) #时间间隔
58
+ res_data = []
59
+ for i in range (1 ,11 ):
60
+ res_dic = {}
61
+ res_dic ['phone_number' ] = driver .find_element_by_xpath (num_tem .format (i )).text
62
+ res_dic ['date' ] = datetime .datetime .strptime (driver .find_element_by_xpath (date_tem .format (i )).text ,"%Y.%m.%d %H-%M-%S" )
63
+
64
+ if res_dic ['date' ]> end_date :
65
+ flag = False
66
+ break
67
+ res_data .append (res_dic )
68
+ df = df .append (res_data )
69
+ df .to_excel (phone_file_path ,index = 0 )
70
+ print ("已写入{}数据" .format (len (df )))
71
+ #点击下一页
72
+ try :
73
+ driver .find_element_by_css_selector ('.ant-pagination-next' ).click ()
74
+ except Exception as e :
75
+ try :
76
+ driver .find_element_by_xpath ('//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[12]' ).click ()
77
+ except Exception :
78
+ driver .find_element_by_xpath ('//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[10]' ).click ()
79
+
80
+ def register ():
81
+ global phone_can_use_index
82
+ link_data = [get_keywords_data (link_tables , i , link_get_col ) for i in range (1 , link_tables .nrows )]
83
+ phone_data = [str (int (get_keywords_data (phoe_tables , i , phone_get_col ))) for i in range (1 , phoe_tables .nrows )]
84
+
85
+ has_phone = True
86
+ for index , link in enumerate (link_data ):
87
+
88
+ if has_phone :
89
+ driver .get (link )
90
+ time .sleep (wait_time )
91
+ try :
92
+ text = driver .find_element_by_xpath ("/html/body/div[1]/div[1]/p[1]" )
93
+ if text .text == "开卡失败" :
94
+ write_to_excel (link_file_path , index + 1 , link_write_col , "已使用" )
95
+ print ("该卡已经被使用..{}" .format (link ))
96
+ continue
97
+ else :
98
+ # print(text.text)
99
+ continue
100
+ except Exception as e :
101
+ time .sleep (wait_time )
102
+ # print(e)
103
+ try :
104
+ print ("该卡可以使用:{},正在查询可用手机号。。" .format (link ))
105
+ text = driver .find_element_by_xpath ('//*[@id="app"]/div[1]/div[1]/p' )
106
+ if text .text == "欢迎加入樊登读书,即刻获得" :
107
+ flag = True
108
+ while flag :
109
+ q = phone_can_use_index
110
+ for ph_number_index in range (q , len (phone_data )):
111
+ driver .get (link )
112
+ print ("当前查询手机号为{}" .format (phone_data [ph_number_index ]))
113
+ time .sleep (wait_time )
114
+ driver .find_element_by_xpath ('//*[@id="app"]/div[1]/div[2]/div[1]/input' ).send_keys (
115
+ phone_data [ph_number_index ])
116
+ driver .find_element_by_xpath ('//*[@id="app"]/div[1]/div[2]/div[3]/input' ).send_keys (
117
+ phone_data [ph_number_index ])
118
+ driver .find_element_by_xpath ('//*[@id="app"]/div[1]/div[3]' ).click ()
119
+ time .sleep (wait_time )
120
+ # 点击开卡
121
+ driver .find_element_by_xpath ('//*[@id="join-btn"]' ).click ()
122
+ # 点击开卡后页面延迟较为严重
123
+ time .sleep (wait_time )
124
+ try :
125
+ tem = driver .find_element_by_xpath ('/html/body/div[1]/div[1]/p[1]' )
126
+ if tem .text == "开卡失败" :
127
+ phone_can_use_index += 1
128
+ print ("开卡失败,您已经是樊登读书好友" )
129
+ write_to_excel (phone_file_path , ph_number_index + 1 , phone_write_col , "开卡失败您已经是樊登读书书友" )
130
+ except Exception as e :
131
+ # print(e)
132
+ time .sleep (wait_time )
133
+ try :
134
+ if driver .find_element_by_xpath ('/html/body/div[1]/div/h1' ).text == "领取成功!" :
135
+ print ("开卡成功" )
136
+ write_to_excel (link_file_path , index + 1 , link_write_col , "领取成功" )
137
+ write_to_excel (phone_file_path , phone_can_use_index + 1 , phone_write_col , "领取成功" )
138
+ phone_can_use_index += 1
139
+ has_phone = True
140
+ flag = False
141
+ continue
142
+ except Exception as e :
143
+ print ("此电话号码有问题" )
144
+ # write_to_excel(link_file_path, index + 1, link_write_col, "此电话号码有问题")
145
+ write_to_excel (phone_file_path , phone_can_use_index + 1 , phone_write_col , "此电话号码有问题" )
146
+ phone_can_use_index += 1
147
+ has_phone = True
148
+ flag = False
149
+ continue
150
+ # print(e)
151
+ print ("当前手机号已全被使用" )
152
+ if phone_can_use_index == len (phone_data ):
153
+ has_phone = False
154
+ flag = False
155
+
156
+ else :
157
+ # print(text.text)
158
+ continue
159
+ except Exception as e :
160
+ pass
161
+ # print(e)
162
+
163
+
164
+ if __name__ == '__main__' :
165
+ while 1 :
166
+ crawl_count = 0
167
+ now_time = time .time ()
168
+ # end_date = now_time- time_jiange
169
+ times = datetime .datetime .fromtimestamp (now_time )
170
+ print (times )
171
+ time_str = "{}-{}-{} {}:{}:{}" .format (times .year ,times .month ,times .day - 1 ,0 ,0 ,0 )
172
+ time_str = datetime .datetime .strptime (time_str ,"%Y-%m-%d %h:%M:%S" )
173
+ print (time_str )
174
+ if crawl_count == 0 :
175
+ "第一次爬取"
176
+ get_phone_number (time_str )
177
+ register ()
178
+ crawl_count += 1
179
+ else :
180
+ now_time = time .time ()- time_jiange
181
+ times = datetime .datetime .fromtimestamp (now_time )
182
+ get_phone_number (times )
183
+ register ()
184
+ #schedule.every().day.at('17:49').do(job4)
185
+ #schedule.every(180).seconds.do(get_phone_number)
186
+ #while True:
187
+ # schedule.run_pending()
188
+
0 commit comments