Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 01cc10c

Browse files
committed
Merge remote-tracking branch 'origin/master'
2 parents 520f85b + 2ce1136 commit 01cc10c

File tree

1 file changed

+188
-0
lines changed

1 file changed

+188
-0
lines changed

register.py

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import xlrd
2+
from xlutils.copy import copy
3+
from selenium import webdriver
4+
import time
5+
import schedule
6+
import pandas
7+
import datetime
8+
9+
wait_time = 3 # 各个阶段等待时间
10+
time_jiange= 60 #时间间隔
11+
driver = webdriver.Chrome(r"C:\Users\qi\Desktop\all\chromedriver.exe")
12+
link_file_path = r"C:\Users\qi\Desktop\all\link.xls"
13+
phone_file_path = r"C:\Users\qi\Desktop\all\phone_number.xls"
14+
15+
link_ecel = xlrd.open_workbook(link_file_path)
16+
link_tables = link_ecel.sheet_by_index(0)
17+
link_get_col = 2
18+
link_write_col = 3
19+
20+
phone_excel = xlrd.open_workbook(phone_file_path)
21+
phoe_tables = phone_excel.sheet_by_index(0)
22+
phone_get_col = 1
23+
phone_write_col = 2
24+
25+
26+
phone_can_use_index = 0
27+
28+
29+
def get_keywords_data(tables, row, col):
30+
actual_data = tables.cell_value(row, col)
31+
return actual_data
32+
33+
34+
def write_to_excel(file_path, row, col, value):
35+
work_book = xlrd.open_workbook(file_path, formatting_info=False)
36+
write_to_work = copy(work_book)
37+
sheet_data = write_to_work.get_sheet(0)
38+
sheet_data.write(row, col, str(value))
39+
write_to_work.save(file_path)
40+
41+
def get_phone_number(end_date):
42+
phone_num=13628398278
43+
#driver = webdriver.Chrome(r'E:\【樊登读书】\【python程序】\抖音后台客户电话导出\houtai\chromedriver.exe')
44+
df = pandas.DataFrame()
45+
#file_path =r"E:\【樊登读书】\【python程序】\抖音后台客户电话导出\houtai\锦集20191130.xls"
46+
driver.get("https://e.douyin.com/site/")
47+
print("请您进行登录及手动进行所有的筛选")
48+
yes = input("您是否已确认进行爬取")
49+
if yes=="y":
50+
all_data_len = driver.find_element_by_xpath('//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[1]').text.split("条")[0].split("共")[1]
51+
print("总共 {} 条数据".format(all_data_len))
52+
num_tem = '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/div/div/div[1]/div/table/tbody/tr[{}]/td[4]'
53+
date_tem = '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/div/div/div[1]/div/table/tbody/tr[{}]/td[6]'
54+
flag = True
55+
56+
while flag:
57+
time.sleep(wait_time) #时间间隔
58+
res_data = []
59+
for i in range(1,11):
60+
res_dic = {}
61+
res_dic['phone_number'] = driver.find_element_by_xpath(num_tem.format(i)).text
62+
res_dic['date'] = datetime.datetime.strptime(driver.find_element_by_xpath(date_tem.format(i)).text,"%Y.%m.%d %H-%M-%S")
63+
64+
if res_dic['date']>end_date:
65+
flag= False
66+
break
67+
res_data.append(res_dic)
68+
df = df.append(res_data)
69+
df.to_excel(phone_file_path,index=0)
70+
print("已写入{}数据".format(len(df)))
71+
#点击下一页
72+
try:
73+
driver.find_element_by_css_selector('.ant-pagination-next').click()
74+
except Exception as e :
75+
try:
76+
driver.find_element_by_xpath('//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[12]').click()
77+
except Exception:
78+
driver.find_element_by_xpath('//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[10]').click()
79+
80+
def register():
81+
global phone_can_use_index
82+
link_data = [get_keywords_data(link_tables, i, link_get_col) for i in range(1, link_tables.nrows)]
83+
phone_data = [str(int(get_keywords_data(phoe_tables, i, phone_get_col))) for i in range(1, phoe_tables.nrows)]
84+
85+
has_phone = True
86+
for index, link in enumerate(link_data):
87+
88+
if has_phone:
89+
driver.get(link)
90+
time.sleep(wait_time)
91+
try:
92+
text = driver.find_element_by_xpath("/html/body/div[1]/div[1]/p[1]")
93+
if text.text == "开卡失败":
94+
write_to_excel(link_file_path, index + 1, link_write_col, "已使用")
95+
print("该卡已经被使用..{}".format(link))
96+
continue
97+
else:
98+
# print(text.text)
99+
continue
100+
except Exception as e:
101+
time.sleep(wait_time)
102+
# print(e)
103+
try:
104+
print("该卡可以使用:{},正在查询可用手机号。。".format(link))
105+
text = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[1]/p')
106+
if text.text == "欢迎加入樊登读书,即刻获得":
107+
flag = True
108+
while flag:
109+
q = phone_can_use_index
110+
for ph_number_index in range(q, len(phone_data)):
111+
driver.get(link)
112+
print("当前查询手机号为{}".format(phone_data[ph_number_index]))
113+
time.sleep(wait_time)
114+
driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div[1]/input').send_keys(
115+
phone_data[ph_number_index])
116+
driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div[3]/input').send_keys(
117+
phone_data[ph_number_index])
118+
driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[3]').click()
119+
time.sleep(wait_time)
120+
# 点击开卡
121+
driver.find_element_by_xpath('//*[@id="join-btn"]').click()
122+
# 点击开卡后页面延迟较为严重
123+
time.sleep(wait_time)
124+
try:
125+
tem = driver.find_element_by_xpath('/html/body/div[1]/div[1]/p[1]')
126+
if tem.text == "开卡失败":
127+
phone_can_use_index +=1
128+
print("开卡失败,您已经是樊登读书好友")
129+
write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col, "开卡失败您已经是樊登读书书友")
130+
except Exception as e:
131+
# print(e)
132+
time.sleep(wait_time)
133+
try:
134+
if driver.find_element_by_xpath('/html/body/div[1]/div/h1').text == "领取成功!":
135+
print("开卡成功")
136+
write_to_excel(link_file_path, index + 1, link_write_col, "领取成功")
137+
write_to_excel(phone_file_path, phone_can_use_index + 1, phone_write_col, "领取成功")
138+
phone_can_use_index += 1
139+
has_phone = True
140+
flag = False
141+
continue
142+
except Exception as e:
143+
print("此电话号码有问题")
144+
# write_to_excel(link_file_path, index + 1, link_write_col, "此电话号码有问题")
145+
write_to_excel(phone_file_path, phone_can_use_index + 1, phone_write_col, "此电话号码有问题")
146+
phone_can_use_index += 1
147+
has_phone = True
148+
flag = False
149+
continue
150+
# print(e)
151+
print("当前手机号已全被使用")
152+
if phone_can_use_index == len(phone_data):
153+
has_phone = False
154+
flag = False
155+
156+
else:
157+
# print(text.text)
158+
continue
159+
except Exception as e:
160+
pass
161+
# print(e)
162+
163+
164+
if __name__ == '__main__':
165+
while 1:
166+
crawl_count = 0
167+
now_time = time.time()
168+
# end_date = now_time- time_jiange
169+
times = datetime.datetime.fromtimestamp(now_time)
170+
print(times)
171+
time_str = "{}-{}-{} {}:{}:{}".format(times.year,times.month,times.day-1,0,0,0)
172+
time_str= datetime.datetime.strptime(time_str,"%Y-%m-%d %h:%M:%S")
173+
print(time_str)
174+
if crawl_count==0:
175+
"第一次爬取"
176+
get_phone_number(time_str)
177+
register()
178+
crawl_count+=1
179+
else:
180+
now_time = time.time()-time_jiange
181+
times = datetime.datetime.fromtimestamp(now_time)
182+
get_phone_number(times)
183+
register()
184+
#schedule.every().day.at('17:49').do(job4)
185+
#schedule.every(180).seconds.do(get_phone_number)
186+
#while True:
187+
# schedule.run_pending()
188+

0 commit comments

Comments
 (0)