Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 520f85b

Browse files
committed
抖音后台手机号导出+注册脚本 12-2
1 parent 3b0fbd1 commit 520f85b

File tree

8 files changed

+365
-5
lines changed

8 files changed

+365
-5
lines changed

01矩阵.py

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,47 @@ def updateMatrix(self, matrix):
2929
matrix[i][j]=min(tem)+1
3030
return matrix
3131

32-
S=Solution()
33-
res=S.updateMatrix([[1, 0, 1, 1, 0, 0, 1, 0, 0, 1], [0, 1, 1, 0, 1, 0, 1, 0, 1, 1], [0, 0, 1, 0, 1, 0, 0, 1, 0, 0], [1, 0, 1, 0, 1, 1, 1, 1, 1, 1], [0, 1, 0, 1, 1, 0, 0, 0, 0, 1], [0, 0, 1, 0, 1, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 0, 1, 1], [1, 0, 0, 0, 1, 1, 1, 1, 0, 1], [1, 1, 1, 1, 1, 1, 1, 0, 1, 0], [1, 1, 1, 1, 0, 1, 0, 0, 1, 1]])
34-
for one in res:
35-
print(one)
32+
33+
# S=Solution()
34+
# res=S.updateMatrix([[1, 0, 1, 1, 0, 0, 1, 0, 0, 1], [0, 1, 1, 0, 1, 0, 1, 0, 1, 1], [0, 0, 1, 0, 1, 0, 0, 1, 0, 0], [1, 0, 1, 0, 1, 1, 1, 1, 1, 1], [0, 1, 0, 1, 1, 0, 0, 0, 0, 1], [0, 0, 1, 0, 1, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 0, 1, 1], [1, 0, 0, 0, 1, 1, 1, 1, 0, 1], [1, 1, 1, 1, 1, 1, 1, 0, 1, 0], [1, 1, 1, 1, 0, 1, 0, 0, 1, 1]])
35+
# for one in res:
36+
# print(one)
37+
38+
s = """{
39+
"state":{
40+
"code":0,
41+
"errorMessage":"string"
42+
},
43+
"data":[
44+
{
45+
"Guid":"string",
46+
"Name":"string",
47+
"ApeType":"string",
48+
"Model":"string",
49+
"Server":"string",
50+
"Place":"string",
51+
"MonitorDirection":"string",
52+
"Longitude":"string",
53+
"Latitude":"string",
54+
"SpaceZ":"string",
55+
"PitchAngle":"string",
56+
"DeflectionAngle":"string",
57+
"MapState":"string",
58+
"NearestSightDistance":"string",
59+
"FarthestSightDistance":"string",
60+
"ViewAngleY":"string",
61+
"WidthSpanRate":"string",
62+
"Description":"string",
63+
"ProductionNumber":"string",
64+
"Manufacturer":"string",
65+
"Remark":"string",
66+
"Functionary":"string"
67+
}
68+
]
69+
}"""
70+
71+
s = eval(s)
72+
tem = s['state']
73+
74+
for key, value in tem.items():
75+
print(key)

图片过滤脚本/test.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,10 @@
4949
'cs_1106_20191107_095117330_1.jpg', 'xs-1_1015_20191016_141324816_3.jpg',
5050
'yf_1010_20191011_092612429_1.jpg', 'yf_1010_20191010_150334441_1.jpg']
5151
delete_path = r'C:\Users\lenovo\Desktop\ssh'
52-
res_path = "image.txt"
52+
res_path = r"C:\Users\lenovo\PycharmProjects\leetcode-python-\图片过滤脚本\image.txt"
5353
with open(res_path, 'w') as f:
5454
for root, dirs, files in os.walk(delete_path):
55+
print(files)
5556
for file in files:
5657
f.write(os.path.join(root, file) + '\n')
5758
# for one in dirs:

安居客小区爬虫/新房.xls

-40 KB
Binary file not shown.

樊登读书脚本/all.py

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
# @Time : 2019/12/2 11:17
2+
# @Author : Libuda
3+
# @FileName: all.py
4+
# @Software: PyCharm
5+
6+
import xlrd
7+
from xlutils.copy import copy
8+
from selenium import webdriver
9+
import time
10+
import schedule
11+
import pandas
12+
import datetime
13+
14+
phone_num = 13628398278
15+
wait_time = 3 # 各个阶段等待时间
16+
time_jiange = 60 # 时间间隔
17+
driver = webdriver.Chrome(r"C:\Users\lenovo\PycharmProjects\Spider\chromedriver.exe")
18+
link_file_path = r"C:\Users\lenovo\PycharmProjects\leetcode-python-\樊登读书脚本\link.xls"
19+
phone_file_path = r"C:\Users\lenovo\PycharmProjects\leetcode-python-\樊登读书脚本\phone_number.xls"
20+
21+
link_ecel = xlrd.open_workbook(link_file_path)
22+
link_tables = link_ecel.sheet_by_index(0)
23+
link_get_col = 2
24+
link_write_col = 3
25+
26+
phone_excel = xlrd.open_workbook(phone_file_path)
27+
phoe_tables = phone_excel.sheet_by_index(0)
28+
phone_get_col = 1
29+
phone_write_col = 2
30+
31+
phone_can_use_index = 0
32+
33+
34+
def get_keywords_data(tables, row, col):
35+
actual_data = tables.cell_value(row, col)
36+
return actual_data
37+
38+
39+
def write_to_excel(file_path, row, col, value):
40+
work_book = xlrd.open_workbook(file_path, formatting_info=False)
41+
write_to_work = copy(work_book)
42+
sheet_data = write_to_work.get_sheet(0)
43+
sheet_data.write(row, col, str(value))
44+
write_to_work.save(file_path)
45+
46+
47+
def get_links():
48+
link_data = [get_keywords_data(link_tables, i, link_get_col) for i in range(1, link_tables.nrows)]
49+
for index, link in enumerate(link_data):
50+
driver.get(link)
51+
time.sleep(wait_time)
52+
try:
53+
text = driver.find_element_by_xpath("/html/body/div[1]/div[1]/p[1]")
54+
if text.text == "开卡失败":
55+
write_to_excel(link_file_path, index + 1, link_write_col, "已使用")
56+
print("该卡已经被使用..{}".format(link))
57+
# print(text.text)
58+
except Exception as e:
59+
print("该卡可以使用:{}".format(link))
60+
# time.sleep(wait_time)
61+
62+
63+
def get_phone_number(end_date):
64+
result = []
65+
66+
# driver = webdriver.Chrome(r'E:\【樊登读书】\【python程序】\抖音后台客户电话导出\houtai\chromedriver.exe')
67+
# df = pandas.DataFrame()
68+
# file_path =r"E:\【樊登读书】\【python程序】\抖音后台客户电话导出\houtai\锦集20191130.xls"
69+
70+
71+
# 搜索按钮
72+
try:
73+
driver.find_element_by_xpath(
74+
'//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[2]/div[4]/span[2]/span/span').click()
75+
except Exception as e:
76+
pass
77+
78+
# all_data_len = driver.find_element_by_xpath('//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[1]').text.split("条")[0].split("共")[1]
79+
# print("总共 {} 条数据".format(all_data_len))
80+
num_tem = '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/div/div/div[1]/div/table/tbody/tr[{}]/td[4]'
81+
date_tem = '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/div/div/div[1]/div/table/tbody/tr[{}]/td[6]'
82+
flag = True
83+
84+
while flag:
85+
time.sleep(wait_time) # 时间间隔
86+
# res_data = []
87+
for i in range(1, 11):
88+
res_dic = {}
89+
try:
90+
res_dic['phone_number'] = driver.find_element_by_xpath(num_tem.format(i)).text
91+
res_dic['date'] = datetime.datetime.strptime(driver.find_element_by_xpath(date_tem.format(i)).text,
92+
"%Y.%m.%d %H:%M:%S")
93+
94+
if res_dic['date'] > end_date:
95+
flag = False
96+
break
97+
98+
result.append([res_dic['phone_number'], res_dic['date']])
99+
except Exception:
100+
flag = False
101+
break
102+
# print(res_dic['date'],end_date)
103+
104+
# res_data.append(res_dic)
105+
# df = df.append(res_data)
106+
# df.to_excel(phone_file_path,index=0)
107+
# print("已写入{}数据".format(len(df)))
108+
# 点击下一页
109+
try:
110+
driver.find_element_by_css_selector('.ant-pagination-next').click()
111+
except Exception as e:
112+
try:
113+
driver.find_element_by_xpath(
114+
'//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[12]').click()
115+
except Exception:
116+
driver.find_element_by_xpath(
117+
'//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[10]').click()
118+
print("已爬取到新手机号:{}个".format(len(result)))
119+
return result
120+
121+
122+
def register(phone_data):
123+
global phone_can_use_index
124+
link_data = [get_keywords_data(link_tables, i, link_get_col) for i in range(1, link_tables.nrows)]
125+
# phone_data = [str(int(get_keywords_data(phoe_tables, i, phone_get_col))) for i in range(1, phoe_tables.nrows)]
126+
# phone_data = [phone[0] for phone in phone_datas]
127+
128+
has_phone = True
129+
for index, link in enumerate(link_data):
130+
131+
if has_phone:
132+
driver.get(link)
133+
time.sleep(wait_time)
134+
try:
135+
text = driver.find_element_by_xpath("/html/body/div[1]/div[1]/p[1]")
136+
if text.text == "开卡失败":
137+
write_to_excel(link_file_path, index + 1, link_write_col, "已使用")
138+
print("该卡已经被使用..{}".format(link))
139+
continue
140+
else:
141+
# print(text.text)
142+
continue
143+
except Exception as e:
144+
time.sleep(wait_time)
145+
# print(e)
146+
try:
147+
print("该卡可以使用:{},正在查询可用手机号。。".format(link))
148+
text = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[1]/p')
149+
if text.text == "欢迎加入樊登读书,即刻获得":
150+
flag = True
151+
while flag:
152+
q = phone_can_use_index
153+
for ph_number_index in range(q, len(phone_data)):
154+
driver.get(link)
155+
print("当前查询手机号索引为{},号码为{}".format(ph_number_index, phone_data[ph_number_index][0]))
156+
time.sleep(wait_time)
157+
driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div[1]/input').send_keys(
158+
phone_data[ph_number_index][0])
159+
driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div[3]/input').send_keys(
160+
phone_data[ph_number_index][0])
161+
driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[3]').click()
162+
time.sleep(wait_time)
163+
# 点击开卡
164+
driver.find_element_by_xpath('//*[@id="join-btn"]').click()
165+
# 点击开卡后页面延迟较为严重
166+
time.sleep(wait_time)
167+
try:
168+
tem = driver.find_element_by_xpath('/html/body/div[1]/div[1]/p[1]')
169+
if tem.text == "开卡失败":
170+
phone_can_use_index += 1
171+
print("开卡失败,您已经是樊登读书好友")
172+
# 日期
173+
write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 2,
174+
phone_data[ph_number_index][0])
175+
# 手机号
176+
write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 1,
177+
phone_data[ph_number_index][0])
178+
# 使用状态
179+
write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col,
180+
"开卡失败您已经是樊登读书书友")
181+
except Exception as e:
182+
# print(e)
183+
time.sleep(wait_time)
184+
try:
185+
if driver.find_element_by_xpath('/html/body/div[1]/div/h1').text == "领取成功!":
186+
print("开卡成功")
187+
write_to_excel(link_file_path, index + 1, link_write_col, "领取成功")
188+
# 日期
189+
write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 2,
190+
phone_data[ph_number_index][0])
191+
# 手机号
192+
write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 1,
193+
phone_data[ph_number_index][0])
194+
# 使用状态
195+
write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col,
196+
"领取成功")
197+
phone_can_use_index += 1
198+
has_phone = True
199+
flag = False
200+
continue
201+
except Exception as e:
202+
print("此电话号码有问题")
203+
# write_to_excel(link_file_path, index + 1, link_write_col, "此电话号码有问题")
204+
write_to_excel(link_file_path, index + 1, link_write_col, "领取成功")
205+
# 日期
206+
write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 2,
207+
phone_data[ph_number_index][0])
208+
# 手机号
209+
write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 1,
210+
phone_data[ph_number_index][0])
211+
# 使用状态
212+
write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col,
213+
"此电话号码有问题")
214+
# write_to_excel(phone_file_path, phone_can_use_index + 1, phone_write_col, "此电话号码有问题")
215+
phone_can_use_index += 1
216+
has_phone = True
217+
flag = False
218+
continue
219+
# print(e)
220+
print("当前手机号已全被使用")
221+
if phone_can_use_index == len(phone_data):
222+
has_phone = False
223+
flag = False
224+
225+
else:
226+
# print(text.text)
227+
continue
228+
except Exception as e:
229+
pass
230+
# print(e)
231+
232+
233+
def main():
234+
crawl_count = 1
235+
while 1:
236+
now_time = time.time()
237+
# end_date = now_time- time_jiange
238+
times = datetime.datetime.fromtimestamp(now_time)
239+
time_str = "{}-{}-{} {}:{}:{}".format(times.year, times.month, times.day - 1, 0, 0, 0)
240+
time_str = datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")
241+
# print(times)
242+
# # s="2019.12.02 13:56:20"
243+
# # print(datetime.datetime.strptime(s,"%Y.%m.%d %H:%M:%S")<times)
244+
if crawl_count == 1:
245+
print("第1次爬取")
246+
# driver.get("https://e.douyin.com/site/manage-center/user-manage")
247+
driver.get("https://e.douyin.com/site/")
248+
249+
print("请您进行登录及手动进行所有的筛选")
250+
yes = input("您是否已确认进行爬取")
251+
# cookie= driver.get_cookies()
252+
# driver.get("https://e.douyin.com/site/manage-center/user-manage")
253+
phone_data = get_phone_number(times)
254+
print([phone for phone in phone_data[0]])
255+
windows = driver.current_window_handle
256+
js = 'window.open("https://www.baidu.com");'
257+
driver.execute_script(js)
258+
for wins in driver.window_handles:
259+
if wins != windows:
260+
driver.switch_to.window(wins)
261+
register(phone_data)
262+
driver.close()
263+
driver.switch_to.window(windows)
264+
265+
crawl_count += 1
266+
else:
267+
print("第{}次爬取".format(crawl_count))
268+
now_time = time.time() - time_jiange
269+
times = datetime.datetime.fromtimestamp(now_time)
270+
phone_data = get_phone_number(times)
271+
print(phone_data)
272+
windows = driver.current_window_handle
273+
js = 'window.open("https://www.baidu.com");'
274+
driver.execute_script(js)
275+
for wins in driver.window_handles:
276+
if wins != windows:
277+
driver.switch_to.window(wins)
278+
register(phone_data)
279+
driver.close()
280+
driver.switch_to.window(windows)
281+
282+
time.sleep(60)
283+
284+
285+
if __name__ == '__main__':
286+
main()
287+
# get_links()
288+
# schedule.every().day.at('17:49').do(job4)
289+
# schedule.every(180).seconds.do(get_phone_number)
290+
# while True:
291+
# schedule.run_pending()

樊登读书脚本/link.xls

13.5 KB
Binary file not shown.

樊登读书脚本/link.xlsx

-11.5 KB
Binary file not shown.

樊登读书脚本/phone_number.xls

-109 KB
Binary file not shown.

樊登读书脚本/tet.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# @Time : 2019/12/2 17:00
2+
# @Author : Libuda
3+
# @FileName: tet.py
4+
# @Software: PyCharm
5+
6+
import xlrd
7+
from xlutils.copy import copy
8+
from selenium import webdriver
9+
import time
10+
11+
wait_time = 3 # 各个阶段等待时间
12+
13+
driver = webdriver.Chrome(r'C:\Users\lenovo\PycharmProjects\Spider\chromedriver.exe')
14+
15+
driver.get("https://www.baidu.com")
16+
windows = driver.current_window_handle
17+
18+
js = 'window.open("https://www.sogou.com");'
19+
driver.execute_script(js)
20+
for wins in driver.window_handles:
21+
if wins != windows:
22+
driver.switch_to.window(wins)
23+
driver.get("http://weibo.com")
24+
25+
time.sleep(5)
26+
driver.close()
27+
28+
driver.switch_to.window(windows)

0 commit comments

Comments
 (0)