Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit fbe74a2

Browse files
committed
断网重启功能
1 parent efd8dff commit fbe74a2

File tree

2 files changed

+41
-16
lines changed

2 files changed

+41
-16
lines changed

飞鱼脚本/无密spider.py renamed to 飞鱼脚本/new_spiders.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,24 +105,36 @@ def write_to_excel(file_path, row, col, value):
105105

106106

107107
def get_new_phone(start, end):
108+
108109
res = []
109110
headers = {
110111
"cookie": "ccid=ac428488c168899d07df951f7354ba55; msh=GqsdyEcveB1HjZLIZKT5ALDFoAE; sso_auth_status=26a7e62720484fd24d45830a4b543edb; sso_uid_tt=89b572982452ca2533fc5c49e4a3540e; toutiao_sso_user=4cd8bb9233af1784dbf3f269d15233d8; passport_auth_status=9f2216029d9ce53808046ea02135feff%2C7f9ddb5f3555a4e4db4cae3b62ed1213; sid_guard=3c3144f57c28219795bc821cf887fc79%7C1576146759%7C5184000%7CMon%2C+10-Feb-2020+10%3A32%3A39+GMT; uid_tt=e239ea11351745eb4404675817d217c5; sid_tt=3c3144f57c28219795bc821cf887fc79; sessionid=3c3144f57c28219795bc821cf887fc79; toutiao-crm-session=s%3Ab88ca4f2-1cca-11ea-adad-ac1f6b0ad100b88ca4f2-1cca-11ea-adad-ac1f6b0ad100sD3tpStsTyYsYE2aa56BtD22.jjnP%2F%2FLSX4oqXo%2FC15QML%2FFEvTN9OYGUoBHcVGkmgz0; gr_user_id=6892c2d6-d651-4a12-adc2-6c3b37e7c414; gr_session_id_9952092a9d995794=05a5816c-4d44-4447-8e5d-a813f5bd7f61; gr_cs1_05a5816c-4d44-4447-8e5d-a813f5bd7f61=advertiser_id%3A1645790969889795; gr_session_id_9952092a9d995794_05a5816c-4d44-4447-8e5d-a813f5bd7f61=true"}
111112
base_url = "https://feiyu.oceanengine.com/crm/v2/api/clue/public/?_t=1576147755&page={}&page_size=20&clue_public_status=0&start_time={}&end_time={}"
112113
i = 1
113114
while True:
114-
response = requests.get(base_url.format(i, start, end), headers=headers).json()
115-
if response['data']:
116-
i += 1
117-
for one in response['data']:
118-
# print(one['telphone'])
119-
if one['telphone'] not in totle_break_set:
115+
try:
116+
response = requests.get(base_url.format(i, start, end), headers=headers).json()
117+
if response['data']:
118+
i += 1
119+
for one in response['data']:
120+
# print(one['telphone'])
121+
# if one['telphone'] not in totle_break_set:
120122
timeStamp = int(one['create_time'])
121123
timeArray = time.localtime(timeStamp)
122124
otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
123125
res.append([one['telphone'], otherStyleTime])
124-
else:
125-
break
126+
else:
127+
break
128+
except Exception as e:
129+
print(e)
130+
starts = time.localtime(start)
131+
starts = time.strftime("%Y-%m-%d %H:%M:%S", starts)
132+
ends = time.localtime(end)
133+
ends = time.strftime("%Y-%m-%d %H:%M:%S", ends)
134+
print("网络可能异常...将在{}秒后对本时间段重新进行爬取 当前时间段为:{}----{}".format(time_jiange, starts, ends))
135+
time.sleep(time_jiange)
136+
return get_new_phone(start, end)
137+
126138
start = time.localtime(start)
127139
start = time.strftime("%Y-%m-%d %H:%M:%S", start)
128140
end = time.localtime(end)

飞鱼脚本/加密spider.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -104,24 +104,37 @@ def write_to_excel(file_path, row, col, value):
104104

105105

106106
def get_new_phone(start, end):
107+
107108
res = []
108109
headers = {
109110
"cookie": "ccid=ac428488c168899d07df951f7354ba55; msh=GqsdyEcveB1HjZLIZKT5ALDFoAE; sso_auth_status=26a7e62720484fd24d45830a4b543edb; sso_uid_tt=89b572982452ca2533fc5c49e4a3540e; toutiao_sso_user=4cd8bb9233af1784dbf3f269d15233d8; passport_auth_status=9f2216029d9ce53808046ea02135feff%2C7f9ddb5f3555a4e4db4cae3b62ed1213; sid_guard=3c3144f57c28219795bc821cf887fc79%7C1576146759%7C5184000%7CMon%2C+10-Feb-2020+10%3A32%3A39+GMT; uid_tt=e239ea11351745eb4404675817d217c5; sid_tt=3c3144f57c28219795bc821cf887fc79; sessionid=3c3144f57c28219795bc821cf887fc79; toutiao-crm-session=s%3Ab88ca4f2-1cca-11ea-adad-ac1f6b0ad100b88ca4f2-1cca-11ea-adad-ac1f6b0ad100sD3tpStsTyYsYE2aa56BtD22.jjnP%2F%2FLSX4oqXo%2FC15QML%2FFEvTN9OYGUoBHcVGkmgz0; gr_user_id=6892c2d6-d651-4a12-adc2-6c3b37e7c414; gr_session_id_9952092a9d995794=05a5816c-4d44-4447-8e5d-a813f5bd7f61; gr_cs1_05a5816c-4d44-4447-8e5d-a813f5bd7f61=advertiser_id%3A1645790969889795; gr_session_id_9952092a9d995794_05a5816c-4d44-4447-8e5d-a813f5bd7f61=true"}
110111
base_url = "https://feiyu.oceanengine.com/crm/v2/api/clue/public/?_t=1576147755&page={}&page_size=20&clue_public_status=0&start_time={}&end_time={}"
111112
i = 1
112113
while True:
113-
response = requests.get(base_url.format(i, start, end), headers=headers).json()
114-
if response['data']:
115-
i += 1
116-
for one in response['data']:
117-
# print(one['telphone'])
118-
if one['telphone'] not in totle_break_set:
114+
try:
115+
response = requests.get(base_url.format(i, start, end), headers=headers).json()
116+
if response['data']:
117+
i += 1
118+
for one in response['data']:
119+
# print(one['telphone'])
120+
# if one['telphone'] not in totle_break_set:
119121
timeStamp = int(one['create_time'])
120122
timeArray = time.localtime(timeStamp)
121123
otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
122124
res.append([one['telphone'], otherStyleTime])
123-
else:
124-
break
125+
else:
126+
break
127+
except Exception as e:
128+
print(e)
129+
starts = time.localtime(start)
130+
starts = time.strftime("%Y-%m-%d %H:%M:%S", starts)
131+
ends = time.localtime(end)
132+
ends = time.strftime("%Y-%m-%d %H:%M:%S", ends)
133+
print("网络可能异常...将在{}秒后对本时间段重新进行爬取 当前时间段为:{}----{}".format(time_jiange, starts, ends))
134+
time.sleep(time_jiange)
135+
return get_new_phone(start, end)
136+
137+
125138
start = time.localtime(start)
126139
start = time.strftime("%Y-%m-%d %H:%M:%S", start)
127140
end = time.localtime(end)

0 commit comments

Comments
 (0)