|
| 1 | +# @Time : 2019/12/2 11:17 |
| 2 | +# @Author : Libuda |
| 3 | +# @FileName: all.py |
| 4 | +# @Software: PyCharm |
| 5 | + |
| 6 | +import xlrd |
| 7 | +from xlutils.copy import copy |
| 8 | +from selenium import webdriver |
| 9 | +import time |
| 10 | +import schedule |
| 11 | +import pandas |
| 12 | +import datetime |
| 13 | + |
| 14 | +phone_num = 13628398278 |
| 15 | +wait_time = 3 # 各个阶段等待时间 |
| 16 | +time_jiange = 60 # 时间间隔 |
| 17 | +driver = webdriver.Chrome(r"C:\Users\lenovo\PycharmProjects\Spider\chromedriver.exe") |
| 18 | +link_file_path = r"C:\Users\lenovo\PycharmProjects\leetcode-python-\樊登读书脚本\link.xls" |
| 19 | +phone_file_path = r"C:\Users\lenovo\PycharmProjects\leetcode-python-\樊登读书脚本\phone_number.xls" |
| 20 | + |
| 21 | +link_ecel = xlrd.open_workbook(link_file_path) |
| 22 | +link_tables = link_ecel.sheet_by_index(0) |
| 23 | +link_get_col = 2 |
| 24 | +link_write_col = 3 |
| 25 | + |
| 26 | +phone_excel = xlrd.open_workbook(phone_file_path) |
| 27 | +phoe_tables = phone_excel.sheet_by_index(0) |
| 28 | +phone_get_col = 1 |
| 29 | +phone_write_col = 2 |
| 30 | + |
| 31 | +phone_can_use_index = 0 |
| 32 | + |
| 33 | + |
| 34 | +def get_keywords_data(tables, row, col): |
| 35 | + actual_data = tables.cell_value(row, col) |
| 36 | + return actual_data |
| 37 | + |
| 38 | + |
| 39 | +def write_to_excel(file_path, row, col, value): |
| 40 | + work_book = xlrd.open_workbook(file_path, formatting_info=False) |
| 41 | + write_to_work = copy(work_book) |
| 42 | + sheet_data = write_to_work.get_sheet(0) |
| 43 | + sheet_data.write(row, col, str(value)) |
| 44 | + write_to_work.save(file_path) |
| 45 | + |
| 46 | + |
| 47 | +def get_links(): |
| 48 | + link_data = [get_keywords_data(link_tables, i, link_get_col) for i in range(1, link_tables.nrows)] |
| 49 | + for index, link in enumerate(link_data): |
| 50 | + driver.get(link) |
| 51 | + time.sleep(wait_time) |
| 52 | + try: |
| 53 | + text = driver.find_element_by_xpath("/html/body/div[1]/div[1]/p[1]") |
| 54 | + if text.text == "开卡失败": |
| 55 | + write_to_excel(link_file_path, index + 1, link_write_col, "已使用") |
| 56 | + print("该卡已经被使用..{}".format(link)) |
| 57 | + # print(text.text) |
| 58 | + except Exception as e: |
| 59 | + print("该卡可以使用:{}".format(link)) |
| 60 | + # time.sleep(wait_time) |
| 61 | + |
| 62 | + |
| 63 | +def get_phone_number(end_date): |
| 64 | + result = [] |
| 65 | + |
| 66 | + # driver = webdriver.Chrome(r'E:\【樊登读书】\【python程序】\抖音后台客户电话导出\houtai\chromedriver.exe') |
| 67 | + # df = pandas.DataFrame() |
| 68 | + # file_path =r"E:\【樊登读书】\【python程序】\抖音后台客户电话导出\houtai\锦集20191130.xls" |
| 69 | + |
| 70 | + |
| 71 | + # 搜索按钮 |
| 72 | + try: |
| 73 | + driver.find_element_by_xpath( |
| 74 | + '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[2]/div[4]/span[2]/span/span').click() |
| 75 | + except Exception as e: |
| 76 | + pass |
| 77 | + |
| 78 | + # all_data_len = driver.find_element_by_xpath('//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[1]').text.split("条")[0].split("共")[1] |
| 79 | + # print("总共 {} 条数据".format(all_data_len)) |
| 80 | + num_tem = '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/div/div/div[1]/div/table/tbody/tr[{}]/td[4]' |
| 81 | + date_tem = '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/div/div/div[1]/div/table/tbody/tr[{}]/td[6]' |
| 82 | + flag = True |
| 83 | + |
| 84 | + while flag: |
| 85 | + time.sleep(wait_time) # 时间间隔 |
| 86 | + # res_data = [] |
| 87 | + for i in range(1, 11): |
| 88 | + res_dic = {} |
| 89 | + try: |
| 90 | + res_dic['phone_number'] = driver.find_element_by_xpath(num_tem.format(i)).text |
| 91 | + res_dic['date'] = datetime.datetime.strptime(driver.find_element_by_xpath(date_tem.format(i)).text, |
| 92 | + "%Y.%m.%d %H:%M:%S") |
| 93 | + |
| 94 | + if res_dic['date'] > end_date: |
| 95 | + flag = False |
| 96 | + break |
| 97 | + |
| 98 | + result.append([res_dic['phone_number'], res_dic['date']]) |
| 99 | + except Exception: |
| 100 | + flag = False |
| 101 | + break |
| 102 | + # print(res_dic['date'],end_date) |
| 103 | + |
| 104 | + # res_data.append(res_dic) |
| 105 | + # df = df.append(res_data) |
| 106 | + # df.to_excel(phone_file_path,index=0) |
| 107 | + # print("已写入{}数据".format(len(df))) |
| 108 | + # 点击下一页 |
| 109 | + try: |
| 110 | + driver.find_element_by_css_selector('.ant-pagination-next').click() |
| 111 | + except Exception as e: |
| 112 | + try: |
| 113 | + driver.find_element_by_xpath( |
| 114 | + '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[12]').click() |
| 115 | + except Exception: |
| 116 | + driver.find_element_by_xpath( |
| 117 | + '//*[@id="root"]/div[2]/div[1]/div/div/div[3]/div[1]/div[2]/div[3]/div/div/div/div/div/ul/li[10]').click() |
| 118 | + print("已爬取到新手机号:{}个".format(len(result))) |
| 119 | + return result |
| 120 | + |
| 121 | + |
| 122 | +def register(phone_data): |
| 123 | + global phone_can_use_index |
| 124 | + link_data = [get_keywords_data(link_tables, i, link_get_col) for i in range(1, link_tables.nrows)] |
| 125 | + # phone_data = [str(int(get_keywords_data(phoe_tables, i, phone_get_col))) for i in range(1, phoe_tables.nrows)] |
| 126 | + # phone_data = [phone[0] for phone in phone_datas] |
| 127 | + |
| 128 | + has_phone = True |
| 129 | + for index, link in enumerate(link_data): |
| 130 | + |
| 131 | + if has_phone: |
| 132 | + driver.get(link) |
| 133 | + time.sleep(wait_time) |
| 134 | + try: |
| 135 | + text = driver.find_element_by_xpath("/html/body/div[1]/div[1]/p[1]") |
| 136 | + if text.text == "开卡失败": |
| 137 | + write_to_excel(link_file_path, index + 1, link_write_col, "已使用") |
| 138 | + print("该卡已经被使用..{}".format(link)) |
| 139 | + continue |
| 140 | + else: |
| 141 | + # print(text.text) |
| 142 | + continue |
| 143 | + except Exception as e: |
| 144 | + time.sleep(wait_time) |
| 145 | + # print(e) |
| 146 | + try: |
| 147 | + print("该卡可以使用:{},正在查询可用手机号。。".format(link)) |
| 148 | + text = driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[1]/p') |
| 149 | + if text.text == "欢迎加入樊登读书,即刻获得": |
| 150 | + flag = True |
| 151 | + while flag: |
| 152 | + q = phone_can_use_index |
| 153 | + for ph_number_index in range(q, len(phone_data)): |
| 154 | + driver.get(link) |
| 155 | + print("当前查询手机号索引为{},号码为{}".format(ph_number_index, phone_data[ph_number_index][0])) |
| 156 | + time.sleep(wait_time) |
| 157 | + driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div[1]/input').send_keys( |
| 158 | + phone_data[ph_number_index][0]) |
| 159 | + driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[2]/div[3]/input').send_keys( |
| 160 | + phone_data[ph_number_index][0]) |
| 161 | + driver.find_element_by_xpath('//*[@id="app"]/div[1]/div[3]').click() |
| 162 | + time.sleep(wait_time) |
| 163 | + # 点击开卡 |
| 164 | + driver.find_element_by_xpath('//*[@id="join-btn"]').click() |
| 165 | + # 点击开卡后页面延迟较为严重 |
| 166 | + time.sleep(wait_time) |
| 167 | + try: |
| 168 | + tem = driver.find_element_by_xpath('/html/body/div[1]/div[1]/p[1]') |
| 169 | + if tem.text == "开卡失败": |
| 170 | + phone_can_use_index += 1 |
| 171 | + print("开卡失败,您已经是樊登读书好友") |
| 172 | + # 日期 |
| 173 | + write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 2, |
| 174 | + phone_data[ph_number_index][0]) |
| 175 | + # 手机号 |
| 176 | + write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 1, |
| 177 | + phone_data[ph_number_index][0]) |
| 178 | + # 使用状态 |
| 179 | + write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col, |
| 180 | + "开卡失败您已经是樊登读书书友") |
| 181 | + except Exception as e: |
| 182 | + # print(e) |
| 183 | + time.sleep(wait_time) |
| 184 | + try: |
| 185 | + if driver.find_element_by_xpath('/html/body/div[1]/div/h1').text == "领取成功!": |
| 186 | + print("开卡成功") |
| 187 | + write_to_excel(link_file_path, index + 1, link_write_col, "领取成功") |
| 188 | + # 日期 |
| 189 | + write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 2, |
| 190 | + phone_data[ph_number_index][0]) |
| 191 | + # 手机号 |
| 192 | + write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 1, |
| 193 | + phone_data[ph_number_index][0]) |
| 194 | + # 使用状态 |
| 195 | + write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col, |
| 196 | + "领取成功") |
| 197 | + phone_can_use_index += 1 |
| 198 | + has_phone = True |
| 199 | + flag = False |
| 200 | + continue |
| 201 | + except Exception as e: |
| 202 | + print("此电话号码有问题") |
| 203 | + # write_to_excel(link_file_path, index + 1, link_write_col, "此电话号码有问题") |
| 204 | + write_to_excel(link_file_path, index + 1, link_write_col, "领取成功") |
| 205 | + # 日期 |
| 206 | + write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 2, |
| 207 | + phone_data[ph_number_index][0]) |
| 208 | + # 手机号 |
| 209 | + write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col - 1, |
| 210 | + phone_data[ph_number_index][0]) |
| 211 | + # 使用状态 |
| 212 | + write_to_excel(phone_file_path, ph_number_index + 1, phone_write_col, |
| 213 | + "此电话号码有问题") |
| 214 | + # write_to_excel(phone_file_path, phone_can_use_index + 1, phone_write_col, "此电话号码有问题") |
| 215 | + phone_can_use_index += 1 |
| 216 | + has_phone = True |
| 217 | + flag = False |
| 218 | + continue |
| 219 | + # print(e) |
| 220 | + print("当前手机号已全被使用") |
| 221 | + if phone_can_use_index == len(phone_data): |
| 222 | + has_phone = False |
| 223 | + flag = False |
| 224 | + |
| 225 | + else: |
| 226 | + # print(text.text) |
| 227 | + continue |
| 228 | + except Exception as e: |
| 229 | + pass |
| 230 | + # print(e) |
| 231 | + |
| 232 | + |
| 233 | +def main(): |
| 234 | + crawl_count = 1 |
| 235 | + while 1: |
| 236 | + now_time = time.time() |
| 237 | + # end_date = now_time- time_jiange |
| 238 | + times = datetime.datetime.fromtimestamp(now_time) |
| 239 | + time_str = "{}-{}-{} {}:{}:{}".format(times.year, times.month, times.day - 1, 0, 0, 0) |
| 240 | + time_str = datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S") |
| 241 | + # print(times) |
| 242 | + # # s="2019.12.02 13:56:20" |
| 243 | + # # print(datetime.datetime.strptime(s,"%Y.%m.%d %H:%M:%S")<times) |
| 244 | + if crawl_count == 1: |
| 245 | + print("第1次爬取") |
| 246 | + # driver.get("https://e.douyin.com/site/manage-center/user-manage") |
| 247 | + driver.get("https://e.douyin.com/site/") |
| 248 | + |
| 249 | + print("请您进行登录及手动进行所有的筛选") |
| 250 | + yes = input("您是否已确认进行爬取") |
| 251 | + # cookie= driver.get_cookies() |
| 252 | + # driver.get("https://e.douyin.com/site/manage-center/user-manage") |
| 253 | + phone_data = get_phone_number(times) |
| 254 | + print([phone for phone in phone_data[0]]) |
| 255 | + windows = driver.current_window_handle |
| 256 | + js = 'window.open("https://www.baidu.com");' |
| 257 | + driver.execute_script(js) |
| 258 | + for wins in driver.window_handles: |
| 259 | + if wins != windows: |
| 260 | + driver.switch_to.window(wins) |
| 261 | + register(phone_data) |
| 262 | + driver.close() |
| 263 | + driver.switch_to.window(windows) |
| 264 | + |
| 265 | + crawl_count += 1 |
| 266 | + else: |
| 267 | + print("第{}次爬取".format(crawl_count)) |
| 268 | + now_time = time.time() - time_jiange |
| 269 | + times = datetime.datetime.fromtimestamp(now_time) |
| 270 | + phone_data = get_phone_number(times) |
| 271 | + print(phone_data) |
| 272 | + windows = driver.current_window_handle |
| 273 | + js = 'window.open("https://www.baidu.com");' |
| 274 | + driver.execute_script(js) |
| 275 | + for wins in driver.window_handles: |
| 276 | + if wins != windows: |
| 277 | + driver.switch_to.window(wins) |
| 278 | + register(phone_data) |
| 279 | + driver.close() |
| 280 | + driver.switch_to.window(windows) |
| 281 | + |
| 282 | + time.sleep(60) |
| 283 | + |
| 284 | + |
| 285 | +if __name__ == '__main__': |
| 286 | + main() |
| 287 | + # get_links() |
| 288 | + # schedule.every().day.at('17:49').do(job4) |
| 289 | + # schedule.every(180).seconds.do(get_phone_number) |
| 290 | + # while True: |
| 291 | + # schedule.run_pending() |
0 commit comments