stormMayo
diff --git a/‎.idea/leetcode-python-.iml
Lines changed: 1 addition & 3 deletions b/‎.idea/leetcode-python-.iml
Lines changed: 1 addition & 3 deletions
diff --git a/‎微信公众号/check_link.py
Lines changed: 42 additions & 18 deletions b/‎微信公众号/check_link.py
Lines changed: 42 additions & 18 deletions
diff --git a/‎微信公众号/chromedriver.exe
8.3 MB b/‎微信公众号/chromedriver.exe
8.3 MB
diff --git a/‎微信公众号/link.xls
-19.5 KB b/‎微信公众号/link.xls
-19.5 KB
diff --git a/‎微信公众号/new_check_link.py
Lines changed: 17 additions & 9 deletions b/‎微信公众号/new_check_link.py
Lines changed: 17 additions & 9 deletions
diff --git a/‎微信公众号/phantomjs.exe
17.7 MB b/‎微信公众号/phantomjs.exe
17.7 MB
diff --git a/‎樊登读书脚本/all.py
Lines changed: 6 additions & 17 deletions b/‎樊登读书脚本/all.py
Lines changed: 6 additions & 17 deletions
diff --git a/‎樊登读书脚本/link.xls
0 Bytes b/‎樊登读书脚本/link.xls
0 Bytes
diff --git a/‎樊登读书脚本/phone_number.xls
-18.5 KB b/‎樊登读书脚本/phone_number.xls
-18.5 KB
@@ -6,7 +6,17 @@
 from selenium import webdriver
 import xlrd
 from xlutils.copy import copy
+from queue import Queue
+import threading
 
+wait_time = 0
+
+link_file_path = r"C:\Users\lenovo\PycharmProjects\leetcode-python-\微信公众号\link.xls"
+link_ecel = xlrd.open_workbook(link_file_path)
+link_tables = link_ecel.sheet_by_index(0)
+link_get_col = 2
+link_write_col = 3
+link_can_use_index = 1
 
 def get_keywords_data(tables, row, col):
     actual_data = tables.cell_value(row, col)
@@ -22,13 +32,13 @@ def write_to_excel(file_path, row, col, value):
 
 
 def get_links():
-    res = []
-    count = 0
-    link_data = [get_keywords_data(link_tables, i, link_get_col) for i in range(1, link_tables.nrows)]
-    for index, link in enumerate(link_data):
+    global count
+    driver = webdriver.Chrome(r"C:\Users\lenovo\PycharmProjects\leetcode-python-\微信公众号\chromedriver.exe")
+    while not link_queue.empty():
+        index, link = link_queue.get()
         driver.get(link)
-        time.sleep(wait_time)
         try:
+            time.sleep(wait_time)
             text = driver.find_element_by_xpath("/html/body/div[1]/div[1]/p[1]")
             if text.text == "开卡失败":
                 write_to_excel(link_file_path, index + 1, link_write_col, "已使用")
@@ -37,19 +47,33 @@ def get_links():
             count += 1
             res.append(link)
             print("该卡可以使用:{}".format(link))
-    print("当前可使用链接个数为：{}".format(count))
-    return res
+            # print("当前可使用链接个数为：{}".format(count))
+
 
 
 if __name__ == '__main__':
-    wait_time = 3
-    driver = webdriver.Chrome(r"C:\Users\lenovo\PycharmProjects\Spider\chromedriver.exe")
-    link_file_path = r"C:\Users\lenovo\PycharmProjects\leetcode-python-\微信公众号\link.xls"
-    link_ecel = xlrd.open_workbook(link_file_path)
-    link_tables = link_ecel.sheet_by_index(0)
-    link_get_col = 2
-    link_write_col = 3
-    link_can_use_index = 1
-
-    res = get_links()
-    print(res)
+    import time
+
+    start_time = time.time()
+    link_queue = Queue()
+    print("正在初始化链接队列。。。")
+    for i in range(1, link_tables.nrows - 1):
+        link_queue.put([i, get_keywords_data(link_tables, i, link_get_col)])
+    print("初始化完成")
+    res = []
+    count = 0
+
+    threads_lis = []
+    for i in range(3):
+        thread = threading.Thread(target=get_links)
+        threads_lis.append(thread)
+
+    for one in threads_lis:
+        one.start()
+
+    for one in threads_lis:
+        one.join()
+
+    print("当前可用链接数：{}".format(count))
+    end_time = time.time()
+    print("用时：{}".format(end_time - start_time))
@@ -2,7 +2,7 @@
 # @Author  : Libuda
 # @FileName: new_check_link.py
 # @Software: PyCharm
-
+import time
 from bs4 import BeautifulSoup
 import requests
 import xlrd
@@ -15,6 +15,8 @@
 link_write_col = 3
 link_can_use_index = 1
 
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
 
 def get_keywords_data(tables, row, col):
     actual_data = tables.cell_value(row, col)
@@ -32,23 +34,29 @@ def write_to_excel(file_path, row, col, value):
 def get_links():
     res = []
     count = 0
-    link_data = [get_keywords_data(link_tables, i, link_get_col) for i in range(1, link_tables.nrows)]
+    link_data = [get_keywords_data(link_tables, i, link_get_col) for i in range(1, link_tables.nrows - 1)]
     for index, link in enumerate(link_data):
+        response = requests.get(link, headers=headers).content
+
+        # print(response)
+        soup = BeautifulSoup(response.decode('utf-8'), "html.parser")
 
-        try:
-            print(link)
-            response = requests.get(link).content
-            soup = BeautifulSoup(response.decode('utf-8'), "html.parser")
-            print(soup.find(class_='mobile').get_text())
+        # time.sleep(1)
+        if soup.find("input", class_='mobile'):
 
-        except Exception as e:
-            print(e)
+            # print(soup.find("input",class_='mobile'))
             count += 1
             res.append(link)
             print("该卡可以使用:{}".format(link))
+        else:
+            print("该链接已使用".format(link))
+            write_to_excel(link_file_path, index, link_write_col, "已使用")
     print("当前可使用链接个数为：{}".format(count))
     return res
 
 
 if __name__ == '__main__':
+    start_time = time.time()
     get_links()
+    end_time = time.time()
+    print("时间：{}".format(end_time - start_time))
@@ -273,22 +273,20 @@ def register(phone_data):
 
 def main():
     crawl_count = 1
+    windows = ""
+    second_window = ''
     while 1:
 
         # time_str = datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")
         # print(times)
         # # s="2019.12.02 13:56:20"
         # # print(datetime.datetime.strptime(s,"%Y.%m.%d %H:%M:%S")<times)
         if crawl_count == 1:
-            now_time = time.time()
-            # end_date = now_time- time_jiange
-            times = datetime.datetime.fromtimestamp(now_time)
-            #time_str = "{}-{}-{} {}:{}:{}".format(times.year, times.month, times.day - 1, 0, 0, 0)
             print("第1次爬取")
             # driver.get("https://e.douyin.com/site/manage-center/user-manage")
             driver.get("https://e.douyin.com/site/")
 
-            print("请您进行登录及手动进行所有的筛选并更改浏览器设置！")
+            print("请您进行登录及手动进行所有的筛选")
             yes = input("您是否已确认进行爬取")
             # cookie= driver.get_cookies()
             # driver.get("https://e.douyin.com/site/manage-center/user-manage")
@@ -300,28 +298,19 @@ def main():
             for wins in driver.window_handles:
                 if wins != windows:
                     driver.switch_to.window(wins)
+            second_window = driver.current_window_handle
+            y = input("是否设置完毕")
             # 测试
             # phone_data = [[phone, 0] for phone in [13945868092, 15169722520]]
             register(phone_data)
-            driver.close()
             driver.switch_to.window(windows)
 
             crawl_count += 1
         else:
             print("第{}次爬取".format(crawl_count))
-            now_time = time.time() - time_jiange
-            times = datetime.datetime.fromtimestamp(now_time)
             phone_data = get_phone_number(start_date, end_date)
-            windows = driver.current_window_handle
-            js = 'window.open("https://www.baidu.com");'
-            driver.execute_script(js)
-            for wins in driver.window_handles:
-                if wins != windows:
-                    driver.switch_to.window(wins)
-            # 测试
-            # phone_data = [[phone,0] for phone in [15099123201,17621790591]]
+            driver.switch_to.window(second_window)
             register(phone_data)
-            driver.close()
             driver.switch_to.window(windows)
             crawl_count += 1