From b4803fa33e5dae2dd7b5026ae4bf7fb0971f0a16 Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Thu, 22 Dec 2016 13:05:21 +0800 Subject: [PATCH 01/22] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cd34115..c6fb28f 100644 --- a/README.md +++ b/README.md @@ -2,4 +2,5 @@ 就是一些~~有用的~~Python脚本 1. hostloc:hostloc访问空间获取金币,每天运行一次,每次20金币 -2. zhihu:知乎图片下载器,下载某个问答下面所有的图片 \ No newline at end of file +2. zhihu:知乎图片下载器,下载某个问答下面所有的图片 +3. tumblr:Tumblr爬虫 \ No newline at end of file From b6bf2451599b857cf98c13eceeb10fd6a32bbd45 Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Thu, 22 Dec 2016 13:05:41 +0800 Subject: [PATCH 02/22] =?UTF-8?q?Tumblr=E7=88=AC=E8=99=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tumblr/README.md | 12 +++++++ tumblr/tumblr.py | 91 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 tumblr/README.md create mode 100644 tumblr/tumblr.py diff --git a/tumblr/README.md b/tumblr/README.md new file mode 100644 index 0000000..e7579db --- /dev/null +++ b/tumblr/README.md @@ -0,0 +1,12 @@ +# Tumblr博客解析 + + +1. 直接运行**python tumblr.py** + +2. 提示输入tumblr博客的ID,比如解析该博客:http://sample.tumblr.com,则输入sample + +3. 然后就开始解析啦...... + +4. 解析完毕可在脚本同目录下找到sample_pictures.txt和sample_videos.txt文件 + +> notice:因国内屏蔽tumblr,请全局翻墙 diff --git a/tumblr/tumblr.py b/tumblr/tumblr.py new file mode 100644 index 0000000..d2b2683 --- /dev/null +++ b/tumblr/tumblr.py @@ -0,0 +1,91 @@ +# -*- coding=utf-8 -*- +from threading import Thread +import Queue +import requests +import re +import os +import sys +import time + + +api_url='http://%s.tumblr.com/api/read?&num=50&start=' +UQueue=Queue.Queue() +def getpost(uid,queue): + url='http://%s.tumblr.com/api/read?&num=50'%uid + page=requests.get(url).content + total=re.findall('',page)[0] + total=int(total) + a=[i*50 for i in range(1000) if i*50-total<0] + ul=api_url%uid + for i in a: + queue.put(ul+str(i)) + + +extractpicre = re.compile(r'(?<=).+?(?=)',flags=re.S) #search for url of maxium size of a picture, which starts with '' and ends with '' +extractvideore=re.compile('/tumblr_(.*?)" type="video/mp4"') + +video_links = [] +pic_links = [] +vhead = 'https://vt.tumblr.com/tumblr_%s.mp4' + +class Consumer(Thread): + + def __init__(self, l_queue): + super(Consumer,self).__init__() + self.queue = l_queue + + def run(self): + session = requests.Session() + while 1: + link = self.queue.get() + print 'start parse post: ' + link + try: + content = session.get(link).content + videos = extractvideore.findall(content) + video_links.extend([vhead % v for v in videos]) + pic_links.extend(extractpicre.findall(content)) + except: + print 'url: %s parse failed\n' % link + if self.queue.empty(): + break + + +def main(): + task=[] + for i in range(min(10,UQueue.qsize())): + t=Consumer(UQueue) + task.append(t) + for t in task: + t.start() + for t in task: + t.join + while 1: + for t in task: + if t.is_alive(): + continue + else: + task.remove(t) + if len(task)==0: + break + + +def write(name): + videos=[i.replace('/480','') for i in video_links] + pictures=pic_links + with open('%s_pictures.txt'%name,'w') as f: + for i in pictures: + f.write('%s\n'%i) + with open('%s_videos.txt'%name,'w') as f: + for i in videos: + f.write('%s\n'%i) + + +if __name__=='__main__': + #name=sys.argv[1] + #name=name.strip() + print u"请输入tumblr博客ID:" + name=raw_input() + getpost(name,UQueue) + main() + write(name) + print u"解析完毕,请查看同目录下的文件" \ No newline at end of file From f3693d8b8427717c67a06c6bbb5b70951e9f505e Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Thu, 22 Dec 2016 13:19:48 +0800 Subject: [PATCH 03/22] add scripts --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c6fb28f..67a7b88 100644 --- a/README.md +++ b/README.md @@ -3,4 +3,9 @@ 1. hostloc:hostloc访问空间获取金币,每天运行一次,每次20金币 2. zhihu:知乎图片下载器,下载某个问答下面所有的图片 -3. tumblr:Tumblr爬虫 \ No newline at end of file +3. tumblr:Tumblr爬虫 +4. v2ex:v2ex签到脚本 +5. fuliba:福利吧签到脚本 + + +> 注:签到脚本都可以在vps上部署一个crontab定时任务 \ No newline at end of file From 0a94b8b72f2dd441fe08b49ddd472c17f8cc92b7 Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Thu, 22 Dec 2016 13:20:10 +0800 Subject: [PATCH 04/22] =?UTF-8?q?=E7=A6=8F=E5=88=A9=E5=90=A7=E7=AD=BE?= =?UTF-8?q?=E5=88=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fuliba/README.md | 7 +++++++ fuliba/fuliba.py | 22 ++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 fuliba/README.md create mode 100644 fuliba/fuliba.py diff --git a/fuliba/README.md b/fuliba/README.md new file mode 100644 index 0000000..607a3da --- /dev/null +++ b/fuliba/README.md @@ -0,0 +1,7 @@ +# 福利吧签到脚本 + +1. 登录福利吧,获取cookies + +2. 获取的cookies填入脚本的**raw_cookies** + +3. 运行`python fuliba.net`即可 diff --git a/fuliba/fuliba.py b/fuliba/fuliba.py new file mode 100644 index 0000000..694edb2 --- /dev/null +++ b/fuliba/fuliba.py @@ -0,0 +1,22 @@ +# coding:utf-8 +import re +import requests as req + +raw_cookies='' #cookies请自助获取 +cookies={} +for line in raw_cookies.split(';'): + key,value=line.split('=',1) + cookies[key]=value + +url='http://www.wndflb.com' +checkIn='http://www.wndflb.com/plugin.php?id=fx_checkin:checkin&formhash=' + +def qiandao(cookies): + s=req.get(url,cookies=cookies) + formhash=re.findall('checkin&formhash=(.*?)&',s.content)[0] + urls=checkIn+formhash + ss=req.get(urls,cookies=cookies) + return ss + +if __name__=='__main__': + qiandao(cookies) From 94b82d017d831ebb3cf91b4874ad098b1b23de01 Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Thu, 22 Dec 2016 13:20:40 +0800 Subject: [PATCH 05/22] =?UTF-8?q?v2ex=E7=AD=BE=E5=88=B0=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- v2ex/README.md | 8 ++++++++ v2ex/v2ex.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 v2ex/README.md create mode 100644 v2ex/v2ex.py diff --git a/v2ex/README.md b/v2ex/README.md new file mode 100644 index 0000000..d73b636 --- /dev/null +++ b/v2ex/README.md @@ -0,0 +1,8 @@ +# v2ex签到脚本 + + +1. 修改脚本的username和passwd为你的v2ex用户名和密码 + +2. 直接运行**python v2ex.py** + +3. 签到成功 diff --git a/v2ex/v2ex.py b/v2ex/v2ex.py new file mode 100644 index 0000000..3a321ac --- /dev/null +++ b/v2ex/v2ex.py @@ -0,0 +1,52 @@ +#-*- coding=utf-8 -*- +import requests +import re +import sys +reload(sys) +sys.setdefaultencoding('utf8') + + +signin='https://v2ex.com/signin' +home='https://v2ex.com' +url='https://v2ex.com/mission/daily' +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36', + 'Origin': 'https://www.v2ex.com', + 'Referer': 'https://www.v2ex.com/signin', + 'Host': 'www.v2ex.com', +} +data={} + +def sign(username,passwd): + try: + session=requests.Session() + session.headers=headers + loginhtm=session.get(signin,verify=False).content + usernameform=re.findall('',loginhtm)[0] + print usernameform + print passwdform + print onceform + data[usernameform]=username + data[passwdform]=passwd + data['once']=onceform + data['next']='/' + loginp=session.post(signin,data=data,verify=False) + sign=session.get(url).content + try: + qiandao=re.findall("location.href = '(.*?)'",sign)[0] + session.get(home+qiandao,verify=False) + print u'签到成功' + except: + print "fail" + except Exception,e: + print e + + + +if __name__=='__main__': + username='' + passwd='' + requests.packages.urllib3.disable_warnings() + sign(username,passwd) \ No newline at end of file From 3b675869516e56aa3d562881e7fde266c3ddc3c8 Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Thu, 22 Dec 2016 13:22:52 +0800 Subject: [PATCH 06/22] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 67a7b88..00ba956 100644 --- a/README.md +++ b/README.md @@ -8,4 +8,7 @@ 5. fuliba:福利吧签到脚本 -> 注:签到脚本都可以在vps上部署一个crontab定时任务 \ No newline at end of file +> 注: +> 1. 签到脚本都可以在vps上部署一个crontab定时任务 +> 2. 脚本很随意,没有备注、不规范 + \ No newline at end of file From 77c6d5afa75cf81d150d7aa95126391649c5b2ee Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Thu, 22 Dec 2016 13:25:45 +0800 Subject: [PATCH 07/22] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 00ba956..1dd7ce9 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ > 注: + > 1. 签到脚本都可以在vps上部署一个crontab定时任务 + > 2. 脚本很随意,没有备注、不规范 - \ No newline at end of file + From a5ac06e8acd6242af5f9a0d588514228adac82a4 Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Thu, 22 Dec 2016 16:25:35 +0800 Subject: [PATCH 08/22] =?UTF-8?q?=E8=B7=B3=E8=BF=87=E9=87=91=E7=9B=BE?= =?UTF-8?q?=E9=AA=8C=E8=AF=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hostloc/hostloc.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hostloc/hostloc.py b/hostloc/hostloc.py index 55d9677..c835d9e 100644 --- a/hostloc/hostloc.py +++ b/hostloc/hostloc.py @@ -25,12 +25,24 @@ def __init__(self,username,passwd): self.session.cookies = cookielib.LWPCookieJar(filename='cookies') try: self.session.cookies.load(ignore_discard=True) + self.pass_jdkey() if self.isLogin(): self.login() except: + self.pass_jdkey() self.login() + + def pass_jdkey(self): + html=self.session.get(index).content + try: + jdkey=re.findall('jdfwkey=(.*?)"')[0] + except: + jdkey='' + url=index+'?jdfwkey='+jdkey + self.session.get(index) def login(self): + jdurl=self.get_jdkey(url) self.session.post(login_url,data=login_data) self.session.cookies.save() From f39553e0df2ca004a6577f08628636215e1f4bdd Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Thu, 22 Dec 2016 17:50:06 +0800 Subject: [PATCH 09/22] update README.d --- hostloc/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/hostloc/README.md b/hostloc/README.md index 757671a..9406398 100644 --- a/hostloc/README.md +++ b/hostloc/README.md @@ -7,3 +7,4 @@ 每天运行一次,每次获取20金币,升元老指日可待2333 +**目前脚本有问题,需修改后才能用** \ No newline at end of file From f01dd7e6e9b13f22740c24529b94e0780f10bb9c Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Thu, 22 Dec 2016 19:15:00 +0800 Subject: [PATCH 10/22] =?UTF-8?q?=E5=BA=94=E8=AF=A5=E5=8F=AF=E7=94=A8?= =?UTF-8?q?=E4=BA=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hostloc/hostloc.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/hostloc/hostloc.py b/hostloc/hostloc.py index c835d9e..1ebcbea 100644 --- a/hostloc/hostloc.py +++ b/hostloc/hostloc.py @@ -14,6 +14,15 @@ ,'quickforward':'yes' ,'handlekey':'ls' } +headers={ + 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' + ,'Accept-Encoding':'gzip, deflate, sdch' + ,'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6' + ,'Host':'www.hostloc.com' + ,'Referer':'http://www.hostloc.com/forum.php' + ,'Upgrade-Insecure-Requests':'1' + ,'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' +} class HostLoc(): def __init__(self,username,passwd): @@ -22,15 +31,9 @@ def __init__(self,username,passwd): login_data['username']=username login_data['password']=passwd self.session=requests.Session() - self.session.cookies = cookielib.LWPCookieJar(filename='cookies') - try: - self.session.cookies.load(ignore_discard=True) - self.pass_jdkey() - if self.isLogin(): - self.login() - except: - self.pass_jdkey() - self.login() + self.session.headers=headers + self.pass_jdkey() + self.login() def pass_jdkey(self): html=self.session.get(index).content @@ -42,9 +45,7 @@ def pass_jdkey(self): self.session.get(index) def login(self): - jdurl=self.get_jdkey(url) self.session.post(login_url,data=login_data) - self.session.cookies.save() def isLogin(self): url='http://www.hostloc.com/home.php?mod=spacecp' @@ -57,7 +58,7 @@ def isLogin(self): def get_user(self): print('parse '+page_url) - self.html=self.session.get(page_url).text + self.html=self.session.get(page_url).content user_pattern=re.compile('space-uid-\d+?.html') users=list(set(user_pattern.findall(self.html))) self.users=[index+i for i in users] From 96e199d8c8354e7138828b097e8bfc58d67e8fe8 Mon Sep 17 00:00:00 2001 From: mzcyx <920082975@qq.com> Date: Fri, 23 Dec 2016 12:44:29 +0800 Subject: [PATCH 11/22] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hostloc/hostloc.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/hostloc/hostloc.py b/hostloc/hostloc.py index 1ebcbea..971b865 100644 --- a/hostloc/hostloc.py +++ b/hostloc/hostloc.py @@ -2,9 +2,11 @@ import requests import re import cookielib +import sys index='http://www.hostloc.com/' page_url='http://www.hostloc.com/forum-45-1.html' +credit_url='http://www.hostloc.com/home.php?mod=spacecp&ac=credit&showcredit=1' login_url='http://www.hostloc.com/member.php?mod=logging&action=login&loginsubmit=yes&infloat=yes&lssubmit=yes&inajax=1' login_data={ 'fastloginfield':'username' @@ -23,7 +25,8 @@ ,'Upgrade-Insecure-Requests':'1' ,'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' } - + + class HostLoc(): def __init__(self,username,passwd): self.username=username @@ -55,6 +58,18 @@ def isLogin(self): return False else: return True + + def get_credit(self): + html=self.session.get(credit_url).content + credit_pattern=re.compile(r'