diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9a05e2d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,91 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+.venv/
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
diff --git a/ASCII-ART/README.md b/ASCII-ART/README.md
new file mode 100644
index 0000000..027fb30
--- /dev/null
+++ b/ASCII-ART/README.md
@@ -0,0 +1,7 @@
+# 图片转字符画
+
+1. 修改image_file_path为图片目录地址
+
+2. 直接运行脚本
+
+3. 打开image_ascii.txt缩小看看
diff --git a/ASCII-ART/baby.jpg b/ASCII-ART/baby.jpg
new file mode 100644
index 0000000..60c62cf
Binary files /dev/null and b/ASCII-ART/baby.jpg differ
diff --git a/ASCII-ART/char.png b/ASCII-ART/char.png
new file mode 100644
index 0000000..a428c67
Binary files /dev/null and b/ASCII-ART/char.png differ
diff --git a/ASCII-ART/image_ascii.txt b/ASCII-ART/image_ascii.txt
new file mode 100644
index 0000000..48e7be2
--- /dev/null
+++ b/ASCII-ART/image_ascii.txt
@@ -0,0 +1,56 @@
+ ...:&@@@$S; . .
+ ...+@#$S;::;S@@=: ..
+ . .=@$::... ....:$@:.. ..::
+ +@;..... . . .+@; .. ..$@@+..
+ . .;#:. .=#:.. +&.:@...
+ . ..:@= .. ..;#:.. .$:..$&..
+ .@= . . ..;@.. S&..;@:
+ . .+$..... ... +&. :#:.:@&...
+ . #: ..&.. . . . ..:$@..@:.. :... .#&:.:#:.. .
+ SS...=#: ..... . .@=.&:SS.. .#&. . .S#...$$...;=. .
+ :@:. .=#.. . &@&+ . S#$:;.:@.. .#@. ...... .. ;@+. :@..@+.
+ =@. ..=#.. .;#$::;. $#=.;..@; . .#$. .;@@=.. ..@$...&$&$.:@: .
+ ..$+.. =#....@::.. =#@:...S=. ..#$. .$;:#. . . .S@:...$;. .$+..
+ .:@. . .=#....=##& . .::....+&. .#$.. ....@;. . . ..@$:.......S& .
+ . .&=.. .S@.. ...:............=@. ..#S .;:.+& . . ..+@&......S@@:..
+ . ;@... ..$@. .. .. .&&S: .....:@. .;#=. .+&.:@.. ...=@#=... .+$#$:..
+ .$S. . ..#S. . .:...:... .@. :S@: .:@.:@:... ..;@@S.....;$##=..
+ ..@:. ...;@:. ..;+:.:$:.....#...$@. ..@:.@; . =@#&:.. ..#+...
+ S&... .S@.. ..+S$@@;.... .@:.;#=. ..$=.SS. :@@@;.. ..S#@$:.. .
+ .#: ....@S. .:.+@&+: ... .@;.&$.. .+$.+$.$;.. ..S###:...
+ ..S$......+@... ....... . .$;;#;. :@::#.=: ... S###+... .
+ .@;.. ..@; . .. . @+@+ . . .@:.&. . ...:@#+....
+ .:@.. .:S:.. .@@S . .. ..@:. . . ;$@+... .
+ .S+. .. .. .@; . . ..:$$... .;&@+ .. .
+ .@.. .@:. .. S#S......+@S:...
+ .=&. .@:. .@:.. ..=@$:....
+ ..@:... .@. ...$;...+#$=...
+ ;@. .#. .+&.=#$:....
+ .&S.. .@. ..@#$;.... .
+ .@: .@. ...... ..
+ .:@. :@.. . ...
+ .&+. .@ . ...
+ ..#:. :@..
+ ;$.. +&.
+ =;. .&=.
+ ... .@:..
+ .@:..
+ .#..
+ ..:$.
+ ;&..
+ ...
+ ..
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ASCII-ART/picture_to_pixles.py b/ASCII-ART/picture_to_pixles.py
new file mode 100644
index 0000000..ce4902e
--- /dev/null
+++ b/ASCII-ART/picture_to_pixles.py
@@ -0,0 +1,69 @@
+# -*- coding=utf-8 -*-
+from PIL import Image
+import sys
+import requests
+import StringIO
+
+
+ASCII_CHARS = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft|()1{}[]?-_+~i!lI;:,^ "
+max_width=100 #调节此项可调节图片大小,越大图片信息越好。
+range_width=float(255)/(len(ASCII_CHARS)-1)
+
+def scale_image(image, new_width=max_width):
+ """Resizes an image preserving the aspect ratio.
+ """
+ (original_width, original_height) = image.size
+ aspect_ratio = original_height/float(original_width)*0.5
+ new_height = int(aspect_ratio * new_width)
+ new_image = image.resize((new_width, new_height))
+ return new_image
+
+def convert_to_grayscale(image):
+ return image.convert('L')
+
+def map_pixels_to_ascii_chars(image, range_width=range_width):
+ """Maps each pixel to an ascii char based on the range
+ in which it lies.
+ 0-255 is divided into 11 ranges of 25 pixels each.
+ """
+ pixels_in_image = list(image.getdata())
+ pixels_to_chars = [ASCII_CHARS[int(pixel_value/range_width)] for pixel_value in pixels_in_image]
+ return "".join(pixels_to_chars)
+
+def convert_image_to_ascii(image, new_width=max_width):
+ image = scale_image(image)
+ image = convert_to_grayscale(image)
+ pixels_to_chars = map_pixels_to_ascii_chars(image)
+ len_pixels_to_chars = len(pixels_to_chars)
+ image_ascii = [pixels_to_chars[index: index + new_width] for index in xrange(0, len_pixels_to_chars, new_width)]
+ f=open('image_ascii.txt','w')
+ for line in image_ascii:
+ f.write('%s\n'%line)
+ return "\n".join(image_ascii)
+
+def handle_image_conversion(image_filepath):
+ image = None
+ session=requests.Session()
+ session.headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
+ try:
+ if image_filepath.startswith('http://') or image_filepath.startswith('https://'):
+ resp=session.get(image_filepath)
+ imagebuf=StringIO.StringIO(resp.content)
+ image=Image.open(imagebuf)
+ else:
+ image = Image.open(image_filepath)
+ except Exception, e:
+ print "Unable to open image file {image_filepath}.".format(image_filepath=image_filepath)
+ print e
+ return
+ image_ascii = convert_image_to_ascii(image)
+ print image_ascii
+
+if __name__=='__main__':
+ while 1:
+ print u"请输入图片地址:"
+ try:
+ image_file_path = sys.argv[1]
+ except:
+ image_file_path=raw_input()
+ handle_image_conversion(image_file_path)
diff --git a/ASCII-ART/xzz.jpg b/ASCII-ART/xzz.jpg
new file mode 100644
index 0000000..da3efde
Binary files /dev/null and b/ASCII-ART/xzz.jpg differ
diff --git a/README.md b/README.md
index cd34115..647b8c8 100644
--- a/README.md
+++ b/README.md
@@ -2,4 +2,18 @@
就是一些~~有用的~~Python脚本
1. hostloc:hostloc访问空间获取金币,每天运行一次,每次20金币
-2. zhihu:知乎图片下载器,下载某个问答下面所有的图片
\ No newline at end of file
+2. zhihu:知乎图片下载器,下载某个问答下面所有的图片(接口更新,目前已经失效)
+3. tumblr:Tumblr爬虫
+4. v2ex:v2ex签到脚本
+5. fuliba:福利吧签到脚本
+6. ASCII-ART:图片转字符画
+7. baidu:百度图床~~
+8. alipay: 支付宝登录&获取订单信息
+
+
+> 注:
+
+> 1. 签到脚本都可以在vps上部署一个crontab定时任务
+
+> 2. 脚本很随意,没有备注、不规范
+
diff --git a/alipay/README.md b/alipay/README.md
new file mode 100644
index 0000000..604313d
--- /dev/null
+++ b/alipay/README.md
@@ -0,0 +1,11 @@
+# 支付宝登录 & 获取订单信息脚本
+
+使用前准备:
+1. 安装selenium:`pip install selenium`
+2. 下载webdriver。phantomjs无界面,适合linux;chromedriver方便调试。自行百度下载
+3. 修改**USERNMAE**和**PASSWD**
+
+运行:
+`python alipay_login.py`
+
+pps. 脚本非原创。在v2ex一名v友的基础上修改的
diff --git a/alipay/alipay.py b/alipay/alipay.py
new file mode 100644
index 0000000..78b9fa2
--- /dev/null
+++ b/alipay/alipay.py
@@ -0,0 +1,181 @@
+#-*- coding=utf-8 -*-
+"""
+支付宝登录获取订单信息脚本
+ps.没啥卵用,使用selenium不稳定,经常出现问题。
+使用前准备:
+1.安装selenium:pip install selenium
+2.下载webdriver。phantomjs无界面,适合linux;chromedriver方便调试。自定百度
+3.修改USERNMAE和PASSWD
+
+运行:
+python alipay_login.py
+
+pps. 脚本非原创。在v2ex一名v友的基础上修改的
+"""
+import requests
+from selenium import webdriver
+import time
+import pickle
+import re
+requests.packages.urllib3.disable_warnings()
+
+# 登录 url
+Login_Url = 'https://auth.alipay.com/login/index.htm?goto=https://consumeprod.alipay.com/record/advanced.htm'
+# 账单 url
+Bill_Url = 'https://consumeprod.alipay.com/record/advanced.htm'
+# 登录用户名和密码
+USERNMAE = ''
+PASSWD = ''
+# 自定义 headers
+HEADERS = {
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
+ 'Referer': 'https://consumeprod.alipay.com/record/advanced.htm',
+ 'Host': 'consumeprod.alipay.com',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
+ 'Connection': 'keep-alive'
+}
+
+
+class Alipay_Bill_Info(object):
+ '''支付宝账单信息'''
+
+ def __init__(self, headers, user, passwd):
+ '''
+ 类的初始化
+ headers:请求头
+ cookies: 持久化访问
+ info_list: 存储账单信息的列表
+ '''
+ self.headers = headers
+ # 初始化用户名和密码
+ self.user = user
+ self.passwd = passwd
+ # 利用 requests 库构造持久化请求
+ self.session = requests.Session()
+ # 将请求头添加到缓存之中
+ self.session.headers = self.headers
+ try:
+ cookies = pickle.load(open("cookies", "rb"))
+ for cookie in cookies:
+ self.session.cookies.set(cookie['name'], cookie['value'])
+ print u"获取cookies成功!"
+ except:
+ print u"未登陆过,需先登录"
+ self.get_cookies()
+ if not self.login_status():
+ print u"cookies失效,重新登录"
+ self.get_cookies()
+ # 初始化存储列表
+ self.info_list = []
+
+ def wait_input(self, ele, str):
+ '''减慢账号密码的输入速度'''
+ for i in str:
+ ele.send_keys(i)
+ time.sleep(0.5)
+
+ def get_cookies(self):
+ '''获取 cookies'''
+ # 初始化浏览器对象
+ # sel = webdriver.PhantomJS(
+ # executable_path='C:\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe')
+ # sel = webdriver.PhantomJS(
+ # executable_path='/root/phantomjs/bin/phantomjs')
+ sel = webdriver.Chrome(executable_path='C:/chromedriver.exe')
+ sel.maximize_window()
+ sel.get(Login_Url)
+ sel.implicitly_wait(3)
+ # 找到用户名字输入框
+ uname = sel.find_element_by_id('J-input-user')
+ uname.clear()
+ print u"正在输入账号....."
+ self.wait_input(uname, self.user)
+ time.sleep(1)
+ # 找到密码输入框
+ upass = sel.find_element_by_id('password_rsainput')
+ upass.clear()
+ print u"正在输入密码...."
+ self.wait_input(upass, self.passwd)
+ # 截图查看
+ sel.save_screenshot('1.png')
+ # 找到登录按钮
+ button = sel.find_element_by_id('J-login-btn')
+ time.sleep(1)
+ print 1
+ button.click()
+ print 2
+ sel.save_screenshot('2.png')
+ if len(re.findall('checkSecurity', sel.current_url)) > 0:
+ riskackcode = sel.find_element_by_id('riskackcode')
+ riskackcode.clear()
+ print u"等待输入验证码:"
+ msgcode = raw_input()
+ self.wait_input(riskackcode, msgcode)
+ button = sel.find_element_by_xpath(
+ '//*[@id="J-submit"]/input') # ui-button
+ time.sleep(1)
+ button.click()
+ sel.save_screenshot('2.1.png')
+ print(sel.current_url)
+ # 跳转到账单页面
+ print u"正在跳转页面...."
+ sel.get(Bill_Url)
+ sel.implicitly_wait(3)
+ sel.save_screenshot('3.png')
+ # 获取 cookies 并转换为字典类型
+ cookies = sel.get_cookies()
+ pickle.dump(cookies, open("cookies", "wb"))
+ for cookie in cookies:
+ self.session.cookies.set(cookie['name'], cookie['value'])
+ # 关闭浏览器
+ sel.close()
+
+ def set_cookies(self):
+ '''将获取到的 cookies 加入 session'''
+ self.get_cookies()
+
+ def login_status(self):
+ '''判断登录状态'''
+ # 添加 cookies
+ status = self.session.get(
+ Bill_Url, timeout=5, allow_redirects=False, verify=False).status_code
+ print(status)
+ if status == 200:
+ return True
+ else:
+ return False
+
+ def get_data(self):
+ '''
+ 利用 正则表达式解析 html
+ 并抓取数据,
+ 数据以字典格式保存在列表里
+ '''
+ status = self.login_status()
+ if status:
+ html = self.session.get(Bill_Url, verify=False).text
+ # 抓取前五个交易记录
+ trades = re.findall('
', html)
+ for trade in trades:
+ # 做一个 try except 避免异常中断
+ try:
+ # 分别找到账单的 时间 金额 以及流水号
+ day = re.findall(
+ '.*?(\d{4}\.\d{2}\.\d{2})', trade)[0]
+ time = re.findall(
+ '
.*?(\d{2}:\d{2})', trade)[0]
+ amount = re.findall(
+ '(.*?) ', trade)[0]
+ ddh = re.findall(
+ '
.*?(\d{20})', trade)[0] #
+ print day, time, amount, ddh
+ except Exception, e:
+ print e
+ else:
+ print u"登录失败"
+
+
+# test:
+test = Alipay_Bill_Info(HEADERS, USERNMAE, PASSWD)
+test.get_data()
+
diff --git a/baidu/smms.py b/baidu/smms.py
new file mode 100644
index 0000000..a76a238
--- /dev/null
+++ b/baidu/smms.py
@@ -0,0 +1,26 @@
+#-*- coding=utf-8 -*-
+import requests
+import json
+import sys
+
+tc_api='https://sm.ms/api/upload'
+files={
+ 'smfile':''
+ }
+data={'ssl':'false'
+ ,'format':'json'}
+
+def smms(filepath):
+ try:
+ img=open(filepath,'rb')
+ except Exception,e:
+ print e
+ sys.exit(0)
+ files['smfile']=img
+ c=requests.post(tc_api,files=files,data=data)
+ dat=json.loads(c.content)
+ if dat['code']=='error':
+ img=dat['msg']
+ if dat['code']=='success':
+ img=dat['data']['url']
+ return img
\ No newline at end of file
diff --git a/baidu/tuchuang.py b/baidu/tuchuang.py
new file mode 100644
index 0000000..328ecec
--- /dev/null
+++ b/baidu/tuchuang.py
@@ -0,0 +1,59 @@
+# -*- coding=utf8 -*-
+import sys
+reload(sys)
+sys.setdefaultencoding('utf-8')
+from PyQt4 import QtCore, QtGui, uic
+import requests
+import re
+from smms import smms
+
+qtCreatorFile = "tuchuang_ui.ui" # Enter file here.
+
+Ui_MainWindow, QtBaseClass = uic.loadUiType(qtCreatorFile)
+
+class MyApp(QtGui.QMainWindow, Ui_MainWindow):
+ def __init__(self):
+ QtGui.QMainWindow.__init__(self)
+ Ui_MainWindow.__init__(self)
+ self.setupUi(self)
+ self.thread=Worker()
+ self.fileSelect.clicked.connect(self.selectFile)
+ self.thread.sinOut2.connect(self.show_result)
+
+ def selectFile(self):
+ filepath=self.to_utf8(QtGui.QFileDialog.getOpenFileName(self,u'选择图片','',r'Image Files(*.png *.jpg *.bmp *.jpeg *.gif)'))
+ self.thread.getPath((filepath,))
+ self.fileSelect.setEnabled(False)
+
+
+ def show_result(self,result):
+ img,isTrue=result[0],result[1]
+ self.markdown_show.setText('')
+ self.realurl.setText(img)
+ self.fileSelect.setEnabled(isTrue)
+
+ def to_utf8(self,input):
+ return unicode(input,'utf8','ignore')
+
+
+class Worker(QtCore.QThread):
+ sinOut2 = QtCore.pyqtSignal(tuple)
+
+ def __init__(self,parent=None):
+ super(Worker,self).__init__(parent)
+
+ def getPath(self,filepath):
+ self.filepath=filepath[0]
+ self.start()
+
+
+ def run(self):
+ img=smms(self.filepath)
+ self.sinOut2.emit((img,True))
+
+
+if __name__ == "__main__":
+ app = QtGui.QApplication(sys.argv)
+ window = MyApp()
+ window.show()
+ sys.exit(app.exec_())
diff --git a/baidu/tuchuang_ui.ui b/baidu/tuchuang_ui.ui
new file mode 100644
index 0000000..aa7e9a3
--- /dev/null
+++ b/baidu/tuchuang_ui.ui
@@ -0,0 +1,98 @@
+
+
+ Form
+
+
+
+ 0
+ 0
+ 395
+ 154
+
+
+
+ 百度图床
+
+
+
+
+ 120
+ 20
+ 251
+ 31
+
+
+
+
+
+
+ 120
+ 60
+ 251
+ 31
+
+
+
+
+
+
+ 30
+ 20
+ 91
+ 21
+
+
+
+
+ 微软雅黑
+ 75
+ true
+
+
+
+ MarkDown
+
+
+ Qt::RichText
+
+
+
+
+
+ 50
+ 60
+ 61
+ 21
+
+
+
+
+ 微软雅黑
+ 75
+ true
+
+
+
+ 图片链接
+
+
+ Qt::RichText
+
+
+
+
+
+ 120
+ 110
+ 93
+ 28
+
+
+
+ 选择图片
+
+
+
+
+
+
diff --git a/fuliba/README.md b/fuliba/README.md
new file mode 100644
index 0000000..607a3da
--- /dev/null
+++ b/fuliba/README.md
@@ -0,0 +1,7 @@
+# 福利吧签到脚本
+
+1. 登录福利吧,获取cookies
+
+2. 获取的cookies填入脚本的**raw_cookies**
+
+3. 运行`python fuliba.net`即可
diff --git a/fuliba/fuliba.py b/fuliba/fuliba.py
new file mode 100644
index 0000000..694edb2
--- /dev/null
+++ b/fuliba/fuliba.py
@@ -0,0 +1,22 @@
+# coding:utf-8
+import re
+import requests as req
+
+raw_cookies='' #cookies请自助获取
+cookies={}
+for line in raw_cookies.split(';'):
+ key,value=line.split('=',1)
+ cookies[key]=value
+
+url='http://www.wndflb.com'
+checkIn='http://www.wndflb.com/plugin.php?id=fx_checkin:checkin&formhash='
+
+def qiandao(cookies):
+ s=req.get(url,cookies=cookies)
+ formhash=re.findall('checkin&formhash=(.*?)&',s.content)[0]
+ urls=checkIn+formhash
+ ss=req.get(urls,cookies=cookies)
+ return ss
+
+if __name__=='__main__':
+ qiandao(cookies)
diff --git a/hostloc/README.md b/hostloc/README.md
index 757671a..da75747 100644
--- a/hostloc/README.md
+++ b/hostloc/README.md
@@ -7,3 +7,5 @@
每天运行一次,每次获取20金币,升元老指日可待2333
+**目前脚本有问题,需修改后才能用**
+**因为crontab定时任务print中文会出错,将脚本里面的print的地方注释**
diff --git a/hostloc/hostloc.py b/hostloc/hostloc.py
index 55d9677..177f820 100644
--- a/hostloc/hostloc.py
+++ b/hostloc/hostloc.py
@@ -2,9 +2,11 @@
import requests
import re
import cookielib
+import sys
index='http://www.hostloc.com/'
page_url='http://www.hostloc.com/forum-45-1.html'
+credit_url='http://www.hostloc.com/home.php?mod=spacecp&ac=credit&showcredit=1'
login_url='http://www.hostloc.com/member.php?mod=logging&action=login&loginsubmit=yes&infloat=yes&lssubmit=yes&inajax=1'
login_data={
'fastloginfield':'username'
@@ -14,7 +16,17 @@
,'quickforward':'yes'
,'handlekey':'ls'
}
-
+headers={
+ 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+ ,'Accept-Encoding':'gzip, deflate, sdch'
+ ,'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6'
+ ,'Host':'www.hostloc.com'
+ ,'Referer':'http://www.hostloc.com/forum.php'
+ ,'Upgrade-Insecure-Requests':'1'
+ ,'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
+}
+
+
class HostLoc():
def __init__(self,username,passwd):
self.username=username
@@ -22,17 +34,22 @@ def __init__(self,username,passwd):
login_data['username']=username
login_data['password']=passwd
self.session=requests.Session()
- self.session.cookies = cookielib.LWPCookieJar(filename='cookies')
- try:
- self.session.cookies.load(ignore_discard=True)
- if self.isLogin():
- self.login()
+ self.session.headers=headers
+ self.login()
+ self.pass_jdkey()
+
+
+ def pass_jdkey(self):
+ html=self.session.get(index).content
+ try:
+ jdkey=re.findall('jdfwkey=(.*?)"')[0]
except:
- self.login()
+ jdkey=''
+ url=index+'?jdfwkey='+jdkey
+ self.session.get(index)
def login(self):
self.session.post(login_url,data=login_data)
- self.session.cookies.save()
def isLogin(self):
url='http://www.hostloc.com/home.php?mod=spacecp'
@@ -42,10 +59,22 @@ def isLogin(self):
return False
else:
return True
+
+ def get_credit(self):
+ html=self.session.get(credit_url).content
+ credit_pattern=re.compile(r'- 金钱: (.*?) .*?
[\w\W]*?- 威望: (.*?)
[\w\W]*?- 积分: (.*?) ')
+ try:
+ credit=credit_pattern.findall(html)
+ coin,wh,jf=credit[0]
+ print u"金币:%s,威望:%s,积分:%s"%(coin,wh,jf)
+ return True
+ except:
+ print u"获取数据失败,请稍后再试"
+ return False
def get_user(self):
print('parse '+page_url)
- self.html=self.session.get(page_url).text
+ self.html=self.session.get(page_url).content
user_pattern=re.compile('space-uid-\d+?.html')
users=list(set(user_pattern.findall(self.html)))
self.users=[index+i for i in users]
@@ -56,9 +85,14 @@ def visit_user(self):
self.session.get(user)
+
if __name__=='__main__':
username='' #用户名
passwd='' #密码
hostloc=HostLoc(username,passwd)
- hostloc.get_user()
- hostloc.visit_user()
+ if hostloc.get_credit():
+ hostloc.get_user()
+ hostloc.visit_user()
+ hostloc.get_credit()
+ else:
+ sys.exit(0)
diff --git a/tumblr/README.md b/tumblr/README.md
new file mode 100644
index 0000000..e7579db
--- /dev/null
+++ b/tumblr/README.md
@@ -0,0 +1,12 @@
+# Tumblr博客解析
+
+
+1. 直接运行**python tumblr.py**
+
+2. 提示输入tumblr博客的ID,比如解析该博客:http://sample.tumblr.com,则输入sample
+
+3. 然后就开始解析啦......
+
+4. 解析完毕可在脚本同目录下找到sample_pictures.txt和sample_videos.txt文件
+
+> notice:因国内屏蔽tumblr,请全局翻墙
diff --git a/tumblr/tumblr.py b/tumblr/tumblr.py
new file mode 100644
index 0000000..d2b2683
--- /dev/null
+++ b/tumblr/tumblr.py
@@ -0,0 +1,91 @@
+# -*- coding=utf-8 -*-
+from threading import Thread
+import Queue
+import requests
+import re
+import os
+import sys
+import time
+
+
+api_url='http://%s.tumblr.com/api/read?&num=50&start='
+UQueue=Queue.Queue()
+def getpost(uid,queue):
+ url='http://%s.tumblr.com/api/read?&num=50'%uid
+ page=requests.get(url).content
+ total=re.findall('',page)[0]
+ total=int(total)
+ a=[i*50 for i in range(1000) if i*50-total<0]
+ ul=api_url%uid
+ for i in a:
+ queue.put(ul+str(i))
+
+
+extractpicre = re.compile(r'(?<=).+?(?=)',flags=re.S) #search for url of maxium size of a picture, which starts with '' and ends with ''
+extractvideore=re.compile('/tumblr_(.*?)" type="video/mp4"')
+
+video_links = []
+pic_links = []
+vhead = 'https://vt.tumblr.com/tumblr_%s.mp4'
+
+class Consumer(Thread):
+
+ def __init__(self, l_queue):
+ super(Consumer,self).__init__()
+ self.queue = l_queue
+
+ def run(self):
+ session = requests.Session()
+ while 1:
+ link = self.queue.get()
+ print 'start parse post: ' + link
+ try:
+ content = session.get(link).content
+ videos = extractvideore.findall(content)
+ video_links.extend([vhead % v for v in videos])
+ pic_links.extend(extractpicre.findall(content))
+ except:
+ print 'url: %s parse failed\n' % link
+ if self.queue.empty():
+ break
+
+
+def main():
+ task=[]
+ for i in range(min(10,UQueue.qsize())):
+ t=Consumer(UQueue)
+ task.append(t)
+ for t in task:
+ t.start()
+ for t in task:
+ t.join
+ while 1:
+ for t in task:
+ if t.is_alive():
+ continue
+ else:
+ task.remove(t)
+ if len(task)==0:
+ break
+
+
+def write(name):
+ videos=[i.replace('/480','') for i in video_links]
+ pictures=pic_links
+ with open('%s_pictures.txt'%name,'w') as f:
+ for i in pictures:
+ f.write('%s\n'%i)
+ with open('%s_videos.txt'%name,'w') as f:
+ for i in videos:
+ f.write('%s\n'%i)
+
+
+if __name__=='__main__':
+ #name=sys.argv[1]
+ #name=name.strip()
+ print u"请输入tumblr博客ID:"
+ name=raw_input()
+ getpost(name,UQueue)
+ main()
+ write(name)
+ print u"解析完毕,请查看同目录下的文件"
\ No newline at end of file
diff --git a/v2ex/README.md b/v2ex/README.md
new file mode 100644
index 0000000..d73b636
--- /dev/null
+++ b/v2ex/README.md
@@ -0,0 +1,8 @@
+# v2ex签到脚本
+
+
+1. 修改脚本的username和passwd为你的v2ex用户名和密码
+
+2. 直接运行**python v2ex.py**
+
+3. 签到成功
diff --git a/v2ex/v2ex.py b/v2ex/v2ex.py
new file mode 100644
index 0000000..3a321ac
--- /dev/null
+++ b/v2ex/v2ex.py
@@ -0,0 +1,52 @@
+#-*- coding=utf-8 -*-
+import requests
+import re
+import sys
+reload(sys)
+sys.setdefaultencoding('utf8')
+
+
+signin='https://v2ex.com/signin'
+home='https://v2ex.com'
+url='https://v2ex.com/mission/daily'
+headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
+ 'Origin': 'https://www.v2ex.com',
+ 'Referer': 'https://www.v2ex.com/signin',
+ 'Host': 'www.v2ex.com',
+}
+data={}
+
+def sign(username,passwd):
+ try:
+ session=requests.Session()
+ session.headers=headers
+ loginhtm=session.get(signin,verify=False).content
+ usernameform=re.findall('',loginhtm)[0]
+ print usernameform
+ print passwdform
+ print onceform
+ data[usernameform]=username
+ data[passwdform]=passwd
+ data['once']=onceform
+ data['next']='/'
+ loginp=session.post(signin,data=data,verify=False)
+ sign=session.get(url).content
+ try:
+ qiandao=re.findall("location.href = '(.*?)'",sign)[0]
+ session.get(home+qiandao,verify=False)
+ print u'签到成功'
+ except:
+ print "fail"
+ except Exception,e:
+ print e
+
+
+
+if __name__=='__main__':
+ username=''
+ passwd=''
+ requests.packages.urllib3.disable_warnings()
+ sign(username,passwd)
\ No newline at end of file
diff --git a/zhihu/zhihu_picture_downloader.py b/zhihu/zhihu_picture_downloader.py
index 6730ef9..2c1a0a1 100644
--- a/zhihu/zhihu_picture_downloader.py
+++ b/zhihu/zhihu_picture_downloader.py
@@ -99,7 +99,7 @@ def login(email,passwd):
def get_pic_from_topic(id,offset):
global session
topicurl=topic_url+str(id)
- _xsrf=get_xsrf(topicurl)
+ #_xsrf=get_xsrf(topicurl)
pic_re=re.compile('data-actualsrc="(.*?)"')
inner_data={"url_token":id
,"pagesize":10
@@ -111,7 +111,7 @@ def get_pic_from_topic(id,offset):
session.headers['Referer']=topicurl
session.headers['Host']='www.zhihu.com'
session.headers['Origin']='https://www.zhihu.com'
- session.headers['X-Xsrftoken']=_xsrf
+ #session.headers['X-Xsrftoken']=_xsrf
js_data=session.post(api_url,data=data)
dat=json.loads(js_data.content)['msg']
pictures=[]
@@ -160,4 +160,4 @@ def downloader(url,path):
for pic in pictures:
downloader(pic,savepath)
print u"=====下载完毕====="
-
\ No newline at end of file
+
|