diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e74cf51 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +# idea +.idea/ + +# cache +**/__pycache__ + +# cookies +**/cookies** \ No newline at end of file diff --git a/.idea/Mybook.iml b/.idea/Mybook.iml deleted file mode 100755 index dbfdecd..0000000 --- a/.idea/Mybook.iml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100755 index c23ecac..0000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100755 index ef11d87..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100755 index aee895c..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/preferred-vcs.xml b/.idea/preferred-vcs.xml deleted file mode 100644 index 848cfc4..0000000 --- a/.idea/preferred-vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - ApexVCS - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100755 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/Asset/cover.jpeg b/Asset/cover.jpeg new file mode 100644 index 0000000..aad8357 Binary files /dev/null and b/Asset/cover.jpeg differ diff --git a/Ch1Spider/cookie/douban_login.py b/Ch1Spider/cookie/douban_login.py index 29ecbb7..c53b9c7 100755 --- a/Ch1Spider/cookie/douban_login.py +++ b/Ch1Spider/cookie/douban_login.py @@ -3,15 +3,14 @@ from fake_useragent import UserAgent from bs4 import BeautifulSoup - # 提交表单登录并获取cookie def get_cookie_from_net(): url = 'https://accounts.douban.com/login' # 构建表单 payload = {'source': 'None', 'redir': 'https://www.douban.com/', - 'form_email': '1021550072@qq.com', - 'form_password': 'pwd', + 'form_email': 'your email', + 'form_password': 'your pwd', 'login': '登录'} data = s.post(url, headers=headers, data=payload, verify=True) # 绕过了SSL验证 diff --git a/Ch1Spider/cookie/douban_login_new.py b/Ch1Spider/cookie/douban_login_new.py new file mode 100644 index 0000000..785818a --- /dev/null +++ b/Ch1Spider/cookie/douban_login_new.py @@ -0,0 +1,70 @@ +import requests +import pickle +from bs4 import BeautifulSoup + + +# 提交表单登录并获取cookie +def get_cookie_from_net(): + url = "https://accounts.douban.com/j/mobile/login/basic" + # 构建表单 + payload = { + "ck": "", + "name": "your email", + "password": "your password", + "remember": "true", + "ticket": "" + } + + data = s.post(url, headers=headers, data=payload).json() + # 检测登录是否成功 + if data["status"] == "success": + print("登陆成功!") + + with open('cookies.douban', 'wb') as f: + cookiedict = requests.utils.dict_from_cookiejar(s.cookies) + pickle.dump(cookiedict, f) + print("成功获取cookies!") + + return s.cookies + + +# 从cookie文件获取cookie +def get_cookie_from_file(): + with open('cookies.douban', 'rb') as f: + cookiedict = pickle.load(f) + cookies = requests.utils.cookiejar_from_dict(cookiedict) + print("解析文件,成功提取cookis...") + return cookies + + +# 假设这里我要获取自己的签名数据 +def getdata(html): + soup = BeautifulSoup(html.text, 'lxml') + mydata = soup.select('#display')[0].get_text() + ''' + 这里进行登录后其他数据的获取及存储,这里仅仅获取了自己的签名数据。 + ''' + return mydata + + +def login_and_getdata(): + print('获取cookis...') + try: + s.cookies = get_cookie_from_file() + except: + print("从文件获取cookies失败...\n正在尝试提交表单登录以获取...") + s.cookies = get_cookie_from_net() + + html = s.get('https://www.douban.com/people/146448257/', headers=headers) + # print(html.text) + data = getdata(html) + print(data) + + +if __name__ == '__main__': + # 一些全局变量 + s = requests.session() + # 这里务必更换 + headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6"} + # 登录并获取数据 + login_and_getdata() diff --git a/Ch1Spider/muti-threads/mutithreadspool.py b/Ch1Spider/muti-threads/mutithreadspool.py index 192cd64..004cfe3 100755 --- a/Ch1Spider/muti-threads/mutithreadspool.py +++ b/Ch1Spider/muti-threads/mutithreadspool.py @@ -123,8 +123,8 @@ def Myfutures(num_of_max_works=10): urls = get_urls_from_file(100) Mynormal() # 串行 MyprocessPool(10) # 进程池 - Mymultithread(10) # 多线程 Myfutures(10) # 线程池 + Mymultithread(10) # 多线程 ''' diff --git a/Ch4Data-Life/News/NewsReport.py b/Ch4Data-Life/News/NewsReport.py index f7c2135..3d1e545 100755 --- a/Ch4Data-Life/News/NewsReport.py +++ b/Ch4Data-Life/News/NewsReport.py @@ -69,11 +69,11 @@ def send_report(roi): s2 += title s2 += roi[title] s2 += '\n' - send_ms(s1+s2) + #send_ms(s1+s2) if __name__=='__main__': - web_data = get_web_data("http://tech.baidu.com/") + web_data = get_web_data("https://news.baidu.com/tech") titles = get_titles(web_data) key_words = 'iPhone' roi = get_roi(titles, key_words) diff --git a/Ch4Data-Life/News/NewsReportLog.txt b/Ch4Data-Life/News/NewsReportLog.txt index 9713aea..56b0709 100755 --- a/Ch4Data-Life/News/NewsReportLog.txt +++ b/Ch4Data-Life/News/NewsReportLog.txt @@ -2,3 +2,4 @@ iPhone相关新闻抓取程序日志Mon Jun 26 08:49:05 2017 ==========Mon Jun 26 08:49:05 2017==========6500元买吗?iPhone 8又有黑科技:3D传..http://tech.ifeng.com/a/20170625/44642859_0.shtml6500元买吗?iPhone 8不仅颜值高还搭载3D传感器http://news.pconline.com.cn/944/9444285.htmliPhone 8最新高清细节图曝光:无后置指纹http://mobile.yesky.com/182/237633182.shtmliPhone有什么录屏软件?iOS10如何不越狱实现录屏?http://news.86wan.com/xinwen/804619.html加拿大将迎来32GB iPhone 6:深空灰色http://iphone.tgbus.com/news/class/201706/20170625100446.shtml又一次iPhone 8爆料:壁纸和贴膜都有了http://iphone.tgbus.com/news/class/201706/20170625100212.shtml爆料大神拿到十多张iPhone8工程机图 快来看别声张http://digi.hsw.cn/system/2017/0625/85413.shtmliPhone到底是怎么诞生的?是乔布斯拿iPad改..http://www.citmt.cn/news/201706/7936.htmliPhone 8无线充电设计背后五大绝招是什么?http://tech.sina.com.cn/roll/2017-06-24/doc-ifyhmpew3268026.shtml ==========Fri Jun 30 15:24:55 2017==========iPhone8能用WiFi充电吗 iPhone8会..http://baijiahao.baidu.com/s?id=1571557098087634时光倒流十年 回顾初代苹果iPhone发售场景 http://www.cb.com.cn/shishiretu/2017_0630/1001871.htmliPhone10周年之际 设计师分享两款iPhone罕见原型机http://mobile.it168.com/a2017/0630/3138/000003138081.shtml一款电子墨水屏幕兼iPhone 7保护壳正在众筹http://news.pconline.com.cn/947/9474544.html十年:ZEALER 带你回顾历代 iPhonehttp://it.sohu.com/20170629/n499226359.shtml4.7寸经典手机 苹果iPhone 6苏宁售2578元http://mobile.pconline.com.cn/946/9468090.htmliPhone这十年也不易 它可迈过了不少坎儿http://mobile.zol.com.cn/645/6455077.html华强北红色iPhone8曝光:机身正面辣眼睛http://mobile.it168.com/a2017/0630/3138/000003138130.shtml微软或与iPhone对着干:有耳机插孔和可拆电池http://baijiahao.baidu.com/s?id=1571535272162131为实现快充!iPhone 8 有可能附赠10W充电..http://baijiahao.baidu.com/s?id=1571499839340222苹果告别神话十年,不再是身份标签的iPhone逐渐..http://news.sina.com.cn/c/2017-06-30/doc-ifyhrttz1773968.shtml苹果10年总共卖了12亿部iPhone:创收738..http://baijiahao.baidu.com/s?id=1571607401477009 ==========Fri Jun 30 15:49:59 2017==========你的iPhone电量总不够用?这里赶紧关了,让电量..http://baijiahao.baidu.com/s?id=1571610812674132苹果10年总共卖了12亿部iPhone:创收738..http://baijiahao.baidu.com/s?id=1571607401477009安卓是如何击败iPhone成为市占之王?http://baijiahao.baidu.com/s?id=1571607515558253时光倒流十年 回顾初代苹果iPhone发售场景 http://www.cb.com.cn/shishiretu/2017_0630/1001871.html一款电子墨水屏幕兼iPhone 7保护壳正在众筹http://news.pconline.com.cn/947/9474544.html十年:ZEALER 带你回顾历代 iPhonehttp://it.sohu.com/20170629/n499226359.shtml4.7寸经典手机 苹果iPhone 6苏宁售2578元http://mobile.pconline.com.cn/946/9468090.htmlOLED面板缺货 iPhone 8首批备货或短缺http://mobile.zol.com.cn/645/6452259.html长沙买iPhone 7仅4199元支持分期可送货http://mobile.zol.com.cn/645/6455107.html第一代iPhone成收藏界新品 原包装未开封能卖400..http://firm.workercn.cn/497/201706/30/170630102411800.shtml华强北红色iPhone8曝光:机身正面辣眼睛http://mobile.it168.com/a2017/0630/3138/000003138130.shtml微软或与iPhone对着干:有耳机插孔和可拆电池http://baijiahao.baidu.com/s?id=1571535272162131为实现快充!iPhone 8 有可能附赠10W充电..http://baijiahao.baidu.com/s?id=1571499839340222苹果告别神话十年,不再是身份标签的iPhone逐渐..http://news.sina.com.cn/c/2017-06-30/doc-ifyhrttz1773968.shtml +==========Sun Jun 26 23:27:55 2022==========新增“古铜色”,电池加大!苹果新iPhone又有新..http://baijiahao.baidu.com/s?id=1736418600868515753iPhone 14Pro或弃用刘海屏增古铜配色http://baijiahao.baidu.com/s?id=1736404759591336703iPhone销量霸榜,高端苹果也走“薄利多销”路线..http://baijiahao.baidu.com/s?id=1736520051940361766iPhone 14大爆料 值得果粉期待吗?丨财经科..http://baijiahao.baidu.com/s?id=1736671674884272833 diff --git a/Pics/Corr_Mat.png b/Pics/Corr_Mat.png new file mode 100755 index 0000000..c4537d2 Binary files /dev/null and b/Pics/Corr_Mat.png differ diff --git a/Pics/Data_visualization_process_v1.png b/Pics/Data_visualization_process_v1.png new file mode 100755 index 0000000..4c82858 Binary files /dev/null and b/Pics/Data_visualization_process_v1.png differ diff --git a/Pics/Email1.png b/Pics/Email1.png new file mode 100755 index 0000000..f021fea Binary files /dev/null and b/Pics/Email1.png differ diff --git a/Pics/Mxlsx.png b/Pics/Mxlsx.png new file mode 100755 index 0000000..1904c57 Binary files /dev/null and b/Pics/Mxlsx.png differ diff --git a/Pics/QP1.png b/Pics/QP1.png new file mode 100755 index 0000000..7280476 Binary files /dev/null and b/Pics/QP1.png differ diff --git a/Pics/QQ_DA.png b/Pics/QQ_DA.png new file mode 100755 index 0000000..7564ef9 Binary files /dev/null and b/Pics/QQ_DA.png differ diff --git a/Pics/Spider1.png b/Pics/Spider1.png new file mode 100755 index 0000000..87b4955 Binary files /dev/null and b/Pics/Spider1.png differ diff --git a/Pics/Tom.png b/Pics/Tom.png new file mode 100755 index 0000000..8156f75 Binary files /dev/null and b/Pics/Tom.png differ diff --git a/Pics/UA.png b/Pics/UA.png new file mode 100755 index 0000000..720397f Binary files /dev/null and b/Pics/UA.png differ diff --git a/Pics/V4_Chi.png b/Pics/V4_Chi.png new file mode 100755 index 0000000..d7de13c Binary files /dev/null and b/Pics/V4_Chi.png differ diff --git a/Pics/V4_Cos.png b/Pics/V4_Cos.png new file mode 100755 index 0000000..fb795ff Binary files /dev/null and b/Pics/V4_Cos.png differ diff --git a/Pics/V4_cos_sin.png b/Pics/V4_cos_sin.png new file mode 100755 index 0000000..2b4c752 Binary files /dev/null and b/Pics/V4_cos_sin.png differ diff --git a/Pics/V4_fill.png b/Pics/V4_fill.png new file mode 100755 index 0000000..9c7766e Binary files /dev/null and b/Pics/V4_fill.png differ diff --git a/Pics/V4_pandas_bar.png b/Pics/V4_pandas_bar.png new file mode 100755 index 0000000..4c6711a Binary files /dev/null and b/Pics/V4_pandas_bar.png differ diff --git a/Pics/V4_pandas_bar1.png b/Pics/V4_pandas_bar1.png new file mode 100755 index 0000000..cc55e8f Binary files /dev/null and b/Pics/V4_pandas_bar1.png differ diff --git a/Pics/V4_pandas_boxplot.png b/Pics/V4_pandas_boxplot.png new file mode 100755 index 0000000..562b823 Binary files /dev/null and b/Pics/V4_pandas_boxplot.png differ diff --git a/Pics/V4_pandas_hist.png b/Pics/V4_pandas_hist.png new file mode 100755 index 0000000..1c8a018 Binary files /dev/null and b/Pics/V4_pandas_hist.png differ diff --git a/Pics/V4_pandas_plot.png b/Pics/V4_pandas_plot.png new file mode 100755 index 0000000..476a8f6 Binary files /dev/null and b/Pics/V4_pandas_plot.png differ diff --git a/Pics/V4_pandas_scatter.png b/Pics/V4_pandas_scatter.png new file mode 100755 index 0000000..9eb65ef Binary files /dev/null and b/Pics/V4_pandas_scatter.png differ diff --git a/Pics/V4_snscountplot.png b/Pics/V4_snscountplot.png new file mode 100755 index 0000000..6ed860c Binary files /dev/null and b/Pics/V4_snscountplot.png differ diff --git a/Pics/V4_snsfactor.png b/Pics/V4_snsfactor.png new file mode 100755 index 0000000..a087620 Binary files /dev/null and b/Pics/V4_snsfactor.png differ diff --git a/Pics/V4_snskde.png b/Pics/V4_snskde.png new file mode 100755 index 0000000..1a03d95 Binary files /dev/null and b/Pics/V4_snskde.png differ diff --git a/Pics/V4_snslmplot.png b/Pics/V4_snslmplot.png new file mode 100755 index 0000000..43934ac Binary files /dev/null and b/Pics/V4_snslmplot.png differ diff --git a/Pics/V4_snsviolin.png b/Pics/V4_snsviolin.png new file mode 100755 index 0000000..6a916cd Binary files /dev/null and b/Pics/V4_snsviolin.png differ diff --git a/Pics/V4_subplot.png b/Pics/V4_subplot.png new file mode 100755 index 0000000..e9abfae Binary files /dev/null and b/Pics/V4_subplot.png differ diff --git a/Pics/V4_wc.png b/Pics/V4_wc.png new file mode 100755 index 0000000..de3c097 Binary files /dev/null and b/Pics/V4_wc.png differ diff --git a/Pics/all_plot.png b/Pics/all_plot.png new file mode 100755 index 0000000..230240b Binary files /dev/null and b/Pics/all_plot.png differ diff --git a/Pics/boxplot.png b/Pics/boxplot.png new file mode 100755 index 0000000..b8f3684 Binary files /dev/null and b/Pics/boxplot.png differ diff --git a/Pics/chardet.png b/Pics/chardet.png new file mode 100755 index 0000000..3b2a9c6 Binary files /dev/null and b/Pics/chardet.png differ diff --git a/Pics/chars.png b/Pics/chars.png new file mode 100755 index 0000000..3657d68 Binary files /dev/null and b/Pics/chars.png differ diff --git a/Pics/charset2.png b/Pics/charset2.png new file mode 100755 index 0000000..dcfa6db Binary files /dev/null and b/Pics/charset2.png differ diff --git a/Pics/cookie.png b/Pics/cookie.png new file mode 100755 index 0000000..7544d64 Binary files /dev/null and b/Pics/cookie.png differ diff --git a/Pics/json1.png b/Pics/json1.png new file mode 100755 index 0000000..4d4bc91 Binary files /dev/null and b/Pics/json1.png differ diff --git a/Pics/json2.png b/Pics/json2.png new file mode 100755 index 0000000..d8c7400 Binary files /dev/null and b/Pics/json2.png differ diff --git a/Pics/json3.png b/Pics/json3.png new file mode 100755 index 0000000..802d864 Binary files /dev/null and b/Pics/json3.png differ diff --git a/Pics/movie_data.png b/Pics/movie_data.png new file mode 100755 index 0000000..3bb88f3 Binary files /dev/null and b/Pics/movie_data.png differ diff --git a/Pics/numpy.png b/Pics/numpy.png new file mode 100755 index 0000000..2f3499c Binary files /dev/null and b/Pics/numpy.png differ diff --git a/Pics/nums.png b/Pics/nums.png new file mode 100755 index 0000000..a4a296a Binary files /dev/null and b/Pics/nums.png differ diff --git a/Pics/outliers.png b/Pics/outliers.png new file mode 100755 index 0000000..e28fddd Binary files /dev/null and b/Pics/outliers.png differ diff --git a/Pics/pair_plot.png b/Pics/pair_plot.png new file mode 100755 index 0000000..3ac9ca7 Binary files /dev/null and b/Pics/pair_plot.png differ diff --git a/Pics/pca1.png b/Pics/pca1.png new file mode 100755 index 0000000..eaa0e7d Binary files /dev/null and b/Pics/pca1.png differ diff --git a/Pics/pi.png b/Pics/pi.png new file mode 100755 index 0000000..2bbacfb Binary files /dev/null and b/Pics/pi.png differ diff --git a/Pics/sepal_plot.png b/Pics/sepal_plot.png new file mode 100755 index 0000000..aea17dc Binary files /dev/null and b/Pics/sepal_plot.png differ diff --git a/Pics/tree.png b/Pics/tree.png new file mode 100755 index 0000000..6a90da1 Binary files /dev/null and b/Pics/tree.png differ diff --git "a/Pics/\345\244\232\347\272\277\347\250\2131.png" "b/Pics/\345\244\232\347\272\277\347\250\2131.png" new file mode 100755 index 0000000..6bdffbd Binary files /dev/null and "b/Pics/\345\244\232\347\272\277\347\250\2131.png" differ diff --git "a/Pics/\345\244\232\347\272\277\347\250\2132.png" "b/Pics/\345\244\232\347\272\277\347\250\2132.png" new file mode 100755 index 0000000..7273fce Binary files /dev/null and "b/Pics/\345\244\232\347\272\277\347\250\2132.png" differ diff --git "a/Pics/\350\247\243\346\236\2201.png" "b/Pics/\350\247\243\346\236\2201.png" new file mode 100755 index 0000000..ad737cf Binary files /dev/null and "b/Pics/\350\247\243\346\236\2201.png" differ diff --git "a/Pics/\350\247\243\346\236\2202.png" "b/Pics/\350\247\243\346\236\2202.png" new file mode 100755 index 0000000..7896221 Binary files /dev/null and "b/Pics/\350\247\243\346\236\2202.png" differ diff --git "a/Pics/\350\247\243\346\236\2203.png" "b/Pics/\350\247\243\346\236\2203.png" new file mode 100755 index 0000000..0c43f34 Binary files /dev/null and "b/Pics/\350\247\243\346\236\2203.png" differ diff --git "a/Pics/\350\247\243\346\236\2204.png" "b/Pics/\350\247\243\346\236\2204.png" new file mode 100755 index 0000000..68cb12a Binary files /dev/null and "b/Pics/\350\247\243\346\236\2204.png" differ diff --git a/README.md b/README.md index 214def0..91883f4 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,24 @@ -### 《Python数据分析入门————从数据获取到可视化》 +# 《Python数据分析入门——从数据获取到可视化》 -#### Overview +

+ +

-本书源码和使用的数据(Python3)。 +## News +- Coming: 《写于出版6周年之后》 +## 概览 -#### Issue +书籍[《Python数据分析入门——从数据获取到可视化》](http://www.broadview.com.cn/book/5010) +中使用的所有源代码,数据等文件。 +关于本书的一些最新的进展的也会第一时间在这里公布。 +希望本书能对大家有所帮助。 -如果大家有问题和建议,可以直接在本项目提交issue,也可以发邮件给我(datahonor@gmail.com) -我会定期查看并尽快回复 -(也有读者到[出版社](http://www.broadview.com.cn/book/5010) -提交拉勘误的,也是可以的,不过只建议在那里提交typo相关的, -涉及到代码还是建议在Github提issue,方便一些)。 +## 反馈建议 -#### Errata - -[传送门](http://datahonor.com/2018/07/30/Errata-of-PythonDA/) - - - -最后,希望本书能对大家有所帮助。 +- Issue/Discussion(推荐): 对于代码的问题可以提交Issue,对于其他问题可以在Discussion中讨论。 +- Email: 也可以发邮件给我(datahonor@gmail.com),我会定期查看并尽快回复。 +## 勘误 +详见[勘误表](./errata.md)。 diff --git a/Report/.idea/.name b/Report/.idea/.name deleted file mode 100644 index ddf3b8c..0000000 --- a/Report/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -Report \ No newline at end of file diff --git a/Report/.idea/Report.iml b/Report/.idea/Report.iml deleted file mode 100644 index 2299d36..0000000 --- a/Report/.idea/Report.iml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/Report/.idea/misc.xml b/Report/.idea/misc.xml deleted file mode 100644 index 6a13835..0000000 --- a/Report/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - ApexVCS - - - \ No newline at end of file diff --git a/Report/.idea/modules.xml b/Report/.idea/modules.xml deleted file mode 100644 index 68b3ff3..0000000 --- a/Report/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/Report/.idea/workspace.xml b/Report/.idea/workspace.xml deleted file mode 100644 index 89413d5..0000000 --- a/Report/.idea/workspace.xml +++ /dev/null @@ -1,533 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 阿良 - 每个人都有故事每个人都有视角每个人都有选择 - 请善良,因为我们每个人都在和生活苦斗 - - - - - - - - - true - DEFINITION_ORDER - - - - - - - - - - - - - - - - - - - - - - - - - - -