diff --git a/.idea/Mybook.iml b/.idea/Mybook.iml deleted file mode 100755 index 032c3de..0000000 --- a/.idea/Mybook.iml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100755 index c23ecac..0000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100755 index ef11d87..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100755 index aee895c..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/preferred-vcs.xml b/.idea/preferred-vcs.xml deleted file mode 100644 index 848cfc4..0000000 --- a/.idea/preferred-vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - ApexVCS - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100755 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/Asset/cover.jpeg b/Asset/cover.jpeg new file mode 100644 index 0000000..aad8357 Binary files /dev/null and b/Asset/cover.jpeg differ diff --git a/Ch4Data-Life/News/NewsReport.py b/Ch4Data-Life/News/NewsReport.py index f7c2135..3d1e545 100755 --- a/Ch4Data-Life/News/NewsReport.py +++ b/Ch4Data-Life/News/NewsReport.py @@ -69,11 +69,11 @@ def send_report(roi): s2 += title s2 += roi[title] s2 += '\n' - send_ms(s1+s2) + #send_ms(s1+s2) if __name__=='__main__': - web_data = get_web_data("http://tech.baidu.com/") + web_data = get_web_data("https://news.baidu.com/tech") titles = get_titles(web_data) key_words = 'iPhone' roi = get_roi(titles, key_words) diff --git a/Ch4Data-Life/News/NewsReportLog.txt b/Ch4Data-Life/News/NewsReportLog.txt index 9713aea..56b0709 100755 --- a/Ch4Data-Life/News/NewsReportLog.txt +++ b/Ch4Data-Life/News/NewsReportLog.txt @@ -2,3 +2,4 @@ iPhone相关新闻抓取程序日志Mon Jun 26 08:49:05 2017 ==========Mon Jun 26 08:49:05 2017==========6500元买吗?iPhone 8又有黑科技:3D传..http://tech.ifeng.com/a/20170625/44642859_0.shtml6500元买吗?iPhone 8不仅颜值高还搭载3D传感器http://news.pconline.com.cn/944/9444285.htmliPhone 8最新高清细节图曝光:无后置指纹http://mobile.yesky.com/182/237633182.shtmliPhone有什么录屏软件?iOS10如何不越狱实现录屏?http://news.86wan.com/xinwen/804619.html加拿大将迎来32GB iPhone 6:深空灰色http://iphone.tgbus.com/news/class/201706/20170625100446.shtml又一次iPhone 8爆料:壁纸和贴膜都有了http://iphone.tgbus.com/news/class/201706/20170625100212.shtml爆料大神拿到十多张iPhone8工程机图 快来看别声张http://digi.hsw.cn/system/2017/0625/85413.shtmliPhone到底是怎么诞生的?是乔布斯拿iPad改..http://www.citmt.cn/news/201706/7936.htmliPhone 8无线充电设计背后五大绝招是什么?http://tech.sina.com.cn/roll/2017-06-24/doc-ifyhmpew3268026.shtml ==========Fri Jun 30 15:24:55 2017==========iPhone8能用WiFi充电吗 iPhone8会..http://baijiahao.baidu.com/s?id=1571557098087634时光倒流十年 回顾初代苹果iPhone发售场景 http://www.cb.com.cn/shishiretu/2017_0630/1001871.htmliPhone10周年之际 设计师分享两款iPhone罕见原型机http://mobile.it168.com/a2017/0630/3138/000003138081.shtml一款电子墨水屏幕兼iPhone 7保护壳正在众筹http://news.pconline.com.cn/947/9474544.html十年:ZEALER 带你回顾历代 iPhonehttp://it.sohu.com/20170629/n499226359.shtml4.7寸经典手机 苹果iPhone 6苏宁售2578元http://mobile.pconline.com.cn/946/9468090.htmliPhone这十年也不易 它可迈过了不少坎儿http://mobile.zol.com.cn/645/6455077.html华强北红色iPhone8曝光:机身正面辣眼睛http://mobile.it168.com/a2017/0630/3138/000003138130.shtml微软或与iPhone对着干:有耳机插孔和可拆电池http://baijiahao.baidu.com/s?id=1571535272162131为实现快充!iPhone 8 有可能附赠10W充电..http://baijiahao.baidu.com/s?id=1571499839340222苹果告别神话十年,不再是身份标签的iPhone逐渐..http://news.sina.com.cn/c/2017-06-30/doc-ifyhrttz1773968.shtml苹果10年总共卖了12亿部iPhone:创收738..http://baijiahao.baidu.com/s?id=1571607401477009 ==========Fri Jun 30 15:49:59 2017==========你的iPhone电量总不够用?这里赶紧关了,让电量..http://baijiahao.baidu.com/s?id=1571610812674132苹果10年总共卖了12亿部iPhone:创收738..http://baijiahao.baidu.com/s?id=1571607401477009安卓是如何击败iPhone成为市占之王?http://baijiahao.baidu.com/s?id=1571607515558253时光倒流十年 回顾初代苹果iPhone发售场景 http://www.cb.com.cn/shishiretu/2017_0630/1001871.html一款电子墨水屏幕兼iPhone 7保护壳正在众筹http://news.pconline.com.cn/947/9474544.html十年:ZEALER 带你回顾历代 iPhonehttp://it.sohu.com/20170629/n499226359.shtml4.7寸经典手机 苹果iPhone 6苏宁售2578元http://mobile.pconline.com.cn/946/9468090.htmlOLED面板缺货 iPhone 8首批备货或短缺http://mobile.zol.com.cn/645/6452259.html长沙买iPhone 7仅4199元支持分期可送货http://mobile.zol.com.cn/645/6455107.html第一代iPhone成收藏界新品 原包装未开封能卖400..http://firm.workercn.cn/497/201706/30/170630102411800.shtml华强北红色iPhone8曝光:机身正面辣眼睛http://mobile.it168.com/a2017/0630/3138/000003138130.shtml微软或与iPhone对着干:有耳机插孔和可拆电池http://baijiahao.baidu.com/s?id=1571535272162131为实现快充!iPhone 8 有可能附赠10W充电..http://baijiahao.baidu.com/s?id=1571499839340222苹果告别神话十年,不再是身份标签的iPhone逐渐..http://news.sina.com.cn/c/2017-06-30/doc-ifyhrttz1773968.shtml +==========Sun Jun 26 23:27:55 2022==========新增“古铜色”,电池加大!苹果新iPhone又有新..http://baijiahao.baidu.com/s?id=1736418600868515753iPhone 14Pro或弃用刘海屏增古铜配色http://baijiahao.baidu.com/s?id=1736404759591336703iPhone销量霸榜,高端苹果也走“薄利多销”路线..http://baijiahao.baidu.com/s?id=1736520051940361766iPhone 14大爆料 值得果粉期待吗?丨财经科..http://baijiahao.baidu.com/s?id=1736671674884272833 diff --git a/Pics/Corr_Mat.png b/Pics/Corr_Mat.png new file mode 100755 index 0000000..c4537d2 Binary files /dev/null and b/Pics/Corr_Mat.png differ diff --git a/Pics/Data_visualization_process_v1.png b/Pics/Data_visualization_process_v1.png new file mode 100755 index 0000000..4c82858 Binary files /dev/null and b/Pics/Data_visualization_process_v1.png differ diff --git a/Pics/Email1.png b/Pics/Email1.png new file mode 100755 index 0000000..f021fea Binary files /dev/null and b/Pics/Email1.png differ diff --git a/Pics/Mxlsx.png b/Pics/Mxlsx.png new file mode 100755 index 0000000..1904c57 Binary files /dev/null and b/Pics/Mxlsx.png differ diff --git a/Pics/QP1.png b/Pics/QP1.png new file mode 100755 index 0000000..7280476 Binary files /dev/null and b/Pics/QP1.png differ diff --git a/Pics/QQ_DA.png b/Pics/QQ_DA.png new file mode 100755 index 0000000..7564ef9 Binary files /dev/null and b/Pics/QQ_DA.png differ diff --git a/Pics/Spider1.png b/Pics/Spider1.png new file mode 100755 index 0000000..87b4955 Binary files /dev/null and b/Pics/Spider1.png differ diff --git a/Pics/Tom.png b/Pics/Tom.png new file mode 100755 index 0000000..8156f75 Binary files /dev/null and b/Pics/Tom.png differ diff --git a/Pics/UA.png b/Pics/UA.png new file mode 100755 index 0000000..720397f Binary files /dev/null and b/Pics/UA.png differ diff --git a/Pics/V4_Chi.png b/Pics/V4_Chi.png new file mode 100755 index 0000000..d7de13c Binary files /dev/null and b/Pics/V4_Chi.png differ diff --git a/Pics/V4_Cos.png b/Pics/V4_Cos.png new file mode 100755 index 0000000..fb795ff Binary files /dev/null and b/Pics/V4_Cos.png differ diff --git a/Pics/V4_cos_sin.png b/Pics/V4_cos_sin.png new file mode 100755 index 0000000..2b4c752 Binary files /dev/null and b/Pics/V4_cos_sin.png differ diff --git a/Pics/V4_fill.png b/Pics/V4_fill.png new file mode 100755 index 0000000..9c7766e Binary files /dev/null and b/Pics/V4_fill.png differ diff --git a/Pics/V4_pandas_bar.png b/Pics/V4_pandas_bar.png new file mode 100755 index 0000000..4c6711a Binary files /dev/null and b/Pics/V4_pandas_bar.png differ diff --git a/Pics/V4_pandas_bar1.png b/Pics/V4_pandas_bar1.png new file mode 100755 index 0000000..cc55e8f Binary files /dev/null and b/Pics/V4_pandas_bar1.png differ diff --git a/Pics/V4_pandas_boxplot.png b/Pics/V4_pandas_boxplot.png new file mode 100755 index 0000000..562b823 Binary files /dev/null and b/Pics/V4_pandas_boxplot.png differ diff --git a/Pics/V4_pandas_hist.png b/Pics/V4_pandas_hist.png new file mode 100755 index 0000000..1c8a018 Binary files /dev/null and b/Pics/V4_pandas_hist.png differ diff --git a/Pics/V4_pandas_plot.png b/Pics/V4_pandas_plot.png new file mode 100755 index 0000000..476a8f6 Binary files /dev/null and b/Pics/V4_pandas_plot.png differ diff --git a/Pics/V4_pandas_scatter.png b/Pics/V4_pandas_scatter.png new file mode 100755 index 0000000..9eb65ef Binary files /dev/null and b/Pics/V4_pandas_scatter.png differ diff --git a/Pics/V4_snscountplot.png b/Pics/V4_snscountplot.png new file mode 100755 index 0000000..6ed860c Binary files /dev/null and b/Pics/V4_snscountplot.png differ diff --git a/Pics/V4_snsfactor.png b/Pics/V4_snsfactor.png new file mode 100755 index 0000000..a087620 Binary files /dev/null and b/Pics/V4_snsfactor.png differ diff --git a/Pics/V4_snskde.png b/Pics/V4_snskde.png new file mode 100755 index 0000000..1a03d95 Binary files /dev/null and b/Pics/V4_snskde.png differ diff --git a/Pics/V4_snslmplot.png b/Pics/V4_snslmplot.png new file mode 100755 index 0000000..43934ac Binary files /dev/null and b/Pics/V4_snslmplot.png differ diff --git a/Pics/V4_snsviolin.png b/Pics/V4_snsviolin.png new file mode 100755 index 0000000..6a916cd Binary files /dev/null and b/Pics/V4_snsviolin.png differ diff --git a/Pics/V4_subplot.png b/Pics/V4_subplot.png new file mode 100755 index 0000000..e9abfae Binary files /dev/null and b/Pics/V4_subplot.png differ diff --git a/Pics/V4_wc.png b/Pics/V4_wc.png new file mode 100755 index 0000000..de3c097 Binary files /dev/null and b/Pics/V4_wc.png differ diff --git a/Pics/all_plot.png b/Pics/all_plot.png new file mode 100755 index 0000000..230240b Binary files /dev/null and b/Pics/all_plot.png differ diff --git a/Pics/boxplot.png b/Pics/boxplot.png new file mode 100755 index 0000000..b8f3684 Binary files /dev/null and b/Pics/boxplot.png differ diff --git a/Pics/chardet.png b/Pics/chardet.png new file mode 100755 index 0000000..3b2a9c6 Binary files /dev/null and b/Pics/chardet.png differ diff --git a/Pics/chars.png b/Pics/chars.png new file mode 100755 index 0000000..3657d68 Binary files /dev/null and b/Pics/chars.png differ diff --git a/Pics/charset2.png b/Pics/charset2.png new file mode 100755 index 0000000..dcfa6db Binary files /dev/null and b/Pics/charset2.png differ diff --git a/Pics/cookie.png b/Pics/cookie.png new file mode 100755 index 0000000..7544d64 Binary files /dev/null and b/Pics/cookie.png differ diff --git a/Pics/json1.png b/Pics/json1.png new file mode 100755 index 0000000..4d4bc91 Binary files /dev/null and b/Pics/json1.png differ diff --git a/Pics/json2.png b/Pics/json2.png new file mode 100755 index 0000000..d8c7400 Binary files /dev/null and b/Pics/json2.png differ diff --git a/Pics/json3.png b/Pics/json3.png new file mode 100755 index 0000000..802d864 Binary files /dev/null and b/Pics/json3.png differ diff --git a/Pics/movie_data.png b/Pics/movie_data.png new file mode 100755 index 0000000..3bb88f3 Binary files /dev/null and b/Pics/movie_data.png differ diff --git a/Pics/numpy.png b/Pics/numpy.png new file mode 100755 index 0000000..2f3499c Binary files /dev/null and b/Pics/numpy.png differ diff --git a/Pics/nums.png b/Pics/nums.png new file mode 100755 index 0000000..a4a296a Binary files /dev/null and b/Pics/nums.png differ diff --git a/Pics/outliers.png b/Pics/outliers.png new file mode 100755 index 0000000..e28fddd Binary files /dev/null and b/Pics/outliers.png differ diff --git a/Pics/pair_plot.png b/Pics/pair_plot.png new file mode 100755 index 0000000..3ac9ca7 Binary files /dev/null and b/Pics/pair_plot.png differ diff --git a/Pics/pca1.png b/Pics/pca1.png new file mode 100755 index 0000000..eaa0e7d Binary files /dev/null and b/Pics/pca1.png differ diff --git a/Pics/pi.png b/Pics/pi.png new file mode 100755 index 0000000..2bbacfb Binary files /dev/null and b/Pics/pi.png differ diff --git a/Pics/sepal_plot.png b/Pics/sepal_plot.png new file mode 100755 index 0000000..aea17dc Binary files /dev/null and b/Pics/sepal_plot.png differ diff --git a/Pics/tree.png b/Pics/tree.png new file mode 100755 index 0000000..6a90da1 Binary files /dev/null and b/Pics/tree.png differ diff --git "a/Pics/\345\244\232\347\272\277\347\250\2131.png" "b/Pics/\345\244\232\347\272\277\347\250\2131.png" new file mode 100755 index 0000000..6bdffbd Binary files /dev/null and "b/Pics/\345\244\232\347\272\277\347\250\2131.png" differ diff --git "a/Pics/\345\244\232\347\272\277\347\250\2132.png" "b/Pics/\345\244\232\347\272\277\347\250\2132.png" new file mode 100755 index 0000000..7273fce Binary files /dev/null and "b/Pics/\345\244\232\347\272\277\347\250\2132.png" differ diff --git "a/Pics/\350\247\243\346\236\2201.png" "b/Pics/\350\247\243\346\236\2201.png" new file mode 100755 index 0000000..ad737cf Binary files /dev/null and "b/Pics/\350\247\243\346\236\2201.png" differ diff --git "a/Pics/\350\247\243\346\236\2202.png" "b/Pics/\350\247\243\346\236\2202.png" new file mode 100755 index 0000000..7896221 Binary files /dev/null and "b/Pics/\350\247\243\346\236\2202.png" differ diff --git "a/Pics/\350\247\243\346\236\2203.png" "b/Pics/\350\247\243\346\236\2203.png" new file mode 100755 index 0000000..0c43f34 Binary files /dev/null and "b/Pics/\350\247\243\346\236\2203.png" differ diff --git "a/Pics/\350\247\243\346\236\2204.png" "b/Pics/\350\247\243\346\236\2204.png" new file mode 100755 index 0000000..68cb12a Binary files /dev/null and "b/Pics/\350\247\243\346\236\2204.png" differ diff --git a/README.md b/README.md index cd87748..91883f4 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,24 @@ -### 《Python数据分析入门————从数据获取到可视化》 +# 《Python数据分析入门——从数据获取到可视化》 -#### 概览 +

+ +

-这里是本书中使用的所有源代码,数据等文件。关于本书的一些最新的进展的也会第一时间在这里公布。希望本书能对大家有所帮助。 +## News +- Coming: 《写于出版6周年之后》 +## 概览 -#### 问题提交 +书籍[《Python数据分析入门——从数据获取到可视化》](http://www.broadview.com.cn/book/5010) +中使用的所有源代码,数据等文件。 +关于本书的一些最新的进展的也会第一时间在这里公布。 +希望本书能对大家有所帮助。 -如果大家有问题和建议,可以直接在本项目提交issue(推荐),也可以发邮件给我(datahonor@gmail.com) -我会定期查看并尽快回复。 -(也有读者到[出版社](http://www.broadview.com.cn/book/5010) -提交勘误的,也是可以的,不过只建议在那里提交typo相关的, -涉及到代码还是建议在Github提issue,方便一些)。 +## 反馈建议 -#### 勘误 +- Issue/Discussion(推荐): 对于代码的问题可以提交Issue,对于其他问题可以在Discussion中讨论。 +- Email: 也可以发邮件给我(datahonor@gmail.com),我会定期查看并尽快回复。 -已更正: - -| 页码 | 错误 | 改正 | -|--------|--------|--------| -| 201 | 上方第一个阴影框(训练集数据)“种类”列最后两行将“bumpy”全改为“orange” | 第二次印刷时更正| -| 202 | 第三行,“是橙子还是水果”改为“是橙子还是苹果” |第二次印刷时更正| -| 99 | 代码框最后两行交换位置(因为多线程会把`urls`清空)| 第六次印刷时更正 | -| 115 |正文第三行“运行输出如下。”下面的输出有误,下面的数据需要我们自己手动创建 | 第六次印刷时更正 | -| 245 | 代码框,最上面应加上`import random as rnd`| 第六次印刷时更正 | -| 247,248 | 两个LP问题的目标函数漏掉,改正参考[博客](http://datahonor.com/2017/03/22/%E5%88%A9%E7%94%A8Python%E8%A7%A3%E7%BA%BF%E6%80%A7%E8%A7%84%E5%88%92%E9%97%AE%E9%A2%98-LP/)。 | 第六次印刷时更正 | -| 71-73 | 豆瓣模拟登录报错 | 第六次印刷时更正 | - - - -待更正: - -| 页码 | 错误 | 改正 | -|--------|--------|--------| - - - - - -#### 意见征集 - -个人认为,一本书在出版后绝对不是结束的标志,而是新一轮的开始。本书写作的初衷在于,当时国内很多的书并没有将数据爬取,数据处理,分析以及可视化放到一起来写,我认为这是一件值得去尝试的事情,所以才有了这本书。 - -在本书出版一年多来,根据各方的反馈也在不断进行着完善。于此同时也意识到书中存在的问题,比较核心的就在于知识的深度与广度之间的矛盾,本书是着眼于广度的,所以深度就有所欠缺。后面会考虑对内容进行删减,在顾及广度的同时突出重点(统计学方法,机器学习方法等算法)。 - -此外,如果有机会写第二版,会将文章核心内容以Jupyter notebook的形式呈现,以更好地说明问题。 - -如上所言,是有一些反馈,但是不太多。希望各位作为读者,在阅读完本书后能够写一些建议给我,我也能更好地明确下面修改的方向。 +## 勘误 +详见[勘误表](./errata.md)。 diff --git a/Report/.idea/.name b/Report/.idea/.name deleted file mode 100644 index ddf3b8c..0000000 --- a/Report/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -Report \ No newline at end of file diff --git a/Report/.idea/Report.iml b/Report/.idea/Report.iml deleted file mode 100644 index 2299d36..0000000 --- a/Report/.idea/Report.iml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/Report/.idea/misc.xml b/Report/.idea/misc.xml deleted file mode 100644 index 6a13835..0000000 --- a/Report/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - ApexVCS - - - \ No newline at end of file diff --git a/Report/.idea/modules.xml b/Report/.idea/modules.xml deleted file mode 100644 index 68b3ff3..0000000 --- a/Report/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/Report/.idea/workspace.xml b/Report/.idea/workspace.xml deleted file mode 100644 index 89413d5..0000000 --- a/Report/.idea/workspace.xml +++ /dev/null @@ -1,533 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 阿良 - 每个人都有故事每个人都有视角每个人都有选择 - 请善良,因为我们每个人都在和生活苦斗 - - - - - - - - - true - DEFINITION_ORDER - - - - - - - - - - - - - - - - - - - - - - - - - - -