Fun With Python
Fun With Python
PYTHON
AGENDA
Web Requests
Web Parser
Web Services
Web Requests
Requests Library
pip install requests #install library
import requests
requests.get(http://www.facebook.com).text
Web Requests
Make a Request
#GET Request
import requests
r = requests.get(http://www.facebook.com)
if r.status_code == 200:
print(Success)
Success
Web Requests
Make a Request
#POST Request
import requests
r = requests.post('http://httpbin.org/post', data = {'key':'value'})
if r.status_code == 200:
print(Success)
Success
Web Requests
Make a Request
#Other Types of Request
import requests
r = requests.put('http://httpbin.org/put', data = {'key':'value'})
r = requests.delete('http://httpbin.org/delete')
r = requests.head('http://httpbin.org/get')
r = requests.options('http://httpbin.org/get')
Web Requests
Passing Parameters In URLs
#GET Request with parameter
import requests
r = requests.get(https://www.google.co.th/?hl=th)
if r.status_code == 200:
print(Success)
Success
Web Requests
Passing Parameters In URLs
#GET Request with parameter
import requests
r = requests.get(https://www.google.co.th,params={hl:en})
if r.status_code == 200:
print(Success)
Success
Web Requests
Passing Parameters In URLs
#POST Request with parameter
import requests
r = requests.post("https://m.facebook.com",data={"key":"value"})
if r.status_code == 200:
print(Success)
Success
Web Requests
Response Content
#Text Response
import requests
Web Requests
Response Content
#Response encoding
import requests
r = requests.get('https://www.google.co.th/logos/doodles/2016/kingbhumibol-adulyadej-1927-2016-5148101410029568.2-hp.png')
r.encoding = tis-620'
if r.status_code == 200:
print(r.text)
'<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage"
lang="th"><head><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta
content="/logos/doodles/2016/king-bhumibol-adulyadej-1927-2016-5148101410029568.2hp.png" itemprop="image"><meta content=" ...
Web Requests
Response Content
#Binary Response
import requests
r = requests.get('https://www.google.co.th/logos/doodles/2016/kingbhumibol-adulyadej-1927-2016-5148101410029568.2-hp.png')
if r.status_code == 200:
open(img.png,wb).write(r.content)
Web Requests
Response Status Codes
#200 Response (OK)
import requests
r = requests.get('https://api.github.com/events')
if r.status_code == requests.codes.ok:
print(data[0]['actor'])
Web Requests
Response Status Codes
#200 Response (OK)
import requests
r = requests.get('https://api.github.com/events')
print(r.status_code)
200
Web Requests
Response Status Codes
#404
import requests
r = requests.get('https://api.github.com/events/404')
print(r.status_code)
404
Web Requests
Response Headers
#404
import requests
r = requests.get('http://www.sanook.com')
print(r.headers)
print(r.headers[Date])
{'Content-Type': 'text/html; charset=UTF-8', 'Date': 'Tue, 08 Nov 2016 14:38:41 GMT', 'CacheControl': 'private, max-age=0', 'Age': '16', 'Content-Encoding': 'gzip', 'Content-Length': '38089',
'Connection': 'keep-alive', 'Vary': 'Accept-Encoding', 'Accept-Ranges': 'bytes'}
Web Requests
Timeouts
#404
import requests
r = requests.get(http://www.sanook.com',timeout=0.001)
Web Requests
Authentication
#Basic Authentication
import requests
r = requests.get('https://api.github.com/user', auth=('user', 'pass'))
print(r.status_code)
200
Web Requests
read more : http://docs.python-requests.org/en/master/
Web Requests
Quiz#1 : Tag Monitoring
Web Parser
HTML Parser : beautifulsoup
pip install beautifulsoup4 #install library
Web Parser
Parse a document
from bs4 import BeautifulSoup
soup = BeautifulSoup(<html>data</html>,"html.parser")
print(soup)
<html>data</html>
Web Parser
Parse a document
#Navigating using tag names
from bs4 import BeautifulSoup
Web Parser
<head><title>The Dormouse's story</title></head>
<title>The Dormouse's story</title>
<p class="title"><b>The Dormouse's story</b></p>
Web Parser
Parse a document
#Access string
from bs4 import BeautifulSoup
html_doc = ""<h1>hello</h1>
soup = BeautifulSoup(html_doc,"html.parser")
print(soup.h1.string)
hello
Web Parser
Parse a document
#Access attribute
from bs4 import BeautifulSoup
http://example.com/elsie
Web Parser
Parse a document
#Get all text in the page
from bs4 import BeautifulSoup
Web Parser
Parse a document
# find_all()
from bs4 import BeautifulSoup
Web Parser
Parse a document
<a class="sister" href="http://example.com/elsie"
id="link1">Elsie</a>
<a class="sister" href="http://example.com/lacie"
id="link2">Lacie</a>
Web Parser
Parse a document
#find_all()
soup.find_all(id='link2')
soup.find_all(href=re.compile("elsie"))
soup.find_all(id=True)
data_soup.find_all(attrs={"data-foo": value"})
soup.find_all("a", class_="sister")
soup.find_all("a", recursive=False)
soup.p.find_all(a", recursive=False)
Web Parser
Parse a document
re.compile(..)
<a href=http://192.x.x.x class=c1>hello</a>
<a href=https://192.x.x.x class=c1>hello</a>
<a href=https://www.com class=c1>hello</a>
find_all(href=re.compile((https|http)://[0-9\.]))
https://docs.python.org/2/howto/regex.html
Web Parser
Parse a document
read more : https://www.crummy.com/software/BeautifulSoup/
bs4/doc/
Web Parser
Quiz#2 : Tag Extraction
Web Parser
JSON Parser : json
built-in function
import json
Web Parser
JSON Parser : json
#JSON string
json_doc = {employees":[
{"firstName":"John", "lastName":"Doe"},
{"firstName":"Anna", "lastName":"Smith"},
{"firstName":"Peter", "lastName":"Jones"}
]}
Web Parser
JSON Parser : json
#Parse string to object
import json
json_obj = json.loads(json_doc)
print(json_obj)
Web Parser
JSON Parser : json
#Access json object
import json
json_obj = json.loads(json_doc)
print(json_obj[employees][0][firstName])
print(json_obj[employees][0][lastName])
John
Doe
Web Parser
JSON Parser : json
#Create json doc
import json
json_obj = {firstName : name,lastName : last} #Dictionary
print(json.dumps(json_obj,indent=1))
"firstName": "name",
"lastName": last"
Web Parser
Quiz#3 : Post Monitoring
Web Parser
Quiz#3 : Post Monitoring
URL
https://graph.facebook.com/v2.8/<PageID>?
fields=posts.limit(100)%7Blikes.limit(1).summary(true)
%2Ccreated_time%7D&access_token=
Web Service
Web Service
Web Service Type
Web Parser
SOAP Example
Web Parser
SOAP Request
Web Parser
REST
Web Parser
REST Request
Web Parser
JSON Web Service
Web Parser
Application
Web Parser
JSON
{"employees":[
{"firstName":"John", "lastName":"Doe"},
{"firstName":"Anna", "lastName":"Smith"},
{"firstName":"Peter", "lastName":"Jones"}
]}
list
dict
key
value
Web Service
Create Simple Web Service
pip install Flask-API
from flask.ext.api import FlaskAPI
app = FlaskAPI(__name__)
@app.route('/example/')
def example():
return {'hello': 'world'}
app.run(debug=False,port=5555)
Web Service
Create Simple Web Service
#receive input
@app.route(/hello/<name>/<lastName>')
def example(name,lastName):
return {'hello':name}
app.run(debug=False,port=5555)
Web Parser
Quiz#4 : Tag Service
Web Parser
Quiz#4 : Top Tag Service
Databases
Zero configuration
SQLite does not need to be Installed as there is no setup procedure to use it.
Server less
SQLite is not implemented as a separate server process. With SQLite, the process that wants to access the
database reads and writes directly from the database files on disk as there is no intermediary server process.
Stable Cross-Platform Database File
The SQLite file format is cross-platform. A database file written on one machine can be copied to and used
on a different machine with a different architecture.
Single Database File
An SQLite database is a single ordinary disk file that can be located anywhere in the directory hierarchy.
Compact
When optimized for size, the whole SQLite library with everything enabled is less than 400KB in size
SQLite
built-in library : sqlite3
import sqlite3
conn = sqlite3.connect('my.db')
SQLite
Workflow
1. Connect to db
2. Get cursor
3. Execute command
4. Commit (insert / update/delete) / Fetch result (select)
5. Close database
SQLite
Workflow Example
import sqlite3
conn = sqlite3.connect(example.db') # connect db
c = conn.cursor() # get cursor
# execute1
c.execute('''CREATE TABLE stocks
(date text, trans text, symbol text, qty real, price real)''')
# execute2
c.execute("INSERT INTO stocks VALUES ('2006-01-05','BUY','RHAT',100,35.14)")
conn.commit() # commit
conn.close() # close
SQLite
Data Type
Database Storage
import sqlite3
conn = sqlite3.connect(example.db') #store in disk
conn = sqlite3.connect(:memory:) #store in memory
Execute
#execute
import sqlite3
conn = sqlite3.connect(example.db')
c = conn.cursor()
t = ('RHAT',)
c.execute('SELECT * FROM stocks WHERE symbol=?', t)
Execute
#executemany
import sqlite3
conn = sqlite3.connect(example.db')
c = conn.cursor()
purchases = [('2006-03-28', 'BUY', 'IBM', 1000, 45.00),
('2006-04-05', 'BUY', 'MSFT', 1000, 72.00),
('2006-04-06', 'SELL', 'IBM', 500, 53.00),]
c.executemany('INSERT INTO stocks VALUES (?,?,?,?,?)', purchases)
fetch
#fetchaone
import sqlite3
conn = sqlite3.connect(example.db')
c = conn.cursor()
c.execute('SELECT * FROM stocks')
c.fetchone()
fetch
#fetchall
import sqlite3
conn = sqlite3.connect(example.db')
c = conn.cursor()
c.execute('SELECT * FROM stocks')
for d in c.fetchall():
print(d)
Context manager
import sqlite3
con = sqlite3.connect(":memory:")
con.execute("create table person (id integer primary key, firstname
varchar unique)")
#con.commit() is called automatically afterwards
with con:
con.execute("insert into person(firstname) values (?)", ("Joe"))
Read more :
https://docs.python.org/2/library/sqlite3.html
https://www.tutorialspoint.com/python/python_database_access.htm
Quiz#5 : Post DB
Processing : pandas
pip install pandas
Pandas : Series
#create series with Array-like
import pandas as pd
from numpy.random import rand
s = pd.Series(rand(5), index=['a', 'b', 'c', 'd', 'e'])
print(s)
a
b
c
d
e
0.690232
0.738294
0.153817
0.619822
0.4347
Pandas : Series
#create series with dictionary
import pandas as pd
from numpy.random import rand
Pandas : Series
#create series with Scalar
import pandas as pd
from numpy.random import rand
s = pd.Series(5., index=['a', 'b', 'a', 'd', a']) #index can duplicate
print(s[a])
a
a
a
5
5
5
dtype: float64
Pandas : Series
#access series data
import pandas as pd
from numpy.random import rand
s = pd.Series(5., index=['a', 'b', 'a', 'd', a']) #index can duplicate
print(s[0])
print(s[:3])
5.0
a 5
b 5
a 5
dtype: float64
Pandas : Series
#series operations
import pandas as pd
from numpy.random import rand
import numpy as np
s = pd.Series(rand(10)) #index can duplicate
s = s + 2
s = s * s
s = np.exp(s)
print(s)
0
1
2
3
4
5
6
7
8
9
187.735606
691.660752
60.129741
595.438606
769.479456
397.052123
4691.926483
1427.593520
180.001824
410.994395
dtype: float64
Pandas : Series
#series filtering
import pandas as pd
from numpy.random import rand
import numpy as np
s = pd.Series(rand(10)) #index can duplicate
s = s[s > 0.1]
print(s)
1
2
3
6
7
8
9
0.708700
0.910090
0.380613
0.692324
0.508440
0.763977
0.470675
dtype: float64
Pandas : Series
#series incomplete data
import pandas as pd
from numpy.random import rand
import numpy as np
s1 = pd.Series(rand(10))
s2 = pd.Series(rand(8))
s = s1 + s2
print(s)
0
1
2
3
4
5
6
7
8
9
0.813747
1.373839
1.569716
1.624887
1.515665
0.526779
1.544327
0.740962
NaN
NaN
dtype: float64
Pandas : Series
#create series with Array-like
import pandas as pd
from numpy.random import rand
s = pd.Series(rand(5), index=['a', 'b', 'c', 'd', 'e'])
print(s)
a
b
c
d
e
0.690232
0.738294
0.153817
0.619822
0.4347
Pandas : DataFrame
2-dimensional labeled data
structure with columns
of potentially different types
Pandas : DataFrame
#create dataframe with dict
df = pd.DataFrame(d)
print(df)
one two
a 1 1
b 2 2
c 3 3
d NaN 4
Pandas : DataFrame
#create dataframe with dict list
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
print(df)
one
0 1
1 2
2 3
3 4
two
4
3
2
1
Pandas : DataFrame
#access dataframe column
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
print(df[one])
0
1
2
3
1
2
3
4
Pandas : DataFrame
#access dataframe row
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
print(df.iloc[:3])
one
0 1
1 2
2 3
two
4
3
2
Pandas : DataFrame
#add new column
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
df['three'] = [1,2,3,2]
print(df)
0
1
2
3
one
1
2
3
4
two
4
3
2
1
three
1
2
3
2
Pandas : DataFrame
#show data : head() and tail()
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
df['three'] = [1,2,3,2]
print(df.head())
print(df.tail())
0
1
2
3
one
1
2
3
4
two
4
3
2
1
three
1
2
3
2
Pandas : DataFrame
#dataframe summary
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
print(df.describe())
Pandas : DataFrame
#dataframe function
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
print(df.mean())
one 2.5
two 2.5
dtype: float64
Pandas : DataFrame
#dataframe function
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
print(df.corr()) #calculate correlation
one two
one 1 -1
two -1 1
Pandas : DataFrame
#dataframe filtering
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
print(df[(df[one] > 1) & (df[one] < 3)] )
one two
1
Pandas : DataFrame
#dataframe filtering with isin
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
print(df[df[one].isin([2,4])] )
one two
1
3
2
4
3
1
Pandas : DataFrame
#dataframe with row data
0
1
Pandas : DataFrame
#dataframe sort values
Pandas : DataFrame
#dataframe from csv file
file.csv
df = pd.read_csv(file.csv)
print(df)
one,two,three
1,2,3
1,2,3
1,2,3
1
1
1
2
2
2
3
3
3
Pandas : DataFrame
#dataframe from csv file, without header.
file.csv
df = pd.read_csv(file.csv,header=-1)
print(df)
1,2,3
1,2,3
1,2,3
0 1 2
0
1
2
1
1
1
2
2
2
3
3
3
Pandas : DataFrame
Pandas : DataFrame
#dataframe from html, need to install lxml first (pip install lxml)
df = pd.read_html(https://simple.wikipedia.org/wiki/
List_of_U.S._states)
print(df[0])
Abbreviation
State Name
AL
Alabama
AK
Alaska
AZ
Arizona
Capital
Became a State
1. Goto https://archive.ics.uci.edu/ml/datasets/Adult
to read data description
2. Parse data into pandas using read_csv() and set
columns name
3. Explore data to answer following questions,
- find number of person in each education level.
- find correlation and covariance between continue
fields
- Avg age of United-States population where income
>50K.
df[3].value_counts()
Visualizing : seaborn
pip install seaborn
Visualizing : seaborn
seaborn : set inline plot for jupyter
%matplotlib inline
import numpy as np
import seaborn as sns
Visualizing : seaborn
seaborn : plot result
Visualizing : seaborn
seaborn : set layout
%matplotlib inline
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
f,ax = plt.subplots(1,1,figsize=(10, 10))
sns.barplot(x=[1,2,3,4,5],y=[3,2,3,4,2])
Visualizing : seaborn
seaborn : set layout
Visualizing : seaborn
seaborn : set layout
%matplotlib inline
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
f,ax = plt.subplots(2,2,figsize=(10, 10))
sns.barplot(x=[1,2,3,4,5],y=[3,2,3,4,2],ax=ax[0,0])
sns.distplot([3,2,3,4,2],ax=ax[0,1])
Visualizing : seaborn
seaborn : set layout
Visualizing : seaborn
seaborn : axis setting
%matplotlib inline
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
f,ax = plt.subplots(figsize=(10, 5))
sns.barplot(x=[1,2,3,4,5],y=[3,2,3,4,2])
ax.set_xlabel("number")
ax.set_ylabel("value")
Visualizing : seaborn
seaborn : axis setting
Visualizing : seaborn
seaborn : with pandas dataframe
%matplotlib inline
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
d = {'x' : [1., 2., 3., 4.], 'y' : [4., 3., 2., 1.]}
df = pd.DataFrame(d)
f,ax = plt.subplots(figsize=(10, 5))
sns.barplot(x=x,y=y,data=df)
Visualizing : seaborn
seaborn : with pandas dataframe
Visualizing : seaborn
seaborn : plot types
http://seaborn.pydata.org/examples/index.html
1. Goto https://archive.ics.uci.edu/ml/datasets/Adult
to read data description
2. Parse data into pandas using read_csv() and set
columns name