Advanced Matplotlib in Python 1695062970
Advanced Matplotlib in Python 1695062970
In [1]:
1 import numpy as np
2 import pandas as pd
3 import matplotlib.pyplot as plt
Colored Scatterplots
In [2]:
1 iris = pd.read_csv('iris.csv')
2 iris.head()
Out[2]:
In [3]:
1 plt.scatter(iris['SepalLengthCm'],iris['PetalLengthCm'])
2
3 plt.xlabel('Sepal Length')
4 plt.ylabel('Petal Length')
5
6 plt.show()
In [4]:
1 iris['Species'] = iris['Species'].replace({'Iris-setosa':0,'Iris-versicolor':1,'Iris
2 iris
Out[4]:
In [5]:
1 plt.scatter(iris['SepalLengthCm'],iris['PetalLengthCm'], c=iris['Species'])
2
3 plt.xlabel('Sepal Length')
4 plt.ylabel('Petal Length')
5
6 plt.show()
In [6]:
1 plt.scatter(iris['SepalLengthCm'],iris['PetalLengthCm'], c=iris['Species'],
2 cmap='winter')
3
4 plt.xlabel('Sepal Length')
5 plt.ylabel('Petal Length')
6
7 plt.show()
In [7]:
1 plt.scatter(iris['SepalLengthCm'],iris['PetalLengthCm'], c=iris['Species'],
2 cmap='winter')
3
4 plt.xlabel('Sepal Length')
5 plt.ylabel('Petal Length')
6
7 plt.colorbar() # adding the colour bar
8 plt.show()
In [8]:
1 plt.scatter(iris['SepalLengthCm'],iris['PetalLengthCm'], c=iris['Species'],
2 cmap='winter', alpha=0.8)
3 # alpha parameter will show the instensity of the colour
4
5 plt.xlabel('Sepal Length')
6 plt.ylabel('Petal Length')
7
8 plt.colorbar()
9 plt.show()
Plot size
In [9]:
Annotations
In [10]:
1 # sample code
2
3 x = [1,2,3,4]
4 y = [5,6,7,8]
5
6 plt.scatter(x,y)
7
8 plt.text(1,5,'Point 1')
9 plt.text(2,6,'Point 2')
10 plt.text(3,7,'Point 3')
11 plt.text(4,8,'Point 4')
12
13 plt.show()
In [11]:
1 x = [1,2,3,4]
2 y = [5,6,7,8]
3
4 plt.scatter(x,y)
5
6 plt.text(1,5,'Point 1', fontdict={'size':10,'color':'green'})
7 plt.text(2,6,'Point 2', fontdict={'size':14,'color':'black'})
8 plt.text(3,7,'Point 3', fontdict={'size':18,'color':'grey'})
9 plt.text(4,8,'Point 4', fontdict={'size':22,'color':'blue'})
10
11 plt.show()
In [12]:
1 batter = pd.read_csv('batter.csv')
2 batter
Out[12]:
In [13]:
1 batter.shape
Out[13]:
(605, 4)
In [14]:
1 sample_df = batter.head(100).sample(25,random_state=5)
2 sample_df
Out[14]:
In [15]:
1 plt.scatter(sample_df['avg'], sample_df['strike_rate'] )
2 plt.show()
In [16]:
1 plt.scatter(sample_df['avg'], sample_df['strike_rate'] )
2
3 for i in range(sample_df.shape[0]):
4 plt.text(sample_df['avg'].values[i],sample_df['strike_rate'].values[i],
5 sample_df['batter'].values[i])
6
7 plt.show()
In [17]:
In [18]:
Subplot
In [20]:
1 batter = pd.read_csv('batter.csv')
2 batter
Out[20]:
In [21]:
Out[21]:
In [22]:
1 plt.subplots()
Out[22]:
in the above code plt.subplots() we are getting two object, figure and axis. now we will separate this two
objects with help of unpacking of the plt.subplots()
In [23]:
1 fig, ax = plt.subplots()
2
3 ax.scatter(batter['avg'],batter['strike_rate'])
4 plt.show()
In [24]:
1 fig, ax = plt.subplots()
2
3 ax.scatter(batter['avg'],batter['strike_rate'])
4 ax.set_title("comparison")
5 ax.set_xlabel("Avg")
6 ax.set_ylabel("Strike Rate")
7
8 plt.show()
In [25]:
1 fig, ax = plt.subplots(figsize=(12,8))
2
3 ax.scatter(batter['avg'],batter['strike_rate'])
4 ax.set_title("comparison")
5 ax.set_xlabel("Avg")
6 ax.set_ylabel("Strike Rate")
7
8 plt.show()
In [26]:
In [27]:
1 fig, ax = plt.subplots(nrows=2,ncols=1)
2 plt.show()
In [28]:
Out[28]:
In [29]:
Out[29]:
In [30]:
1 fig, ax = plt.subplots(nrows=2,ncols=1)
2
3 ax[0].scatter(batter['avg'],batter['strike_rate'])
4 ax[1].scatter(batter['avg'],batter['runs'])
5
6 plt.show()
In [31]:
1 fig, ax = plt.subplots(nrows=2,ncols=1)
2
3 ax[0].scatter(batter['avg'],batter['strike_rate'])
4 ax[1].scatter(batter['avg'],batter['runs'])
5
6 ax[0].set_title("avg Vs strike rate")
7 ax[0].set_ylabel("strike rate")
8
9 ax[1].set_title("avg Vs runs")
10 ax[1].set_ylabel("runs")
11 ax[1].set_xlabel("avg")
12
13 plt.show()
In [32]:
In [33]:
In [34]:
1 fig = plt.figure(figsize=(9,9))
2
3 ax1 = fig.add_subplot(2,2,1)
4 ax1.scatter(batter['avg'],batter['strike_rate'],color='red')
5
6 ax2 = fig.add_subplot(2,2,2)
7 ax2.hist(batter['runs'])
8
9 ax3 = fig.add_subplot(2,2,4)
10 ax3.hist(batter['avg'])
11
12 plt.show()
3D scatter plot
In [35]:
1 batter = pd.read_csv('batter.csv')
2 batter
Out[35]:
In [36]:
1 batter
Out[36]:
In [37]:
1 fig = plt.figure()
2
3 ax = plt.subplot(projection="3d")
4
5 plt.show()
In [38]:
1 fig = plt.figure()
2
3 ax = plt.subplot(projection="3d")
4 ax.scatter3D(batter['runs'],batter['avg'],batter['strike_rate'])
5
6 plt.show()
In [39]:
1 fig = plt.figure()
2
3 ax = plt.subplot(projection="3d")
4 ax.scatter3D(batter['runs'],batter['avg'],batter['strike_rate'])
5
6 ax.set_title('IPL batsman analysis')
7 ax.set_xlabel('Runs')
8 ax.set_ylabel('Avg')
9 ax.set_zlabel('SR')
10
11 plt.show()
3D line plot
In [40]:
1 x = [0,1,5,25]
2 y = [0,10,13,0]
3 z = [0,13,20,9]
4
5 fig = plt.figure()
6
7 ax = plt.subplot(projection="3d")
8
9 ax.scatter3D(x,y,z,s=[100,100,100,100])
10 ax.plot3D(x,y,z, color='red')
11 plt.show()
3D Surface plot
In [41]:
1 x = np.linspace(-10,10,100)
2 y = np.linspace(-10,10,100)
In [42]:
1 xx, yy = np.meshgrid(x,y)
2 xx.shape
Out[42]:
(100, 100)
In [43]:
Out[43]:
(100, 100)
In [44]:
1 fig = plt.figure(figsize=(12,8))
2
3 ax = plt.subplot(projection='3d')
4 ax.plot_surface(xx,yy,z)
5
6 plt.show()
In [45]:
1 fig = plt.figure(figsize=(12,8))
2
3 ax = plt.subplot(projection='3d')
4 p = ax.plot_surface(xx,yy,z, cmap='viridis')
5
6 fig.colorbar(p)
7 plt.show()
In [46]:
1 x = np.linspace(-10,10,100)
2 y = np.linspace(-10,10,100)
3
4 xx, yy = np.meshgrid(x,y)
5
6 z = np.sin(xx) + np.cos(yy)
7
8 fig = plt.figure(figsize=(12,8))
9
10 ax = plt.subplot(projection='3d')
11 p = ax.plot_surface(xx,yy,z, cmap='viridis')
12
13 fig.colorbar(p)
14 plt.show()
Contour plot
In [47]:
1 x = np.linspace(-10,10,100)
2 y = np.linspace(-10,10,100)
3
4 xx, yy = np.meshgrid(x,y)
5
6 z = xx**2 + yy**2
In [48]:
1 fig = plt.figure(figsize=(12,8))
2
3 ax = plt.subplot()
4 p = ax.contour(xx,yy,z, cmap='viridis')
5
6 fig.colorbar(p)
7 plt.show()
In [49]:
In [50]:
1 x = np.linspace(-10,10,100)
2 y = np.linspace(-10,10,100)
3
4 xx, yy = np.meshgrid(x,y)
5
6 z = np.sin(xx) + np.cos(yy)
7
8 fig = plt.figure(figsize=(12,8))
9
10 ax = plt.subplot()
11 p = ax.contourf(xx,yy,z, cmap='viridis')
12
13 fig.colorbar(p)
14 plt.show()
Heatmap
A heat map is a two-dimensional representation of data in which values are represented by colors. A
simple heat map provides an immediate visual summary of information. More elaborate heat maps
allow the viewer to understand complex data sets.
In [51]:
1 delivery = pd.read_csv("IPL_Ball_by_Ball_2008_2022.csv")
2 delivery.head()
Out[51]:
non-
ID innings overs ballnumber batter bowler extra_type batsman_ru
striker
YBK Mohammed JC
0 1312200 1 0 1 NaN
Jaiswal Shami Buttler
YBK Mohammed JC
1 1312200 1 0 2 legbyes
Jaiswal Shami Buttler
JC Mohammed YBK
2 1312200 1 0 3 NaN
Buttler Shami Jaiswal
YBK Mohammed JC
3 1312200 1 0 4 NaN
Jaiswal Shami Buttler
YBK Mohammed JC
4 1312200 1 0 5 NaN
Jaiswal Shami Buttler
we want the data of of 20 overs of all the ipl matches played so far and then we want the six hitted by
the batsman on each ball of the 20 over
In [52]:
1 delivery['ballnumber'].unique()
Out[52]:
In [53]:
1 temp_df = delivery[(delivery['ballnumber'].isin([1,2,3,4,5,6]))
2 & (delivery['batsman_run']==6)]
In [54]:
1 temp_df.pivot_table(index='overs',columns='ballnumber',
2 values='batsman_run', aggfunc='count')
Out[54]:
ballnumber 1 2 3 4 5 6
overs
0 9 17 31 39 33 27
1 31 40 49 56 58 54
2 75 62 70 72 58 76
3 60 74 74 103 74 71
4 71 76 112 80 81 72
5 77 102 63 86 78 80
6 34 56 49 59 64 38
7 59 62 73 70 69 56
8 86 83 79 81 73 52
In [55]:
1 grid = temp_df.pivot_table(index='overs',columns='ballnumber',
2 values='batsman_run', aggfunc='count')
In [56]:
1 plt.figure(figsize=(20,10))
2 plt.imshow(grid)
3 plt.colorbar()
4 plt.show()
In [57]:
1 plt.figure(figsize=(20,10))
2 plt.imshow(grid)
3 plt.colorbar()
4
5 plt.yticks(delivery['overs'].unique(), list(range(1,21)))
6 plt.xticks(np.arange(0,6), list(range(1,7)))
7 plt.show()
Pandas Plots()
In [58]:
1 # on a series
2
3 s = pd.Series([1,2,3,4,5,6,7])
4 s.plot(kind='pie')
Out[58]:
<AxesSubplot:ylabel='None'>
In [59]:
In [60]:
1 tips = sns.load_dataset('tips')
2 tips.head()
Out[60]:
Scatter plot -> labels -> markers -> figsize -> color -> cmap
In [61]:
1 tips.plot(kind='scatter',x='total_bill',y='tip')
Out[61]:
<AxesSubplot:xlabel='total_bill', ylabel='tip'>
In [62]:
1 tips.plot(kind='scatter',x='total_bill',y='tip',title='Cost Analysis',color='red',
2 marker='*',figsize=(10,6))
Out[62]:
In [63]:
1 tips.plot(kind='scatter',x='total_bill',y='tip',title='Cost Analysis',
2 marker='*',figsize=(10,6),c='sex',cmap='viridis')
Out[63]:
dataset = 'https://raw.githubusercontent.com/m-
mehdi/pandas_tutorials/main/weekly_stocks.csv' (https://raw.githubusercontent.com/m-
mehdi/pandas_tutorials/main/weekly_stocks.csv')
In [64]:
1 stocks = pd.read_csv('weekly_stocks.csv')
2 stocks.head()
Out[64]:
line plot
In [65]:
1 stocks['MSFT'].plot(kind='line')
2 plt.show()
In [66]:
1 stocks.plot(kind='line')
2 plt.show()
In [67]:
1 stocks.plot(kind='line',x='Date')
2
3 plt.xticks(rotation='50')
4 plt.show()
In [68]:
1 stocks[['Date','MSFT','FB']].plot(kind='line',x='Date')
2 # here by using the fancy indexing we will showcase the MSFT and FB only
3
4 plt.xticks(rotation='50')
5 plt.show()
bat chart
In [69]:
1 temp = pd.read_csv('batsman_season_record.csv')
2 temp
Out[69]:
In [70]:
1 temp['2015'].plot(kind='bar')
Out[70]:
<AxesSubplot:>
In [71]:
1 tips
Out[71]:
In [72]:
1 tips.groupby('sex')['total_bill'].mean()
Out[72]:
sex
Male 20.744076
Female 18.056897
Name: total_bill, dtype: float64
In [73]:
1 tips.groupby('sex')['total_bill'].mean().plot(kind='bar')
Out[73]:
<AxesSubplot:xlabel='sex'>
In [74]:
1 temp
Out[74]:
In [75]:
1 temp.plot(kind='bar',x='batsman')
Out[75]:
<AxesSubplot:xlabel='batsman'>
In [76]:
Out[76]:
<AxesSubplot:xlabel='batsman'>
histogram
In [77]:
1 stocks.head()
Out[77]:
In [78]:
1 stocks.plot(kind='hist')
Out[78]:
<AxesSubplot:ylabel='Frequency'>
In [79]:
1 stocks[['MSFT','FB']].plot(kind='hist',bins=40)
Out[79]:
<AxesSubplot:ylabel='Frequency'>
Pie charts
In [80]:
1 df = pd.DataFrame(
2 {
3 'batsman':['Dhawan','Rohit','Kohli','SKY','Pandya','Pant'],
4 'match1':[120,90,35,45,12,10],
5 'match2':[0,1,123,130,34,45],
6 'match3':[50,24,145,45,10,90]
7 }
8 )
9
10 df.head()
Out[80]:
0 Dhawan 120 0 50
1 Rohit 90 1 24
3 SKY 45 130 45
4 Pandya 12 34 10
In [81]:
1 df['match1'].plot(kind='pie', labels=df['batsman'])
Out[81]:
<AxesSubplot:ylabel='match1'>
In [82]:
Out[82]:
<AxesSubplot:ylabel='match1'>
In [83]:
Out[83]:
array([<AxesSubplot:ylabel='match1'>, <AxesSubplot:ylabel='match2'>,
<AxesSubplot:ylabel='match3'>], dtype=object)
In [84]:
1 stocks.head()
Out[84]:
In [85]:
1 stocks.plot(kind='line', subplots=True)
Out[85]:
multiindex dataframe
In [86]:
1 tips.head()
Out[86]:
In [87]:
Out[87]:
<AxesSubplot:xlabel='day'>
In [88]:
Out[88]:
day time
In [89]:
Out[89]:
<AxesSubplot:xlabel='day,time'>
In [90]:
Out[90]:
day time
In [91]:
Out[91]:
<AxesSubplot:xlabel='day,time'>
In [92]:
Out[92]:
<AxesSubplot:xlabel='day,time'>
In [93]:
Out[93]:
array([<AxesSubplot:ylabel='(Male, Yes)'>,
<AxesSubplot:ylabel='(Male, No)'>,
<AxesSubplot:ylabel='(Female, Yes)'>,
<AxesSubplot:ylabel='(Female, No)'>], dtype=object)