Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                

Python Course Cheat Sheet

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 30

1.

Basic
a. While Loop

i=1
while i < 5:
print('i is: {}'.format(i))
i = i+1

i is: 1
i is: 2
i is: 3
i is: 4

b. range()

range(5)
range(0, 5)

c. list comprehension

x = [1,2,3,4]
[item**2 for item in x]

[1, 4, 9, 16]

d. map and filter

seq = [1,2,3,4,5]
list(filter(lambda item: item%2 == 0,seq))

2. Numpy

a. zeros and ones - Generate arrays of zeros or ones


np.zeros(3)
array([ 0., 0., 0.])

np.zeros((5,5))

array([[ 0., 0., 0., 0., 0.],


[ 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0.]])

np.ones(3)
array([ 1., 1., 1.])

b. linspace - Return evenly spaced numbers over a specified interval.


np.linspace(0,10,3)
array([ 0., 5., 10.])
np.linspace(0,10,50)
array([ 0. , 0.20408163, 0.40816327, 0.6122449 ,
0.81632653, 1.02040816, 1.2244898 , 1.42857143,
1.63265306, 1.83673469, 2.04081633, 2.24489796,
2.44897959, 2.65306122, 2.85714286, 3.06122449,
3.26530612, 3.46938776, 3.67346939, 3.87755102,
4.08163265, 4.28571429, 4.48979592, 4.69387755,
4.89795918, 5.10204082, 5.30612245, 5.51020408,
5.71428571, 5.91836735, 6.12244898, 6.32653061,
6.53061224, 6.73469388, 6.93877551, 7.14285714,
7.34693878, 7.55102041, 7.75510204, 7.95918367,
8.16326531, 8.36734694, 8.57142857, 8.7755102 ,
8.97959184, 9.18367347, 9.3877551 , 9.59183673,
9.79591837, 10. ])

c. eye - Creates an identity matrix


np.eye(4)
array([[ 1., 0., 0., 0.],
[ 0., 1., 0., 0.],
[ 0., 0., 1., 0.],
[ 0., 0., 0., 1.]])

d. Reshape - Returns an array containing the same data with a new


shape.
arr = np.arange(25)
ranarr = np.random.randint(0,50,10)
arr.reshape(5,5)
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]])

e. max,min,argmax,argmin
ranarr = np.random.randint(0,50,10)
array([10, 12, 41, 17, 49, 2, 46, 3, 19, 39])

ranarr.max()
49

ranarr.argmax()
4

ranarr.min()
2
ranarr.argmin()
5
f. dtype - You can also grab the data type of the object in the array:
arr.dtype
dtype('int64')

3. Pandas
a. DataFrames
from numpy.random import randn
np.random.seed(101)

df = pd.DataFrame(randn(5,4),index='A B C D E'.split(),columns='W X Y Z'.split())

newind = 'CA NY WY OR CO'.split()


newind
['CA', 'NY', 'WY', 'OR', 'CO']
df['States'] = newind
df

State
W X Y Z
s

2.70685 0.62813 0.90796 0.50382


A CA
0 3 9 6

- -
0.65111 0.60596
B 0.31931 0.84807 NY
8 5
8 7

- -
0.74012 0.52881
C 2.01816 0.58900 WY
2 3
8 1

- -
0.18869 0.95505
D 0.75887 0.93323 OR
5 7
2 7

0.19079 1.97875 2.60596 0.68350


E CO
4 7 7 9

b. Multi-Index and Index Hierarchy

# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2',
3)]

hier_index = pd.MultiIndex.from_tuples(hier_index)

df.loc['G1']

A B

-
0.01869
1 0.94299
0
9

1.41079 1.00219
2
7 5

-
0.34369
3 2.37165
8
2

c. Missing Data

df.dropna()

drops the null value at row.

Df.dropna(axis=1)

Drops the null value at column.

df.dropna(thresh=2)
It would drop the rows which have less than 2 non-null values

df.fillna(value='FILL VALUE')

d. Merging, Joining, and Concatenating

left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],


'key2': ['K0', 'K1', 'K0', 'K1'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})

right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],


'key2': ['K0', 'K0', 'K0', 'K0'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
pd.merge(left, right, on=['key1', 'key2'])

left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],


'B': ['B0', 'B1', 'B2']},
index=['K0', 'K1', 'K2'])

key key
A B C D
1 2

A B
0 K0 K0 C0 D0
0 0

A B Na Na
1 K0 K1
1 1 N N

A B
2 K1 K0 C1 D1
2 2

A B
3 K1 K0 C2 D2
2 2

A B Na Na
4 K2 K1
3 3 N N

right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],


'D': ['D0', 'D2', 'D3']},
index=['K0', 'K2', 'K3'])
left.join(right)

A B C D

K A B
C0 D0
0 0 0

K A B Na Na
1 1 1 N N

K A B
C2 D2
2 2 2

e. Pivot Table
df.pivot_table(values='D',index=['A', 'B'],columns=['C'])
Visualization
import matplotlib.pyplot as plt
%matplotlib inline (You'll also need to use this line to see plots in the
notebook:)
plt.show() - to have the figure pop up in another window.

plt.plot(x, y, 'r') # 'r' is the color red


plt.xlabel('X Axis Title Here')
plt.ylabel('Y Axis Title Here')
plt.title('String Title Here')
plt.show()

Creating Multiplots on Same Canvas

# plt.subplot(nrows, ncols, plot_number)


plt.subplot(1,2,1)
plt.plot(x, y, 'r--') # More on color options later
plt.subplot(1,2,2)
plt.plot(y, x, 'g*-');
# Creates blank canvas
fig = plt.figure()

axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # main axes


axes2 = fig.add_axes([0.2, 0.5, 0.4, 0.3]) # inset axes

# Larger Figure Axes 1


axes1.plot(x, y, 'b')
axes1.set_xlabel('X_label_axes2')
axes1.set_ylabel('Y_label_axes2')
axes1.set_title('Axes 2 Title')

# Insert Figure Axes 2


axes2.plot(y, x, 'r')
axes2.set_xlabel('X_label_axes2')
axes2.set_ylabel('Y_label_axes2')
axes2.set_title('Axes 2 Title');
# Empty canvas of 1 by 2 subplots
fig, axes = plt.subplots(nrows=1, ncols=2)

fig = plt.figure(figsize=(8,4), dpi=100)

fig, axes = plt.subplots(figsize=(12,3))

axes.plot(x, y, 'r')
axes.set_xlabel('x')
axes.set_ylabel('y')
axes.set_title('title');

fig = plt.figure()

ax = fig.add_axes([0,0,1,1])

ax.plot(x, x**2, label="x**2")


ax.plot(x, x**3, label="x**3")
ax.legend()
fig, ax = plt.subplots(figsize=(12,6))

ax.plot(x, x+1, color="red", linewidth=0.25)


ax.plot(x, x+2, color="red", linewidth=0.50)
ax.plot(x, x+3, color="red", linewidth=1.00)
ax.plot(x, x+4, color="red", linewidth=2.00)

# possible linestype options ‘-‘, ‘–’, ‘-.’, ‘:’, ‘steps’


ax.plot(x, x+5, color="green", lw=3, linestyle='-')
ax.plot(x, x+6, color="green", lw=3, ls='-.')
ax.plot(x, x+7, color="green", lw=3, ls=':')

# custom dash
line, = ax.plot(x, x+8, color="black", lw=1.50)
line.set_dashes([5, 10, 15, 10]) # format: line length, space length, ...

# possible marker symbols: marker = '+', 'o', '*', 's', ',', '.', '1', '2', '3', '4', ...
ax.plot(x, x+ 9, color="blue", lw=3, ls='-', marker='+')
ax.plot(x, x+10, color="blue", lw=3, ls='--', marker='o')
ax.plot(x, x+11, color="blue", lw=3, ls='-', marker='s')
ax.plot(x, x+12, color="blue", lw=3, ls='--', marker='1')

# marker size and color


ax.plot(x, x+13, color="purple", lw=1, ls='-', marker='o', markersize=2)
ax.plot(x, x+14, color="purple", lw=1, ls='-', marker='o', markersize=4)
ax.plot(x, x+15, color="purple", lw=1, ls='-', marker='o', markersize=8,
markerfacecolor="red")
ax.plot(x, x+16, color="purple", lw=1, ls='-', marker='s', markersize=8,
markerfacecolor="yellow", markeredgewidth=3, markeredgecolor="green");
n = np.array([0,1,2,3,4,5])
fig, axes = plt.subplots(1, 4, figsize=(12,3))

axes[0].scatter(xx, xx + 0.25*np.random.randn(len(xx)))
axes[0].set_title("scatter")

axes[1].step(n, n**2, lw=2)


axes[1].set_title("step")

axes[2].bar(n, n**2, align="center", width=0.5, alpha=0.5)


axes[2].set_title("bar")

axes[3].fill_between(x, x**2, x**3, color="green", alpha=0.5);


axes[3].set_title("fill_between");
Seaborn
sns.distplot(tips['total_bill'])

sns.distplot(tips['total_bill'],kde=False,bins=30)

## jointplot

jointplot() allows you to basically match up two distplots for bivariate data. With your
choice of what **kind** parameter to compare with:
* “scatter”
* “reg”
* “resid”
* “kde”
* “hex”

sns.jointplot(x='total_bill',y='tip',data=tips,kind='scatter')
sns.jointplot(x='total_bill',y='tip',data=tips,kind='hex')
sns.jointplot(x='total_bill',y='tip',data=tips,kind='reg')

pairplot
pairplot will plot pairwise relationships across an entire dataframe (for the
numerical columns) and supports a color hue argument (for categorical
columns).

sns.pairplot(tips,hue='sex',palette='coolwarm')
## rugplot

rugplots are actually a very simple concept, they just draw a dash mark for every point
on a univariate distribution. They are the building block of a KDE plot:

sns.rugplot(tips['total_bill'])
barplot and countplot
These very similar plots allow you to get aggregate data off a
categorical feature in your data. barplot is a general plot that allows
you to aggregate the categorical data based off some function, by
default the mean:

sns.barplot(x='sex',y='total_bill',data=tips)

sns.barplot(x='sex',y='total_bill',data=tips,estimator=np.std)
sns.countplot(x='sex',data=tips)

sns.boxplot(x="day", y="total_bill", data=tips,palette='rainbow')


sns.boxplot(data=tips,palette='rainbow',orient='h')

boxplot and violinplot


boxplots and violinplots are used to shown the distribution of categorical
data. A box plot (or box-and-whisker plot) shows the distribution of
quantitative data in a way that facilitates comparisons between variables or
across levels of a categorical variable. The box shows the quartiles of the
dataset while the whiskers extend to show the rest of the distribution, except
for points that are determined to be “outliers” using a method that is a
function of the inter-quartile range.

sns.boxplot(x="day", y="total_bill", hue="smoker",data=tips, palette="coolwarm")

A violin plot plays a similar role as a box and whisker plot. It shows
the distribution of quantitative data across several levels of one (or
more) categorical variables such that those distributions can be
compared. Unlike a box plot, in which all of the plot components
correspond to actual datapoints, the violin plot features a kernel
density estimation of the underlying distribution.

sns.violinplot(x="day", y="total_bill", data=tips,palette='rainbow')

stripplot and swarmplot


The stripplot will draw a scatterplot where one variable is
categorical. A strip plot can be drawn on its own, but it is also a
good complement to a box or violin plot in cases where you want to
show all observations along with some representation of the
underlying distribution.

The swarmplot is similar to stripplot(), but the points are adjusted


(only along the categorical axis) so that they don’t overlap. This
gives a better representation of the distribution of values, although
it does not scale as well to large numbers of observations (both in
terms of the ability to show all the points and in terms of the
computation needed to arrange them).

sns.stripplot(x="day", y="total_bill", data=tips)


sns.swarmplot(x="day", y="total_bill", data=tips)

sns.violinplot(x="tip", y="day", data=tips,palette='rainbow')


sns.swarmplot(x="tip", y="day", data=tips,color='black',size=3)
factorplot
factorplot is the most general form of a categorical plot. It can take in
a kind parameter to adjust the plot type:

sns.factorplot(x='sex',y='total_bill',data=tips,kind='bar')

Matrix Plots
Matrix plots allow you to plot data as color-encoded matrices and can also be
used to indicate clusters within the data (later in the machine learning
section we will learn how to formally cluster data).

Let's begin by exploring seaborn's heatmap and clutermap:

flights = sns.load_dataset('flights')

sns.heatmap(tips.corr(),cmap='coolwarm',annot=True)
flights.pivot_table(values='passengers',index='month',columns='year')

sns.clustermap(pvflights)
# Just the Grid
sns.PairGrid(iris)

# Map to upper,lower, and diagonal


g = sns.PairGrid(iris)
g.map_diag(plt.hist)
g.map_upper(plt.scatter)
g.map_lower(sns.kdeplot)

g = sns.FacetGrid(tips, col="time", row="smoker")


g = g.map(plt.hist, "total_bill")
g = sns.JointGrid(x="total_bill", y="tip", data=tips)
g = g.plot(sns.regplot, sns.distplot)
Lmplot() - lmplot allows you to display linear models, but it also
conveniently allows you to split up those plots based off of features,
as well as coloring the hue based off of features.

sns.lmplot(x='total_bill',y='tip',data=tips)

sns.lmplot(x='total_bill',y='tip',data=tips,col='sex')

sns.lmplot(x="total_bill", y="tip", row="sex", col="time",data=tips)


## Spine Removal

sns.countplot(x='sex',data=tips)
sns.despine()

_
# Pandas Built-in Data Visualization
import numpy as np
import pandas as pd
%matplotlib inline

df1 = pd.read_csv('df1',index_col=0)
df2 = pd.read_csv('df2')

df1['A'].hist()

import matplotlib.pyplot as plt


plt.style.use('ggplot')

df1['A'].hist()

plt.style.use('bmh')
df1['A'].hist()

plt.style.use('dark_background')
df1['A'].hist()

plt.style.use('fivethirtyeight')
df1['A'].hist()

Area
df2.plot.area(alpha=0.4)

Barplots
df2.head()
df2.plot.bar()
df2.plot.bar(stacked=True)
Histogram
df1['A'].plot.hist(bins=50)

Line Plots
df1.plot.line(x=df1.index,y='B',figsize=(12,3),lw=1)

Scatter Plots
df1.plot.scatter(x='A',y='B')

BoxPlots
df2.plot.box() # Can also pass a by= argument for groupby

Kernel Density Estimation plot (KDE)


df2['a'].plot.kde()

PlotLy

from plotly import __version__


from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

print(__version__) # requires version >= 1.9.0


import cufflinks as cf

df = pd.DataFrame(np.random.randn(100,4),columns='A B C D'.split())

df.head()

A B C D

1.87872 0.68871 1.06673 0.54395


0
5 9 3 6

0.02873 0.10405 0.04817 1.84218


1
4 4 6 8

- - -
0.38792
2 0.15879 0.63537 0.63755
6
3 1 8

3 - 1.39342 - -
A B C D

1.22197 0.29979 1.11362


3
2 4 2

- -
1.25315 0.30291
4 0.53759 2.54608
2 7
8 3

Using Cufflinks and iplot()


 scatter
 bar
 box
 spread
 ratio
 heatmap
 surface
 histogram
 bubble

Scatter

df.iplot(kind='scatter',x='A',y='B',mode='markers',size=10)

df.count().iplot(kind='bar')

Boxplots

df.iplot(kind='box')

3d Surface

df3 = pd.DataFrame({'x':[1,2,3,4,5],'y':[10,20,30,20,10],'z':[5,4,3,2,1]})
df3.iplot(kind='surface',colorscale='rdylbu')

Spread
df[['A','B']].iplot(kind='spread')

histogram
df['A'].iplot(kind='hist',bins=25)

scatter_matrix()

Similar to sns.pairplot()

df.scatter_matrix()
df.idxmin() – returns lowest index value column

df.idxmax()- returns highest index value column

Use .ta_plot(study='boll') to create a Bollinger Band Plot for Bank of


America for the year 2015.

BAC['Close'].loc['2015-01-01':'2016-01-01'].ta_plot(study='boll')

Use .ta_plot(study='sma') to create a Simple Moving Averages plot of


Morgan Stanley for the year 2015

MS['Close'].loc['2015-01-01':'2016-01-
01'].ta_plot(study='sma',periods=[13,21,55],title='Simple Moving Averages')

You might also like