Day 1 Python Notebook
Day 1 Python Notebook
# Ordered Output
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 1 of 19
Day 1 16/12/19, 17:40
In [140]: mydict2={}
mydict2['Key1']=[1,2]
mydict2['key2']=['G','F']
print(mydict2)
print(numtupple[3])
2
3
Pete
Pete
25
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 2 of 19
Day 1 16/12/19, 17:40
Out[143]:
RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balance
In [144]: bankchurn.tail()
Out[144]:
RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balan
In [145]: bankchurn.columns
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 3 of 19
Day 1 16/12/19, 17:40
In [146]: bankchurn.dtypes
# DATA TYPES
# NUMBER - int64 & float64
# CHARACTER - object
# Date - Datetime
# Boolean - True False
In [148]: bankchurn.describe().transpose()
# describe() - Basic DESCRIPTIVE STATISTICS
Out[148]:
count mean std min 25% 50%
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 4 of 19
Day 1 16/12/19, 17:40
In [151]: bankchurn['Balance'].describe()
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 5 of 19
Day 1 16/12/19, 17:40
In [152]: bankchurn[['Balance','Tenure','Age']].describe()
# multi column indexing - double square brackets
Out[152]:
Balance Tenure Age
Out[153]:
Age Tenure Balance
Out[154]:
Geography Gender Age Tenure
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 6 of 19
Day 1 16/12/19, 17:40
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 7 of 19
Day 1 16/12/19, 17:40
0.0
250898.09
76485.88928799961
97198.54000000001
3893046832.3731775
62394.285254125454
0.0
97198.54000000001
127644.24
133710.358
-0.14110871094154384
-1.489411767941925
In [156]: print(np.quantile(bankchurn.Balance,0.50))# Q2
print(np.percentile(bankchurn.Balance,50))# Q2
97198.54000000001
97198.54000000001
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 8 of 19
Day 1 16/12/19, 17:40
In [157]: bankchurn.dtypes
In [159]: pd.value_counts(bankchurn.HasCrCard)
Out[159]: 1 7055
0 2945
Name: HasCrCard, dtype: int64
In [160]: pd.value_counts(bankchurn.Exited)
# 1 - Customer Exited 0 - Customer Not Exited
Out[160]: 0 7963
1 2037
Name: Exited, dtype: int64
In [161]: pd.value_counts(bankchurn.IsActiveMember)
Out[161]: 1 5151
0 4849
Name: IsActiveMember, dtype: int64
In [162]: bankchurn.columns
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 9 of 19
Day 1 16/12/19, 17:40
In [163]: categorycols=['HasCrCard','IsActiveMember',
'Exited','Geography','Gender']
1 7055
0 2945
Name: HasCrCard, dtype: int64
1 5151
0 4849
Name: IsActiveMember, dtype: int64
0 7963
1 2037
Name: Exited, dtype: int64
France 5014
Germany 2509
Spain 2477
Name: Geography, dtype: int64
Male 5457
Female 4543
Name: Gender, dtype: int64
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 10 of 19
Day 1 16/12/19, 17:40
In [167]: bankchurn.Balance.plot(kind='hist',color="red")
In [168]: bankchurn.EstimatedSalary.plot(kind='box',vert=False)
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 11 of 19
Day 1 16/12/19, 17:40
In [169]: bankchurn.Balance.plot(kind='density')
Out[170]:
Balance EstimatedSalary
In [171]: bankchurn[['Balance','EstimatedSalary']].corr()
# Correlation
Out[171]:
Balance EstimatedSalary
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 12 of 19
Day 1 16/12/19, 17:40
In [172]: bankchurn.plot(kind='scatter',x='Balance',
y='EstimatedSalary')
print(pd.value_counts(bankchurn.Exited))
bankchurn.Balance.groupby(bankchurn.Exited).mean()
0 7963
1 2037
Name: Exited, dtype: int64
Out[173]: Exited
0 72745.296779
1 91108.539337
Name: Balance, dtype: float64
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 13 of 19
Day 1 16/12/19, 17:40
In [174]: pd.crosstab(bankchurn.Exited,bankchurn.Gender,
normalize='index')
# CROSS TABULATION _ FREQUENCY of 2 Categorical
# Variables
Out[174]:
Gender Female Male
Exited
0 0.427477 0.572523
1 0.559156 0.440844
In [175]: bankchurn.Balance.groupby([bankchurn.Exited,
bankchurn.Gender,
bankchurn.Geography]).mean()
In [176]: bankchurn.Balance.groupby(
bankchurn.Exited).mean()
Out[176]: Exited
0 72745.296779
1 91108.539337
Name: Balance, dtype: float64
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 14 of 19
Day 1 16/12/19, 17:40
# PROCESS IN PYTHON
# GROUPBY - TO IDENTIFY NUM OF GROUPS & CONFIRM
# MEAN DIFFERENCE
# SPLIT THE DATAFRAME INTO SUBSETS BASED ON
# GROUPS
# CONDUCT THE RELEVANT TEST & INFER BASED ON
# P-VALUE
(2037, 14)
(7963, 14)
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 15 of 19
Day 1 16/12/19, 17:40
In [181]: bankchurn.Balance.groupby(
bankchurn.Gender).mean()
Out[181]: Gender
Female 75659.369139
Male 77173.974506
Name: Balance, dtype: float64
In [183]: ttest_ind(male.Balance,female.Balance,
equal_var=False)
# SINCE p-value greater than 0.05, Fail to Reject
# Null
Out[185]: Geography
France 62092.636516
Germany 119730.116134
Spain 61818.147763
Name: Balance, dtype: float64
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 16 of 19
Day 1 16/12/19, 17:40
In [191]: # DISTRIBUTIONS
# T TEST - STUDENT T DISTRIBUTION
# ANOVA SINGLE FACTOR - F DISTRIBUTION
# CHI SQUARE TEST - CHI DISTRIBUTION
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 17 of 19
Day 1 16/12/19, 17:40
Out[194]:
Exited 0 1
Gender
5.328333333333334
2.1033333333333335
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 18 of 19
Day 1 16/12/19, 17:40
1.9116666666666668
3.6216666666666666
In [204]: ttest_rel(beforegst,aftergst)
In [ ]:
http://localhost:8889/nbconvert/html/SBIIT%20B2/Day%201%20.ipynb?download=false Page 19 of 19