6.outlier Code - Jupyter Notebook
6.outlier Code - Jupyter Notebook
In [3]: mba.boxplot()
...
In [4]: mba.boxplot(column='gmat')
...
In [6]: mba.describe()
...
In [7]: q1 = mba['gmat'].quantile(0.25)
In [8]: q1
Out[8]: 690.0
In [9]: q3 = mba['gmat'].quantile(0.75)
#q3 = 730
In [10]: q3
Out[10]: 730.0
Out[13]: 40.0
Out[14]: 630.0
Out[15]: 790.0
In [16]: mba.shape
Out[16]: (773, 3)
In [17]: # any value which are greater than lower boundary and less than Upper boundary ar
mba1 = mba.loc[(mba['gmat'] > low) & (mba['gmat'] < high)]
In [15]: mba.shape
Out[15]: (773, 3)
Out[14]: (758, 3)
In [20]: import seaborn as sns
sns.boxplot(mba1['gmat'])
...
In [24]: out
Out[24]: Int64Index([189, 337, 392, 403, 478, 491, 653, 766, 768, 770, 771, 772], dtype
='int64')
In [26]: mba2
...
In [27]: mba.drop(out,inplace=True)
In [28]: mba
...
Method 3
Replace the Outlier
...
...
Out[33]: 630.0
In [34]: mba[(mba["gmat"]<low)]
...
In [41]: out1 = mba[(mba["gmat"]<low)].values
out1
...
In [42]: mba['gmat'].replace(out1,low,inplace=True)
In [43]: mba
...
In [49]: sns.boxplot(mba['gmat'])
...
In [47]: mean
Out[47]: 711.4230271668823
...
In [55]: mba['gmat'].replace(out1,mean,inplace=True)
In [56]: mba
0 1 21 720.000000
1 2 107 640.000000
2 3 57 740.000000
3 4 99 690.000000
4 5 208 710.000000
In [ ]: