Python Code Library
Python Code Library
Library---------------------------------------------
##Import popular library for EDA/Visualization
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
os.chdir('D:\\For Dan\\Learning\\Udemy\\Python')
df = pd.read_csv('P4-Demographic-Data.csv')
df.columns
##Column rename
stats.columns = ['CountryName', 'CountryCode', 'BirthRate',
'InternetUsers','IncomeGroup']
df.info()
df.describe()
##df.describe().transpose()
movies.Genre = movies.Genre.astype('category')
----------------------------------Visualization
---------------------------------------------
sns.set_style('darkgrid')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = 8,4
plt.figure(figsize=(8,4))
##Histogram/Distribution
sns.set()
vis1 = sns.distplot(stats['InternetUsers'], hist_kws={"edgecolor":"Black"},
bins=20)
plt.show()
sns.set_style('darkgrid')
fig, ax = plt.subplots()
fig.set_size_inches(11.7,8.27)
h2 = plt.hist(list1, bins = 20, stacked = True, rwidth = 1, label = listlabel)
#
plt.title('Movie Budget Distribution', fontsize=30)
plt.ylabel('Number of Movies',fontsize=15)
plt.xlabel('Budget',fontsize=15)
plt.yticks(fontsize=15)
plt.xticks(fontsize=15)
plt.legend(frameon = True, fancybox = True, shadow = True, fontsize=15)
plt.show()
##Subplot
f, axes = plt.subplots(1,2,figsize = (12,6), sharex = True, sharey=True)
k3 = sns.kdeplot(movies.BudgetMillion, movies.AudienceRatings,cmap = 'Greens',
ax = axes[0])
k4 = sns.kdeplot(movies.BudgetMillion, movies.CriticRatings,
ax = axes[1])
k3.set(xlim = (-20,160)) #custom x-axis range
plt.show()
##violin plot
w = sns.violinplot(data=movies, x = 'Genre', y = 'CriticRatings')
##Boxplot
sns.set()
vis2 = sns.boxplot(data = stats, x = 'IncomeGroup', y = 'BirthRate')
##Linear Model
vis3 = sns.lmplot(data = stats, x = 'InternetUsers', y = 'BirthRate',
fit_reg = False, hue = 'IncomeGroup', size = 10, aspect=1)
##Jointplot
j = sns.jointplot(data = movies, x = 'CriticRatings', y = 'AudienceRatings')
j = sns.jointplot(data = movies, x = 'CriticRatings', y = 'AudienceRatings',
kind = 'kde')
##FacetGrid
# Controlling Axes and Adding Diagonals
g = sns.FacetGrid(movies, row='Genre', col='YearRelease', hue='Genre')
kws = dict(s=50, edgecolor='black', linewidth=0.5)
g = g.map(plt.scatter, 'CriticRatings', 'AudienceRatings')
g.set(xlim=(0,100), ylim=(20,100))
for ax in g.axes.flat:
ax.plot((0,100),(20,100), c='grey', ls='--')
g.add_legend()
plt.show()
-----------------------------------------------------------------------------------
-----------
k1 = sns.kdeplot(movies.BudgetMillion, movies.AudienceRatings,
shade = True, Shade_lowest = True, cmap='inferno',
ax = axes[0,0])
k1b = sns.kdeplot(movies.BudgetMillion, movies.AudienceRatings, cmap = 'PuBu',
ax = axes[0,0])
k2 = sns.kdeplot(movies.BudgetMillion, movies.CriticRatings,
shade = True, Shade_lowest = True, cmap='inferno',
ax = axes[0,1])
k2b = sns.kdeplot(movies.BudgetMillion, movies.CriticRatings, cmap = 'cool',
ax = axes[0,1])
k4 = sns.kdeplot(movies.CriticRatings, movies.AudienceRatings,
shade = True, shade_lowest = False, cmap = 'Blues_r',
ax = axes[1,1])
k4b = sns.kdeplot(movies.CriticRatings, movies.AudienceRatings, cmap =
'gist_gray_r',
ax = axes[1,1])
k1.set(xlim = (-20,200))
k2.set(xlim = (-20,200))
plt.show()
Col = {"KobeBryant":'Black',"JoeJohnson":'green',"LeBronJames":'red',
"CarmeloAnthony":'y',"DwightHoward":'k',"ChrisBosh":'m',
"ChrisPaul":'b',"KevinDurant":'k',"DerrickRose":'c',"DwayneWade":'m'}
Mkers = {"KobeBryant":"o","JoeJohnson":"D","LeBronJames":"^",
"CarmeloAnthony":"*","DwightHoward":"v","ChrisBosh":'',
"ChrisPaul":"p","KevinDurant":"D","DerrickRose":"H","DwayneWade":"^"}
----------------LinearRegression
----------------LogisticRegression
lm = LinearRegression(
logmodel = LogisticRegression()
lm.fit(X_train,y_train)
logmodel.fit(X_train,y_train)
predictions = lm.predict(X_test)
predictions = logmodel.predict(X_test)
#F1-Score??
from sklearn.metrics import classification_report
print(classification_report(y_test,predictions))
---------------------KNN
##Standardize
scaler = StandardScaler()
scaler.fit(df.drop('TARGET CLASS', axis=1))
scaled_features = scaler.transform(df.drop('TARGET CLASS', axis=1))
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)
error_rate = []
for i in range(1,40):
knn = KNeighborsClassifier(n_neighbors=i)
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)
error_rate.append(np.mean(predictions != y_test))
plt.figure(figsize=(10,6))
plt.plot(range(1,40), error_rate, color='blue', linestyle='-', marker='o',
markerfacecolor='red', markersize=10)
plt.title('Error Rate vs K-value')
plt.xlabel('K')
plt.ylabel('K-value')
knn = KNeighborsClassifier(n_neighbors=17)
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)
print(confusion_matrix(y_test, predictions))
print('\n')
print(classification_report(y_test, predictions))
--------------------------------------Decision
Tree-----------------------------------------
X = final_data.drop('not.fully.paid', axis=1)
y = final_data['not.fully.paid']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=101)
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier()
dtree.fit(X_train, y_train)
predictions = dtree.predict(X_test)
rfc_pred = rfc.predict(X_test)
print(confusion_matrix(y_test, rfc_pred))
print('\n')
print(classification_report(y_test, rfc_pred))
------------------------------Standardisation vs Max-Min
Normalization----------------------------------------------------
Unit/magnitude
Standardisation
#Import library
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_X = sc_X.fit_transform(df)
#Convert to table format - StandardScaler
sc_X = pd.DataFrame(data=sc_X, columns=["Age",
"Salary","Purchased","Country_France","Country_Germany", "Country_spain"])
sc_X
Max-Min Normalization