EDA Plots Code
EDA Plots Code
~~~~~~~~~~~~~~~~~~~~~~~~~~~
if target_type1 == 'Continuous':
df = first_data[feat_imp1]
date = ['date','Date','DATE']
col = df.columns
def rem_date(x):
for dates in date:
for cols in col:
if dates == cols :
df_new = x.drop(dates,axis = 1)
print("date column removed")
return df_new
else:
print("date column not found")
return df
df = rem_date(df)
print(df.head())
symbols = ["?","#","%","&","*","+","@","_","-"]
def remove_spchar(x):
for column in x.values:
for symbol in symbols:
if symbol not in column:
print("Special Character Not detected")
return x
else:
print("Special Character Detected and will be Removed: ")
x2 = x.replace(symbol, 0)
return x2
df = remove_spchar(df)
df.head()
def validate_null_values(x):
check = x.isnull().sum()
val = check.values
val2 = 0
for col in x.columns:
for ele in range(0, len(val)):
val2 = val2 + val[ele]
if val2 == 0:
print("There is no null values:")
return x
else:
data_n = x.fillna(x[col].mean())
print("There is null values and will be removed:")
return data_n
df = validate_null_values(df)
df.head()
ids_ = ['Id','ID','id','encounter_id']
cols = df.columns
def rem_id(x):
for id_ in ids_:
for col in cols:
if col == id_:
new_df = x.drop(id_,axis = 1)
print("id column removed")
return new_df
else:
print("id column not found")
return x
df = rem_id(df)
df.head()
X = df.drop(target_var1,axis = 1)
Y = df[target_var1]
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X = X.apply(lambda col: le.fit_transform(col.astype(str)), axis = 0,
result_type = 'expand')
X.head()
p1 = max(max(predicted_value), max(true_value))
p2 = min(min(predicted_value), min(true_value))
plt.plot([p1, p2], [p1, p2], 'b-')
plt.xlabel('True Values', fontsize=15)
plt.ylabel('Predictions', fontsize=15)
plt.title('Actual Vs predictions')
# plt.axis('equal')
rand = random.randint(00000000, 99999999)
name = 'modelgraph' + str(rand)
buyerid = request.session['buyerid']
modelgraph= 'media/files/expert_serv/' + str(buyerid) + '/' + str(
name) + '.png'
print(modelgraph)
plt.savefig('media/files/expert_serv/' + str(buyerid) +'/' + str(name) +
'.png', dpi=300)
obj = ['object']
obj_features = list(top_5_feature.select_dtypes(include=obj).columns)
data_obj = df[obj_features]
num_col_df = df[numerical_features]
obj_col_df = df[obj_features]
# data analysis plots
data_num[target_var1] = Y
x_range = list(range(len(num_col_df.columns)))
print(x_range)
for i in x_range:
try:
if i ==0:
plt.figure(figsize=(10,5))
sns.scatterplot(x = data_num.columns[-2],y = data_num.columns[-
1],data=data_num)
rand = random.randint(00000000, 99999999)
name = 'dataanalysis1_' + str(rand)
buyerid = request.session['buyerid']
dataanalysis1= 'media/files/expert_serv/' + str(buyerid) + '/'
+ str(
name) + '.png'
print(dataanalysis1)
plt.savefig('media/files/expert_serv/' + str(buyerid) +'/' +
str(name) + '.png', dpi=300)
except:
pass
try:
if i==1:
plt.figure(figsize=(10,5))
sns.lineplot(x = data_num.columns[-3],y = data_num.columns[-
1],data=data_num)
rand = random.randint(00000000, 99999999)
name = 'dataanalysis2_' + str(rand)
buyerid = request.session['buyerid']
dataanalysis2= 'media/files/expert_serv/' + str(buyerid) + '/'
+ str(
name) + '.png'
print(dataanalysis2)
plt.savefig('media/files/expert_serv/' + str(buyerid) +'/' +
str(name) + '.png', dpi=300)
except:
pass
# try:
# if i==2:
# plt.figure(figsize=(10,5))
# sns.scatterplot(x = data_num.columns[-4],y =
data_num.columns[-1],data=data_num)
# rand = random.randint(00000000, 99999999)
# name = 'dataanalysis3_' + str(rand)
# buyerid = request.session['buyerid']
# location = 'media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png'
# print(location)
# plt.savefig('media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png', dpi=300)
# info = expertservice.objects.filter(id=esp).all()
# info.update(dataanalysis3=location)
# except:
# pass
# try:
# if i==3:
# plt.figure(figsize=(10,5))
# sns.scatterplot(x = data_num.columns[-5],y =
data_num.columns[-1],data=data_num)
# rand = random.randint(00000000, 99999999)
# name = 'dataanalysis4_' + str(rand)
# buyerid = request.session['buyerid']
# location = 'media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png'
# print(location)
# plt.savefig('media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png', dpi=300)
# info = expertservice.objects.filter(id=esp).all()
# info.update(dataanalysis4=location)
# except:
# pass
# try:
# if i==4:
# plt.figure(figsize=(10,5))
# sns.scatterplot(x = data_num.columns[-6],y =
data_num.columns[-1],data=data_num)
# rand = random.randint(00000000, 99999999)
# name = 'dataanalysis4_' + str(rand)
# buyerid = request.session['buyerid']
# location = 'media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png'
# print(location)
# plt.savefig('media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png', dpi=300)
# info = expertservice.objects.filter(id=esp).all()
# info.update(dataanalysis4=location)
# except:
# pass
count_col = obj_col_df.columns
data_obj[target_var1] = Y
x_range = list(range(len(count_col)))
print(x_range)
for i in x_range:
try:
if i ==0:
plt.figure(figsize=(10,5))
plt.xticks(rotation=90)
sns.barplot(x = data_obj.columns[-2],y = data_obj.columns[-
1],data=data_obj)
rand = random.randint(00000000, 99999999)
name = 'dataanalysis1_' + str(rand)
buyerid = request.session['buyerid']
dataanalysis1_2= 'media/files/expert_serv/' + str(buyerid)
+ '/' + str(
name) + '.png'
print(dataanalysis1_2)
plt.savefig('media/files/expert_serv/' + str(buyerid) +'/'
+ str(name) + '.png', dpi=300)
else:
pass
except:
pass
try:
if i==1:
plt.figure(figsize=(10,5))
plt.xticks(rotation=90)
sns.countplot(x = data_obj.columns[-3],data=data_obj)
rand = random.randint(00000000, 99999999)
name = 'dataanalysis2_2' + str(rand)
buyerid = request.session['buyerid']
dataanalysis2_2= 'media/files/expert_serv/' + str(buyerid)
+ '/' + str(
name) + '.png'
print(dataanalysis2_2)
plt.savefig('media/files/expert_serv/' + str(buyerid) +'/'
+ str(name) + '.png', dpi=300)
else:
pass
except:
pass
try:
if i ==2:
plt.figure(figsize=(10,5))
plt.xticks(rotation=90)
sns.violinplot(x = data_obj.columns[-4],y =
data_obj.columns[-1],data=data_obj)
rand = random.randint(00000000, 99999999)
name = 'dataanalysis2_3' + str(rand)
buyerid = request.session['buyerid']
dataanalysis2_3= 'media/files/expert_serv/' + str(buyerid)
+ '/' + str(
name) + '.png'
print(dataanalysis2_3)
plt.savefig('media/files/expert_serv/' + str(buyerid) +'/'
+ str(name) + '.png', dpi=300)
else:
pass
except:
pass
# try:
# if i==3:
# plt.figure(figsize=(10,5))
# plt.xticks(rotation=90)
# sns.countplot(x = data_obj.columns[-5],data=data_obj)
# rand = random.randint(00000000, 99999999)
# name = 'dataanalysis2_4_' + str(rand)
# buyerid = request.session['buyerid']
# location = 'media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png'
# print(location)
# plt.savefig('media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png', dpi=300)
# info = expertservice.objects.filter(id=esp).all()
# info.update(dataanalysis2_4=location)
# except:
# pass
# try:
# if i==4:
# plt.figure(figsize=(10,5))
# plt.xticks(rotation=90)
# sns.boxplot(x = data_num.columns[-6],y =
data_obj.columns[-1],data=data_obj)
# rand = random.randint(00000000, 99999999)
# name = 'dataanalysis2_5_' + str(rand)
# buyerid = request.session['buyerid']
# location = 'media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png'
# print(location)
# plt.savefig('media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png', dpi=300)
# info = expertservice.objects.filter(id=esp).all()
# info.update(dataanalysis2_5=location)
# except:
# pass
temp = expert_eda()
temp.expertservice_id = expertservice_id
temp.buyer_id = buyerid
temp.dataanalysis1 = location_1
temp.dataanalysis2 = location_2
temp.dataanalysis2_1= location_2_1
temp.dataanalysis2_2 = location_2_2
temp.dataanalysis2_3 = location_2_3
temp.matrix2 = matrix2
temp.feature = feature
temp.modelgraph = modelgraph
temp.target = target_var1
# temp.algo_type = 'clf'
temp.save()
df = rem_date(df)
print(df.head())
symbols = ["?","#","%","&","*","+","@","_","-"]
def remove_spchar(x):
for column in x.values:
for symbol in symbols:
if symbol not in column:
print("Special Character Not detected")
return x
else:
print("Special Character Detected and will be Removed: ")
x2 = x.replace(symbol, 0)
return x2
df = remove_spchar(df)
df.head()
def validate_null_values(x):
check = x.isnull().sum()
val = check.values
val2 = 0
for col in x.columns:
for ele in range(0, len(val)):
val2 = val2 + val[ele]
if val2 == 0:
print("There is no null values:")
return x
else:
data_n = x.fillna(x[col].mean())
print("There is null values and will be removed:")
return data_n
df = validate_null_values(df)
df.head()
ids_ = ['Id','ID','id','encounter_id']
cols = df.columns
def rem_id(x):
for id_ in ids_:
for col in cols:
if col == id_:
new_df = x.drop(id_,axis = 1)
print("id column removed")
return new_df
else:
print("id column not found")
return x
df = rem_id(df)
df.head()
X = df.drop(target_var1,axis = 1)
Y = df[target_var1]
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X = X.apply(lambda col: le.fit_transform(col.astype(str)), axis = 0,
result_type = 'expand')
X.head()
p1 = max(max(predicted_value), max(true_value))
p2 = min(min(predicted_value), min(true_value))
plt.plot([p1, p2], [p1, p2], 'b-')
plt.xlabel('True Values', fontsize=15)
plt.ylabel('Predictions', fontsize=15)
plt.title('Actual Vs predictions')
# plt.axis('equal')
rand = random.randint(00000000, 99999999)
name = 'modelgraph_new' + str(rand)
buyerid = request.session['buyerid']
modelgraph = 'media/files/expert_serv/' + str(buyerid) + '/' + str(name) +
'.png'
print(modelgraph)
plt.savefig('media/files/expert_serv/' + str(buyerid) + '/' + str(name) +
'.png', dpi=300)
# info = expert_eda.objects.filter(id=esp).all()
# info.update(modelgraph=location)
obj = ['object']
obj_features = list(top_5_feature.select_dtypes(include=obj).columns)
data_obj = df[obj_features]
num_col_df = df[numerical_features]
obj_col_df = df[obj_features]
# data analysis plots
data_num[target_var1] = Y
x_range = list(range(len(num_col_df.columns)))
print(x_range)
for i in x_range:
try:
if i ==0:
plt.figure(figsize=(10,5))
sns.scatterplot(x = data_num.columns[-2],y = data_num.columns[-
1],data=data_num)
rand = random.randint(00000000, 99999999)
name = 'dataanalysis1_new' + str(rand)
buyerid = request.session['buyerid']
location_1 = 'media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png'
print(location_1)
plt.savefig('media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png', dpi=300)
# info = expert_eda.objects.filter(id=esp).all()
# info.update(dataanalysis1=location)
except:
pass
try:
if i==1:
plt.figure(figsize=(10,5))
sns.lineplot(x = data_num.columns[-3],y = data_num.columns[-
1],data=data_num)
rand = random.randint(00000000, 99999999)
name = 'dataanalysis2_new' + str(rand)
buyerid = request.session['buyerid']
location_2 = 'media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png'
print(location_2)
plt.savefig('media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png', dpi=300)
# info = expert_eda.objects.filter(id=esp).all()
# info.update(dataanalysis2=location)
except:
pass
# try:
# if i==2:
# plt.figure(figsize=(10,5))
# sns.scatterplot(x = data_num.columns[-4],y =
data_num.columns[-1],data=data_num)
# rand = random.randint(00000000, 99999999)
# name = 'dataanalysis3_' + str(rand)
# buyerid = request.session['buyerid']
# location = 'media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png'
# print(location)
# plt.savefig('media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png', dpi=300)
# info = expertservice.objects.filter(id=esp).all()
# info.update(dataanalysis3=location)
# except:
# pass
# try:
# if i==3:
# plt.figure(figsize=(10,5))
# sns.scatterplot(x = data_num.columns[-5],y =
data_num.columns[-1],data=data_num)
# rand = random.randint(00000000, 99999999)
# name = 'dataanalysis4_' + str(rand)
# buyerid = request.session['buyerid']
# location = 'media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png'
# print(location)
# plt.savefig('media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png', dpi=300)
# info = expertservice.objects.filter(id=esp).all()
# info.update(dataanalysis4=location)
# except:
# pass
# try:
# if i==4:
# plt.figure(figsize=(10,5))
# sns.scatterplot(x = data_num.columns[-6],y =
data_num.columns[-1],data=data_num)
# rand = random.randint(00000000, 99999999)
# name = 'dataanalysis4_' + str(rand)
# buyerid = request.session['buyerid']
# location = 'media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png'
# print(location)
# plt.savefig('media/files/expert_serv/' + str(buyerid) + '/' +
str(name) + '.png', dpi=300)
# info = expertservice.objects.filter(id=esp).all()
# info.update(dataanalysis4=location)
# except:
# pass
count_col = obj_col_df.columns
data_obj[target_var1] = Y
x_range = list(range(len(count_col)))
print(x_range)
for i in x_range:
try:
if i ==0:
plt.figure(figsize=(10,5))
plt.xticks(rotation=90)
sns.barplot(x = data_obj.columns[-2],y = data_obj.columns[-
1],data=data_obj)
rand = random.randint(00000000, 99999999)
name = 'dataanalysis2_1_new' + str(rand)
buyerid = request.session['buyerid']
location_2_1 = 'media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png'
print(location_2_1)
plt.savefig('media/files/expert_serv/' + str(buyerid) + '/'
+ str(name) + '.png', dpi=300)
# info = expert_eda.objects.filter(id=esp).all()
# info.update(dataanalysis2_1=location)
except:
pass
try:
if i==1:
plt.figure(figsize=(10,5))
plt.xticks(rotation=90)
sns.countplot(x = data_obj.columns[-3],data=data_obj)
rand = random.randint(00000000, 99999999)
name = 'dataanalysis2_2_new' + str(rand)
buyerid = request.session['buyerid']
location_2_2 = 'media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png'
print(location_2_2)
plt.savefig('media/files/expert_serv/' + str(buyerid) + '/'
+ str(name) + '.png', dpi=300)
# info = expert_eda.objects.filter(id=esp).all()
# info.update(dataanalysis2_2=location)
except:
pass
try:
if i ==2:
plt.figure(figsize=(10,5))
plt.xticks(rotation=90)
sns.violinplot(x = data_obj.columns[-4],y =
data_obj.columns[-1],data=data_obj)
rand = random.randint(00000000, 99999999)
name = 'dataanalysis2_3_new' + str(rand)
buyerid = request.session['buyerid']
location_2_3 = 'media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png'
print(location_2_3)
plt.savefig('media/files/expert_serv/' + str(buyerid) + '/'
+ str(name) + '.png', dpi=300)
# info = expert_eda.objects.filter(id=esp).all()
# info.update(dataanalysis2_3=location)
except:
pass
# try:
# if i==3:
# plt.figure(figsize=(10,5))
# plt.xticks(rotation=90)
# sns.countplot(x = data_obj.columns[-5],data=data_obj)
# rand = random.randint(00000000, 99999999)
# name = 'dataanalysis2_4_' + str(rand)
# buyerid = request.session['buyerid']
# location = 'media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png'
# print(location)
# plt.savefig('media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png', dpi=300)
# info = expertservice.objects.filter(id=esp).all()
# info.update(dataanalysis2_4=location)
# except:
# pass
# try:
# if i==4:
# plt.figure(figsize=(10,5))
# plt.xticks(rotation=90)
# sns.boxplot(x = data_num.columns[-6],y =
data_obj.columns[-1],data=data_obj)
# rand = random.randint(00000000, 99999999)
# name = 'dataanalysis2_5_' + str(rand)
# buyerid = request.session['buyerid']
# location = 'media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png'
# print(location)
# plt.savefig('media/files/expert_serv/' + str(buyerid) +
'/' + str(name) + '.png', dpi=300)
# info = expertservice.objects.filter(id=esp).all()
# info.update(dataanalysis2_5=location)
# except:
# pass
temp = expert_eda()
temp.expertservice_id = expertservice_id
temp.buyer_id = buyerid
temp.dataanalysis1 = location_1
temp.dataanalysis2 = location_2
temp.dataanalysis2_1= location_2_1
temp.dataanalysis2_2 = location_2_2
temp.dataanalysis2_3 = location_2_3
temp.matrix2 = matrix2
temp.feature = feature
temp.modelgraph = modelgraph
temp.target = target_var1
# temp.algo_type = 'clf'
temp.save()
else:
pass