0% found this document useful (0 votes)

19 views

Data Visualization

The document provides a comprehensive guide on various data visualization techniques using Python libraries such as Matplotlib and Seaborn. It includes multiple examples of visualizing datasets related to heart disease and Titanic survival, showcasing different plot types like bar plots, pie charts, density plots, and correlation heatmaps. Additionally, it emphasizes the importance of visualizing data distributions and relationships between features for better insights.

Uploaded by

wfarouk

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

19 views

Data Visualization

Uploaded by

wfarouk

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 159

Mastering Data Visualization Techniques

(Part 1)
Prepared by: Syed Afroz Ali
plt.figure(figsize = (8, 6))
plt.ticklabel_format(style = 'plain')
sns.barplot(x = heart["ChestPainType"], y = heart["Cholest
erol"], palette = "tab20");

sns.catplot(data = titanic , x ="Embarked" , y ="Age" , col

="Survived" , kind="bar" , ci =None)
plt.show()

wine.plot(kind='density', subplots=True, layout=(4,3), shar

ex=False, figsize= (14,8))
plt.show()

numeric_feature = titanic.dtypes!=object
final_numeric_feature = titanic.columns[numeric_feature].t
olist()

titanic[final_numeric_feature].plot(kind='density', subplots
=True, layout=(1,7), sharex=False, figsize= (20,4))
plt.show()

heart["ChestPainType"].value_counts()[:3].plot.pie(figsize
= (5, 5),
autopct = '%1.0f%%')
plt.title("Pie Chart")
plt.xticks(rotation = 90)
plt.show()
plt.pie(heart['ChestPainType'].value_counts(),labels=heart[
'ChestPainType'].unique(),autopct = '%1.2f%%');

plt.figure(figsize = (6, 4))

counts = heart["ChestPainType"].value_counts()
explode = (0, 0.1, 0.2, 0.3)
colors = ['#A85CF9', '#FF4949', '#BDF2D5', '#FF06B7', '#4B
7BE5', '#FF5D5D', '#FAC213', '#37E2D5', '#6D8B74', '#E9D5
CA']
counts.plot(kind = 'pie', fontsize = 12, colors = colors, expl
ode = explode, autopct = '%1.1f%%')
plt.axis('equal')
plt.legend(labels = counts.index, loc = "best")
plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
my_circle=plt.Circle( (0,0), 0.9, color='white')
plt.pie(titanic['Embarked'].value_counts()[:10].values, label
s = titanic['Embarked'].value_counts()[:10].index)
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()

my_circle=plt.Circle( (0,0), 0.9, color='pink')

plt.pie(titanic['Embarked'].value_counts()[:10].values, label
s = titanic['Embarked'].value_counts()[:10].index)
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()

•• Join my WhatsApp Channel for the latest updates on AI:

https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
•• Join my LinkedIn group for the latest updates on Machine
Learning: https://www.linkedin.com/groups/7436898/

plt.figure(figsize = (7,4))
ax = sns.countplot(x=heart['RestingECG'])
for bars in ax.containers:
ax.bar_label(bars)
plt.title("Count of RestingECG", fontsize = 15,color='Blue');
# Visulazing the distibution of the data for every feature
plt.figure(figsize=(20, 8))

for i, column in enumerate(heart.columns, 1):

plt.subplot(2, 6, i)
heart[heart["HeartDisease"] == 0][column].hist(bins=35,
color='blue', label='Have Diabetes = NO', alpha=0.9)
heart[heart["HeartDisease"] == 1][column].hist(bins=35,
color='red', label='Have Diabetes = YES', alpha=0.5)
plt.legend()
plt.xlabel(column)

cat = ['Sex','RestingECG']

fig, ax = plt.subplots(1, 2, figsize = (10, 4))

for indx, (column, axes) in list(enumerate(list(zip(cat,
ax.flatten())))):
sns.countplot(ax = axes, x = heart[column], hue = heart[
'ExerciseAngina'],
palette = 'magma', alpha = 0.8)

else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]
]
plt.tight_layout()
plt.show()
plt.figure(figsize=(11,5))
plt.gcf().text(.55, .95, "Box Plot", fontsize = 40, color='Red'
,ha='center', va='center')
sns.boxenplot(x=heart['RestingECG'] , y = heart['Cholester
ol'],palette="Set1")
plt.show()

# Facet along the columns to show a categorical variable

using "col" parameter
plt.figure(figsize=(11,5))
sns.catplot(x="ChestPainType" , y = "MaxHR", hue= "Sex",
col="RestingECG", kind="boxen",palette="Set2" , h
eight=5, aspect=1 ,data=heart)
plt.show();
plt.figure(figsize=(12,5))
params = dict(data=titanic ,x = titanic.Pclass ,y = titanic.F
are ,hue=titanic.Sex,dodge=True)
sns.stripplot(**params , size=8,jitter=0.35,palette=['#33FF
66','#FF6600','Blue'],edgecolor='black',linewidth=1)
sns.boxplot(**params ,palette=['#BDBDBD','#E0E0E0'],line
width=6)
plt.show()

# Plot a subset of variables

g = sns.PairGrid(titanic, hue='Pclass' ,x_vars=["Fare" , "Ag
e"],y_vars=["Fare" , "Age"],
height=6, aspect=1)
g = g.map_offdiag(plt.scatter , edgecolor="w", s=130)
g = g.map_diag(plt.hist , edgecolor ='w', linewidth=2)
g = g.add_legend()
plt.show()

•• Join my WhatsApp Channel for the latest updates on AI:

https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
features_mean= list(wine.columns[:6])

num_rows, num_cols = 3,2

fig, axes = plt.subplots(num_rows, num_cols, figsize=(20, 8

))
fig.tight_layout()

for index, column in enumerate(wine[features_mean].colu

mns):
i,j = (index // num_cols, index % num_cols)
g = sns.distplot(wine[column], color="m", label="%.2f"%
(wine[column].skew()), ax=axes[i,j])
g = g.legend(loc="best")

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
y = heart['Sex']

# Explore Age distibution

g = sns.kdeplot(heart["Age"][(y == 'M') & (heart["Age"].not
null())], color="Red", shade=True)
g = sns.kdeplot(heart["Age"][(y == 'F') & (heart["Age"].notn
ull())], ax=g, color="Blue", shade=True)
g.set_xlabel("Age")
g.set_ylabel("Frequency")
g = g.legend(["Male","Female"])
raw_df = heart [['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholester
ol', 'FastingBS',
'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
'HeartDisease']]
# Function to print width of barcharts on the bars
def barw(ax):
for p in ax.patches:
val = p.get_width() #height of the bar
x = p.get_x()+ p.get_width() # x- position
y = p.get_y() + p.get_height()/2 #y-position
ax.annotate(round(val,2),(x,y))
plt.figure(figsize=(10,5))
ax0 = sns.countplot(data = heart, y ='ChestPainType', order = heart['
ChestPainType'].value_counts().index)
barw(ax0)
plt.show()

sns.set_style('white')
wine.plot(kind="scatter", x="fixed acidity", y="total sulfur
dioxide", alpha=.5,
s=wine["alcohol"], label="alcohol", figsize=(10,7),
c="chlorides", cmap=plt.get_cmap("jet"), colorbar=
True,
sharex=False)
plt.legend()
plt.show()
#Correlation with Response Variable class
X = wine.drop(['quality'], axis=1)
y = wine['quality']

X.corrwith(y).plot.bar(figsize=(16, 4), rot=90, grid=True)

plt.title('Correlation with quality',
fontsize=30,
color='Red',
font='Times New Roman')
plt.show()

import matplotlib
matplotlib.rcParams.update({'font.size': 20})
ax=heart['Sex'].value_counts().plot.pie(explode=[0.1, 0.1],autopct='
%1.2f%%',shadow=True);
ax.set_title(label = "Sex", fontsize = 40,color='DarkOrange',font='Luc
ida Calligraphy');
plt.legend(labels=['M','F'])
plt.axis('off');
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G

matplotlib.rcParams.update({'font.size': 10})
corr = wine.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
plt.title('Correlation Analysis',
fontsize=25,
color='DarkGreen',
font='Times New Roman')
sns.heatmap(corr,
mask=mask,
annot=True,
lw=0,
linecolor='white',
cmap='viridis',
fmt="0.2f")
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.show()
#set configuration for charts
plt.rcParams["figure.figsize"]=[20 , 5]
plt.rcParams["font.size"]=15
plt.rcParams["legend.fontsize"]="medium"
plt.rcParams["figure.titlesize"]="medium"

def plot_disribution(data , x ,color,bins ):

mean = data[x].mean()
std = data[x].std()
info=dict(data = data , x = x , color = color)
plt.subplot(1 , 3 , 1 , title =f"Ditstribution of {x} column")
sns.distplot(a=data[x] , bins = bins)
plt.xlabel(f"bins of {x}")
plt.axvline(mean , label ="mean" , color ="red")
plt.ylabel("frequency")
plt.legend(["${\sigma}$ = %d"%std , f"mean = {mean:.2f}"])
plt.title(f"histogram of {x} column")
plt.subplot(1 , 3 , 2)
sns.boxplot(**info)
plt.xlabel(f"{x}")
plt.title(f"box plot of {x} column")
plt.subplot(1 , 3 , 3)
sns.swarmplot(**info)
plt.xlabel(f"{x}")
plt.title(f"distribution of points in {x} column")
plt.suptitle(f"Distribution of {x} column" , fontsize =15 , color="red
")
plt.show()

age_bins = np.arange(29 , 77+5 , 5)

base_color = sns.color_palette()[4]
plot_disribution(data = heart , x ="Age" , color = base_color , bins=ag
e_bins)
sns.set_style("white")
sns.set_context("poster",font_scale = .7)
palette = ["#1d7874","#679289","#f4c095","#ee2e31","#ffb563","#91
8450","#f85e00","#a41623","#9a031e","#d6d6d6","#ffee32","#ffd100
","#333533","#202020"]
# sns.palplot(sns.color_palette(palette))
# plt.show()

plt.subplots(figsize=(20,8))
p = sns.barplot(x=heart["ChestPainType"][:14],y=heart["Age"],palett
e=palette, saturation=1, edgecolor = "#1c1c1c", linewidth = 2)
p.axes.set_title("\n ChestPainType \n", fontsize=25)
plt.ylabel("Total Member" , fontsize = 20)
plt.xlabel("\n Name" , fontsize = 20)
# plt.yscale("log")
plt.xticks(rotation = 90)
for container in p.containers:
p.bar_label(container,label_type = "center",padding = 6,size = 15,c
olor = "black",rotation = 90,
bbox={"boxstyle": "round", "pad": 0.6, "facecolor": "orange", "edg
ecolor": "black", "alpha": 1})

sns.despine(left=True, bottom=True)
plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
fig,axis=plt.subplots(ncols=4,nrows=3,figsize=(15,10))
index=0
axis=axis.flatten()

for col,values in wine.items():

sns.boxplot(y=col,data=wine,color='r',ax=axis[index])
index+=1
plt.tight_layout(pad=0.5,w_pad=0.7,h_pad=5.0);

#checking the target variables for distribution

sns.distplot(heart['Cholesterol'],color='Brown')
plt.axvline(x=heart['Cholesterol'].mean(), color='Brown', lin
estyle='--', linewidth=2)
plt.title('Cholesterol');
s = sns.countplot(x = 'quality',data = wine)
sizes=[]
for p in s.patches:
height = p.get_height()
sizes.append(height)
s.text(p.get_x()+p.get_width()/2.,
height + 3,
'{:1.2f}%'.format(height/len(wine)*100),
ha="center", fontsize=14)

countfeature = ["Survived", "Pclass", "Sex", "SibSp", "Parc

h", "Embarked"]
countlist = list(enumerate(countfeature))

plt.figure(figsize = (20,10))
plt.suptitle("Countplot of Categorical Features", fontsize=1
8)
for i in countlist:
plt.subplot(2,3,i[0]+1)
sns.countplot(data = titanic, x = i[1], hue = "Survived", p
alette="rainbow")
plt.ylabel("")
plt.legend(['Not Survived', 'Survived'], loc='upper center'
, prop={'size': 10})
plt.tight_layout()
plt.show()
numfeature = ["Age", "Fare"]
enumfeat = list(enumerate(numfeature))

plt.figure(figsize=(20,7))
plt.suptitle("Distribution and Outliers of Numerical Data", fontsize=2
0)
for i in enumfeat:
plt.subplot(1,4,i[0]+1)
sns.boxplot(data = titanic[i[1]], palette="rainbow")
plt.xlabel(str(i[1]))
for i in enumfeat:
plt.subplot(1,4,i[0]+3)
sns.histplot(data = titanic[i[1]], palette="rainbow", bins=15)
plt.xlabel(str(i[1]))
plt.tight_layout()
plt.show()

•• Join my WhatsApp Channel for the latest updates on AI:

https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
plt.figure(figsize=(20,6))
plt.title("Correlation of quality column with Independent Features", f
ontsize=15)
corr = wine.corr()["quality"].sort_values(ascending=False)[1:]
sns.barplot(x=corr.index, y=corr, color=(0.90,0.30,0.50))
plt.tight_layout()
plt.xticks(rotation = 90)
plt.show()

plt.figure(figsize=(15,5))
plt.suptitle("Probability Distribution of numerical columns
according to number of Survived", fontsize = 20)
for i in enumfeat:
plt.subplot(1,2,i[0]+1)
sns.kdeplot(data=titanic, x=i[1], hue="Survived")
plt.tight_layout()

plt.show()
import missingno as msno
msno.matrix(titanic, color=(0.50,0.30,0.80))
plt.show()
x = titanic.isnull().sum()
for a, b in x.items():
if b > 0:
print(f"There are {b} missing values in column: {a}")
Mastering Data Visualization Techniques
(Part 2)
Prepared by: Syed Afroz Ali

Feature Importance Visualization

import numpy as np
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
plt.style.use('seaborn-white')

# Create train and test splits

target_name = 'quality'
X = wine.drop('quality', axis=1)
label=wine[target_name]
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
X_train, X_test, y_train, y_test = train_test_split(X,label,test_size=0.2,
random_state=42, stratify=label)
# Build a classification task using 3 informative features
tree = tree.DecisionTreeClassifier(
class_weight='balanced',
min_weight_fraction_leaf = 0.01
)
tree = tree.fit(X_train, y_train)
importances = tree.feature_importances_
feature_names = wine.drop('quality', axis=1).columns
indices = np.argsort(importances)[::-1]

# Print the feature ranking

for f in range(X_train.shape[1]):
print("%d. feature %d (%f)" % (f + 1, indices[f],
importances[indices[f]]))
# Plot the feature importances of the forest
def feature_importance_graph(indices, importances, feature_names):
plt.figure(figsize=(12,6))
plt.title("Determining Feature importances \n with
DecisionTreeClassifier", fontsize=18)
plt.barh(range(len(indices)), importances[indices],
color='#31B173', align="center")
plt.yticks(range(len(indices)), feature_names[indices],
rotation='horizontal',fontsize=14)
plt.ylim([-1, len(indices)])
plt.axhline(y=1.0, xmin=0.65, xmax=0.952, color='red', linewidth=3,
linestyle='--')
plt.text(0.19, 2.8, '46% Difference between \n duration and
contacts', color='Blue', fontsize=15)

feature_importance_graph(indices, importances, feature_names)

plt.show()
# Visulazing the distibution of the data for every feature
plt.figure(figsize=(20, 20))

for i, column in enumerate(df.columns, 1):

plt.subplot(3, 3, i)

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
df[df["Outcome"] == 0] [column].hist(bins=35, color='blue',
label='Have Diabetes = NO', alpha=0.6)
df[df["Outcome"] == 1] [column].hist(bins=35, color='red',
label='Have Diabetes = YES', alpha=0.6)
plt.legend()
plt.xlabel(column)
from yellowbrick.classifier import ConfusionMatrix
from yellowbrick.classifier import ClassPredictionError
from yellowbrick.classifier import ROCAUC
from yellowbrick.style import set_palette

from statsmodels.graphics.gofplots import qqplot

# --- Variable, Color & Plot Size ---

var = titanic['Fare']
color = color_mix[2]
fig = plt.figure(figsize = (14, 10))

# --- Skewness & Kurtosis ---

print('\033[35m\033[1m'+'.: Sepal Length Skewness & Kurtosis
:.'+'\033[0m')
print('*' * 40)

•• Join my WhatsApp Channel for the latest updates on AI:

https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
print('Skewness:'+'\033[35m\033[1m {:.3f}'.format(var.skew(axis = 0,
skipna = True)))
print('\033[0m'+'Kurtosis:'+'\033[35m\033[1m {:.3f}'.format(var.kurt(axis
= 0, skipna = True)))
print('\n')

# --- General Title ---

fig.suptitle('Sepal Length Distribution', fontweight = 'bold', fontsize =
16, fontfamily = 'sans-serif',
color = black_grad[0])
fig.subplots_adjust(top = 0.9)

# --- Histogram ---

ax_1=fig.add_subplot(2, 2, 2)
plt.title('Histogram Plot', fontweight = 'bold', fontsize = 14, fontfamily =
'sans-serif', color = black_grad[1])
sns.histplot(data = titanic, x = var, kde = True, color = color)
plt.xlabel('Total', fontweight = 'regular', fontsize = 11, fontfamily =
'sans-serif', color = black_grad[1])
plt.ylabel('Sepal Length', fontweight = 'regular', fontsize = 11,
fontfamily = 'sans-serif', color = black_grad[1])
plt.grid(axis = 'x', alpha = 0)
plt.grid(axis = 'y', alpha = 0.2)

# --- Q-Q Plot ---

ax_2 = fig.add_subplot(2, 2, 4)
plt.title('Q-Q Plot', fontweight = 'bold', fontsize = 14, fontfamily = 'sans-
serif', color = black_grad[1])
qqplot(var, fit = True, line = '45', ax = ax_2, markerfacecolor = color,
markeredgecolor = color, alpha = 0.6)
plt.xlabel('Theoritical Quantiles', fontweight = 'regular', fontsize = 11,
fontfamily = 'sans-serif',
color = black_grad[1])
plt.ylabel('Sample Quantiles', fontweight = 'regular', fontsize = 11,
fontfamily = 'sans-serif', color = black_grad[1])
plt.grid(axis = 'both', alpha = 0.2)

# --- Boxen Plot ---

ax_3 = fig.add_subplot(1, 2, 1)
plt.title('Boxen Plot', fontweight = 'bold', fontsize = 14, fontfamily =
'sans-serif', color = black_grad[1])
sns.boxenplot(y = var, data = titanic, color = color, linewidth = 1.5)
plt.ylabel('Sepal Length', fontweight = 'regular', fontsize = 11,
fontfamily = 'sans-serif', color = black_grad[1])
plt.grid(axis = 'y', alpha = 0.2)
plt.show();

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
from yellowbrick.model_selection import LearningCurve,
FeatureImportances
from sklearn.metrics import
accuracy_score,precision_recall_curve

# --- Applying Logistic Regression ---

LRclassifier = LogisticRegression(solver='liblinear')
LRclassifier.fit(X_train, y_train)

y_pred_LR = LRclassifier.predict(X_test)

# --- LR Accuracy ---

LRAcc = accuracy_score(y_pred_LR, y_test)
print('.:. Logistic Regression Accuracy:'+'\033[35m\033[1m
{:.2f}%'.format(LRAcc*100)+' \033[0m.:.')
# --- LR Classification Report ---
print('\033[35m\033[1m\n.: Classification Report'+'\033[0m')
print('*' * 25)
print(classification_report(y_test, y_pred_LR))

# --- Performance Evaluation ---

print('\033[35m\n\033[1m'+'.: Performance
Evaluation'+'\033[0m')
print('*' * 26)
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize = (14, 10))

# --- LR Confusion Matrix ---

logmatrix = ConfusionMatrix(LRclassifier, ax=ax1,
cmap='RdPu', title='Logistic Regression Confusion Matrix')
logmatrix.fit(X_train, y_train)
logmatrix.score(X_test, y_test)
logmatrix.finalize()

# --- LR ROC AUC ---

logrocauc = ROCAUC(LRclassifier, ax = ax2, title = 'Logistic
Regression ROC AUC Plot')
logrocauc.fit(X_train, y_train)
logrocauc.score(X_test, y_test)
logrocauc.finalize()

# --- LR Learning Curve ---

loglc = LearningCurve(LRclassifier, ax = ax3, title = 'Logistic
Regression Learning Curve')
loglc.fit(X_train, y_train)
loglc.finalize()
plt.tight_layout();
cat = ['Sex','Embarked']
sns.set_theme(rc = {'figure.dpi': 100, 'axes.labelsize': 7,
'axes.facecolor': '#f0eee9', 'grid.color': '#fffdfa',
'figure.facecolor': '#e8e6e1'}, font_scale = 0.55)
fig, ax = plt.subplots(5, 2, figsize = (7, 18))
for indx, (column, axes) in list(enumerate(list(zip(cat,
ax.flatten())))):

sns.countplot(ax = axes, x = titanic[column], hue = titanic['Pclass'],

palette = 'magma', alpha = 0.8)

else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]]
plt.tight_layout()
plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
num = wine.select_dtypes(include="number")
fig, ax = plt.subplots(14, 1, figsize = (7, 30))
for indx, (column, axes) in list(enumerate(list(zip(num, ax.flatten())))):

sns.scatterplot(ax = axes, y = wine[column].index, x =

wine[column],hue = wine['total sulfur dioxide'],
palette = 'magma', alpha = 0.8)

else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]]
plt.tight_layout()
plt.show()
num = wine.select_dtypes(include="number")
fig, ax = plt.subplots(12, 1, figsize = (14, 35))
for indx, (column, axes) in list(enumerate(list(zip(num, ax.flatten())))):

sns.histplot(ax = axes, x = wine[column],hue = wine['quality'],

palette = 'magma', alpha = 0.8, multiple = 'stack')
legend = axes.get_legend() # sns.hisplot has some issues with
legend
handles = legend.legendHandles
legend.remove()
axes.legend(handles, ['0', '1'], title = 'Survived', loc = 'upper right')
Quantiles = np.quantile(wine[column], [0, 0.25, 0.50, 0.75, 1])

for q in Quantiles: axes.axvline(x = q, linewidth = 0.5, color =

'r')
plt.tight_layout()
plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
cat = ['Sex','Embarked']
fig, ax = plt.subplots(5, 2, figsize = (6.5, 10))
for indx, (column, axes) in list(enumerate(list(zip(cat, ax.flatten())))):
sns.violinplot(ax = axes, x = titanic[column],
y = titanic['Fare'],
scale = 'width', linewidth = 0.5,
palette = 'magma', inner = None)
plt.setp(axes.collections, alpha = 0.3)
sns.stripplot(ax = axes, x = titanic[column],
y = titanic['Fare'],
palette = 'magma', alpha = 0.9,
s = 1.5, jitter = 0.07)
sns.pointplot(ax = axes, x = titanic[column],
y = titanic['Fare'],
color = '#ff5736', scale = 0.25,
estimator = np.mean, ci = 'sd',
errwidth = 0.5, capsize = 0.15, join = True)
plt.setp(axes.lines, zorder = 100)
plt.setp(axes.collections, zorder = 100)
else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]]
plt.tight_layout()
plt.show()
sns.set(rc={"axes.facecolor":"#a1c45a" , "axes.grid" : False})
plt.figure(figsize=(11,6))
plt.gcf().text(.51, .84, "Box Plot", fontsize = 40, color='Black' ,ha='center',
va='center')
sns.boxenplot(x=titanic['Embarked'] , y = titanic['Fare'],palette="Set1")
plt.show()
# Facet along the columns to show a categorical variable using "col"
plt.figure(figsize=(11,7))
sns.catplot(x="Embarked" , y = "Fare", hue= "Pclass",
col="Sex", kind="boxen",palette="Set2" , height=8, aspect=1
,data=titanic)
plt.show();
plt.figure(figsize=(16,7))
sns.set(rc={"axes.facecolor":"#b0deff","axes.grid":False,
'xtick.labelsize':15,'ytick.labelsize':15,
'axes.labelsize':20,'figure.figsize':(20.0, 9.0)})
params = dict(data=titanic ,x = titanic.Pclass ,y = titanic.Fare
,hue=titanic.Pclass,dodge=True)
sns.stripplot(**params ,
size=8,jitter=0.35,palette=['#33FF66','#FF6600'],edgecolor='black',linewidth=1)
sns.boxplot(**params ,palette=['#BDBDBD','#E0E0E0'],linewidth=6)
plt.show()
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
# Plot a subset of variables
g = sns.PairGrid(titanic, hue='Pclass' ,x_vars=["Fare" , "Age"],y_vars=["Fare" ,
"Age"],
height=6, aspect=1)
g = g.map_offdiag(plt.scatter , edgecolor="w", s=130)
g = g.map_diag(plt.hist , edgecolor ='w', linewidth=2)
g = g.add_legend()
plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
df = pd.read_csv("winequality-red.csv")
features_mean= list(df.columns[:10])
num_rows, num_cols = 5,2
fig, axes = plt.subplots(num_rows, num_cols, figsize=(25, 12))
fig.tight_layout()
for index, column in enumerate(df[features_mean].columns):
i,j = (index // num_cols, index % num_cols)
g = sns.distplot(df[column], color="m", label="%.2f"%(df[column].skew()),
ax=axes[i,j])
g = g.legend(loc="best")
y = df['Sex']

# Explore Age distibution

g = sns.kdeplot(df["Age"][(y == 'male') & (df["Age"].notnull())], color="Red",
shade=True)
g = sns.kdeplot(df["Age"][(y == 'female') & (df["Age"].notnull())], ax=g,
color="Blue", shade=True)
g.set_xlabel("radius_mean")
g.set_ylabel("Frequency")
g = g.legend(["Not Survived","Survived"])
df = pd.read_csv("winequality-red.csv")
features_mean= list(df.columns[:10])
df_b = df[df['quality'] == 5]
df_m = df[df['quality'] == 6]
num_rows, num_cols = 5,2
fig, axes = plt.subplots(num_rows, num_cols, figsize=(25, 12))
fig.tight_layout()
for index, column in enumerate(df[features_mean].columns):
i,j = (index // num_cols, index % num_cols)
g = sns.kdeplot(df_b[column], color="Red", shade=True,
ax=axes[i,j])
g = sns.kdeplot(df_m[column], ax=g, color="Blue", shade=True)
g.set_xlabel(column)
g = g.legend(["Benign","Malignant"])
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
raw_df = raw_df [['name', 'year', 'selling_price', 'km_driven',
'fuel', 'seller_type',
'transmission', 'owner']]
# Function to print width of barcharts on the bars
def barw(ax):
for p in ax.patches:
val = p.get_width() #height of the bar
x = p.get_x()+ p.get_width() # x- position
y = p.get_y() + p.get_height()/2 #y-position
ax.annotate(round(val,2),(x,y))
plt.figure(figsize=(10,5))
ax0 = sns.countplot(data = raw_df, y ='owner', order =
raw_df['owner'].value_counts().index)
barw(ax0)
plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
raw_df = pd.read_csv('datasets_33080_1320127_CAR DETAILS FROM
CAR DEKHO.csv')
raw_df = raw_df [['name', 'year', 'selling_price', 'km_driven', 'fuel',
'seller_type',
'transmission', 'owner']]
df_gc = raw_df.groupby('owner').mean()
df_gc.reset_index(inplace= True)
df_gc[['owner','selling_price']].sort_values('selling_price', ascending
=False)

plt.figure(figsize=(10,5))
ax1 = sns.barplot(data = raw_df, x='selling_price', y ='owner', order =
df_gc.sort_values('selling_price',ascending =False)['owner'], ci =None)
barw(ax1)
plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
sns.set_style('white')
df.plot(kind="scatter", x="fixed acidity", y="total sulfur dioxide",
alpha=.5,
s=df["alcohol"]/10, label="alcohol", figsize=(10,7),
c="chlorides", cmap=plt.get_cmap("jet"), colorbar=True,
sharex=False)
plt.legend()
plt.show()
matplotlib.rcParams.update({'font.size': 20})
corr = heart.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
plt.figure(dpi=100)
plt.title('Correlation Analysis',
fontsize=25,
color='DarkOrange',
font='Lucida Calligraphy')
sns.heatmap(corr,
mask=mask,
annot=True,
lw=0,
linecolor='white',
cmap='viridis',
fmt="0.2f")
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
matplotlib.rcParams.update({'font.size': 40})
ax=heart['Sex'].value_counts().plot.pie(explode=[0.1,
0.1],autopct='%1.2f%%',shadow=True);
ax.set_title(label = "Sex", fontsize =
40,color='DarkOrange',font='Lucida Calligraphy');
plt.legend(labels=['M','F'])
plt.axis('off');

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G

heart["age_bins"]= pd.cut(heart["Age"] , bins=[29 , 40 , 50 , 60

, 80] , labels=["adult" , "fortieth" , "old" , "ancient"] )
def count_plot(data , x=None , y=None , figsize =None , title
=None , color =None , prop=False , rotation_x =0 ):
if x is None and y is None :
raise("Expected y or x")
if x is not None and y is not None:
raise("Expected y or x not both")
count_type = data[y if x is None else
x].value_counts(ascending =False)
Sum = count_type.sum()
type_order = count_type.index
plt.figure(figsize=figsize if figsize is None else (12 , 7))
if x is None:
sns.countplot(data = data , y=y , color = color
,order=type_order)
if prop==True:
for i in range(len(count_type)):
count = count_type[i]
pct_string ="{:0.1f}%".format(100*count/Sum)
plt.text(count+1 , i , pct_string , va="center")
if prop==False:
for i in range(len(count_type)):
count = count_type[i]
pct_string ="{}".format(count)
plt.text(count+1 , i , pct_string , va="center")
plt.title(title)
plt.show()
if y is None :
sns.countplot(data = data , x = x , color = color , order =
type_order)
locs , labels =plt.xticks(rotation = rotation_x)
if prop == True :
for loc , label in zip(locs , labels):
count = count_type[label.get_text()]
pct_string ="{:0.1f}%".format(100*count/Sum)
plt.text(loc , count+2 ,pct_string,ha ="center")
if prop==False :
for loc , label in zip(locs , labels):
count = count_type[label.get_text()]
pct_string ="{}".format(count)
plt.text(loc , count+2 ,pct_string,ha ="center")
plt.title(title)
plt.show()

count_plot(data = heart , x ="age_bins")

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
heart.rename(columns={"target":"have disease"} , inplace=True)
heart.replace({1:"yes" , 0:"no"} , inplace =True)
count_plot(data = heart , x ="HeartDisease")

count_plot(data = heart , x ="HeartDisease" , prop=True)

from sklearn.metrics import confusion_matrix
# 4697: no's, 4232: yes
conf_matrix = confusion_matrix(y_train, y_train_pred)
f, ax = plt.subplots(figsize=(12, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", linewidths=.5,
ax=ax)
plt.title("Confusion Matrix", fontsize=20)
plt.subplots_adjust(left=0.15, right=0.99, bottom=0.15,
top=0.99)
ax.set_yticks(np.arange(conf_matrix.shape[0]) + 0.5,
minor=False)
ax.set_xticklabels("")
ax.set_yticklabels(['Refused T. Deposits', 'Accepted T.
Deposits'], fontsize=16, rotation=360)
plt.show()
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
lr = LogisticRegression()
lr.fit(X_train,y_train)
y_pred_lr = lr.predict(X_test)
confusion_matrix(y_test,y_pred_lr)

def make_confusion_matrix(cf,
group_names=None,
categories='auto',
count=True,
percent=True,
cbar=True,
xyticks=True,
xyplotlabels=True,
sum_stats=True,
figsize=None,
cmap='Blues',
title=None):

# CODE TO GENERATE TEXT INSIDE EACH SQUARE

blanks = ['' for i in range(cf.size)]

if group_names and len(group_names)==cf.size:

group_labels = ["{}\n".format(value) for value in group_names]
else:
group_labels = blanks

if count:
group_counts = ["{0:0.0f}\n".format(value) for value in cf.flatten()]
else:
group_counts = blanks

if percent:
group_percentages = ["{0:.2%}".format(value) for value in cf.flatten()/np.sum(cf)]
else:
group_percentages = blanks

box_labels = [f"{v1}{v2}{v3}".strip() for v1, v2, v3 in

zip(group_labels,group_counts,group_percentages)]
box_labels = np.asarray(box_labels).reshape(cf.shape[0],cf.shape[1])

# CODE TO GENERATE SUMMARY STATISTICS & TEXT FOR SUMMARY STATS

if sum_stats:
#Accuracy is sum of diagonal divided by total observations
accuracy = np.trace(cf) / float(np.sum(cf))
#if it is a binary confusion matrix, show some more stats
if len(cf)==2:
#Metrics for Binary Confusion Matrices
precision = cf[1,1] / sum(cf[:,1])
recall = cf[1,1] / sum(cf[1,:])
f1_score = 2*precision*recall / (precision + recall)
stats_text = "\n\nAccuracy={:0.3f}\nPrecision={:0.3f}\nRecall={:0.3f}\nF1
Score={:0.3f}".format(
accuracy,precision,recall,f1_score)
else:
stats_text = "\n\nAccuracy={:0.3f}".format(accuracy)
else:
stats_text = ""

# SET FIGURE PARAMETERS ACCORDING TO OTHER ARGUMENTS

if figsize==None:
#Get default figure size if not set
figsize = plt.rcParams.get('figure.figsize')

if xyticks==False:
#Do not show categories if xyticks is False

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
categories=False

# MAKE THE HEATMAP VISUALIZATION

fig = plt.figure(figsize=figsize)
fig.patch.set_facecolor('#f5f6f6')
sns.heatmap(cf,annot=box_labels,fmt="",linewidths = 1,square = True,linecolor=
'#f5f6f6',
cmap=cmap,cbar=cbar,annot_kws={'fontfamily':'serif','size':18,'weight':'bold'},
xticklabels=categories,
yticklabels=categories,)

if xyplotlabels:
plt.ylabel('True label', **{'fontfamily':'serif','size':12,'weight':'bold'})
plt.xlabel('Predicted label' + stats_text,**{'fontfamily':'serif','size':12,'weight':'bold'})
else:
plt.xlabel(stats_text,**{'fontfamily':'serif','size':12,'weight':'bold'})

vani_cf_matrix = confusion_matrix(y_test,y_pred_lr)
my_cols = [colors[3],colors[2]]

labels = [ 'True Neg','False Pos','False Neg','True Pos']

categories = ['Cat', 'Dog']
make_confusion_matrix(vani_cf_matrix,figsize = (10,5),group_names=labels,cbar =
False,cmap = 'magma',categories=categories,
title = 'Vanila CNN comfusion matrix')

plt.show()

•• Join my WhatsApp Channel for the latest updates on AI:

https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
sns.set_style("white")
sns.set_context("poster",font_scale = .7)
palette =
["#1d7874","#679289","#f4c095","#ee2e31","#ffb563","#918450","#f85e00","#a
41623","#9a031e","#d6d6d6","#ffee32","#ffd100","#333533","#202020"]
# sns.palplot(sns.color_palette(palette))
# plt.show()

plt.subplots(figsize=(20,8))
p = sns.barplot(x=dataset["Pclass"][:14],y=dataset["Age"],palette=palette,
saturation=1, edgecolor = "#1c1c1c", linewidth = 2)
p.axes.set_title("\nTop Anime Community\n", fontsize=25)
plt.ylabel("Total Member" , fontsize = 20)
plt.xlabel("\nAnime Name" , fontsize = 20)
# plt.yscale("log")
plt.xticks(rotation = 90)
for container in p.containers:
p.bar_label(container,label_type = "center",padding = 6,size = 15,color =
"black",rotation = 90,
bbox={"boxstyle": "round", "pad": 0.6, "facecolor": "orange", "edgecolor":
"black", "alpha": 1})

sns.despine(left=True, bottom=True)
plt.show()
numfeature = ["Age", "Fare"]
enumfeat = list(enumerate(numfeature))

plt.figure(figsize=(20,9))
plt.suptitle("Distribution and Outliers of Numerical Data", fontsize=20)
for i in enumfeat:
plt.subplot(1,4,i[0]+1)
sns.boxplot(data = train[i[1]], palette="rainbow")
plt.xlabel(str(i[1]))
for i in enumfeat:
plt.subplot(1,4,i[0]+3)
sns.histplot(data = train[i[1]], palette="rainbow", bins=15)
plt.xlabel(str(i[1]))
plt.tight_layout()
plt.show()
plt.figure(figsize=(15,12))
plt.suptitle("Distribution & Kernel Density Estimation of Numerical
Features", fontsize=20)
for i in enumfeat:
plt.subplot(2,1,i[0]+1)
sns.histplot(x = train[i[1]], kde=True, bins=30,
color=(0.50,0.20,0.70))
plt.tight_layout()
plt.show()
plt.figure(figsize=(6,8))
plt.title("Correlation of Survival column with Independent Features",
fontsize=15)
corr = train.corr()["Survived"].sort_values(ascending=False)[1:]
sns.barplot(x=corr.index, y=corr, color=(0.90,0.30,0.50))
plt.tight_layout()
plt.show()
plt.figure(figsize=(15,5))
plt.suptitle("Probability Distribution of numerical columns according to number
of Survived", fontsize = 20)
for i in enumfeat:
plt.subplot(1,2,i[0]+1)
sns.kdeplot(data=train, x=i[1], hue="Survived")
plt.tight_layout()
plt.show()
import missingno as msno
msno.matrix(train, color=(0.50,0.30,0.80))
plt.show()
x = train.isnull().sum()
for a, b in x.items():
if b > 0:
print(f"There are {b} missing values in column: {a}")
rc = {'figure.dpi': 150, 'axes.labelsize': 4,
'axes.facecolor': '#F6C90E', 'grid.color': 'Red','figure.figsize':(12,5),
'figure.facecolor': '#F6C90E'}
sns.set_theme(context='notebook',
style='dark',
palette='deep',
font='Comic Sans Ms',
font_scale=1,
color_codes='red',
rc=rc)

color = ['Green',"Red"]
df.groupby('Sex')['Medal'].count().sort_values(ascending=True).plot(kind="bar",
color=color,alpha=.5);
plt.title("Sex Vs Medalse",fontsize=17,color='Brown',font='Comic Sans
Ms',pad=20);
plt.xlabel("Sex ",fontsize=15,color='#1a4441',font='Comic Sans Ms')
plt.ylabel("Number of Medals",fontsize=15,color='#1a4441',font='Comic Sans
Ms');
plt.legend(loc='best');
plt.savefig('world regions.png');

region_medal=df.groupby('region')['Medal'].count().nlargest(20).reset_index()
region_medal.head()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G

sns.barplot(y='region',x='Medal',data=region_medal)
plt.title('medals by regions')
plt.xlabel('medals')
plt.ylabel('regions')
plt.xticks(rotation=45)
plt.show()
summer_medals=df.groupby(['region', 'Medal']).size().reset_index()
summer_medals.columns=['region', 'Medal', 'count']
summer_medals.pivot('region', 'Medal', 'count').fillna(0)
summer_medals_20=summer_medals.pivot('region', 'Medal',
'count').fillna(0).sort_values(['Gold'], ascending=False).head(20)
summer_medals_20.plot(kind='bar')
plt.xlabel('Country')
plt.title('Medals by Country- Summer Olympics ')
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
year=df['Year'].value_counts()

plt.figure(figsize=(15,10))
sns.barplot(x=year.index, y=year.values)
plt.xticks(rotation=90)
plt.xlabel("Year")
plt.ylabel("number of athletes")
plt.title("Number of participants by year")
plt.show()
sport=df['Sport'].value_counts()[:5]
print(sport)

labels=sport.index
sizes=sport.values
plt.pie(sizes,labels=labels,autopct='%1.1f%%',
shadow=True,startangle=90)
plt.show()
sport_summer=df[df['Season']=='Summer']['Sport'].value_counts
().sort_values(ascending=False).head(20)
sport_summer

plt.figure(figsize=(15,10))
sns.barplot(y=sport_summer.index, x=sport_summer.values,
palette='magma')
plt.xlabel('Number of events')
plt.ylabel('Sport')
plt.xticks(rotation=90)
plt.title("Number of events in each sport in the summer
Olympics")
plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
sport_winter=df[df['Season']=='Winter']['Sport'].value_counts().sort_values(asce
nding=False)
plt.figure(figsize=(15,10))
sns.barplot(y=sport_winter.head(20).index, x=sport_winter.head(20).values,
palette='magma')
plt.xlabel('Number of events')
plt.ylabel('Sport')
plt.xticks(rotation=90)
plt.title("Number of events in each sport in the winter Olympics")
plt.show()
Mastering Data Visualization Techniques
(Part 3)
Prepared by: Syed Afroz Ali

plot , ax = plt.subplots(1 , 3 , figsize=(14,4))

sns.histplot(data = train_data.loc[train_data["Pclass"]==1] , x
= "Age" , hue = "Survived",binwidth=5,ax = ax[0],palette = sn
s.color_palette(["yellow" , "green"]),multiple = "stack").set_ti
tle("1-Pclass")
sns.histplot(data = train_data.loc[train_data["Pclass"]==2] , x
= "Age" , hue = "Survived",binwidth=5,ax = ax[1],palette = sn
s.color_palette(["yellow" , "green"]),multiple = "stack").set_ti
tle("2-Pclass")
sns.histplot(data = train_data.loc[train_data["Pclass"]==3] , x
= "Age" , hue = "Survived",binwidth=5,ax = ax[2],palette = sn
s.color_palette(["yellow" , "green"]),multiple = "stack").set_ti
tle("3-Pclass")
plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
sex = ["Male", "Female"]
values = data["sex"].value_counts()
color = ["#FF0000", "#000000"]

plt.figure(figsize = (5, 7))

plt.pie(values, labels = sex, colors = color, explode = (0.1, 0),
textprops = {"color":"w"}, autopct = "%.2f%%", shadow = Tru
e, startangle = 90)

plt.legend();

# Plotting Outliers
col = 1
plt.figure(figsize = (20, 15))
for i in data.columns:
if col < 14:
plt.subplot(3, 5, col)
plt.boxplot(data[i])
plt.xlabel(i)
col = col + 1

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig = plt.figure( figsize=(8, 6))
ax = fig.add_axes([0,0,1,1])
sns.boxplot(ax=ax, data=df, x='TARGET', y='LDH')#,fli
erprops=dict(marker='o', markersize=6),fliersize=2)

ax.axhline(y=550,color='b')
ax.axhline(y=650,color='orange')
ax.axhline(y=1200,color='g')

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig = plt.figure(figsize = (15, 60))
for i in range(len(train.columns.tolist()[:100])):
plt.subplot(20,5,i+1)
sns.set_style("white")
plt.title(train.columns.tolist()[:100][i], size = 12, fontname
= 'monospace')
a = sns.kdeplot(train[train.columns.tolist()[:100][i]], shade
= True, alpha = 0.9, linewidth = 1.5, facecolor=(1, 1, 1, 0), ed
gecolor=".2")
plt.ylabel('')
plt.xlabel('')
plt.xticks(fontname = 'monospace')
plt.yticks([])
for j in ['right', 'left', 'top']:
a.spines[j].set_visible(False)
a.spines['bottom'].set_linewidth(1.2)

fig.tight_layout(h_pad = 3)
plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig = plt.figure(figsize = (15, 60))
for i in range(len(train.columns.tolist()[:100])):
plt.subplot(20,5,i+1)
sns.set_style("white")
plt.title(train.columns.tolist()[:100][i], size = 12, fontname
= 'monospace')
a = sns.boxplot(train[train.columns.tolist()[:100][i]], linewi
dth = 2.5,color = 'white')
plt.ylabel('')
plt.xlabel('')
plt.xticks(fontname = 'monospace')
plt.yticks([])
for j in ['right', 'left', 'top']:
a.spines[j].set_visible(False)
a.spines['bottom'].set_linewidth(1.2)

fig.tight_layout(h_pad = 3)
plt.show()

fig, ax = plt.subplots(nrows = 5,ncols = 1,figsize = (15,30))

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
colors = ['#F3ED13','#451FA4']
for i in range(len(numerical_features)):
plt.subplot(5,1,i+1)
sns.countplot(numerical_features[i],data = data,hue = "He
artDisease",palette = colors)
title = numerical_features[i] + ' vs Heart Disease'
plt.title(title);

train.iloc[:, :-1].describe().T.sort_values(by='std' , ascending

= False)\
.style.background_gradient(cmap='GnBu')\
.bar(subset=["max"], color='#BB0000')\
.bar(subset=["mean",], color='green')

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.figure(figsize = (15, 12))
ax = plt.axes()
ax.set_facecolor('black')
ax = sns.countplot(x = 'Embarked', data = titanic, palette = [
custom_colors[2], custom_colors[1]], edgecolor = 'white', lin
ewidth = 1.2)
plt.title('Disaster Count', fontsize = 25)
plt.xlabel('Disaster', fontsize = 20)
plt.ylabel('Count', fontsize = 20)
ax.xaxis.set_tick_params(labelsize = 15)
ax.yaxis.set_tick_params(labelsize = 15)
bbox_args = dict(boxstyle = 'round', fc = '0.9')
for p in ax.patches:
ax.annotate('{:.0f} = {:.2f}%'.format(p.get_height(), (p.get
_height() / len(titanic['Embarked'])) * 100), (p.get_x() + 0.25, p
.get_height() + 10),
color = 'black',
bbox = bbox_args,
fontsize = 18)
plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
#Plotting the distributions of the numerical variables
color_plot = ['#de972c','#74c91e','#1681de','#e069f5','#f54545','#f0ea46
','#7950cc']

fig,ax = plt.subplots(4,2,figsize=(20,20))
sns.kdeplot(df['HeartDisease'],color=np.random.choice(color_plot), ax=
ax[0][0], shade=True)
sns.kdeplot(df['Oldpeak'],color=np.random.choice(color_plot), ax=ax[0]
[1], shade=True)
sns.kdeplot(df['Age'],color=np.random.choice(color_plot), ax=ax[1][0],
shade=True)
sns.kdeplot(df['FastingBS'],color=np.random.choice(color_plot), ax=ax[
1][1], shade=True)
sns.kdeplot(df['RestingBP'],color=np.random.choice(color_plot), ax=ax[
2][0],shade=True)
sns.kdeplot(df['Cholesterol'],color=np.random.choice(color_plot), ax=a
x[2][1], shade=True)
sns.kdeplot(df['MaxHR'],color=np.random.choice(color_plot), ax=ax[3][
0],shade=True)
fig.delaxes(ax[3][1])

s = sns.countplot(x = 'Survived',data = train)

sizes=[]
for p in s.patches:
height = p.get_height()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
sizes.append(height)
s.text(p.get_x()+p.get_width()/2.,
height + 3,
'{:1.2f}%'.format(height/len(train)*100),
ha="center", fontsize=14)

z=df['job_title'].value_counts().head(10)
fig=px.bar(z,x=z.index,y=z.values,color=z.index,text=
z.values,labels={'index':'job title','y':'count','text':'cou
nt'},template='seaborn',title='<b> Top 10 Popular Rol
es in Data Sceince')
fig.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
hm= df.drop('id', axis =1)
mask = np.zeros_like(hm.corr(), dtype=np.bool)
mask[np.triu_indices_from(mask)]= True

plt.suptitle('Correlation', size = 20, weight='bold')

ax = sns.heatmap(hm.corr(), linewidths = 0.9, linecolor = 'whi

te', cbar = True,mask=mask, cmap=heatmap)

ax.annotate('Low Correlation',
fontsize=10,fontweight='bold',
xy=(1.3, 3.5), xycoords='data',
xytext=(0.6, 0.95), textcoords='axes fraction',
arrowprops=dict(
facecolor=heatmap[0], shrink=0.025,
connectionstyle='arc3, rad=0.50'),
horizontalalignment='left', verticalalignment='to
p'
)
ax.annotate('High Correlation',
fontsize=10,fontweight='bold',
xy=(3.3, 7.5), xycoords='data',
xytext=(0.8, 0.4), textcoords='axes fraction',
arrowprops=dict(
facecolor=heatmap[0], shrink=0.025,
connectionstyle='arc3, rad=-0.6'),
horizontalalignment='left', verticalalignment='to
p'
)
plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.suptitle('Target Variable', size = 20, weight='bold')

song_popularity = df['song_popularity'].map({0:'UnPopular', 1:
'Popular'})

a = sns.countplot(data = df, x =song_popularity,palette=them

e)
plt.tick_params(axis="x", colors=theme[0],labelsize=15)

for p in a.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
a.annotate(f'{height/df.shape[0]*100} %', (x + width/2, y + h
eight*1.02), ha='center')

plt.show()

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625

cont = ['song_duration_ms', 'acousticness', 'danceabi

lity', 'energy',
'instrumentalness', 'liveness', 'loudness',
'speechiness', 'tempo', 'audio_valence']
cat = [ 'key', 'audio_mode', 'time_signature']

a = 4 # number of rows
b = 3 # number of columns
c = 1 # initialize plot counter

plt.figure(figsize= (18,18))

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
for i in cont:
plt.suptitle('Distribution of Features', size = 20,
weight='bold')
plt.subplot(a, b, c)
A=sns.kdeplot(data= df, x=i,hue=song_popularit
y,palette=theme[:-2], linewidth = 1.3,shade=True, alp
ha=0.35)
plt.title(i)
plt.xlabel(" ")
c=c+1

#plotting
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 9))
fig.suptitle(' Highest and Lowest Correlation ', size =
20, weight='bold')

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
axs = [ax1, ax2]

#kdeplot
sns.kdeplot(data=df, y='energy', x='acousticness', ax
=ax1, color=heatmap[0])
ax1.set_title('Energy vs Acousticness', size = 14, wei
ght='bold', pad=20)

#kdeplot
sns.kdeplot(data=df, y='energy', x='loudness', ax=ax2
, color=heatmap[4])
ax2.set_title('Energy vs Loudness', size = 14, weight=
'bold', pad=20);

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
colors = ["#e9d9c8","#cca383","#070c23","#f82d06",
"#e8c195","#cd7551","#a49995","#a3a49c","#6c7470
"]
sns.palplot(sns.color_palette(colors))

#plot
A = sns.countplot(train_df['case_num'],
color=colors[1],
edgecolor='white',
linewidth=1.5,
saturation=1.5)

#Patch
patch_h = []
for patch in A.patches:
reading = patch.get_height()
patch_h.append(reading)

idx_tallest = np.argmax(patch_h)
A.patches[idx_tallest].set_facecolor(colors[3])

#Lables
plt.ylabel('Count', weight='semibold', fontname = 'Georgia')
plt.xlabel('Cases', weight='semibold', fontname = 'Georgia')
plt.suptitle('Number of Cases', fontname = 'Georgia', weight=
'bold', size = 18, color = colors[2])
A.bar_label(A.containers[0], label_type='edge')

plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625

Datas = india_df["common_name"].value_counts().reset_inde
x().sort_values(by='common_name')

# Creating the bar chart

trace = go.Bar(
y = Datas["index"],
x = Datas["common_name"],
orientation = "h",
marker_color= "#4F7177",
text = Datas["common_name"],
)

layout = dict(
width = 600,
height= 1000,
plot_bgcolor = "#FFFFFF",
font=dict(family='Arial',
size=12,
color='black'),

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
margin = dict(
l=0,
r=0,
b=100,
t=100,
pad=0
),
xaxis = dict(showline=True, linewidth=1.45, linecolor="#4
F7177",gridcolor='#D5D7D8',
#griddash='dot',
title_text='Counts'),
yaxis = dict(showline=True, linewidth=1.45, linecolor="#4F
7177",ticksuffix = " ",title_text='Name'),
bargap = 0.15,hoverlabel_bgcolor="#4F7177",hovermode="
x"
)

fig = go.Figure(data = trace, layout = layout)

fig.layout.xaxis.fixedrange = True
fig.layout.yaxis.fixedrange = True

#text
texter("Indian Birds Species",0.000,1.10,28,"Work Sans")
texter("Birds found in the dataset",0.000,1.06,18,"Source Sa
ns Pro")
texter("heyRobin!",1.00,-0.06,16,"Playfair Display")

fig.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Missing Values:
fig, axes = plt.subplots(1,2, figsize=(20,5))
fig.suptitle('Missing Data', size = 15, weight='bold')

#first plot
sns.heatmap(train.isna().transpose(),

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
cmap="crest",ax=axes[0])

#missing data
missing = round(train.isna().sum()/train.shape[0]* 100
,2)
missing = missing[missing>0].sort_values().to_frame(
)
missing.columns = ['Percentage']
missing.index.names = ['Name']
missing = missing.reset_index()
sns.barplot(data = missing, y= 'Name', x = 'Percentag
e',ax=axes[1],color=pal[0])
plt.show()

from plotly.subplots import make_subplots

import plotly.graph_objects as go
#Data
cnt_srshp =train['HomePlanet'].value_counts()
cnt_srsdes =train['Destination'].value_counts()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig = make_subplots(rows=2, cols=2, shared_yaxes=True,
subplot_titles=("Home Planets","Destination Planets","VIP
","CryoSleep"))

#figure1
fig.add_trace(go.Scatter(
x=cnt_srshp.index,
y=cnt_srshp.values,
mode='markers',
marker=dict(
sizemode = 'diameter',
sizeref = 20,
size = cnt_srshp.values,
color = ['#1D7595','#B9B596','#864D29'])), 1, 1)
#figure2
fig.add_trace(go.Scatter(
x=cnt_srsdes.index,
y=cnt_srsdes.values,
mode='markers',
marker=dict(
sizemode = 'diameter',
sizeref = 20,
size = cnt_srsdes.values,
color = ['#048B95','#A1231F','#602F58'])),
1, 2)

#figure3
fig.add_trace(go.Histogram(x=train["VIP"],
marker=dict(color=pal)),
row=2, col=1)

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
#figure4
fig.add_trace(go.Histogram(x=train["CryoSleep"],
marker=dict(color=pal)),
row=2, col=2)

fig.update_layout(height=1000,width=1000, coloraxis=dict(c
olorscale='Bluered_r'), showlegend=False,
title_x=0.9,
titlefont=dict(size = 2, color='black', family='Space Mono'),
plot_bgcolor='rgba(0,0,0,0)'
)
fig.show()

fig, axes = plt.subplots(1,3, figsize=(20,6))

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
sns.countplot(train["VIP"], ax=axes[0],palette =pal)
sns.kdeplot(train["Num"],linewidth = 1.3,shade=True,
alpha=0.35, ax=axes[1],color=pal[0])
sns.countplot(train["Side"], ax=axes[2],palette =pal)

plt.show()

import matplotlib as mlb

import matplotlib.image as mpimg
from matplotlib.offsetbox import AnnotationBbox, OffsetImage

#plotting
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 11))
fig.suptitle(' Potablity of Water Quality ', size = 26, color = th
eme[3], weight='bold')
axs = [ax1, ax2]

#Count-Plot
sns.countplot(water_df['Potability'], ax=ax1, palette='husl')

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
ax1.set_title('Count Plot', size = 14, color = theme[3], weight
='bold', pad=20)

#Data-2
names = ["Not Potable", "Potable"]
values = water_df['Potability'].value_counts()
colors = ["#E68193","#459E97"]
explode = (0.01, 0.01)

#Doughnut-chart
ax2.pie(x= values,labels =names, colors=colors,autopct='%1.
0f%%', pctdistance=0.8,explode=explode)

#draw-circle
centre_circle = plt.Circle((0,0),0.62,fc='white')
ax2.add_artist(centre_circle)
ax2.axis('equal')

ax2.set_title('Pie Chart', size = 14, color = theme[3], weight='

bold', pad=20)

#Image

path = mpimg.imread('../input/water/water bottle.png')

imagebox = OffsetImage(path , zoom=0.3)
xy = (0.5, 0.7)
ab = AnnotationBbox(imagebox, xy, frameon=False, pad=1, x
ybox=(0.02, 0.05))
ax2.add_artist(ab)

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.subplots_adjust(left=None, bottom=None, right=None, top
=0.8, wspace=0.4, hspace=None);

#Figure with Image

import matplotlib as mlb
import matplotlib.image as mpimg
from matplotlib.offsetbox import AnnotationBbox, Off
setImage

plt.figure(figsize=(27,15));
ax = sns.barplot(y ='Country',
x='Total',
data=medals[:30],
color=olympics_col[3],
zorder=2,
linewidth=0,
orient='h',
saturation=1,
alpha=1,
)

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
#Lables
ax.set_xlabel("Total",fontsize=20, weight='bold')
ax.set_ylabel("Country",fontsize=20, weight='bold')
ax.tick_params(labelsize=10, width=0.5, length=1.5)
plt.title("Top 30 Countries with Medals",size=20,weig
ht='bold')

#Patches
for a in ax.patches:
value = f'{a.get_width():.0f}'
x = a.get_x() + a.get_width() + 0.60
y = a.get_y() + a.get_height() / 1.8
ax.text(x, y, value, ha='left', va='center', fontsize=12,
bbox=dict(facecolor='none', edgecolor='black', box
style='round', linewidth=0.2))

#image
path = mpimg.imread('../input/font-worksans/medal-cr
op.png')
imagebox = OffsetImage(path , zoom=1.6)
xy = (0.5, 0.7)
ab = AnnotationBbox(imagebox, xy, frameon=False, p
ad=1, xybox=(100.5, 16))

ax.add_artist(ab)
ax.text(x = 92.5, y = 22.5, s = 'Best Performance', fon
tsize=22, weight = 'bold',color=olympics_col[1])

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
ax.text(x = 95.5, y = 23.5, s = 'From U.S.A', fontsize=2
2, weight = 'bold',color=olympics_col[1]);

for col in numeric_features[1:]:

fig = plt.figure(figsize=(9, 6))
ax = fig.gca()
feature = data[col]
feature.hist(bins=50, ax = ax)
ax.axvline(feature.mean(), color='magenta', linesty
le='dashed', linewidth=2)
ax.axvline(feature.median(), color='cyan', linestyle
='dashed', linewidth=2)
ax.set_title(col)

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.show()

data1=mydata[['Parameter 1']]
for i in data1.columns:
plt.figure(figsize=(15,6))
sns.boxplot(data1[i])
plt.xticks(rotation=90)
plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
# Creating a figure
plt.figure(figsize=(10,6))

#plotting the values for people who have heart disease

plt.scatter(df.age[df.target==1],
df.thalach[df.target==1],
c="tomato")

#plotting the values for people who doesn't have heart disea
se
plt.scatter(df.age[df.target==0],
df.thalach[df.target==0],
c="lightgreen")

# Addind info
plt.title("Heart Disease w.r.t Age and Max Heart Rate")
plt.xlabel("Age")
plt.legend(["Disease", "No Disease"])
plt.ylabel("Max Heart Rate");

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
sns.displot(x = df.thalach[df.target==1], data = df, kde = True, color= 'o
live')#
skewness=str(df.thalach[df.target==1].skew())
kurtosis=str(df.thalach[df.target==1].kurt())
plt.legend([skewness,kurtosis],title=("skewness and kurtosis"))
plt.title("Maximum heart achieved of peple with heart disease")
plt.xlabel("Maximum heart rate achieved")
plt.ylabel("Number of people with heart disease");

pd.crosstab(df.sex, df.fbs)

fig = pd.crosstab(df.sex, df.fbs).plot(kind = 'bar', color = ['ligh

tblue', 'salmon'])
plt.title("Fasting blood sugar w.r.t sex")
fig.set_xticklabels(labels=['fbs>120 mg/dl', 'fbs<120 mg/dl'], r
otation=0)
plt.legend(['Female', 'Male']);

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
pred = rf_model.predict(x_test)
plt.rcParams['figure.figsize'] = (12,8)
plt.plot(pred, label='Predicted value')
plt.plot(y_test, label='actual value')
plt.legend(loc ="upper left")
plt.title('Random Forest Model')
plt.xlabel('Test data')
plt.ylabel('mm / Day')
plt.show()

fig, ax = plt.subplots(ncols=3, figsize=(18,6))

colors = [['#ADEFD1FF', '#00203FFF'], ['#97BC62FF', '

#2C5F2D'], ['#F5C7B8FF', '#FFA177FF']]
explode = [0, 0.2]
columns = ['Parking', 'Warehouse', 'Elevator']
for i in range(3):
data = df[columns[i]].value_counts()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
ax[i].pie(data, labels=data.values, explode=expl
ode, colors=colors[i], shadow=True)
ax[i].legend(labels=data.index, fontsize='large')
ax[i].set_title('{} distribution'.format(columns[i]))

def plot_hist(feature):
fig, ax = plt.subplots(2, 1, figsize=(17, 12))

sns.histplot(data = titanic[feature], kde = True, ax =

ax[0],color="Brown")

ax[0].axvline(x = titanic[feature].mean(), color = 'r',

linestyle = '--', linewidth = 2, label = 'Mean:
{}'.format(round(titanic[feature].mean(), 3)))
ax[0].axvline(x = titanic[feature].median(), color =
'orange', linewidth = 2, label = 'Median:
{}'.format(round(titanic[feature].median(), 3)))
ax[0].axvline(x = statistics.mode(titanic[feature]), color =
'yellow', linewidth = 2, label = 'Mode:
{}'.format(statistics.mode(titanic[feature])))
ax[0].legend()

sns.boxplot(x = titanic[feature], ax = ax[1],color="Brown")

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
plt.show()
plot_hist('Age')

plt.figure(figsize=(12,5))
plt.title('top categories')
plt.ylabel('item_price')
titanic.groupby('Embarked')['Fare'].mean().sort_values(ascen
ding=False)[0:15].plot(kind='line', marker='*', color='red',
ms=10)
titanic.groupby('Embarked')['Fare'].mean().sort_values(ascen
ding=False)[0:15].plot(kind='bar',color=sns.color_palette("inf
erno_r", 7))
plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
numeric_feature = titanic.dtypes!=object
final_numeric_feature =
titanic.columns[numeric_feature].tolist()

titanic[final_numeric_feature].plot(kind='density',
subplots=True, layout=(2,4), sharex=False, figsize= (20,12))
plt.show()

df.describe().round(2).T.sort_values(by='std' , ascending = Fa
lse)\
.style.background_gradient(cmap='GnBu')\
.bar(subset=["max"], color='#BB0000')\
.bar(subset=["min",], color='green')\
.bar(subset=["mean",], color='Orange')\
.bar(subset=['std'], color='pink')\
.bar(subset=['50%'], color='magenta')

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
def highlight_min(s, props=''):
return np.where(s == np.nanmin(s.values), props, '')
titanic.describe().style.apply(highlight_min, props='color:yell
ow;background-color:Grey', axis=0)

titanic[titanic["Age"] >= 50].describe().style.backgro

und_gradient(cmap='RdPu')

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig, ax = plt.subplots(2, 1, sharex=True,
figsize=(17,10),gridspec_kw={"height_ratios": (.2, .8)})
ax[0].set_title('Age distribution',fontsize=18,pad=20)
sns.boxplot(x='age', data=heart, ax=ax[0])
ax[0].set(yticks=[])
sns.histplot(x='age', data=heart, ax=ax[1])
ax[1].set_xlabel(col, fontsize=16)
plt.axvline(heart['age'].mean(), color='darkgreen',
linewidth=2.2, label='mean=' +
str(np.round(heart['age'].mean(),1)))
plt.axvline(heart['age'].median(), color='red', linewidth=2.2,
label='median='+ str(np.round(heart['age'].median(),1)))
plt.axvline(heart['age'].mode()[0], color='purple',
linewidth=2.2, label='mode='+ str(heart['age'].mode()[0]))
plt.legend(bbox_to_anchor=(1, 1.03), ncol=1, fontsize=17,
fancybox=True, shadow=True, frameon=True)
plt.tight_layout()
plt.show()

plt.rcParams['font.family'] = 'Lucida Calligraphy'

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.rcParams['font.size'] = 30

heart["age_bins"]= pd.cut(heart["age"] , bins=[29 , 40 , 50 ,

60 , 80] , labels=["adult" , "fortieth" , "old" , "ancient"] )

def count_plot(data , x=None , y=None , figsize =None , title

=None , color =None , prop=False , rotation_x =0 ):
if x is None and y is None :
raise("Expected y or x")
if x is not None and y is not None:
raise("Expected y or x not both")
count_type = data[y if x is None else
x].value_counts(ascending =False)
Sum = count_type.sum()
type_order = count_type.index
plt.figure(figsize=figsize if figsize is None else (12 , 7))
if x is None:
sns.countplot(data = data , y=y , color = color
,order=type_order)
if prop==True:
for i in range(len(count_type)):
count = count_type[i]
pct_string ="{:0.1f}%".format(100*count/Sum)
plt.text(count+1 , i , pct_string , va="center")
if prop==False:
for i in range(len(count_type)):
count = count_type[i]
pct_string ="{}".format(count)
plt.text(count+1 , i , pct_string , va="center")
plt.title(title)
plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
if y is None :
sns.countplot(data = data , x = x , color = color , order =
type_order)
locs , labels =plt.xticks(rotation = rotation_x)
if prop == True :
for loc , label in zip(locs , labels):
count = count_type[label.get_text()]
pct_string ="{:0.1f}%".format(100*count/Sum)
plt.text(loc , count+2 ,pct_string,ha ="center")
if prop==False :
for loc , label in zip(locs , labels):
count = count_type[label.get_text()]
pct_string ="{}".format(count)
plt.text(loc , count+2 ,pct_string,ha ="center")
plt.title(title)
plt.show()

Syed Afroz Ali

Learn Data Visualization With Python
Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/

https://t.me/AIMLDeepThaught/625
# Barchart sorted by frequency
base_color = sns.color_palette()[0]
cat_order = train_eda[col_name].value_counts().index
plt.figure(figsize=(15,10))
plt.xticks(rotation = 90)
sns.countplot(data = train_eda, x = col_name, order = cat_order, color =
base_color);

# add annotations
n_points = train_eda.shape[0]
cat_counts = train_eda[col_name].value_counts()
locs, labels = plt.xticks() # get the current tick locations and labels

# loop through each pair of locations and labels

for loc, label in zip(locs, labels):

# get the text property for the label to get the correct count
count = cat_counts[label.get_text()]
pct_string = '{:0.1f}%'.format(100*count/n_points)

# print the annotation just below the top of the bar

plt.text(loc, count+4, pct_string, ha = 'center', color = 'black')

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
train = pd.read_csv('train_housing.csv')

#Visualising numerical predictor variables with Target

Variables
train_num = train.select_dtypes(include=['int64','float64'])
fig,axs= plt.subplots(12,3,figsize=(20,80))
#adjust horizontal space between plots
fig.subplots_adjust(hspace=0.6)
for i,ax in zip(train_num.columns,axs.flatten()):
sns.scatterplot(x=i, y='SalePrice',
hue='SalePrice',data=train_num,ax=ax,palette='viridis_r')
plt.xlabel(i,fontsize=12)
plt.ylabel('SalePrice',fontsize=12)
#ax.set_yticks(np.arange(0,900001,100000))
ax.set_title('SalePrice'+' -
'+str(i),fontweight='bold',size=20)

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
train = pd.read_csv('train_housing.csv')
categorical = train.select_dtypes(include=['object'])
##Visualising Categorical predictor variables with Target
Variables
def facetgrid_boxplot(x, y, **kwargs):
sns.boxplot(x=x, y=y)
x=plt.xticks(rotation=90)

f = pd.melt(train, id_vars=['SalePrice'],
value_vars=sorted(train[categorical.columns]))
g = sns.FacetGrid(f, col="variable", col_wrap=3,
sharex=False, sharey=False, size=5)
g = g.map(facetgrid_boxplot, "value", "SalePrice")

import matplotlib.pyplot as plt

import seaborn as sns

sns.scatterplot(x=df.iloc[:,0], y=df.iloc[:,1], hue=y)

plt.annotate("KD65", (df.iloc[64,0], df.iloc[64,1]), (8*1e6, 1),
arrowprops=dict(arrowstyle="->"), fontsize="xx-large",c='red'
)

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.annotate("KD99", (df.iloc[98,0], df.iloc[98,1]), (8*1e6, 2*1
e6), arrowprops=dict(arrowstyle="->"), fontsize="xx-large",c=
'red')
plt.annotate("control3", (df.iloc[107,0], df.iloc[107,1]), (8*1e6
, 3*1e6), arrowprops=dict(arrowstyle="->"), fontsize="xx-larg
e",c='red')
plt.annotate("control13", (df.iloc[117,0], df.iloc[117,1]), (8*1e
6, 4*1e6), arrowprops=dict(arrowstyle="->"), fontsize="xx-lar
ge",c='red')

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
corr = wine.corr() # We already examined SalePrice
correlations
plt.figure(figsize=(12, 10))

sns.heatmap(corr[(corr >= 0.5) | (corr <= -0.4)],

cmap='viridis', vmax=1.0, vmin=-1.0,
linewidths=0.1,annot=True, annot_kws={"size": 8},
square=True);

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
import dabl
dabl.plot(titanic, 'Fare');

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Mastering Data Visualization Techniques
(Part 4)
Prepared by: Syed Afroz Ali
X = dataset.copy()
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
pca.fit(X)
PCA_ds = pd.DataFrame(pca.transform(X), columns=(["col1","col2", "col3"]))

# A 3D Projection Of Data In The Reduced Dimension

x =PCA_ds["col1"]
y =PCA_ds["col2"]
z =PCA_ds["col3"]
#To plot
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(x, y, z, c="maroon", marker="o" )
ax.set_title("A 3D Projection Of Data In The Reduced Dimension")
plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
# A 3D Projection Of Data In The Reduced Dimension
x =PCA_ds["col1"]
y =PCA_ds["col2"]
z =PCA_ds["col3"]

#To plot
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(x, y, z, c=labels, marker="o", cmap="BuGn")
ax.set_title("A 3D Projection Of Data In The Reduced Dimension")
plt.show()

for i in range(0, 10):

fig = plt.figure(figsize=(8, 6))
ax = plt.axes(projection="3d")

ax.scatter(x, y, z, marker='*', color='red')

X, Y = np.meshgrid(x, y)

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Z = theta_0[i]*X + theta_1[i]*Y + theta_2[i]
ax.plot_surface(X, Y, Z, cmap='plasma')

ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("z")
ax.set_title("Thetas: {},{},{}".format(theta_0[i], theta_1[i], theta_2[i]))
plt.show()
print(40*"=")

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
plt.suptitle('Target Variable', size = 20, weight='bold')

song_popularity = df['song_popularity'].map({0:'UnPopular', 1:'Popular'})

a = sns.countplot(data = df, x =song_popularity,palette=theme)

plt.tick_params(axis="x", colors=theme[0],labelsize=15)

for p in a.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
a.annotate(f'{height/df.shape[0]*100} %', (x + width/2, y + height*1.0
2), ha='center')

plt.show()

cont = ['song_duration_ms', 'acousticness', 'danceability', 'energy',

'instrumentalness', 'liveness', 'loudness',
'speechiness', 'tempo', 'audio_valence']
cat = [ 'key', 'audio_mode', 'time_signature']

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
a = 4 # number of rows
b = 3 # number of columns
c = 1 # initialize plot counter

plt.figure(figsize= (18,18))

for i in cont:
plt.suptitle('Distribution of Features', size = 20, weight='bold')
plt.subplot(a, b, c)
A=sns.kdeplot(data= df, x=i,hue=song_popularity,palette=theme[:-
2], linewidth = 1.3,shade=True, alpha=0.35)
plt.title(i)
plt.xlabel(" ")
c=c+1

a = 4 # number of rows
b = 3 # number of columns
c = 1 # initialize plot counter

plt.figure(figsize= (18,18))

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
for i in cat:

plt.suptitle('Count of Features', size = 20, weight='bold')

plt.subplot(a, b, c)
A=sns.countplot(df[i],color=theme[3], alpha=0.5)
plt.title(i)
plt.xlabel(" ")
plt.tick_params(axis="x", colors='black',labelsize=10)
c=c+1

figure = plt.figure(figsize=(30,10))

A = plt.pie(medals['Total'][:10],
labels=medals['Country'][:10],
startangle=90,
labeldistance=1.15,
pctdistance=0.8,
autopct='%1.1f%%')
plt.title("Pie Chart of Top 10 Countries with Medals",size=20,weight='b
old')
plt.show();

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
#checking the target variables for distribution
sns.distplot(house['SalePrice'],color=colors[7])
plt.axvline(x=house['SalePrice'].mean(), color=colors[7], linestyle='--', li
newidth=2)
plt.title('Sales');

l = df_current['Q3'].value_counts(normalize=True).mul(100).tolist()[1]-df
_old['Q2'].value_counts(normalize=True).mul(100).values.tolist()[1]

print(5*'\n',"\033[1;32m Increase in Woman is only\033[1;32m",round(l,

2),'%\033[1;32m Over Last Year\033[1;32m',5*'\n')

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625
fig, ax = plt.subplots(1, 2, figsize=(20,8))
fig.text(0.1, 0.95, "Visualisation of Gender Distribution for 2022 and 20
21", fontsize=15, fontweight='bold')

sns.countplot(x='Q3', data=df_current,palette="Dark2", ax=ax[0]); #Cur

rent Year
sns.countplot(x='Q2', data=df_old,palette="Dark2",ax=ax[1]); #Last Yea
r

for i, ax in enumerate(ax.flatten()):
ax.grid(axis='y', linestyle='-', alpha=0.4)
if i==0:t=shape;year = 2022
else:t=shape_21;year =2021
for p in ax.patches:
percentage = f'{100 * p.get_height() / t:.2f}%\n'
ax.annotate(percentage, (p.get_x() + p.get_width() / 2,p.get_height
()), ha='center', va='center')
ax.set_xlabel('Gender');ax.set_title("Gender Wise Distribution in "+
str(year))
if not(0.5 <= p.get_x() < 1.5):
p.set_facecolor('lightgrey')

plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig, ax = plt.subplots(1,2, figsize=(20,8))

fig.text(0.1, 0.95, "Age Distribution of Kaggle Users - 2022", fontsize=1

5, fontweight='bold')

sns.barplot(x=df_current['Q2'].value_counts().index, y=df_current['Q2'].
value_counts().values, ax=ax[0],
edgecolor='black', linewidth=1.5, saturation=1.5)
ax[0].yaxis.set_major_locator(MaxNLocator(nbins=20));ax[0].grid(axis=
'y', linestyle='-', alpha=0.4)
ax[0].set_ylabel('Count', weight='semibold')
ax[0].set_xlabel('Age Group 2022', weight='semibold')
ax[1].set_xlabel('Pie Chart for Age Group 2022', weight='semibold')
for p in ax[0].patches:
percentage = f'{100 * p.get_height() / t:.1f}%\n'
ax[0].annotate(percentage, (p.get_x() + p.get_width() / 2,p.get_hei
ght()), ha='center', va='center')

ax[1].pie(df_current['Q2'].value_counts(), labels = df_current['Q2'].value

_counts().index, autopct='%1.1f%%',
explode=[0.03 for i in df_current['Q2'].value_counts().index])

plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
df2=titanic.groupby('Pclass')['Age'].mean().sort_values(ascending=Fals
e)
plt.figure(figsize = (15,8))
color = [('b' if i < 30 else 'r') for i in df2]
df2.plot.bar(color=color);

col=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',

'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
'pH', 'sulphates', 'alcohol', 'quality']

fig = plt.figure(figsize=(15,10))

for i in range(len(col)):
plt.subplot(3,4,i+1)
plt.title(col[i])
sns.distplot(df,x=df[col[i]])
plt.tight_layout()
plt.show()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
fig, ax = plt.subplots(1, 1)

plt.xlim(-1,26)
plt.ylim(0,1)
x = np.linspace(f.ppf(0.0000000001, dfn, dfd),f.ppf(0.9999999999, dfn, d
fd), 100)
ax.plot(x, f.pdf(x, dfn, dfd), 'r-')
ax.axvline(f.ppf(0.95, dfn, dfd), ls = "--", color = "navy")
print('upper 5%:', f.ppf(0.95, dfn, dfd))

# Free or Paid Courses - Countplot

fig, ax = plt.subplots(figsize=(7,5), dpi=100)
ax = sns.countplot(data=courses, x='is_paid', palette='magma_r')
ax.set_xticklabels(labels=['Free', 'Paid'])
ax.set_xlabel("Free/Paid courses")
ax.set_ylabel("Number of courses")
ax.set_title("Share of Free and Paid Courses on Udemy")
percentage = round(courses['is_paid'].value_counts() * 100 /len(course
s), 2)
patches = ax.patches
for i in range(len(patches)):
x = patches[i].get_x() + patches[i].get_width()/2
y = patches[i].get_height()+.05
ax.annotate('{:.2f}%'.format(percentage[i]), (x, y), ha='center')

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
#Creating a stripplot to visualize differences in data distribution between hot
els

features = ['lead_time', 'stays_in_weekend_nights', 'stays_in_week_nights', 'ad

ults', 'children', 'babies', 'previous_cancellations', 'previous_bookings_not_can
celed', 'booking_changes', 'adr', 'days_in_waiting_list']

n=1

sns.set_style('darkgrid')
sns.set(font_scale = 1.2)
plt.figure(figsize = (14, 18))

for feature in features:

plt.subplot(4,3,n)
sns.stripplot(x = df['hotel'], y = df[feature], palette = 'summer').set(xlabel =
None, ylabel = None)
plt.title(f'{feature} strip plot')
n=n+1
plt.tight_layout()

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
import plotly.graph_objects as go

labels = confirmed_bookings['meal'].unique()
values = confirmed_bookings['meal'].value_counts()
palette = ["#f6bd60", "#f5cac3", "#84a59d", "#f28482"]

fig = go.Figure(data=[go.Pie(labels = labels,

values = values,
hole=.5,
title = 'Meal plans',
legendgroup = True,
pull = [0.1, 0.1, 0.1, 0.1]
)
]
)

fig.update_traces(marker = dict(colors = palette));

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
x = rent_df["Rent"]
y = rent_df["Size"]
colors = rent_df["Size"]
sizes = rent_df["Size"]

plt.figure(figsize = (25, 8))

plt.ticklabel_format(style = 'plain')
plt.scatter(x, y, c = colors, s = sizes, alpha = 0.3, cmap = 'viridis')
plt.colorbar();

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625

Syed Afroz Ali

Follow me on LinkedIn for more: https://www.linkedin.com/in/syed-afroz-70939914/
Mastering Data Visualization Techniques
(Part 5)
Prepared by: Syed Afroz Ali

import plotly.graph_objs as go
values = data['cuisines'].value_counts()[:20]
labels=values.index
text=values.index
fig =
go.Figure(data=[go.Pie(values=values,labels=labels,hole=.3)]
)
fig.update_traces(hoverinfo='label+percent', textinfo='value',
textfont_size=20,
marker=dict(line=dict(color='#000000', width=3)))
fig.update_layout(title="Most popular cuisines of Bangalore
",
titlefont={'size': 30},
)
fig.show()

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
MughlaiFoodcafe = data[data['cuisines'].str.contains('Mughlai',
case=False, regex=True,na=False)]
MughlaiFoodcafe.head()

#pie chart showing % of various Food serving Type cafe

slices=[MughlaiFoodcafe.shape[0],
ChineseFoodcafe.shape[0],
MexicanFoodcafe.shape[0],
NorthIndianFoodcafe.shape[0],
SouthIndianFoodcafe.shape[0],
ItalianFoodcafe.shape[0],
AmericanFoodcafe.shape[0]]
labels=['Mughlai','Chinese','Mexican','North Indian','South
Indian','Italian','American']
colors = ['#3333cc','#ffff1a','#ff3333','#c2c2d6','#6699ff','#c4ff4d','#339933']
plt.pie(slices,colors=colors, labels=labels, autopct='%1.0f%%',
pctdistance=.5, labeldistance=1.2,shadow=True)
fig = plt.gcf()
plt.title("Percentage of cafe according to their Food Type",
bbox={'facecolor':'1', 'pad':5})
fig.set_size_inches(12,12)
plt.show()

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
# Most Liked Dishes in Bangalore
import re
data=data[data['dish_liked'].notnull()]
data.index=range(data.shape[0])
likes=[]
for i in range(data.shape[0]):
splited_array=re.split(',',data['dish_liked'][i])
for item in splited_array:
likes.append(item)

print("Count of Most liked dishes of Bangalore")

favourite_food = pd.Series(likes).value_counts()
favourite_food.head(20)

ax = favourite_food.nlargest(n=20,
keep='first').plot(kind='bar',figsize=(15,15),title = 'Top 20
Favourite Food counts ')
for p in ax.patches:

ax.annotate(str(p.get_height()), (p.get_x() * 1.005,

p.get_height() * 1.005))

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625

#Analysis of biggest food chains

branches =
data.groupby(['name']).size().to_frame('count').reset_index().
sort_values(['count'],ascending=False)
ax = sns.barplot(x='name', y='count', data=branches[:12])
plt.xlabel('')
plt.ylabel('Branches')
plt.title('Food chains and their counts')
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005,
p.get_height() * 1.005))

fig = plt.gcf()
fig.set_size_inches(25,15)

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
data = df.groupby('name').size()[["Domino's Pizza", "KFC",
"McDonald's",'Subway']].sort_values(ascending = False)

x = data.index
y = data.values

plt.figure(figsize = (15,10))

color = ['red','yellow','green','blue']
ax = plt.bar(x,y,width = 0.4,color = color)
for i in ax:
x_ = i.xy[0] + i.get_width() / 2
y_ = i.get_height()
txt = str(y_)
plt.annotate(
text = txt,
xy = (x_,y_),
xytext = (-17,2.9),
textcoords = 'offset points'
)

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
plt.xlabel('Restraurant')
plt.ylabel('Number of Branches')
plt.title('Number of branches on KFC, MacD, Dominos and
Subway')
plt.show()

plt.style.use('seaborn-notebook')
for i, label in enumerate(df.Drug_Type.unique().tolist()):
sns.kdeplot(df2.loc[df2['Drug_Type'] == i+1, 'Na_to_K'],
label=label, shade=True)

plt.title('1. KDE of Na_to_k (based on Drug_Type)',

fontdict=font, pad=15)
plt.xticks(np.arange(0,46,2), rotation=90)
plt.xlim([0,46])
plt.legend()
plt.show()

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
# draw countplot and pie plot of categorical data
for col in categorical:
fig, axes = plt.subplots(1,2,figsize=(10,4))
# count of col (countplot)
sns.countplot(data=df2, x=col, ax=axes[0])
for container in axes[0].containers:
axes[0].bar_label(container)
# count of col (pie chart)
slices = df2[col].value_counts().values
activities = [f"{i} ({var})" for i, var in
zip(df2[col].value_counts().index,
df[col].value_counts().index)]
axes[1].pie(slices, labels=activities, shadow=True,
autopct='%1.1f%%')
plt.suptitle(f'Count of Unique Value in {col}', y=1.09,
**font)
plt.show()

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
for col in ['Sex','BP','Cholesterol']:
ax = sns.countplot(data=df, x='Drug_Type', hue=col)
for container in ax.containers:
ax.bar_label(container)
plt.title(f'Count of Drug (based on {col})', fontdict=font,
pad=15)
plt.show()

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
for col in ['Sex', 'BP', 'Cholesterol']:
fig , ax= plt.subplots(1,2, figsize=(10,4))
gp =
df.groupby([col])['Na_to_K'].mean().to_frame().reset_index()
sns.barplot(data=gp, x=col, y='Na_to_K', ax=ax[0])
for container in ax[0].containers:
ax[0].bar_label(container)
ax[0].set_title(f'Mean of Na_to_K (based on {col})', y=1.09,
**font)
sns.boxplot(data=df, x=col, y='Na_to_K', ax=ax[1])
ax[1].set_title(f'Boxplot of {col})', y=1.09, **font)

Learn Data Visualization With Python

https://t.me/AIMLDeepThaught/625

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
plt.show()

fig, ax = plt.subplots(2,2,figsize=(14,8))
for i, col in enumerate(['Sex', 'BP', 'Cholesterol',
'Drug_Type']):
sns.scatterplot(data=df, x='Age', y='Na_to_K', hue=col,
ax=ax[i//2, i%2], palette='turbo')
ax[i//2, i%2].set_title(f'Na_to_K vs Age (based on {col}',
y=1.09, **font)
ax[i//2, i%2].legend(loc='upper center',
bbox_to_anchor=(1.2, 0.6),
fancybox=True, shadow=True)

fig.tight_layout()
plt.show()

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
fig, ax = plt.subplots(3,2,figsize=(14,12))
sns.swarmplot(data=df, x='Cholesterol', y='Na_to_K',
hue='Drug_Type', ax=ax[0,0])
sns.swarmplot(data=df, x='Cholesterol', y='Age',
hue='Drug_Type', ax=ax[0,1])
sns.swarmplot(data=df, x='BP', y='Na_to_K',
hue='Drug_Type', ax=ax[1,0])
sns.swarmplot(data=df, x='BP', y='Age', hue='Drug_Type',
ax=ax[1,1])
sns.swarmplot(data=df, x='Sex', y='Na_to_K',
hue='Drug_Type', ax=ax[2,0])
sns.swarmplot(data=df, x='Sex', y='Age', hue='Drug_Type',
ax=ax[2,1])
ax[0,0].set_title('Swarmplot of Drug Type vs Na_to_K',y=1.05,
**font)
ax[0,1].set_title('Swarmplot of Drug Type vs Age',y=1.05,
**font)
plt.tight_layout()

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625

import itertools
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
# for solve problem of show plotly plots
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)
plt.style.use('_mpl-gallery')
FONT = {'fontsize':20, 'fontstyle':'normal', 'fontfamily':'Times
New Roman', 'backgroundcolor':'#145A32', 'color':'orange'} #
for plot title

fig = go.Figure()
for col in df:
fig.add_trace(go.Box(x=df[col], name=col))

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
fig.update_layout(
title_text="Box Plot Styling Outliers",
title_font=dict(color='orange', family='newtimeroman',
size=25),
title_x=0.45,
paper_bgcolor='#145A32',
# plot_bgcolor='#DAF7A6',
font=dict(color='#DAF7A6', family='newtimeroman',
size=16),
)
fig.show()

# univariate analysis of categorical data:

sns.set_palette("summer_r")
for i, col in enumerate(discrete_cols1):

fig, axes = plt.subplots(1,2,figsize=(10,4))

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
# count of col (countplot)
sns.countplot(data=df, x=col, ax=axes[0])
for container in axes[0].containers:
axes[0].bar_label(container)
# count of col (pie chart)
slices = df[col].value_counts().sort_index().values
activities = [var for var in
df[col].value_counts().sort_index().index]
axes[1].pie(slices, labels=activities, shadow=True,
autopct='%1.1f%%')
plt.suptitle(f'Count of Unique Value in {col} (Fig {i+1})',
y=1.09, **FONT)
plt.show()

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
sns.set_palette(['#1f4a1b','orange','#bbff33','yellow'])
discrete_cols2 = ['Family', 'Education', 'Securities Account',
'CD Account', 'Online', 'CreditCard']
for i, col in enumerate(discrete_cols2):
ax = sns.countplot(data=df, x='Personal Loan', hue=col)
for container in ax.containers:
ax.bar_label(container)
plt.title(f'Count of Personal Loan based on {col} (Fig {i+5})',
fontdict=FONT, pad=15)
plt.show()

for i, col in enumerate(['Income', 'CCAvg','Mortgage']):

print('='*30, f"Mean of {col} in each categorical feature",
'='*30)
for j, cat in enumerate(discrete_cols2):
fig , ax= plt.subplots(1,2, figsize=(10,4))
gp =
df.groupby([cat])[col].mean().to_frame().reset_index()

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
sns.barplot(data=gp, x=cat, y=col, ax=ax[0])
for container in ax[0].containers:
ax[0].bar_label(container)
ax[0].set_title(f'Mean of {col} (based on {cat})', y=1.09,
**FONT)

sns.boxplot(data=df, x=cat, y=col, ax=ax[1])

ax[1].set_title(f'Boxplot of {cat} (Fig {i+11}-{j+1})',
y=1.09, **FONT)

plt.show()

continuous_cols = ['Age','Experience','CCAvg','Mortgage']

for i, col in enumerate(continuous_cols):

fig = px.scatter_3d(
data_frame= df,
x=df.Income,
y=df[col],
z=df['Personal Loan'],
color=df['Personal Loan'].astype(str),
color_discrete_map={'1':'orange', '0':'red'},
template='ggplot2',

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu
Learn Data Visualization With Python
https://t.me/AIMLDeepThaught/625
hover_name='Age',
# hover_data=
opacity=0.6,
# symbol='Transmission',
# symbol_map=
# log_x=True,
# log_z=True,
height=700,
title=f'3D scatter of features based on Personal Loan (Fig
{i+1})')
fig.update_layout(
title_text="Box Plot Styling Outliers",
title_font=dict(color='orange', family='newtimeroman',
size=25),
title_x=0.45,
paper_bgcolor='#145A32',
# plot_bgcolor='#DAF7A6',
font=dict(color='#DAF7A6', family='newtimeroman', size=16),
)
pio.show(fig)

Syed Afroz Ali (Kaggle Grandmaster)

Follow me on LinkedIn for more: https://lnkd.in/g3W7w_Xu

Pset 6 - Fall2019 - Solutions PDF
100% (3)
Pset 6 - Fall2019 - Solutions PDF
33 pages
Graphical Methods For Data Analysis
No ratings yet
Graphical Methods For Data Analysis
410 pages
Some Notes From The Book - Pairs Trading - Quantitative Methods and Analysis by Ganapathy Vidyamurthy Weatherwax - Vidyamurthy - Notes
No ratings yet
Some Notes From The Book - Pairs Trading - Quantitative Methods and Analysis by Ganapathy Vidyamurthy Weatherwax - Vidyamurthy - Notes
32 pages
To Log or Not To Log: The Distribution of Asset Returns: SAMBA/03/04
No ratings yet
To Log or Not To Log: The Distribution of Asset Returns: SAMBA/03/04
14 pages
E-Tivity 2.2 Tharcisse 217010849
No ratings yet
E-Tivity 2.2 Tharcisse 217010849
7 pages
Mastering Data Visualization Techniques
No ratings yet
Mastering Data Visualization Techniques
159 pages
Mastering Data Visualization Techniques 1728896857
No ratings yet
Mastering Data Visualization Techniques 1728896857
85 pages
Mastering Data Visualization Techniques (Part 1)
No ratings yet
Mastering Data Visualization Techniques (Part 1)
20 pages
Data Visualization
No ratings yet
Data Visualization
70 pages
BDA Project Codes
No ratings yet
BDA Project Codes
20 pages
Python for Machine Learning Visualization 1735231185
No ratings yet
Python for Machine Learning Visualization 1735231185
69 pages
# Load Packages: Pandas Pandas PD PD Numpy Numpy NP NP
No ratings yet
# Load Packages: Pandas Pandas PD PD Numpy Numpy NP NP
17 pages
Python Datavisualization
No ratings yet
Python Datavisualization
69 pages
Heart Failure Prediction
100% (1)
Heart Failure Prediction
41 pages
Ai in HC - 2
No ratings yet
Ai in HC - 2
9 pages
Ide To 6 Classification Algorithms
No ratings yet
Ide To 6 Classification Algorithms
34 pages
Anemia Code
No ratings yet
Anemia Code
33 pages
Heart Attacks Analysis
No ratings yet
Heart Attacks Analysis
10 pages
Roll NO 2020
No ratings yet
Roll NO 2020
8 pages
Heart Dataset Analysis
No ratings yet
Heart Dataset Analysis
24 pages
Logistic Regression With Pyspark
No ratings yet
Logistic Regression With Pyspark
19 pages
Hare Krishna
No ratings yet
Hare Krishna
1 page
Assignment 1 - LP1
No ratings yet
Assignment 1 - LP1
14 pages
Hear Disease
No ratings yet
Hear Disease
45 pages
ML 7
No ratings yet
ML 7
6 pages
Heart Disease Prediction - Jupyter Notebook
100% (1)
Heart Disease Prediction - Jupyter Notebook
9 pages
Data_Analyzer
No ratings yet
Data_Analyzer
10 pages
Print Print Print Print: Import As
No ratings yet
Print Print Print Print: Import As
6 pages
Phase 3 Health Monitoring and Diagnosis
No ratings yet
Phase 3 Health Monitoring and Diagnosis
10 pages
Logistic Regression
No ratings yet
Logistic Regression
12 pages
project code health sleep lifestyle
No ratings yet
project code health sleep lifestyle
4 pages
Python Solution
No ratings yet
Python Solution
30 pages
1728086737277
No ratings yet
1728086737277
26 pages
Diabetes_Prediction_1704256341
No ratings yet
Diabetes_Prediction_1704256341
17 pages
Sleep Health Analysis
No ratings yet
Sleep Health Analysis
20 pages
Heart Disease Indicator Prediction Model
No ratings yet
Heart Disease Indicator Prediction Model
17 pages
Content From Jose Portilla's Udemy Course Learning Python For Data Analysis and Visualization Notes by Michael Brothers, Available On
No ratings yet
Content From Jose Portilla's Udemy Course Learning Python For Data Analysis and Visualization Notes by Michael Brothers, Available On
13 pages
Assignment Instructions:: Import As
No ratings yet
Assignment Instructions:: Import As
1 page
Seaborn
No ratings yet
Seaborn
2 pages
Data Analisis 2
No ratings yet
Data Analisis 2
13 pages
Data Visualization With Python
No ratings yet
Data Visualization With Python
34 pages
B 4 Heart
No ratings yet
B 4 Heart
9 pages
Data science and analtics Laboratory
No ratings yet
Data science and analtics Laboratory
21 pages
Logistic Regression 205
No ratings yet
Logistic Regression 205
8 pages
medical-cost-analysis
No ratings yet
medical-cost-analysis
17 pages
Medical Insurance Analysis ??
No ratings yet
Medical Insurance Analysis ??
17 pages
Seaborn - Plots - Jupyter Notebook
No ratings yet
Seaborn - Plots - Jupyter Notebook
36 pages
AD3411 (2)
No ratings yet
AD3411 (2)
28 pages
Data Analysis Report With EDA
No ratings yet
Data Analysis Report With EDA
71 pages
Heart Disease Risk Factor Data Analysis Midterm Data 2 - Jupyter Notebook
No ratings yet
Heart Disease Risk Factor Data Analysis Midterm Data 2 - Jupyter Notebook
20 pages
DSBDAL - Assignment No 9
No ratings yet
DSBDAL - Assignment No 9
12 pages
Batch-2 Ieee DMT
No ratings yet
Batch-2 Ieee DMT
4 pages
Lab 2
No ratings yet
Lab 2
8 pages
CS-3361-Data-science-lab Manual
No ratings yet
CS-3361-Data-science-lab Manual
36 pages
2358873_MayankBaryal
No ratings yet
2358873_MayankBaryal
9 pages
Experiment No 9
No ratings yet
Experiment No 9
13 pages
R Note
No ratings yet
R Note
56 pages
Formulario - EA
No ratings yet
Formulario - EA
6 pages
Week 4 Laboratory Activity
No ratings yet
Week 4 Laboratory Activity
6 pages
Project_Report
No ratings yet
Project_Report
18 pages
Datascience 2 PDF
No ratings yet
Datascience 2 PDF
24 pages
No Ph.D. Game Design With Three.js
From Everand
No Ph.D. Game Design With Three.js
Nikiforos Kontopoulos
No ratings yet
Python For Beginners
From Everand
Python For Beginners
Célio Azevedo
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
150+ C Pattern Programs
From Everand
150+ C Pattern Programs
Hernando Abella
No ratings yet
Assignment 8
No ratings yet
Assignment 8
3 pages
Assignment 7
No ratings yet
Assignment 7
3 pages
Chapter-7-IS-OER-PowerPoint-Slides
No ratings yet
Chapter-7-IS-OER-PowerPoint-Slides
8 pages
Ethical hacking LEC-3
No ratings yet
Ethical hacking LEC-3
31 pages
Bozorgzadeh Et Al. (2018) - Comp. Stat. Analysis of Intact Rock Strength For Reliability-Based Design
No ratings yet
Bozorgzadeh Et Al. (2018) - Comp. Stat. Analysis of Intact Rock Strength For Reliability-Based Design
14 pages
Notes and Correspondence Plotting Positions in Extreme Value Analysis
No ratings yet
Notes and Correspondence Plotting Positions in Extreme Value Analysis
7 pages
An Intercomparison of Multiple Statistical Downscaling Methods For Daily Precipitation and Temperature Over China: Present Climate Evaluations
No ratings yet
An Intercomparison of Multiple Statistical Downscaling Methods For Daily Precipitation and Temperature Over China: Present Climate Evaluations
21 pages
DADM S3 Skewness and Transformations To Achieve Normality
No ratings yet
DADM S3 Skewness and Transformations To Achieve Normality
9 pages
How To Draw Q-Q Plot
No ratings yet
How To Draw Q-Q Plot
9 pages
Exploratory Data Analysis
100% (3)
Exploratory Data Analysis
26 pages
Spss Manual
No ratings yet
Spss Manual
27 pages
Geovariances Geostats Software For Improved Resource Estimation
No ratings yet
Geovariances Geostats Software For Improved Resource Estimation
2 pages
Nedim Baruh
No ratings yet
Nedim Baruh
20 pages
Lecture 3
No ratings yet
Lecture 3
42 pages
03 Basic Statistical Data Analysis Using Excel
No ratings yet
03 Basic Statistical Data Analysis Using Excel
141 pages
Test For Normality PDF
No ratings yet
Test For Normality PDF
30 pages
5 1 1-Ridf-Curves
No ratings yet
5 1 1-Ridf-Curves
18 pages
Understanding Q-Q Plots: Latest News
No ratings yet
Understanding Q-Q Plots: Latest News
4 pages
Rotinas Fortran L Moments
No ratings yet
Rotinas Fortran L Moments
39 pages
Musau 4TH Project Management Theses
No ratings yet
Musau 4TH Project Management Theses
167 pages
The Latticeextra Package: R Topics Documented
No ratings yet
The Latticeextra Package: R Topics Documented
24 pages
Jaro-Liko-Jeepney-Arrival-interval
No ratings yet
Jaro-Liko-Jeepney-Arrival-interval
16 pages
All All: % (A) Construct Side-By-Side Stem-And-Leaf Plots
No ratings yet
All All: % (A) Construct Side-By-Side Stem-And-Leaf Plots
34 pages
Package Openair': December 7, 2020
No ratings yet
Package Openair': December 7, 2020
165 pages
Subjective Questions
No ratings yet
Subjective Questions
8 pages
DR - Arunachalam Rajagopal - Time Series Forecasting With R A Beginner's Guide (2020)
No ratings yet
DR - Arunachalam Rajagopal - Time Series Forecasting With R A Beginner's Guide (2020)
93 pages
Ho - Diagnostics Examples 2 in SPSS
No ratings yet
Ho - Diagnostics Examples 2 in SPSS
4 pages
NumXL - Getting Started
No ratings yet
NumXL - Getting Started
34 pages
Unit 4
No ratings yet
Unit 4
30 pages