Data Visualization
Data Visualization
(Part 1)
Prepared by: Syed Afroz Ali
plt.figure(figsize = (8, 6))
plt.ticklabel_format(style = 'plain')
sns.barplot(x = heart["ChestPainType"], y = heart["Cholest
erol"], palette = "tab20");
numeric_feature = titanic.dtypes!=object
final_numeric_feature = titanic.columns[numeric_feature].t
olist()
titanic[final_numeric_feature].plot(kind='density', subplots
=True, layout=(1,7), sharex=False, figsize= (20,4))
plt.show()
heart["ChestPainType"].value_counts()[:3].plot.pie(figsize
= (5, 5),
autopct = '%1.0f%%')
plt.title("Pie Chart")
plt.xticks(rotation = 90)
plt.show()
plt.pie(heart['ChestPainType'].value_counts(),labels=heart[
'ChestPainType'].unique(),autopct = '%1.2f%%');
plt.figure(figsize = (7,4))
ax = sns.countplot(x=heart['RestingECG'])
for bars in ax.containers:
ax.bar_label(bars)
plt.title("Count of RestingECG", fontsize = 15,color='Blue');
# Visulazing the distibution of the data for every feature
plt.figure(figsize=(20, 8))
cat = ['Sex','RestingECG']
else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]
]
plt.tight_layout()
plt.show()
plt.figure(figsize=(11,5))
plt.gcf().text(.55, .95, "Box Plot", fontsize = 40, color='Red'
,ha='center', va='center')
sns.boxenplot(x=heart['RestingECG'] , y = heart['Cholester
ol'],palette="Set1")
plt.show()
sns.set_style('white')
wine.plot(kind="scatter", x="fixed acidity", y="total sulfur
dioxide", alpha=.5,
s=wine["alcohol"], label="alcohol", figsize=(10,7),
c="chlorides", cmap=plt.get_cmap("jet"), colorbar=
True,
sharex=False)
plt.legend()
plt.show()
#Correlation with Response Variable class
X = wine.drop(['quality'], axis=1)
y = wine['quality']
import matplotlib
matplotlib.rcParams.update({'font.size': 20})
ax=heart['Sex'].value_counts().plot.pie(explode=[0.1, 0.1],autopct='
%1.2f%%',shadow=True);
ax.set_title(label = "Sex", fontsize = 40,color='DarkOrange',font='Luc
ida Calligraphy');
plt.legend(labels=['M','F'])
plt.axis('off');
•• Join my WhatsApp Channel for the latest updates on AI:
https://www.whatsapp.com/channel/0029VavNSDO9mrGWYirxz40G
matplotlib.rcParams.update({'font.size': 10})
corr = wine.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
plt.title('Correlation Analysis',
fontsize=25,
color='DarkGreen',
font='Times New Roman')
sns.heatmap(corr,
mask=mask,
annot=True,
lw=0,
linecolor='white',
cmap='viridis',
fmt="0.2f")
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.show()
#set configuration for charts
plt.rcParams["figure.figsize"]=[20 , 5]
plt.rcParams["font.size"]=15
plt.rcParams["legend.fontsize"]="medium"
plt.rcParams["figure.titlesize"]="medium"
plt.subplots(figsize=(20,8))
p = sns.barplot(x=heart["ChestPainType"][:14],y=heart["Age"],palett
e=palette, saturation=1, edgecolor = "#1c1c1c", linewidth = 2)
p.axes.set_title("\n ChestPainType \n", fontsize=25)
plt.ylabel("Total Member" , fontsize = 20)
plt.xlabel("\n Name" , fontsize = 20)
# plt.yscale("log")
plt.xticks(rotation = 90)
for container in p.containers:
p.bar_label(container,label_type = "center",padding = 6,size = 15,c
olor = "black",rotation = 90,
bbox={"boxstyle": "round", "pad": 0.6, "facecolor": "orange", "edg
ecolor": "black", "alpha": 1})
sns.despine(left=True, bottom=True)
plt.show()
plt.figure(figsize = (20,10))
plt.suptitle("Countplot of Categorical Features", fontsize=1
8)
for i in countlist:
plt.subplot(2,3,i[0]+1)
sns.countplot(data = titanic, x = i[1], hue = "Survived", p
alette="rainbow")
plt.ylabel("")
plt.legend(['Not Survived', 'Survived'], loc='upper center'
, prop={'size': 10})
plt.tight_layout()
plt.show()
numfeature = ["Age", "Fare"]
enumfeat = list(enumerate(numfeature))
plt.figure(figsize=(20,7))
plt.suptitle("Distribution and Outliers of Numerical Data", fontsize=2
0)
for i in enumfeat:
plt.subplot(1,4,i[0]+1)
sns.boxplot(data = titanic[i[1]], palette="rainbow")
plt.xlabel(str(i[1]))
for i in enumfeat:
plt.subplot(1,4,i[0]+3)
sns.histplot(data = titanic[i[1]], palette="rainbow", bins=15)
plt.xlabel(str(i[1]))
plt.tight_layout()
plt.show()
plt.figure(figsize=(15,5))
plt.suptitle("Probability Distribution of numerical columns
according to number of Survived", fontsize = 20)
for i in enumfeat:
plt.subplot(1,2,i[0]+1)
sns.kdeplot(data=titanic, x=i[1], hue="Survived")
plt.tight_layout()
plt.show()
import missingno as msno
msno.matrix(titanic, color=(0.50,0.30,0.80))
plt.show()
x = titanic.isnull().sum()
for a, b in x.items():
if b > 0:
print(f"There are {b} missing values in column: {a}")
Mastering Data Visualization Techniques
(Part 2)
Prepared by: Syed Afroz Ali
for f in range(X_train.shape[1]):
print("%d. feature %d (%f)" % (f + 1, indices[f],
importances[indices[f]]))
# Plot the feature importances of the forest
def feature_importance_graph(indices, importances, feature_names):
plt.figure(figsize=(12,6))
plt.title("Determining Feature importances \n with
DecisionTreeClassifier", fontsize=18)
plt.barh(range(len(indices)), importances[indices],
color='#31B173', align="center")
plt.yticks(range(len(indices)), feature_names[indices],
rotation='horizontal',fontsize=14)
plt.ylim([-1, len(indices)])
plt.axhline(y=1.0, xmin=0.65, xmax=0.952, color='red', linewidth=3,
linestyle='--')
plt.text(0.19, 2.8, '46% Difference between \n duration and
contacts', color='Blue', fontsize=15)
y_pred_LR = LRclassifier.predict(X_test)
else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]]
plt.tight_layout()
plt.show()
else:
[axes.set_visible(False) for axes in ax.flatten()[indx + 1:]]
plt.tight_layout()
plt.show()
num = wine.select_dtypes(include="number")
fig, ax = plt.subplots(12, 1, figsize = (14, 35))
for indx, (column, axes) in list(enumerate(list(zip(num, ax.flatten())))):
plt.figure(figsize=(10,5))
ax1 = sns.barplot(data = raw_df, x='selling_price', y ='owner', order =
df_gc.sort_values('selling_price',ascending =False)['owner'], ci =None)
barw(ax1)
plt.show()
def make_confusion_matrix(cf,
group_names=None,
categories='auto',
count=True,
percent=True,
cbar=True,
xyticks=True,
xyplotlabels=True,
sum_stats=True,
figsize=None,
cmap='Blues',
title=None):
if count:
group_counts = ["{0:0.0f}\n".format(value) for value in cf.flatten()]
else:
group_counts = blanks
if percent:
group_percentages = ["{0:.2%}".format(value) for value in cf.flatten()/np.sum(cf)]
else:
group_percentages = blanks
if xyticks==False:
#Do not show categories if xyticks is False
if xyplotlabels:
plt.ylabel('True label', **{'fontfamily':'serif','size':12,'weight':'bold'})
plt.xlabel('Predicted label' + stats_text,**{'fontfamily':'serif','size':12,'weight':'bold'})
else:
plt.xlabel(stats_text,**{'fontfamily':'serif','size':12,'weight':'bold'})
vani_cf_matrix = confusion_matrix(y_test,y_pred_lr)
my_cols = [colors[3],colors[2]]
plt.show()
plt.subplots(figsize=(20,8))
p = sns.barplot(x=dataset["Pclass"][:14],y=dataset["Age"],palette=palette,
saturation=1, edgecolor = "#1c1c1c", linewidth = 2)
p.axes.set_title("\nTop Anime Community\n", fontsize=25)
plt.ylabel("Total Member" , fontsize = 20)
plt.xlabel("\nAnime Name" , fontsize = 20)
# plt.yscale("log")
plt.xticks(rotation = 90)
for container in p.containers:
p.bar_label(container,label_type = "center",padding = 6,size = 15,color =
"black",rotation = 90,
bbox={"boxstyle": "round", "pad": 0.6, "facecolor": "orange", "edgecolor":
"black", "alpha": 1})
sns.despine(left=True, bottom=True)
plt.show()
numfeature = ["Age", "Fare"]
enumfeat = list(enumerate(numfeature))
plt.figure(figsize=(20,9))
plt.suptitle("Distribution and Outliers of Numerical Data", fontsize=20)
for i in enumfeat:
plt.subplot(1,4,i[0]+1)
sns.boxplot(data = train[i[1]], palette="rainbow")
plt.xlabel(str(i[1]))
for i in enumfeat:
plt.subplot(1,4,i[0]+3)
sns.histplot(data = train[i[1]], palette="rainbow", bins=15)
plt.xlabel(str(i[1]))
plt.tight_layout()
plt.show()
plt.figure(figsize=(15,12))
plt.suptitle("Distribution & Kernel Density Estimation of Numerical
Features", fontsize=20)
for i in enumfeat:
plt.subplot(2,1,i[0]+1)
sns.histplot(x = train[i[1]], kde=True, bins=30,
color=(0.50,0.20,0.70))
plt.tight_layout()
plt.show()
plt.figure(figsize=(6,8))
plt.title("Correlation of Survival column with Independent Features",
fontsize=15)
corr = train.corr()["Survived"].sort_values(ascending=False)[1:]
sns.barplot(x=corr.index, y=corr, color=(0.90,0.30,0.50))
plt.tight_layout()
plt.show()
plt.figure(figsize=(15,5))
plt.suptitle("Probability Distribution of numerical columns according to number
of Survived", fontsize = 20)
for i in enumfeat:
plt.subplot(1,2,i[0]+1)
sns.kdeplot(data=train, x=i[1], hue="Survived")
plt.tight_layout()
plt.show()
import missingno as msno
msno.matrix(train, color=(0.50,0.30,0.80))
plt.show()
x = train.isnull().sum()
for a, b in x.items():
if b > 0:
print(f"There are {b} missing values in column: {a}")
rc = {'figure.dpi': 150, 'axes.labelsize': 4,
'axes.facecolor': '#F6C90E', 'grid.color': 'Red','figure.figsize':(12,5),
'figure.facecolor': '#F6C90E'}
sns.set_theme(context='notebook',
style='dark',
palette='deep',
font='Comic Sans Ms',
font_scale=1,
color_codes='red',
rc=rc)
color = ['Green',"Red"]
df.groupby('Sex')['Medal'].count().sort_values(ascending=True).plot(kind="bar",
color=color,alpha=.5);
plt.title("Sex Vs Medalse",fontsize=17,color='Brown',font='Comic Sans
Ms',pad=20);
plt.xlabel("Sex ",fontsize=15,color='#1a4441',font='Comic Sans Ms')
plt.ylabel("Number of Medals",fontsize=15,color='#1a4441',font='Comic Sans
Ms');
plt.legend(loc='best');
plt.savefig('world regions.png');
region_medal=df.groupby('region')['Medal'].count().nlargest(20).reset_index()
region_medal.head()
sns.barplot(y='region',x='Medal',data=region_medal)
plt.title('medals by regions')
plt.xlabel('medals')
plt.ylabel('regions')
plt.xticks(rotation=45)
plt.show()
summer_medals=df.groupby(['region', 'Medal']).size().reset_index()
summer_medals.columns=['region', 'Medal', 'count']
summer_medals.pivot('region', 'Medal', 'count').fillna(0)
summer_medals_20=summer_medals.pivot('region', 'Medal',
'count').fillna(0).sort_values(['Gold'], ascending=False).head(20)
summer_medals_20.plot(kind='bar')
plt.xlabel('Country')
plt.title('Medals by Country- Summer Olympics ')
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.show()
plt.figure(figsize=(15,10))
sns.barplot(x=year.index, y=year.values)
plt.xticks(rotation=90)
plt.xlabel("Year")
plt.ylabel("number of athletes")
plt.title("Number of participants by year")
plt.show()
sport=df['Sport'].value_counts()[:5]
print(sport)
labels=sport.index
sizes=sport.values
plt.pie(sizes,labels=labels,autopct='%1.1f%%',
shadow=True,startangle=90)
plt.show()
sport_summer=df[df['Season']=='Summer']['Sport'].value_counts
().sort_values(ascending=False).head(20)
sport_summer
plt.figure(figsize=(15,10))
sns.barplot(y=sport_summer.index, x=sport_summer.values,
palette='magma')
plt.xlabel('Number of events')
plt.ylabel('Sport')
plt.xticks(rotation=90)
plt.title("Number of events in each sport in the summer
Olympics")
plt.show()
plt.legend();
# Plotting Outliers
col = 1
plt.figure(figsize = (20, 15))
for i in data.columns:
if col < 14:
plt.subplot(3, 5, col)
plt.boxplot(data[i])
plt.xlabel(i)
col = col + 1
ax.axhline(y=550,color='b')
ax.axhline(y=650,color='orange')
ax.axhline(y=1200,color='g')
fig.tight_layout(h_pad = 3)
plt.show()
fig.tight_layout(h_pad = 3)
plt.show()
fig,ax = plt.subplots(4,2,figsize=(20,20))
sns.kdeplot(df['HeartDisease'],color=np.random.choice(color_plot), ax=
ax[0][0], shade=True)
sns.kdeplot(df['Oldpeak'],color=np.random.choice(color_plot), ax=ax[0]
[1], shade=True)
sns.kdeplot(df['Age'],color=np.random.choice(color_plot), ax=ax[1][0],
shade=True)
sns.kdeplot(df['FastingBS'],color=np.random.choice(color_plot), ax=ax[
1][1], shade=True)
sns.kdeplot(df['RestingBP'],color=np.random.choice(color_plot), ax=ax[
2][0],shade=True)
sns.kdeplot(df['Cholesterol'],color=np.random.choice(color_plot), ax=a
x[2][1], shade=True)
sns.kdeplot(df['MaxHR'],color=np.random.choice(color_plot), ax=ax[3][
0],shade=True)
fig.delaxes(ax[3][1])
z=df['job_title'].value_counts().head(10)
fig=px.bar(z,x=z.index,y=z.values,color=z.index,text=
z.values,labels={'index':'job title','y':'count','text':'cou
nt'},template='seaborn',title='<b> Top 10 Popular Rol
es in Data Sceince')
fig.show()
ax.annotate('Low Correlation',
fontsize=10,fontweight='bold',
xy=(1.3, 3.5), xycoords='data',
xytext=(0.6, 0.95), textcoords='axes fraction',
arrowprops=dict(
facecolor=heatmap[0], shrink=0.025,
connectionstyle='arc3, rad=0.50'),
horizontalalignment='left', verticalalignment='to
p'
)
ax.annotate('High Correlation',
fontsize=10,fontweight='bold',
xy=(3.3, 7.5), xycoords='data',
xytext=(0.8, 0.4), textcoords='axes fraction',
arrowprops=dict(
facecolor=heatmap[0], shrink=0.025,
connectionstyle='arc3, rad=-0.6'),
horizontalalignment='left', verticalalignment='to
p'
)
plt.show()
song_popularity = df['song_popularity'].map({0:'UnPopular', 1:
'Popular'})
for p in a.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
plt.show()
a = 4 # number of rows
b = 3 # number of columns
c = 1 # initialize plot counter
plt.figure(figsize= (18,18))
#plotting
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 9))
fig.suptitle(' Highest and Lowest Correlation ', size =
20, weight='bold')
#kdeplot
sns.kdeplot(data=df, y='energy', x='acousticness', ax
=ax1, color=heatmap[0])
ax1.set_title('Energy vs Acousticness', size = 14, wei
ght='bold', pad=20)
#kdeplot
sns.kdeplot(data=df, y='energy', x='loudness', ax=ax2
, color=heatmap[4])
ax2.set_title('Energy vs Loudness', size = 14, weight=
'bold', pad=20);
#plot
A = sns.countplot(train_df['case_num'],
color=colors[1],
edgecolor='white',
linewidth=1.5,
saturation=1.5)
#Patch
patch_h = []
for patch in A.patches:
reading = patch.get_height()
patch_h.append(reading)
idx_tallest = np.argmax(patch_h)
A.patches[idx_tallest].set_facecolor(colors[3])
#Lables
plt.ylabel('Count', weight='semibold', fontname = 'Georgia')
plt.xlabel('Cases', weight='semibold', fontname = 'Georgia')
plt.suptitle('Number of Cases', fontname = 'Georgia', weight=
'bold', size = 18, color = colors[2])
A.bar_label(A.containers[0], label_type='edge')
plt.show()
Datas = india_df["common_name"].value_counts().reset_inde
x().sort_values(by='common_name')
layout = dict(
width = 600,
height= 1000,
plot_bgcolor = "#FFFFFF",
font=dict(family='Arial',
size=12,
color='black'),
#text
texter("Indian Birds Species",0.000,1.10,28,"Work Sans")
texter("Birds found in the dataset",0.000,1.06,18,"Source Sa
ns Pro")
texter("heyRobin!",1.00,-0.06,16,"Playfair Display")
fig.show()
#first plot
sns.heatmap(train.isna().transpose(),
#missing data
missing = round(train.isna().sum()/train.shape[0]* 100
,2)
missing = missing[missing>0].sort_values().to_frame(
)
missing.columns = ['Percentage']
missing.index.names = ['Name']
missing = missing.reset_index()
sns.barplot(data = missing, y= 'Name', x = 'Percentag
e',ax=axes[1],color=pal[0])
plt.show()
#figure1
fig.add_trace(go.Scatter(
x=cnt_srshp.index,
y=cnt_srshp.values,
mode='markers',
marker=dict(
sizemode = 'diameter',
sizeref = 20,
size = cnt_srshp.values,
color = ['#1D7595','#B9B596','#864D29'])), 1, 1)
#figure2
fig.add_trace(go.Scatter(
x=cnt_srsdes.index,
y=cnt_srsdes.values,
mode='markers',
marker=dict(
sizemode = 'diameter',
sizeref = 20,
size = cnt_srsdes.values,
color = ['#048B95','#A1231F','#602F58'])),
1, 2)
#figure3
fig.add_trace(go.Histogram(x=train["VIP"],
marker=dict(color=pal)),
row=2, col=1)
fig.update_layout(height=1000,width=1000, coloraxis=dict(c
olorscale='Bluered_r'), showlegend=False,
title_x=0.9,
titlefont=dict(size = 2, color='black', family='Space Mono'),
plot_bgcolor='rgba(0,0,0,0)'
)
fig.show()
plt.show()
#plotting
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 11))
fig.suptitle(' Potablity of Water Quality ', size = 26, color = th
eme[3], weight='bold')
axs = [ax1, ax2]
#Count-Plot
sns.countplot(water_df['Potability'], ax=ax1, palette='husl')
#Data-2
names = ["Not Potable", "Potable"]
values = water_df['Potability'].value_counts()
colors = ["#E68193","#459E97"]
explode = (0.01, 0.01)
#Doughnut-chart
ax2.pie(x= values,labels =names, colors=colors,autopct='%1.
0f%%', pctdistance=0.8,explode=explode)
#draw-circle
centre_circle = plt.Circle((0,0),0.62,fc='white')
ax2.add_artist(centre_circle)
ax2.axis('equal')
#Image
plt.figure(figsize=(27,15));
ax = sns.barplot(y ='Country',
x='Total',
data=medals[:30],
color=olympics_col[3],
zorder=2,
linewidth=0,
orient='h',
saturation=1,
alpha=1,
)
#Patches
for a in ax.patches:
value = f'{a.get_width():.0f}'
x = a.get_x() + a.get_width() + 0.60
y = a.get_y() + a.get_height() / 1.8
ax.text(x, y, value, ha='left', va='center', fontsize=12,
bbox=dict(facecolor='none', edgecolor='black', box
style='round', linewidth=0.2))
#image
path = mpimg.imread('../input/font-worksans/medal-cr
op.png')
imagebox = OffsetImage(path , zoom=1.6)
xy = (0.5, 0.7)
ab = AnnotationBbox(imagebox, xy, frameon=False, p
ad=1, xybox=(100.5, 16))
ax.add_artist(ab)
ax.text(x = 92.5, y = 22.5, s = 'Best Performance', fon
tsize=22, weight = 'bold',color=olympics_col[1])
data1=mydata[['Parameter 1']]
for i in data1.columns:
plt.figure(figsize=(15,6))
sns.boxplot(data1[i])
plt.xticks(rotation=90)
plt.show()
#plotting the values for people who doesn't have heart disea
se
plt.scatter(df.age[df.target==0],
df.thalach[df.target==0],
c="lightgreen")
# Addind info
plt.title("Heart Disease w.r.t Age and Max Heart Rate")
plt.xlabel("Age")
plt.legend(["Disease", "No Disease"])
plt.ylabel("Max Heart Rate");
pd.crosstab(df.sex, df.fbs)
def plot_hist(feature):
fig, ax = plt.subplots(2, 1, figsize=(17, 12))
plt.figure(figsize=(12,5))
plt.title('top categories')
plt.ylabel('item_price')
titanic.groupby('Embarked')['Fare'].mean().sort_values(ascen
ding=False)[0:15].plot(kind='line', marker='*', color='red',
ms=10)
titanic.groupby('Embarked')['Fare'].mean().sort_values(ascen
ding=False)[0:15].plot(kind='bar',color=sns.color_palette("inf
erno_r", 7))
plt.show()
titanic[final_numeric_feature].plot(kind='density',
subplots=True, layout=(2,4), sharex=False, figsize= (20,12))
plt.show()
df.describe().round(2).T.sort_values(by='std' , ascending = Fa
lse)\
.style.background_gradient(cmap='GnBu')\
.bar(subset=["max"], color='#BB0000')\
.bar(subset=["min",], color='green')\
.bar(subset=["mean",], color='Orange')\
.bar(subset=['std'], color='pink')\
.bar(subset=['50%'], color='magenta')
https://t.me/AIMLDeepThaught/625
# Barchart sorted by frequency
base_color = sns.color_palette()[0]
cat_order = train_eda[col_name].value_counts().index
plt.figure(figsize=(15,10))
plt.xticks(rotation = 90)
sns.countplot(data = train_eda, x = col_name, order = cat_order, color =
base_color);
# add annotations
n_points = train_eda.shape[0]
cat_counts = train_eda[col_name].value_counts()
locs, labels = plt.xticks() # get the current tick locations and labels
# get the text property for the label to get the correct count
count = cat_counts[label.get_text()]
pct_string = '{:0.1f}%'.format(100*count/n_points)
f = pd.melt(train, id_vars=['SalePrice'],
value_vars=sorted(train[categorical.columns]))
g = sns.FacetGrid(f, col="variable", col_wrap=3,
sharex=False, sharey=False, size=5)
g = g.map(facetgrid_boxplot, "value", "SalePrice")
plt.figure(figsize=(12,5))
plt.title('top categories')
plt.ylabel('item_price')
titanic.groupby('Embarked')['Fare'].mean().sort_values(ascen
ding=False)[0:15].plot(kind='line', marker='*', color='red',
ms=10)
titanic.groupby('Embarked')['Fare'].mean().sort_values(ascen
ding=False)[0:15].plot(kind='bar',color=sns.color_palette("inf
erno_r", 7))
plt.show()
#To plot
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(x, y, z, c=labels, marker="o", cmap="BuGn")
ax.set_title("A 3D Projection Of Data In The Reduced Dimension")
plt.show()
X, Y = np.meshgrid(x, y)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("z")
ax.set_title("Thetas: {},{},{}".format(theta_0[i], theta_1[i], theta_2[i]))
plt.show()
print(40*"=")
for p in a.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
a.annotate(f'{height/df.shape[0]*100} %', (x + width/2, y + height*1.0
2), ha='center')
plt.show()
plt.figure(figsize= (18,18))
for i in cont:
plt.suptitle('Distribution of Features', size = 20, weight='bold')
plt.subplot(a, b, c)
A=sns.kdeplot(data= df, x=i,hue=song_popularity,palette=theme[:-
2], linewidth = 1.3,shade=True, alpha=0.35)
plt.title(i)
plt.xlabel(" ")
c=c+1
a = 4 # number of rows
b = 3 # number of columns
c = 1 # initialize plot counter
plt.figure(figsize= (18,18))
plt.subplot(a, b, c)
A=sns.countplot(df[i],color=theme[3], alpha=0.5)
plt.title(i)
plt.xlabel(" ")
plt.tick_params(axis="x", colors='black',labelsize=10)
c=c+1
figure = plt.figure(figsize=(30,10))
A = plt.pie(medals['Total'][:10],
labels=medals['Country'][:10],
startangle=90,
labeldistance=1.15,
pctdistance=0.8,
autopct='%1.1f%%')
plt.title("Pie Chart of Top 10 Countries with Medals",size=20,weight='b
old')
plt.show();
l = df_current['Q3'].value_counts(normalize=True).mul(100).tolist()[1]-df
_old['Q2'].value_counts(normalize=True).mul(100).values.tolist()[1]
for i, ax in enumerate(ax.flatten()):
ax.grid(axis='y', linestyle='-', alpha=0.4)
if i==0:t=shape;year = 2022
else:t=shape_21;year =2021
for p in ax.patches:
percentage = f'{100 * p.get_height() / t:.2f}%\n'
ax.annotate(percentage, (p.get_x() + p.get_width() / 2,p.get_height
()), ha='center', va='center')
ax.set_xlabel('Gender');ax.set_title("Gender Wise Distribution in "+
str(year))
if not(0.5 <= p.get_x() < 1.5):
p.set_facecolor('lightgrey')
plt.show()
sns.barplot(x=df_current['Q2'].value_counts().index, y=df_current['Q2'].
value_counts().values, ax=ax[0],
edgecolor='black', linewidth=1.5, saturation=1.5)
ax[0].yaxis.set_major_locator(MaxNLocator(nbins=20));ax[0].grid(axis=
'y', linestyle='-', alpha=0.4)
ax[0].set_ylabel('Count', weight='semibold')
ax[0].set_xlabel('Age Group 2022', weight='semibold')
ax[1].set_xlabel('Pie Chart for Age Group 2022', weight='semibold')
for p in ax[0].patches:
percentage = f'{100 * p.get_height() / t:.1f}%\n'
ax[0].annotate(percentage, (p.get_x() + p.get_width() / 2,p.get_hei
ght()), ha='center', va='center')
plt.show()
fig = plt.figure(figsize=(15,10))
for i in range(len(col)):
plt.subplot(3,4,i+1)
plt.title(col[i])
sns.distplot(df,x=df[col[i]])
plt.tight_layout()
plt.show()
plt.xlim(-1,26)
plt.ylim(0,1)
x = np.linspace(f.ppf(0.0000000001, dfn, dfd),f.ppf(0.9999999999, dfn, d
fd), 100)
ax.plot(x, f.pdf(x, dfn, dfd), 'r-')
ax.axvline(f.ppf(0.95, dfn, dfd), ls = "--", color = "navy")
print('upper 5%:', f.ppf(0.95, dfn, dfd))
n=1
sns.set_style('darkgrid')
sns.set(font_scale = 1.2)
plt.figure(figsize = (14, 18))
labels = confirmed_bookings['meal'].unique()
values = confirmed_bookings['meal'].value_counts()
palette = ["#f6bd60", "#f5cac3", "#84a59d", "#f28482"]
import plotly.graph_objs as go
values = data['cuisines'].value_counts()[:20]
labels=values.index
text=values.index
fig =
go.Figure(data=[go.Pie(values=values,labels=labels,hole=.3)]
)
fig.update_traces(hoverinfo='label+percent', textinfo='value',
textfont_size=20,
marker=dict(line=dict(color='#000000', width=3)))
fig.update_layout(title="Most popular cuisines of Bangalore
",
titlefont={'size': 30},
)
fig.show()
ax = favourite_food.nlargest(n=20,
keep='first').plot(kind='bar',figsize=(15,15),title = 'Top 20
Favourite Food counts ')
for p in ax.patches:
fig = plt.gcf()
fig.set_size_inches(25,15)
x = data.index
y = data.values
plt.figure(figsize = (15,10))
color = ['red','yellow','green','blue']
ax = plt.bar(x,y,width = 0.4,color = color)
for i in ax:
x_ = i.xy[0] + i.get_width() / 2
y_ = i.get_height()
txt = str(y_)
plt.annotate(
text = txt,
xy = (x_,y_),
xytext = (-17,2.9),
textcoords = 'offset points'
)
plt.style.use('seaborn-notebook')
for i, label in enumerate(df.Drug_Type.unique().tolist()):
sns.kdeplot(df2.loc[df2['Drug_Type'] == i+1, 'Na_to_K'],
label=label, shade=True)
fig, ax = plt.subplots(2,2,figsize=(14,8))
for i, col in enumerate(['Sex', 'BP', 'Cholesterol',
'Drug_Type']):
sns.scatterplot(data=df, x='Age', y='Na_to_K', hue=col,
ax=ax[i//2, i%2], palette='turbo')
ax[i//2, i%2].set_title(f'Na_to_K vs Age (based on {col}',
y=1.09, **font)
ax[i//2, i%2].legend(loc='upper center',
bbox_to_anchor=(1.2, 0.6),
fancybox=True, shadow=True)
fig.tight_layout()
plt.show()
import itertools
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
# for solve problem of show plotly plots
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)
plt.style.use('_mpl-gallery')
FONT = {'fontsize':20, 'fontstyle':'normal', 'fontfamily':'Times
New Roman', 'backgroundcolor':'#145A32', 'color':'orange'} #
for plot title
fig = go.Figure()
for col in df:
fig.add_trace(go.Box(x=df[col], name=col))
plt.show()
continuous_cols = ['Age','Experience','CCAvg','Mortgage']