0% found this document useful (0 votes)

11 views

Program 2 Hierarchical Cluestring

Uploaded by

9738978362.mj

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

11 views

Program 2 Hierarchical Cluestring

Uploaded by

9738978362.mj

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 5

import pandas as pd

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv("Country-data.csv")

country child_mort exports health imports income inflation life_expec total_fer gdpp

0 Afghanistan 90.2 10.0 7.58 44.9 1610 9.44 56.2 5.82 553

1 Albania 16.6 28.0 6.55 48.6 9930 4.49 76.3 1.65 4090

2 Algeria 27.3 38.4 4.17 31.4 12900 16.10 76.5 2.89 4460

3 Angola 119.0 62.3 2.85 42.9 5900 22.40 60.1 6.16 3530

4 Antigua and Barbuda 10.3 45.5 6.03 58.9 19100 1.44 76.8 2.13 12200

... ... ... ... ... ... ... ... ... ... ...

162 Vanuatu 29.2 46.6 5.25 52.7 2950 2.62 63.0 3.50 2970

163 Venezuela 17.1 28.5 4.91 17.6 16500 45.90 75.4 2.47 13500

164 Vietnam 23.3 72.0 6.84 80.2 4490 12.10 73.1 1.95 1310

165 Yemen 56.3 30.0 5.18 34.4 4480 23.60 67.5 4.67 1310

166 Zambia 83.1 37.0 5.89 30.9 3280 14.00 52.0 5.40 1460

167 rows × 10 columns

df.isnull().sum()

country 0
child_mort 0
exports 0
health 0
imports 0
income 0
inflation 0
life_expec 0
total_fer 0
gdpp 0
dtype: int64

df.dtypes

country object
child_mort float64
exports float64
health float64
imports float64
income int64
inflation float64
life_expec float64
total_fer float64
gdpp int64
dtype: object

df_new = df.drop(['country'],axis = 1)
df_new

child_mort exports health imports income inflation life_expec total_fer gdpp

0 90.2 10.0 7.58 44.9 1610 9.44 56.2 5.82 553

1 16.6 28.0 6.55 48.6 9930 4.49 76.3 1.65 4090

2 27.3 38.4 4.17 31.4 12900 16.10 76.5 2.89 4460

3 119.0 62.3 2.85 42.9 5900 22.40 60.1 6.16 3530

4 10.3 45.5 6.03 58.9 19100 1.44 76.8 2.13 12200

... ... ... ... ... ... ... ... ... ...

162 29.2 46.6 5.25 52.7 2950 2.62 63.0 3.50 2970

163 17.1 28.5 4.91 17.6 16500 45.90 75.4 2.47 13500

164 23.3 72.0 6.84 80.2 4490 12.10 73.1 1.95 1310

165 56.3 30.0 5.18 34.4 4480 23.60 67.5 4.67 1310

166 83.1 37.0 5.89 30.9 3280 14.00 52.0 5.40 1460

167 rows × 9 columns

plt.figure(figsize = (12,8))
plt.figure(figsize = (12,8))
feature_list = df_new.columns
for i in range(len(feature_list)):
plt.subplot(3, 3, i + 1)
sns.boxplot(y = df_new[feature_list[i]], data = df_new)
plt.title('Boxplot of {}'.format(feature_list[i]))
plt.tight_layout()

#Define a function which returns the Upper and Lower limit to detect outliers for each feature
def remove_outlier(col):
Q1,Q3=col.quantile([0.25,0.75])
IQR=Q3-Q1
lower_range= Q1-(1.5 * IQR)
upper_range= Q3+(1.5 * IQR)
return lower_range, upper_range

#Cap & floor the values beyond the outlier boundaries

for i in feature_list:
LL, UL = remove_outlier(df_new[i])
df_new[i] = np.where(df_new[i] > UL, UL, df_new[i])
df_new[i] = np.where(df_new[i] < LL, LL, df_new[i])

plt.figure(figsize = (12,8))
feature_list = df_new.columns
for i in range(len(feature_list)):
plt.subplot(3, 3, i + 1)
sns.boxplot(y = df_new[feature_list[i]], data = df_new)
plt.title('Boxplot of {}'.format(feature_list[i]))
plt.tight_layout()
from scipy.cluster.hierarchy import dendrogram, linkage

wardlink = linkage(df_new, method="ward")

dend = dendrogram(wardlink)

#To merge last 10

dend = dendrogram(wardlink,
truncate_mode = "lastp",
p=10)
from scipy.cluster.hierarchy import fcluster

#Method 1 if you want entire records in number of clusters

clusters = fcluster(wardlink, 5, criterion = 'maxclust') #5 is number of clusters
clusters

array([4, 5, 5, 5, 3, 3, 5, 1, 1, 5, 2, 1, 4, 3, 5, 1, 5, 4, 5, 5, 5, 5,
3, 1, 5, 4, 4, 4, 4, 1, 5, 4, 4, 3, 5, 5, 4, 4, 5, 5, 4, 3, 2, 2,
1, 5, 5, 5, 5, 2, 4, 3, 5, 1, 2, 5, 4, 5, 1, 4, 2, 5, 5, 4, 4, 5,
4, 3, 1, 4, 5, 5, 5, 1, 2, 2, 5, 2, 5, 3, 4, 4, 1, 4, 4, 3, 5, 4,
4, 2, 3, 1, 5, 4, 4, 3, 5, 4, 2, 4, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4,
1, 2, 4, 5, 1, 1, 4, 5, 5, 5, 5, 3, 2, 1, 5, 3, 4, 5, 1, 4, 5, 3,
4, 1, 3, 2, 4, 5, 2, 2, 5, 5, 4, 5, 1, 1, 4, 4, 5, 4, 4, 5, 5, 3,
5, 4, 5, 1, 2, 1, 3, 4, 4, 3, 4, 4, 4], dtype=int32)

#Method 2(based on distance)

clusters = fcluster(wardlink, 23, criterion="distance" )

clusters

array([ 79, 117, 156, 137, 53, 52, 129, 10, 18, 144, 36, 9, 60,
40, 145, 11, 110, 69, 130, 131, 113, 160, 39, 4, 146, 86,
77, 61, 56, 12, 138, 72, 71, 54, 114, 152, 83, 75, 133,
162, 57, 55, 31, 21, 20, 153, 115, 109, 123, 26, 84, 43,
125, 14, 30, 142, 80, 126, 13, 97, 34, 149, 127, 91, 87,
139, 81, 41, 15, 101, 107, 147, 157, 16, 35, 28, 111, 29,
120, 45, 62, 64, 4, 63, 104, 50, 143, 58, 76, 27, 47,
1, 155, 85, 74, 46, 150, 70, 22, 98, 140, 92, 106, 121,
161, 128, 73, 105, 112, 68, 17, 33, 78, 134, 5, 7, 103,
141, 124, 116, 132, 42, 23, 4, 148, 49, 88, 135, 8, 59,
158, 48, 89, 1, 44, 24, 65, 151, 25, 32, 108, 154, 95,
163, 19, 2, 66, 67, 159, 94, 90, 136, 119, 51, 118, 82,
122, 3, 28, 6, 37, 102, 93, 38, 99, 100, 96], dtype=int32)

df["clusters"] = clusters
df

country child_mort exports health imports income inflation life_expec total_fer gdpp clusters

0 Afghanistan 90.2 10.0 7.58 44.9 1610 9.44 56.2 5.82 553 4

1 Albania 16.6 28.0 6.55 48.6 9930 4.49 76.3 1.65 4090 5

2 Algeria 27.3 38.4 4.17 31.4 12900 16.10 76.5 2.89 4460 5

3 Angola 119.0 62.3 2.85 42.9 5900 22.40 60.1 6.16 3530 5

4 Antigua and Barbuda 10.3 45.5 6.03 58.9 19100 1.44 76.8 2.13 12200 3

... ... ... ... ... ... ... ... ... ... ... ...

162 Vanuatu 29.2 46.6 5.25 52.7 2950 2.62 63.0 3.50 2970 4

163 Venezuela 17.1 28.5 4.91 17.6 16500 45.90 75.4 2.47 13500 3

164 Vietnam 23.3 72.0 6.84 80.2 4490 12.10 73.1 1.95 1310 4

165 Yemen 56.3 30.0 5.18 34.4 4480 23.60 67.5 4.67 1310 4

166 Zambia 83.1 37.0 5.89 30.9 3280 14.00 52.0 5.40 1460 4

167 rows × 11 columns

df.to_csv("country_data_clust.csv")
Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js

Math, Grade 6
From Everand
Math, Grade 6
Carson Dellosa Education
5/5 (4)
Lesson 2 Classical Art
No ratings yet
Lesson 2 Classical Art
27 pages
Country - Data (Record) - Jupyter Notebook
No ratings yet
Country - Data (Record) - Jupyter Notebook
5 pages
Tabla de Equivalencias de Valores DCP Vrs CBR
No ratings yet
Tabla de Equivalencias de Valores DCP Vrs CBR
1 page
Forex Trading Plan Adjusted
No ratings yet
Forex Trading Plan Adjusted
5 pages
Group 1
No ratings yet
Group 1
4 pages
Forex Trading Plan Corrected
No ratings yet
Forex Trading Plan Corrected
5 pages
30.12.23 v10
No ratings yet
30.12.23 v10
24 pages
02 Task Performance 1 ARG
No ratings yet
02 Task Performance 1 ARG
2 pages
Progression
No ratings yet
Progression
2 pages
Advance® UKA - Equal: Dimensions and Properties
No ratings yet
Advance® UKA - Equal: Dimensions and Properties
1 page
Angle PDF
No ratings yet
Angle PDF
1 page
Universal Sompo General Insurance Co LTD: Complete Healthcare Insurance UIN: UNIHLIP14003V011314
No ratings yet
Universal Sompo General Insurance Co LTD: Complete Healthcare Insurance UIN: UNIHLIP14003V011314
10 pages
PremiumChart CompleteHealthcareInsurance PDF
No ratings yet
PremiumChart CompleteHealthcareInsurance PDF
10 pages
Mixit Ponedoras
No ratings yet
Mixit Ponedoras
48 pages
SalaryScaleTeachers23 24
No ratings yet
SalaryScaleTeachers23 24
1 page
Universal Sompo General Insurance Co LTD: Complete Healthcare Insurance UIN: UNIHLIP21409V022021
No ratings yet
Universal Sompo General Insurance Co LTD: Complete Healthcare Insurance UIN: UNIHLIP21409V022021
10 pages
Watchlist 23 June 2024
No ratings yet
Watchlist 23 June 2024
76 pages
GIFT SELECT HYBRID
No ratings yet
GIFT SELECT HYBRID
2 pages
Muh - Sofwwan Tahir EOQ
No ratings yet
Muh - Sofwwan Tahir EOQ
4 pages
International Fetal Size Standards in Early Pregnancy: Centiles Gestational Age (Weeks + Days)
No ratings yet
International Fetal Size Standards in Early Pregnancy: Centiles Gestational Age (Weeks + Days)
4 pages
International Fetal Size Standards in Early Pregnancy: Centiles Gestational Age (Weeks + Days)
No ratings yet
International Fetal Size Standards in Early Pregnancy: Centiles Gestational Age (Weeks + Days)
4 pages
International Fetal Size Standards in Early Pregnancy: Centiles Gestational Age (Weeks + Days)
No ratings yet
International Fetal Size Standards in Early Pregnancy: Centiles Gestational Age (Weeks + Days)
4 pages
Oferta Monetaria
No ratings yet
Oferta Monetaria
14 pages
Advanced Math Hard
No ratings yet
Advanced Math Hard
30 pages
Friction Loss.
No ratings yet
Friction Loss.
1 page
Activity 8
No ratings yet
Activity 8
5 pages
WI 2387 5CT Recalcuate Weight GainLoss
No ratings yet
WI 2387 5CT Recalcuate Weight GainLoss
7 pages
Square and Rectangular Sections
No ratings yet
Square and Rectangular Sections
2 pages
Barometric Pressure
No ratings yet
Barometric Pressure
1 page
Waktu (S) Konduktivitas (MS) Waktu (S) Konduktivitas (MS) Waktu (S) Konduktivitas (MS)
No ratings yet
Waktu (S) Konduktivitas (MS) Waktu (S) Konduktivitas (MS) Waktu (S) Konduktivitas (MS)
4 pages
Cs Gmpme Nd06
No ratings yet
Cs Gmpme Nd06
4 pages
Light Lip Pped Channel
No ratings yet
Light Lip Pped Channel
3 pages
Excel Worksheet 1
No ratings yet
Excel Worksheet 1
20 pages
Chapter 4 PR2
No ratings yet
Chapter 4 PR2
3 pages
PembelajaranMesin - Ipynb - Colaboratory
No ratings yet
PembelajaranMesin - Ipynb - Colaboratory
6 pages
S9 Regresión Simple y Múltiple Al - Colaboratory
No ratings yet
S9 Regresión Simple y Múltiple Al - Colaboratory
14 pages
The Engineering Toolbox: Compressed Air - Pressure Drop in Pipelines
No ratings yet
The Engineering Toolbox: Compressed Air - Pressure Drop in Pipelines
6 pages
Universiyt of Benghazi (Autorecovered)
No ratings yet
Universiyt of Benghazi (Autorecovered)
6 pages
Karisma_23011101119_eda_rec
No ratings yet
Karisma_23011101119_eda_rec
88 pages
Learning Curve Estimation
No ratings yet
Learning Curve Estimation
8 pages
Scaling Targets, Laser Academy - Standard
No ratings yet
Scaling Targets, Laser Academy - Standard
2 pages
Country Y: Internet - Use X: GDP - Capita X X-Xbar X 2 y Y-Ybar
No ratings yet
Country Y: Internet - Use X: GDP - Capita X X-Xbar X 2 y Y-Ybar
5 pages
Perfiles Estructurales74de
No ratings yet
Perfiles Estructurales74de
1 page
Variantes de Producto (Product - Product) 2
No ratings yet
Variantes de Producto (Product - Product) 2
6 pages
FDS Solved Slips
100% (1)
FDS Solved Slips
63 pages
Offset (M) Time (MS) : Jarak Dan Waktu Kedalaman
No ratings yet
Offset (M) Time (MS) : Jarak Dan Waktu Kedalaman
17 pages
Offset (M) Time (MS) : Jarak Dan Waktu Kedalaman
No ratings yet
Offset (M) Time (MS) : Jarak Dan Waktu Kedalaman
17 pages
V 6 Dev 8
No ratings yet
V 6 Dev 8
12 pages
Monte Carlo Simulation Example
No ratings yet
Monte Carlo Simulation Example
3 pages
ML Assignment1 Linear Regression
No ratings yet
ML Assignment1 Linear Regression
6 pages
kN/m3 % kN/m3 m kN/m3 m kN/m3 kN/m3: γ1 E γ2 Df γ1sat NAF γ2sat γ h2o
No ratings yet
kN/m3 % kN/m3 m kN/m3 m kN/m3 kN/m3: γ1 E γ2 Df γ1sat NAF γ2sat γ h2o
24 pages
Linear Regression
100% (1)
Linear Regression
4 pages
3% Equity Trading Days Profit Daily: Mind Set
No ratings yet
3% Equity Trading Days Profit Daily: Mind Set
7 pages
Light Lip Channel
No ratings yet
Light Lip Channel
4 pages
General Government Gross Debt
No ratings yet
General Government Gross Debt
6 pages
Taula de Pesos I Mides de Tubs Rectangulars, Rodons I Quadrats
No ratings yet
Taula de Pesos I Mides de Tubs Rectangulars, Rodons I Quadrats
1 page
Calculate The Summary Statistics Listed in Rows 23 Through 30 For Each Set of Data A To E. Round All Answers To 1 Decimal Place
No ratings yet
Calculate The Summary Statistics Listed in Rows 23 Through 30 For Each Set of Data A To E. Round All Answers To 1 Decimal Place
3 pages
Proyecto Nuevo Alausi
No ratings yet
Proyecto Nuevo Alausi
16 pages
Statistics & Big Data For Real Estate: Course Code: Aumreal 639
No ratings yet
Statistics & Big Data For Real Estate: Course Code: Aumreal 639
20 pages
Math Mastery
From Everand
Math Mastery
Patricia Lewis
5/5 (5)
AoF - Darkness Within v3.1.0
No ratings yet
AoF - Darkness Within v3.1.0
31 pages
MIS Draft Assignment
100% (1)
MIS Draft Assignment
25 pages
ETX-5300A: Ethernet Service Aggregation Platform
No ratings yet
ETX-5300A: Ethernet Service Aggregation Platform
4 pages
Double Sale Immovable Property
100% (1)
Double Sale Immovable Property
3 pages
CAPM Proof That Market Portfolio Is Value Weighted
No ratings yet
CAPM Proof That Market Portfolio Is Value Weighted
3 pages
Topic 3 - Working Capital Management
No ratings yet
Topic 3 - Working Capital Management
77 pages
The Casualties in The Caraga Region. (2 PTS)
No ratings yet
The Casualties in The Caraga Region. (2 PTS)
2 pages
(2001) "A Conceptual Study On Brand Valuation"
No ratings yet
(2001) "A Conceptual Study On Brand Valuation"
3 pages
List Thesis Topics Sociology
100% (3)
List Thesis Topics Sociology
6 pages
Cifra Club - Guns N' Roses - November Rain PDF
No ratings yet
Cifra Club - Guns N' Roses - November Rain PDF
12 pages
Prevention of Illegal Working and Right To Work in The UK Policy
No ratings yet
Prevention of Illegal Working and Right To Work in The UK Policy
7 pages
IIQS Membership Guidelines PDF
No ratings yet
IIQS Membership Guidelines PDF
15 pages
Analisis Manajemen Resiko Dalam Penerapan Good Corporate Governance Studi Pada Perusahaan Perbankan Di Indonesia 2
No ratings yet
Analisis Manajemen Resiko Dalam Penerapan Good Corporate Governance Studi Pada Perusahaan Perbankan Di Indonesia 2
15 pages
Export REACH 20221206 11154615
No ratings yet
Export REACH 20221206 11154615
2 pages
Suny Genocide
No ratings yet
Suny Genocide
46 pages
The Diary of A Young Girl Mooyaart - Doubleday PDF: Traducir Esta Página
No ratings yet
The Diary of A Young Girl Mooyaart - Doubleday PDF: Traducir Esta Página
2 pages
004 - Fiduciarie State For Morocco Forbes
No ratings yet
004 - Fiduciarie State For Morocco Forbes
5 pages
Solution Manual For Personal Finance 1st Edition by Walker
No ratings yet
Solution Manual For Personal Finance 1st Edition by Walker
21 pages
Financial Administration of The Rural Local Bodies
No ratings yet
Financial Administration of The Rural Local Bodies
7 pages
Culture and Value PDF
No ratings yet
Culture and Value PDF
29 pages
People v. Concepcion
100% (1)
People v. Concepcion
1 page
INV-002839
No ratings yet
INV-002839
2 pages
ccw331 Business Analytics Unit-Iv Notes
No ratings yet
ccw331 Business Analytics Unit-Iv Notes
112 pages
Defoe Excerpts
No ratings yet
Defoe Excerpts
7 pages
Ethical Decision Making For The 21st Century Counselor 1st Edition Sheperis Test Bank
100% (1)
Ethical Decision Making For The 21st Century Counselor 1st Edition Sheperis Test Bank
42 pages
Https Retail - Onlinesbi.com Retail Mobilenoupdateguidelines
100% (1)
Https Retail - Onlinesbi.com Retail Mobilenoupdateguidelines
1 page
A
No ratings yet
A
46 pages
Boq 1951517
No ratings yet
Boq 1951517
6 pages
A Study On The Effectiveness of Training and Development in Any Organisation
No ratings yet
A Study On The Effectiveness of Training and Development in Any Organisation
36 pages