ML Assignment 01 Code
ML Assignment 01 Code
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.decomposition import
PCA
selected_eigenvalues =
sorted_eigenvalues[:num_component
s_to_retain]
selected_eigenvectors =
sorted_eigenvectors[:, :num_compone
nts_to_retain]
# Step 6: Transform original data
using eigen vectors corresponding to
selected eigenvalues
transformed_data =
np.dot(normalized_data,
selected_eigenvectors)
Code 02
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.decomposition import
PCA
def load_iris_data():
iris = datasets.load_iris()
data, columns = iris.data,
iris.feature_names
return pd.DataFrame(data,
columns=columns), iris.target
def display_correlation_matrix(df):
correlation_matrix = df.corr()
print("Correlation Matrix:")
print(correlation_matrix)
def normalize_features(df):
return (df - df.mean()) / df.std()
display_feature_contributions(pca.co
mponents_, data.columns)
def
plot_before_after_pca(original_data,
transformed_data, target):
plt.figure(figsize=(12, 6))
# Original Data
plt.subplot(1, 2, 1)
plt.scatter(original_data.iloc[:, 0],
original_data.iloc[:, 1], c=target,
cmap='Set1')
plt.title('Original Data')
plt.xlabel('Feature 01')
plt.ylabel('Feature 02')
plt.savefig('output_plot.png')
plt.show()
def display_pca_info(pca):
print(f"\nNumber of components to
retain 95% variance:
{pca.n_components_}")
print("Explained variance ratio:",
pca.explained_variance_ratio_)
def
display_feature_contributions(compo
nents, columns):
feature_contributions =
np.abs(components) /
np.sum(np.abs(components), axis=1)
[:, np.newaxis]
feature_contributions_df =
pd.DataFrame(feature_contributions.T
, index=columns,
print("\nFeature contributions to
Principal Components:")
print(feature_contributions_df)
# Plotting the feature contributions
plt.figure(figsize=(12, 6))
for i in
range(components.shape[0]):
plt.subplot(1,
components.shape[0], i + 1)
plt.bar(columns,
feature_contributions_df.iloc[:, i])
plt.title(f'PC{i + 1} Feature
Contributions')
plt.xlabel('Original Features')
plt.ylabel('Contribution')
plt.savefig('output_plot_feature_contri
butions.png')
plt.show()
def main():
iris_data, target = load_iris_data()
display_correlation_matrix(iris_data)
normalized_data =
normalize_features(iris_data)
perform_pca(normalized_data,
target)
if __name__ == "__main__":
main()
Code 03
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.decomposition import
PCA
# Original Data
plt.subplot(1, 2, 1)
plt.scatter(normalized_data.iloc[:, 0],
normalized_data.iloc[:, 1],
c=iris.target, cmap='Set1')
plt.title('Original Data')
plt.xlabel('Feature 01')
plt.ylabel('Feature 02')
plt.savefig('output_plot.png')
plt.show()
# Display the number of components
and explained variance ratio
print(f"\nNumber of components to
retain 95% variance:
{pca.n_components_}")
print("Explained variance ratio:",
pca.explained_variance_ratio_)
print("\nFeature contributions to
Principal Components:")
print(feature_contributions_df)
plt.savefig('output_plot_feature_contri
butions.png')
plt.show()