Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                

Machine Learnin

Download as pdf or txt
Download as pdf or txt
You are on page 1of 23

MACHINE LEARNING

LAB PRACTICAL

NAME: - Vijay Kumar


ID: - 201001001190
BATCH: - BCS 4C
Lab Assignment 1: Simple Linear Regression
Perform Simple and Multiple linear regression both on the given dataset. For
simple regression, choose independent variable as 'lstat' and dependent
variable 'medv' and for multiple regression dependent variable should be
'medv'. Now evaluate the algorithms with metrics Mean absolute error, Root
mean square error and r_2 squared error.

Code:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error,
r2_score
import matplotlib.pyplot as plt

# Load the dataset


data = pd.read_csv('housing_data.csv')
data.fillna(0, inplace=True)
# Separate the data into features (X) and target (y)
X_simple = data[['LSTAT']]
y_simple = data['MEDV']

X_multiple = data.drop(columns=['MEDV'])
y_multiple = data['MEDV']

# Split the data into training and testing sets


X_train_simple, X_test_simple, y_train_simple, y_test_simple =
train_test_split(X_simple, y_simple, test_size=0.2, random_state=42)

# Simple Linear Regression


simple_model = LinearRegression()
simple_model.fit(X_train_simple, y_train_simple)
y_pred_simple = simple_model.predict(X_test_simple)

# Evaluation metrics
mae_simple = mean_absolute_error(y_test_simple, y_pred_simple)
rmse_simple = np.sqrt(mean_squared_error(y_test_simple,
y_pred_simple))
r2_simple = r2_score(y_test_simple, y_pred_simple)
# Print the metrics for both models
print("Simple Linear Regression Metrics:")
print("MAE:", mae_simple)
print("RMSE:", rmse_simple)
print("R-squared:", r2_simple)

# Plot the results for Simple Linear Regression


plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.scatter(X_test_simple, y_test_simple, color='blue', label='Actual')
plt.plot(X_test_simple, y_pred_simple, color='red', linewidth=2,
label='Predicted')
plt.title('Simple Linear Regression')
plt.xlabel('LSTAT')
plt.ylabel('MEDV')
plt.legend()

OUTPUT:

Graph:
Lab Assignment 2 : Multiple Regression:

Code:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error,
r2_score
import matplotlib.pyplot as plt

# Load the dataset


data = pd.read_csv('housing_data.csv')
data.fillna(0, inplace=True)
X_multiple = data.drop(columns=['MEDV'])
y_multiple = data['MEDV']

# Split the data into training and testing sets


X_train_multiple, X_test_multiple, y_train_multiple, y_test_multiple =
train_test_split(X_multiple, y_multiple, test_size=0.2, random_state=42)

# Multiple Linear Regression


multiple_model = LinearRegression()
multiple_model.fit(X_train_multiple, y_train_multiple)
y_pred_multiple = multiple_model.predict(X_test_multiple)

mae_multiple = mean_absolute_error(y_test_multiple, y_pred_multiple)


rmse_multiple = np.sqrt(mean_squared_error(y_test_multiple,
y_pred_multiple))
r2_multiple = r2_score(y_test_multiple, y_pred_multiple)

print("\nMultiple Linear Regression Metrics:")


print("MAE:", mae_multiple)
print("RMSE:", rmse_multiple)
print("R-squared:", r2_multiple)

# Plot the results for Multiple Linear Regression


plt.subplot(1, 2, 2)
plt.scatter(y_test_multiple, y_pred_multiple, color='blue')
plt.plot([min(y_test_multiple), max(y_test_multiple)],
[min(y_test_multiple), max(y_test_multiple)], linestyle='--', color='red')
plt.title('Multiple Linear Regression')
plt.xlabel('Actual MEDV')
plt.ylabel('Predicted MEDV')
plt.tight_layout()
plt.show()

OUTPUT:

Graph:
Lab Assignment 3: Logistics Regression
Code:
#Data Pre-procesing Step
# importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
#importing datasets
data_set= pd.read_csv('user_data.csv')

#Extracting Independent and dependent Variable


x= data_set.iloc[:, [2,3]].values
y= data_set.iloc[:, 4].values

# Splitting the dataset into training and test set.


from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25,
random_state=0)

#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)

#Fitting Logistic Regression to the training set


from sklearn.linear_model import LogisticRegression
classifier= LogisticRegression(random_state=0)
classifier.fit(x_train, y_train)

#Predicting the test set result


y_pred= classifier.predict(x_test)

#Creating the Confusion matrix


from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

#Visualizing the training set result


from matplotlib.colors import ListedColormap
x_set, y_set = x_train, y_train
x1, x2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop =
x_set[:, 0].
max() + 1, step =0.01),
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step =
0.01))
mtp.contourf(x1, x2, classifier.predict(nm.array([x1.ravel(),
x2.ravel()]).T).reshape
(x1.shape),
alpha = 0.75, cmap = ListedColormap(('purple','green' )))
mtp.xlim(x1.min(), x1.max())
mtp.ylim(x2.min(), x2.max())

for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('purple', 'green'))(i), label = j)

mtp.title('Logistic Regression (Training set)')


mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()

Output:
Lab Assignment 4: Support Vector Machine

Code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

#Load the dataset


dataset = pd.read_csv('Social_Network_Ads.csv')
#print(dataset)
x = dataset.iloc[:,[2,3]].values
y = dataset.iloc[:,4].values
#print(y)

#Splitting the daatset into the Training and Test set


from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test =
train_test_split(x,y,test_size=0.25,random_state=0)

#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

#Fitting SVM to the training set


from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state =0)
classifier.fit(x_train,y_train)

#Predicting the test set results


y_pred = classifier.predict(x_test)

#Making the confusion matrix


from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test,y_pred)
print(cm)
res = accuracy_score(y_test,y_pred)
print(res)
fig =
px.scatter_3d(dataset,x='Gender',y='Age',z='EstimatedSalary',color='Purc
hased')
fig.show()

Output:
ASSIGNMENT: - 5

PROBLEM STATEMENT: -

Perform Naive Bayes on the given dataset.

SOURCE CODE: -
import numpy as np
import matplotlib.pyplot as mtp
import pandas as pd
#load the dataset

dataset = pd.read_csv('User_Data.csv')
#print(dataset)
#print(dataset.head())

x = dataset.iloc[:, [2,3]].values
y = dataset.iloc[:, 4].values
#print(x)

# Spliƫng the dataset into the Training and Test set


from sklearn.model_selecƟon import train_test_split
x_train,x_test,y_train,y_test =
train_test_split(x,y,test_size=0.25,random_state=0)

#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

#Fiƫng Naive Bayes to the trainning set


from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(x_train,y_train)

#PredicƟng the test set results


y_pred = classifier.predict(x_test)
#Making the confusion matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test,y_pred)
print(cm)
res = accuracy_score(y_test,y_pred)
print(res)

Output:
ASSIGNMENT: - 6
PROBLEM STATEMENT: -

Perform the k-nearest neighbours algorithm(knn) on the given dataset.

SOURCE CODE: -

import numpy as np
#import matplotlib.pyplot as mtp
import pandas as pd
from sklearn import datasets
from sklearn.model_selecƟon import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import plotly.express as px
import warnings

warnings.filterwarnings('ignore')
df = pd.read_csv('iris.csv')
#print(df)

x = df.drop(['variety'],axis='columns')
y=df['variety']
x_train,x_test,y_train,y_test =
train_test_split(x,y,test_size=0.2,random_state=5)
kmn=KNeighborsClassifier(n_neighbors=7)
kmn.fit(x_train,y_train)
res = kmn.score(x_test,y_test)
print(res)

fig =
px.scaƩer_3d(df,x='sepal.length',y='sepal.width',z='petal.length',co
lor='variety')
fig.show()
Output :
ASSIGNMENT: - 7

PROBLEM STATEMENT:
Perform Decision Tree and K-means on the given dataset.

Decision Tree:

# importng libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
import plotly.express as px
import warnings

warnings.filterwarnings(‘ignore’)
#imporƟng datasets
data_set= pd.read_csv(‘User_Data.csv’)
#ExtracƟng Independent and dependent Variable
x= data_set.iloc[:, [2,3]].values
y= data_set.iloc[:, 4].values

# Spliting the dataset into training and test set.


From sklearn.model_selecƟon import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size=
0.25, random_state=0)

#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
#Fiƫng Decision Tree classifier to the training set
from sklearn.tree import DecisionTreeClassifier
classifier= DecisionTreeClassifier(criterion=’entropy’,
random_state=0)
classifier.fit(x_train, y_train)

#Predicting the test set result


y_pred= classifier.predict(x_test)
#CreaƟng the Confusion matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm= confusion_matrix(y_test, y_pred)
print(cm)
res=accuracy_score(y_test,y_pred)
print(res)

#Visulaizing the ontour set result


#fig=px.scaƩer_3d(data_set,x=’Gender’,y=’Age’,z=’EsƟmatedSalary’
, color=’Purchased’)
#fig.show()

from matplotlib.colors import ListedColormap


x_set, y_set = x_train, y_train
x1, x2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() – 1, stop
= x_set[:, 0].max() + 1,
step =0.01),
nm.arange(start = x_set[:, 1].min() – 1, stop = x_set[:, 1].max() + 1,
step = 0.01))
mtp.contourf(x1, x2, classifier.predict(nm.array([x1.ravel(),
x2.ravel()]).T).reshape(x1.shape),
alpha = 0.75, cmap = ListedColormap((‘purple’,’green’ )))
mtp.xlim(x1.min(), x1.max())
mtp.ylim(x2.min(), x2.max())
for I, j in enumerate(nm.unique(y_set)):
mtp.scaƩer(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap((‘purple’, ‘green’))(i), label = j)
mtp.title(‘Decision Tree Algorithm (Training set)’)
mtp.xlabel(‘Age’)
mtp.ylabel(‘Estimated Salary’)
mtp.legend()
mtp.show()
Output:
Lab Assignment 8: K Means Clustering

Code:
# importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
import warnings

warnings.filterwarnings('ignore')
# ImporƟng the dataset
dataset = pd.read_csv('Mall_Customers.csv')
x = dataset.iloc[:, [3, 4]].values
#print(x)

#finding opƟmal number of clusters using the elbow method


from sklearn.cluster import KMeans
wcss_list= [] #IniƟalizing the list for the values of WCSS

#Using for loop for iteraƟons from 1 to 10.


for i in range(1, 11):
kmeans = KMeans(n_clusters=i, init='k-means++', random_state= 42)
kmeans.fit(x)
wcss_list.append(kmeans.inerƟa_)
mtp.plot(range(1, 11), wcss_list)
mtp.Ɵtle('The Elobw Method Graph')
mtp.xlabel('Number of clusters(k)')
mtp.ylabel('wcss_list')
mtp.show()

#training the K-means model on a dataset


kmeans = KMeans(n_clusters=5, init='k-means++', random_state= 42)
y_predict= kmeans.fit_predict(x)
print(y_predict)
#print(x[y_predict])
#visulaizing the clusters

mtp.scatter(x[y_predict == 0, 0], x[y_predict == 0, 1], s = 100, c = 'blue',


label = 'Cluster 1')
#for first cluster
mtp.scaƩer(x[y_predict == 1, 0], x[y_predict == 1, 1], s = 100, c = 'green',
label = 'Cluster 2')
#for second cluster
mtp.scatter(x[y_predict== 2, 0], x[y_predict == 2, 1], s = 100, c = 'red',
label = 'Cluster 3') #for
third cluster
mtp.scatter(x[y_predict == 3, 0], x[y_predict == 3, 1], s = 100, c = 'cyan',
label = 'Cluster 4')
#for fourth cluster

mtp.scatter(x[y_predict == 4, 0], x[y_predict == 4, 1], s = 100, c =


'magenta', label = 'Cluster
5') #for fifth cluster
mtp.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
s = 300, c =
'yellow', label = 'Centroid')
mtp.title('Clusters of customers')
mtp.xlabel('Annual Income (k$)')
mtp.ylabel('Spending Score (1-100)')
mtp.legend()
mtp.show()

Output:
Lab Assignment 9 : MLP

Code:
import pandas as pd
import numpy as np

from sklearn.neural_network import MLPClassifier


from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

df=pd.read_csv("bank_note_data.csv")
#print(df.head())
#df.shape
#df.describe()
x=df.drop('Class', axis=1)
y=df['Class']
#print(x.head())
#print(y.head())
#print(x.shape)

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3,


random_state = 2)
print(X_train.shape)
print(X_test.shape)
mlp=MLPClassifier(max_iter=500,activation='relu')
mlp.fit(X_train,y_train)
y_pred=mlp.predict(X_test)
y_pred
confusion_matrix(y_test,y_pred)
print(classification_report(y_test,y_pred))
from sklearn.metrics import accuracy_score
res=accuracy_score(y_test, y_pred)
print(res)
Output:
Lab Assignment 10: Polynomial Regression

Code:

# importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd

#importing datasets
data_set= pd.read_csv("Position_Salaries.csv")

#Extracting Independent and dependent Variable


x= data_set.iloc[:, 1:2].values
y= data_set.iloc[:, 2].values
#Fitting the Linear Regression to the dataset
from sklearn.linear_model import LinearRegression
lin_regs= LinearRegression()
lin_regs.fit(x,y)

#Fitting the Polynomial regression to the dataset


from sklearn.preprocessing import PolynomialFeatures
poly_regs= PolynomialFeatures(degree= 2)
x_poly= poly_regs.fit_transform(x)
lin_reg_2 =LinearRegression()
lin_reg_2.fit(x_poly, y)

#Visulaizing the result for Linear Regression model


'''mtp.scatter(x,y,color="blue")
mtp.plot(x,lin_regs.predict(x), color="red")
mtp.title("Bluff detection model(Linear Regression)")
mtp.xlabel("Position Levels")
mtp.ylabel("Salary")
mtp.show()

#Visulaizing the result for Polynomial Regression


mtp.scatter(x,y,color="blue")
mtp.plot(x, lin_reg_2.predict(poly_regs.fit_transform(x)), color="red")
mtp.title("Bluff detection model(Polynomial Regression)")
mtp.xlabel("Position Levels")
mtp.ylabel("Salary")
mtp.show()

#Predicting the final result with the Linear Regression model


'''lin_pred = lin_regs.predict([[6.5]])
print(lin_pred)

#Predicting the final result with the Polynomial Regression model


poly_pred = lin_reg_2.predict(poly_regs.fit_transform([[6.5]]))
print(poly_pred)

Output:

You might also like