0% found this document useful (0 votes)

31 views

Assignment 2

This document discusses analyzing and modeling an alloy composition dataset using Python. It imports various libraries for data analysis and machine learning. It loads and explores the alloy composition and hardness (HV) data, examining distributions and correlations. It then fits several linear regression models to the data, including a manual calculation, Scikit-Learn implementation, and gradient descent optimization, and evaluates their performance on the training and test sets.

Uploaded by

Abhineet Kumar mm22m006

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

31 views

Assignment 2

Uploaded by

Abhineet Kumar mm22m006

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 10

import pandas as pd # To play with data tables

import matplotlib.pyplot as plt # To visualize data

import numpy as np
import copy

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge
from sklearn.datasets import make_regression

from sklearn.model_selection import KFold

!gdown 1i5c01hhj04J816siI‐6u3ea32vI‐HToy
Downloading...
From: https://drive.google.com/uc?id=1i5c01hhj04J816siI‐6u3ea32vI‐HToy
To: /content/alloy‐confp‐train‐data.csv
100% 7.22k/7.22k [00:00<00:00, 27.3MB/s]

data = pd.read_csv('/content/alloy‐confp‐train‐data.csv')

data = data.sample(frac=1)
data.to_csv('/content/alloy‐confp‐train‐data.csv')

data.shape
(120, 8)

Xcols = data.columns[data.columns.str.contains('C.')]
X = data[Xcols]
X

C.al C.co C.cr C.cu C.fe C.ni

102 0.074074 0.000000 0.185185 0.185185 0.185185 0.370370

22 0.250000 0.166667 0.166667 0.083333 0.166667 0.166667

105 0.062500 0.000000 0.312500 0.000000 0.468750 0.156250

106 0.142857 0.285714 0.000000 0.000000 0.285714 0.285714

117 0.264706 0.147059 0.147059 0.147059 0.147059 0.147059

... ... ... ... ... ... ...

60 0.428571 0.142857 0.071429 0.071429 0.071429 0.214286

3 0.208333 0.000000 0.208333 0.208333 0.208333 0.166667

79 0.264706 0.000000 0.147059 0.147059 0.147059 0.294118

34 0.000000 0.250000 0.000000 0.250000 0.250000 0.250000

88 0.166667 0.166667 0.333333 0.000000 0.166667 0.166667

120 rows × 6 columns

from matplotlib import pyplot as plt

X['C.al'].plot(kind='hist', bins=20, title='C.al')
plt.gca().spines[['top', 'right']].set_visible(False)
y = data['HV']

fig, (ax1,ax2,ax3,ax4) = plt.subplots(nrows=1, ncols=4, figsize=(12,3.5))

ax1.hist(y.values, bins=20)
ax2.hist(X.values[:,0], bins=20, label='c.Al')
ax3.hist(X.values[:,1], bins=20, label='c.Co')
ax4.hist(X.values[:,2], bins=20, label='c.Cr')

ax1.set_xlabel('HV', fontsize=14)
ax2.set_xlabel('c.Al', fontsize=14)
ax3.set_xlabel('c.Co', fontsize=14)
ax4.set_xlabel('c.Cr', fontsize=14)

ax1.set_ylabel('Frequency', fontsize=14)
ax2.set_ylabel('Frequency', fontsize=14)
ax3.set_ylabel('Frequency', fontsize=14)
ax4.set_ylabel('Frequency', fontsize=14)

ax1.set_ylabel('Frequency', fontsize=14)
Text(0, 0.5, 'Frequency')
# First we will define function to make plots. This will make the code simpler.

def polt_parity(y_cv_test,y_pred_test, y_cv_train=None,y_pred_train=None, label=None, ylim=[50,900]):

"""
Function to make parity plots.
"""

# Plot Parity plot

rmse_test = np.sqrt(mean_squared_error(y_cv_test,y_pred_test))
r2_test = r2_score(y_cv_test,y_pred_test)

if y_cv_train is None:
fig, ax1 = plt.subplots(nrows=1, ncols=1, figsize=(5,4), sharey=True, sharex=True)
else:
fig, (ax1,ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9,4), sharey=True, sharex=True)

ax1.scatter(y_cv_test,y_pred_test)
ax1.text(0.95, 0.26, label, transform=ax1.transAxes, ha='right', fontsize=14)
ax1.text(0.95, 0.18, "RMSE: %.2f"%rmse_test, transform=ax1.transAxes, ha='right', fontsize=14)
ax1.text(0.95, 0.1, "R$^2$: %.2f"%r2_test, transform=ax1.transAxes, ha='right', fontsize=14)
ax1.plot(ylim, ylim, '‐‐k')
ax1.set_xlabel('True y', fontsize=14)
ax1.set_ylabel('Pred y', fontsize=14)
ax1.set_xlim(ylim[0],ylim[1])
ax1.set_ylim(ylim[0],ylim[1])

if y_cv_train is not None:

rmse_train = np.sqrt(mean_squared_error(y_cv_train,y_pred_train))
r2_train = r2_score(y_cv_train,y_pred_train)

ax2.scatter(y_cv_train,y_pred_train, c='m')
ax2.text(0.95, 0.26, "Train", transform=ax2.transAxes, ha='right', fontsize=14)
ax2.text(0.95, 0.18, "RMSE: %.2f"%rmse_train, transform=ax2.transAxes, ha='right', fontsize=14
ax2.text(0.95, 0.1, "R2: %.2f"%r2_train, transform=ax2.transAxes, ha='right', fontsize=14)
ax2.plot(ylim, ylim, '‐‐k')

ax2.set_xlabel('True y', fontsize=14)

ax2.set_xlim(ylim[0],ylim[1])
ax2.set_ylim(ylim[0],ylim[1])

plt.tight_layout()
plt.show()

return None
X_tras_X_inv = np.linalg.inv(np.dot(X.T,X))
X_tras_y = np.dot(X.T,y)
w_cap_vec = np.dot(X_tras_X_inv,X_tras_y)
y_pred_manual = np.dot(X,w_cap_vec)

polt_parity(y,y_pred_manual, label="Train")

lr = LinearRegression(fit_intercept=False)
model = lr.fit(X,y)
lr_model = copy.deepcopy(model)

y_pred = model.predict(X)
polt_parity(y,y_pred, label="Train")
print("Sklearn model: ", lr_model.coef_)
print("Eq. based model: ", w_cap_vec)
Sklearn model: [1589.03703891 154.02145017 647.00169133 279.68594241 204.32826373
‐241.42532589]
Eq. based model: [1589.03703891 154.02145017 647.00169133 279.68594241 204.32826373
‐241.42532589]

y_avg_pred = [y.mean()]*len(y)
print("Root mean squared error: %.2f" % np.sqrt(mean_squared_error(y, y_avg_pred)))

# The coefficient of determination: 1 is perfect prediction

print("Coefficient of determination: %.2f" % r2_score(y, y_avg_pred))
Root mean squared error: 186.35
Coefficient of determination: 0.00

errors = []
rmse_avg = []
for i in range(2,12):
kf = KFold(i)
rmses = []
for idx, (train, test) in enumerate(kf.split(X)):
X_cv_train = X.values[train]
X_cv_test = X.values[test]

y_cv_train = y.values[train]
y_cv_test = y.values[test]

# Model fit and prediction

model = lr.fit(X_cv_train,y_cv_train)
y_pred_test = model.predict(X_cv_test)
y_pred_train = model.predict(X_cv_train)

# Computing errors
rmse_test = np.sqrt(mean_squared_error(y_cv_test, y_pred_test))
rmse_train = np.sqrt(mean_squared_error(y_cv_train, y_pred_train))

r2_test = r2_score(y_cv_test, y_pred_test)

r2_train = r2_score(y_cv_train, y_pred_train)

# Plot Parity plot

# polt_parity(y_cv_test,y_pred_test, y_cv_train,y_pred_train)
# print("Root mean squared error: %.2f" % rmse_test)
# print("Coefficient of determination: %.2f" % r2_test)

rmses.append(rmse_test)

rmse_avg.append(sum(rmses)/len(rmses))

rmse_avg
[90.90410023685317,
86.94150026120126,
86.89238900991143,
86.44250692124533,
87.09357006766437,
87.02679087151705,
85.82328460961962,
85.80805780460595,
86.6502684030654,
87.13891194799572]

x= range(2,12)

errors = pd.DataFrame(errors)
plt.plot(x,rmse_avg)
plt.title('Average Validation RMSE vs. Number of Folds (k)')
plt.xlabel('Number of Folds (k)')
plt.ylabel('Average Validation RMSE')
plt.grid(True)
plt.show()

X.shape[1]
6

num_iterations = 1000
learning_rate = 0.01

weights = np.zeros(X.shape[1])
bias = 0

def cost_function(weights, bias, X, y):

predictions = np.dot(X, weights) + bias
cost = np.sqrt(np.mean((predictions y) 2))
return cost

def gradient_descent(weights, bias, X, y, learning_rate, num_iterations):

for i in range(num_iterations):
predictions = np.dot(X, weights) + bias
gradient_weights = np.dot(X.T, (predictions ‐ y))
gradient_bias = np.sum(predictions ‐ y)
weights ‐= learning_rate * gradient_weights
bias ‐= learning_rate * gradient_bias
if i % 100 == 0:
print(f"Iteration {i}: Cost = {cost_function(weights, bias, X, y)}")
return weights, bias

weights, bias = gradient_descent(weights, bias, X, y, learning_rate, num_iterations)

print(f"Optimal weights: {weights}")

print(f"Optimal bias: {bias}")
Iteration 0: Cost = 255.67893485794872
Iteration 100: Cost = 87.65877256028017
Iteration 200: Cost = 82.03876320532915
Iteration 300: Cost = 81.60425558281177
Iteration 400: Cost = 81.55097669962204
Iteration 500: Cost = 81.54245710489937
Iteration 600: Cost = 81.54084599370569
Iteration 700: Cost = 81.5405029810298
Iteration 800: Cost = 81.54042383588254
Iteration 900: Cost = 81.54040461622849
Optimal weights: [1212.94760702 ‐221.99136401 270.9281074 ‐96.50510823 ‐171.89813133
‐617.39672497]
Optimal bias: 376.08439070091987

weights

array([1212.94760702, ‐221.99136401, 270.9281074 , ‐96.50510823,

‐171.89813133, ‐617.39672497])

Assign-01 On ML of Transport Phenomena
No ratings yet
Assign-01 On ML of Transport Phenomena
30 pages
21brs1474 ML Lab 2
No ratings yet
21brs1474 ML Lab 2
25 pages
Assignment No 8
No ratings yet
Assignment No 8
17 pages
Soft Sensor Code
No ratings yet
Soft Sensor Code
4 pages
Soft Sensor Code
No ratings yet
Soft Sensor Code
4 pages
Exp 6
No ratings yet
Exp 6
6 pages
RandomForest
No ratings yet
RandomForest
8 pages
SVM
No ratings yet
SVM
8 pages
Neural_Network
No ratings yet
Neural_Network
7 pages
Srushti ML Assign1
No ratings yet
Srushti ML Assign1
9 pages
To Improve The Performance of Models Predicting Ba
No ratings yet
To Improve The Performance of Models Predicting Ba
6 pages
Python Programs
No ratings yet
Python Programs
7 pages
EXP 08 (ML) - Shri
No ratings yet
EXP 08 (ML) - Shri
2 pages
PythonFile[1]
No ratings yet
PythonFile[1]
5 pages
ML Project
No ratings yet
ML Project
10 pages
Sanket ML Assign1
No ratings yet
Sanket ML Assign1
9 pages
R Module 11 - Statistics
No ratings yet
R Module 11 - Statistics
35 pages
Komal ML Assg1
No ratings yet
Komal ML Assg1
9 pages
7 - 201904121342. Lampiran Skripsi
No ratings yet
7 - 201904121342. Lampiran Skripsi
65 pages
DS_Lab_7[1]
No ratings yet
DS_Lab_7[1]
3 pages
AD3411 DATA SCIENCE AND ANALYTICS LAB (2)_removed
No ratings yet
AD3411 DATA SCIENCE AND ANALYTICS LAB (2)_removed
24 pages
My Code
No ratings yet
My Code
7 pages
Import Pandas As PD
No ratings yet
Import Pandas As PD
21 pages
Melbourne Ia
No ratings yet
Melbourne Ia
16 pages
ML Lab Programs
No ratings yet
ML Lab Programs
23 pages
Naive - Bayes - Ipynb - Colab
No ratings yet
Naive - Bayes - Ipynb - Colab
3 pages
Is Lab 7
No ratings yet
Is Lab 7
7 pages
Practical 6
No ratings yet
Practical 6
8 pages
Programs Lab Bca
No ratings yet
Programs Lab Bca
16 pages
cyberbullying code
No ratings yet
cyberbullying code
6 pages
Online Payment Fraud Detection Using Machine Learning
No ratings yet
Online Payment Fraud Detection Using Machine Learning
2 pages
mon
No ratings yet
mon
8 pages
Tutorial 0405
No ratings yet
Tutorial 0405
5 pages
vertopal.com_Untitled-2
No ratings yet
vertopal.com_Untitled-2
2 pages
Empirical Crop Suitability Model 1694688954
No ratings yet
Empirical Crop Suitability Model 1694688954
24 pages
Big Data Assignment - 4
No ratings yet
Big Data Assignment - 4
6 pages
DM Slip Solutions
100% (1)
DM Slip Solutions
24 pages
PR
No ratings yet
PR
17 pages
Fall Semester 2021-22 Mathematical Modelling For Data Science CSE 3045
No ratings yet
Fall Semester 2021-22 Mathematical Modelling For Data Science CSE 3045
8 pages
Machine Learnin
100% (2)
Machine Learnin
23 pages
Linear _Regression_Insuarace_StudentsPerformance
No ratings yet
Linear _Regression_Insuarace_StudentsPerformance
4 pages
vertopal.com_mlee4
No ratings yet
vertopal.com_mlee4
12 pages
PINN_1DBurgers
No ratings yet
PINN_1DBurgers
19 pages
Machine
100% (1)
Machine
45 pages
Deep Learning Assignments
No ratings yet
Deep Learning Assignments
5 pages
BOSTON CON 3 VARIABLES - Ipynb - Colab
No ratings yet
BOSTON CON 3 VARIABLES - Ipynb - Colab
3 pages
Ex-11 Implementation of RNN
No ratings yet
Ex-11 Implementation of RNN
6 pages
MLfull
No ratings yet
MLfull
29 pages
Deep Learning Assignments
No ratings yet
Deep Learning Assignments
6 pages
Examen - Darwin - Merchan - Ipynb - Colaboratory
No ratings yet
Examen - Darwin - Merchan - Ipynb - Colaboratory
5 pages
Https Raw - Githubusercontent.com Joelgrus Data-Science-From-Scratch Master Code Working With Data
No ratings yet
Https Raw - Githubusercontent.com Joelgrus Data-Science-From-Scratch Master Code Working With Data
7 pages
Experiment 14ml
No ratings yet
Experiment 14ml
2 pages
LAB1 pdf
No ratings yet
LAB1 pdf
4 pages
Coca Cola Start
No ratings yet
Coca Cola Start
1 page
Project Notes
No ratings yet
Project Notes
2 pages
5 Logistic Regression Social Nw
No ratings yet
5 Logistic Regression Social Nw
5 pages
Ilovepdf Merged
No ratings yet
Ilovepdf Merged
14 pages
Lecture 21
No ratings yet
Lecture 21
138 pages
Data Science Manual
No ratings yet
Data Science Manual
16 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Million-Day Gregorian-Julian Calendar - Notes
100% (1)
Million-Day Gregorian-Julian Calendar - Notes
10 pages
selye1975
No ratings yet
selye1975
10 pages
Sample Weber Health Assessment Nursing 7th
100% (1)
Sample Weber Health Assessment Nursing 7th
30 pages
Sonos Pulse Wall Sounder Beacon
No ratings yet
Sonos Pulse Wall Sounder Beacon
4 pages
Today Days Noah
100% (2)
Today Days Noah
75 pages
Пустой 3
No ratings yet
Пустой 3
12 pages
Unit-I Switched Capacitor Circuits
No ratings yet
Unit-I Switched Capacitor Circuits
16 pages
FutureLogic PSA 66 ST Operator
No ratings yet
FutureLogic PSA 66 ST Operator
40 pages
Common Idioms List
No ratings yet
Common Idioms List
3 pages
The Good Life Is A: Not A State of
No ratings yet
The Good Life Is A: Not A State of
8 pages
Saint Mary's University - Criminology Review Center: Subject: Traffic Management & Accident Investigation
No ratings yet
Saint Mary's University - Criminology Review Center: Subject: Traffic Management & Accident Investigation
7 pages
ENG - PPW Ver 5 Brochure
No ratings yet
ENG - PPW Ver 5 Brochure
4 pages
3-Dimensional Drawings, or Photographic or Pictorial Drawings. Isometric Drawing
No ratings yet
3-Dimensional Drawings, or Photographic or Pictorial Drawings. Isometric Drawing
32 pages
1 PDF
No ratings yet
1 PDF
3 pages
Extensive Listening
No ratings yet
Extensive Listening
2 pages
CAPE Caribbean Studies Solution 2008
No ratings yet
CAPE Caribbean Studies Solution 2008
56 pages
DBM CSC Form No. 1 Position Description Forms Blank
No ratings yet
DBM CSC Form No. 1 Position Description Forms Blank
1 page
Pattern Recognition Letters: Xiaohua Xia, Suping Fang, Yan Xiao
No ratings yet
Pattern Recognition Letters: Xiaohua Xia, Suping Fang, Yan Xiao
6 pages
Barry Holloway v. Robert J. Tansy, 986 F.2d 1427, 10th Cir. (1993)
No ratings yet
Barry Holloway v. Robert J. Tansy, 986 F.2d 1427, 10th Cir. (1993)
6 pages
108 Circular 2022
No ratings yet
108 Circular 2022
10 pages
IMRAD Introduction Methods Results and Discussion 1
No ratings yet
IMRAD Introduction Methods Results and Discussion 1
2 pages
Computer Maintenance Concept
No ratings yet
Computer Maintenance Concept
45 pages
Inal 2017
No ratings yet
Inal 2017
9 pages
Casting Cover Letter
100% (1)
Casting Cover Letter
7 pages
12 TH PPT of Foods and Industrial MicrobiologyCourse No. DTM 321 1
No ratings yet
12 TH PPT of Foods and Industrial MicrobiologyCourse No. DTM 321 1
22 pages
Grade 6 Language T1
No ratings yet
Grade 6 Language T1
136 pages
Knee Rehab Self Assessment Checklist 2
100% (1)
Knee Rehab Self Assessment Checklist 2
32 pages
Design Requirements.: Design of Pre Engineered Buildings
No ratings yet
Design Requirements.: Design of Pre Engineered Buildings
2 pages
Choices
No ratings yet
Choices
2 pages
MSSU OJT MTech
No ratings yet
MSSU OJT MTech
17 pages