Simple Linear regression-LAB4.ipynb - Colaboratory
Simple Linear regression-LAB4.ipynb - Colaboratory
Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Importing the dataset
df = pd.read_csv('MBA Salary.csv')
df.head()
0 1 62.00 270000
1 2 76.33 200000
2 3 72.00 240000
3 4 60.00 250000
4 5 61.00 180000
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
print(df.shape)
(50, 3)
# View descriptive statistics
print(df.describe())
# Declare feature variable and target variable
X = df['Percentage in Grade 10']
y = df['Salary']
# Plot scatter plot between X and y
plt.scatter(X, y, color = 'blue', label='Scatter Plot')
plt.title('Relationship between Grades and Salary of a person')
plt.xlabel('Percentage in Grade 10')
plt.ylabel('Salary')
plt.legend(loc=4)
plt.show()
# Print the dimensions of X and y
print(X.shape)
print(y.shape)
(50,)
(50,)
0 62.00
1 76.33
2 72.00
3 60.00
4 61.00
5 55.00
6 70.00
7 68.00
8 82.80
9 59.00
10 58.00
11 60.00
12 66.00
13 83.00
14 68.00
15 37.33
16 79.00
17 68.40
18 70.00
19 59.00
20 63.00
21 50.00
22 69.00
23 52.00
24 49.00
25 64.60
26 50.00
27 74.00
28 58.00
29 67.00
30 75.00
31 60.00
32 55.00
33 78.00
34 50.08
35 56.00
36 68.00
37 52.00
38 54.00
39 52.00
40 76.00
41 64.80
42 74.40
43 74.50
44 73.50
45 57.58
46 68.00
47 69.00
48 66.00
49 60.80
X=np.array(X)
y=np.array(y)
# Reshape X and y
X = X.reshape(-1,1)
y = y.reshape(-1,1)
# Print the dimensions of X and y after reshaping
print(X.shape)
print(y.shape)
(50,)
(50,)
# Split X and y into training and test data sets
#random_state--the set of data does not change
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.30, random_state=42)
# Print the dimensions of X_train,X_test,y_train,y_test
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
(33, 1)
(33, 1)
(17, 1)
(17, 1)
# Fit the linear model
# Instantiate the linear regression object lm
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
# Train the model using training data sets
lm.fit(X_train,y_train)
# Predict on the test data
y_pred=lm.predict(X_test)
# Visualising the Training set results
plt.scatter(X_train, y_train, color = 'red')
plt.plot(X_train, lm.predict(X_train), color = 'blue')
[<matplotlib.lines.Line2D at 0x22c2d23c430>]
# Visualising the Test set results
plt.scatter(X_test, y_test, color = 'red')
plt.plot(X_test, lm.predict(X_test), color = 'blue')
plt.title('Test set results')
plt.xlabel('Grades')
plt.ylabel('Salary')
plt.show()
# Compute model slope and intercept
slope = lm.coef_
intercept = lm.intercept_,
print("Estimated model slope:" , slope)
print("Estimated model intercept:" , intercept)
X_new = [[80]]
lm.predict(X_new)
array([[273197.97007155]])
Colab paid products
-
Cancel contracts here