ML Assignment1 Linear Regression
ML Assignment1 Linear Regression
df = pd.read_csv('/content/sample_data/Salary_Data.csv')
df
#DISHANT KUMAR YADAV
YearsExperience Salary
0 1.1 39343.0
1 1.3 46205.0
2 1.5 37731.0
3 2.0 43525.0
4 2.2 39891.0
5 2.9 56642.0
6 3.0 60150.0
7 3.2 54445.0
8 3.2 64445.0
9 3.7 57189.0
10 3.9 63218.0
11 4.0 55794.0
12 4.0 56957.0
13 4.1 57081.0
14 4.5 61111.0
15 4.9 67938.0
16 5.1 66029.0
17 5.3 83088.0
18 5.9 81363.0
19 6.0 93940.0
20 6.8 91738.0
21 7.1 98273.0
22 7.9 101302.0
23 8.2 113812.0
24 8.7 109431.0
25 9.0 105582.0
26 9.5 116969.0
27 9.6 112635.0
28 10.3 122391.0
29 10.5 121872.0
exp = df['YearsExperience']
sal = df['Salary']
plt.scatter(exp,sal)
plt.xlabel('Experience')
plt.ylabel('Salary')
#DISHANT KUMAR YADAV
Text(0, 0.5, 'Salary')
exp_np.shape, sal_np.shape
#DISHANT KUMAR YADAV
((30,), (30,))
sklearn_sal_predictions = sklearn_model.predict(exp_np.reshape((30,1)))
sklearn_sal_predictions.shape
#DISHANT KUMAR YADAV
(30,)
plt.scatter(exp,sal)
plt.xlabel('Experience')
plt.ylabel('Salary')
plt.scatter(exp,sklearn_sal_predictions )
#DISHANT KUMAR YADAV
predictions_df
#DISHANT KUMAR YADAV
# Step 3: Data analysis - distribution plot shows the variation in the data distribution.
exp = df['YearsExperience']
sal = df['Salary']
plt.scatter(exp, sal)
plt.xlabel('Experience')
plt.ylabel('Salary')
plt.title('Distribution of Experience vs. Salary')
plt.show()
output
▾ LinearRegression
LinearRegression()
# Step 7: Plot the training results
plt.scatter(X_train, y_train, color='blue')
plt.plot(X_train, regression_model.predict(X_train), color='red')
plt.xlabel('Experience')
plt.ylabel('Salary')
plt.title('Training Results: Experience vs. Salary')
plt.show()