Simple Linear Regression
Simple Linear Regression
ipynb - Colaboratory
df = pd.read_csv("homeprices.csv")
df
area price
0 2600 550000
1 3000 565000
2 3200 610000
3 3600 680000
4 4000 725000
%matplotlib inline
plt.xlabel("area (sqr ft)") # adds labels on the x-axis
plt.ylabel("price (US $)")
plt.scatter(df.area, df.price, color="red", marker="+")
<matplotlib.collections.PathCollection at 0x7bbdf44ec1c0>
reg = linear_model.LinearRegression()
reg.fit(df[['area']], df.price)
print(reg.coef_) # gives the coeffecients of the linear equation.
print(reg.intercept_) # gives the y-intercept of the equation
[135.78767123]
180616.43835616432
%matplotlib inline
plt.xlabel("area (sqr ft)")
plt.ylabel("price (US $)")
plt.scatter(df.area, df.price, color="red", marker="+")
plt.plot(df.area, reg.predict(df[['area']]), color='blue') # plots the Linear Regression Line.
https://colab.research.google.com/drive/19T8cNCsKWIzrDmgmVNTbrb_LOBxMtsp8#scrollTo=XJelj1L0xusa&printMode=true 1/4
3/8/24, 4:48 PM MlYt1.ipynb - Colaboratory
[<matplotlib.lines.Line2D at 0x7bbdf4506aa0>]
[628715.75342466]
[859554.79452055]
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression
warnings.warn(
d = pd.read_csv("areas.csv")
d.head(3)
area
0 1000
1 1500
2 2300
https://colab.research.google.com/drive/19T8cNCsKWIzrDmgmVNTbrb_LOBxMtsp8#scrollTo=XJelj1L0xusa&printMode=true 2/4
3/8/24, 4:48 PM MlYt1.ipynb - Colaboratory
area prices
0 1000 3.164041e+05
1 1500 3.842979e+05
2 2300 4.929281e+05
3 3540 6.613048e+05
4 4120 7.400616e+05
5 4560 7.998082e+05
6 5490 9.260908e+05
7 3460 6.504418e+05
8 4750 8.256079e+05
9 2300 4.929281e+05
10 9000 1.402705e+06
11 8600 1.348390e+06
12 7100 1.144709e+06
d.to_csv("prediction.csv", index = False) # exporting the csv file with no index column
keyboard_arrow_down Exercise
df1=pd.read_csv("/content/canada_per_capita_income.csv")
df1.head()
year pci
0 1970 3399.299037
1 1971 3768.297935
2 1972 4251.175484
3 1973 4804.463248
4 1974 5576.514583
plt.xlabel("Year")
plt.ylabel("Per Capita Income")
plt.scatter(df1.year, df1.pci, color='red', marker='+')
<matplotlib.collections.PathCollection at 0x7bbdf4705960>
https://colab.research.google.com/drive/19T8cNCsKWIzrDmgmVNTbrb_LOBxMtsp8#scrollTo=XJelj1L0xusa&printMode=true 3/4
3/8/24, 4:48 PM MlYt1.ipynb - Colaboratory
reg1 = linear_model.LinearRegression()
reg1.fit(df1[['year']], df1.pci)
print(reg1.coef_)
print(reg1.intercept_)
[828.46507522]
-1632210.7578554575
plt.xlabel("Year")
plt.ylabel("Per Capita Income")
plt.scatter(df1.year, df1.pci, color='red', marker='+')
plt.plot(df1.year, reg1.predict(df1[['year']]), color='blue')
reg1.predict([[2020]])
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression
warnings.warn(
array([41288.69409442])
https://colab.research.google.com/drive/19T8cNCsKWIzrDmgmVNTbrb_LOBxMtsp8#scrollTo=XJelj1L0xusa&printMode=true 4/4