Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
0% found this document useful (0 votes)
7 views

Machine Learning Program

Uploaded by

sudhisugumar3006
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views

Machine Learning Program

Uploaded by

sudhisugumar3006
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 12

DATA CLEANING AND PREPROSSING:

import numpy as np

import pandas as pd

dataset = pd.read_csv("D:/book1.csv",delimiter=',')

print(dataset)

dataset.info()

dataset.isna()

dataset_1=dataset.dropna()

print(dataset_1)

x=dataset.iloc[:,[0,1,2]]

y=dataset.iloc[:,[3]]

print(x)

print(y)

from sklearn.preprocessing import LabelEncoder

lEncoder=LabelEncoder()

x.iloc[:,0]=lEncoder.fit_transform(x.iloc[:,0])

print(x)

OUTPUT:

= RESTART: C:/Users/AMAR PAAPU/pre1.py


COUNTRY AGE SALARY PURCHASED

0 France 44 72000 no

1 Spain 27 48000 yes

2 Germany 30 64000 no

3 Spain 38 61000 no

4 Germany 40 Nan yes

5 France 38 54000 yes

6 Spain Nan 62000 no

7 France 48 74000 yes

8 Germany 50 83000 no

9 France 37 67000 yes

<class 'pandas.core.frame.DataFrame'>

RangeIndex: 10 entries, 0 to 9

Data columns (total 4 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 COUNTRY 10 non-null object

1 AGE 10 non-null object

2 SALARY 10 non-null object

3 PURCHASED 10 non-null object

dtypes: object(4)

memory usage: 452.0+ bytes

COUNTRY AGE SALARY PURCHASED

0 France 44 72000 no

1 Spain 27 48000 yes

2 Germany 30 64000 no

3 Spain 38 61000 no

4 Germany 40 Nan yes

5 France 38 54000 yes


6 Spain Nan 62000 no

7 France 48 74000 yes

8 Germany 50 83000 no

9 France 37 67000 yes

COUNTRY AGE SALARY

0 France 44 72000

1 Spain 27 48000

2 Germany 30 64000

3 Spain 38 61000

4 Germany 40 Nan

5 France 38 54000

6 Spain Nan 62000

7 France 48 74000

8 Germany 50 83000

9 France 37 67000

PURCHASED

0 no

1 yes

2 no

3 no

4 yes

5 yes

6 no

7 yes

8 no

9 yes

COUNTRY AGE SALARY

0 0 44 72000

1 2 27 48000
2 1 30 64000

3 2 38 61000

4 1 40 Nan

5 0 38 54000

6 2 Nan 62000

7 0 48 74000

8 1 50 83000

9 0 37 67000
Data from database:

import mysql.connector
# Create the connection object
myconn = mysql.connector.connect(host = "localhost", user = "root",passwd = "",database="SampleDB")
# Creating the cursor object
cur = myconn.cursor()
# Executing the query
cur.execute("select * from students")
# Fetching the rows from the cursor object
result = cur.fetchall()
print("Student Details are :")
# Printing the result
for x in result:
print(x);
# Commit the transaction
myconn.commit()
# Close the connection
myconn.close()
Output:
K-means clustering:

from sklearn.cluster import KMeans

import matplotlib.pyplot as plt

x = [4, 5, 10, 4, 3, 11, 14 , 6, 10, 12]


y = [21, 19, 24, 17, 16, 25, 24, 22, 21, 21]

#plt.scatter(x, y)

data = list(zip(x, y))


inertias = []

for i in range(1,11):
kmeans = KMeans(n_clusters=i)
kmeans.fit(data)
inertias.append(kmeans.inertia_)

plt.plot(range(1,11), inertias, marker='o')


plt.title('Elbow method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()

Output:
K-nearest neighbours:

# Import necessary modules


from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import random

# Loading data
data_iris = load_iris()
# To get list of target names
label_target = data_iris.target_names
print()
print("Sample Data from Iris Dataset")
print("*"*30)
# to display the sample data from the iris dataset
for i in range(10):
rn = random.randint(0,120)
print(data_iris.data[rn],"===>",label_target[data_iris.target[rn]])

# Create feature and target arrays


X = data_iris.data
y = data_iris.target

# Split into training and test set


X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size = 0.3, random_state=1)

print("The Training dataset length: ",len(X_train))


print("The Testing dataset length: ",len(X_test))
try:
nn = int(input("Enter number of neighbors :"))
knn = KNeighborsClassifier(nn)

knn.fit(X_train, y_train)
# to display the score
print("The Score is :",knn.score(X_test, y_test))
# To get test data from the user
test_data = input("Enter Test Data :").split(",")
for i in range(len(test_data)):
test_data[i] = float(test_data[i])
print()

v = knn.predict([test_data])
print("Predicted output is :",label_target[v])
except:
print("Please supply valid input......")
Output:
Linear regression:

import numpy as np
import matplotlib.pyplot as plt

def estimate_coef(x, y):


# number of observations/points
n = np.size(x)

# mean of x and y vector


m_x = np.mean(x)
m_y = np.mean(y)

# calculating cross-deviation and deviation about x


SS_xy = np.sum(y*x) - n*m_y*m_x
SS_xx = np.sum(x*x) - n*m_x*m_x

# calculating regression coefficients


b_1 = SS_xy / SS_xx
b_0 = m_y - b_1*m_x

return (b_0, b_1)

def plot_regression_line(x, y, b):


# plotting the actual points as scatter plot
plt.scatter(x, y, color = "m",
marker = "o", s = 30)

# predicted response vector


y_pred = b[0] + b[1]*x

# plotting the regression line


plt.plot(x, y_pred, color = "g")

# putting labels
plt.xlabel('x')
plt.ylabel('y')

# function to show plot


plt.show()

def main():
# observations / data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {} \
\nb_1 = {}".format(b[0], b[1]))

# plotting regression line


plot_regression_line(x, y, b)

#if __name__ == "__main__":


main()
Output:
SVM program:

import numpy as np

import matplotlib.pyplot as plt

from sklearn.dataset import make_blobs

x,y=make_blobs(n_samples=500,centers=2,random_state=0,cluster_std=0.40)

xfit=np.linspace(-1,3.5)

plt.scatter(x[:,0],x[:,1],c=y,s=50,cmap='spring')

for m,b,d in [(1,0.65,0.33),(0.5,1.6,0.55),(-0.2,2.9,0.2)]:

yfit=m*xfit+b

plt.plot(xfit,yfit,'-k')

plt.fill_between(xfit,yfit-d,yfit+d,edgecolor='none',color='#AAAAAA',alpha=0.4)

plt.xlim(-1,3.5);

plt.show()

You might also like