Machine File
Machine File
Machine File
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# filtering data
# displaying data only with Gender = NaN
data[bool_series]
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# creating a dataframe from dictionary
df = pd.DataFrame(dict)
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# importing pandas as pd
import pandas as pd
# x axis values
x = [1,2,3]
# corresponding y axis values
y = [2,4,1]
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
x = pd.read_csv("C:\...\cancer.csv")
a = np.array(x)
y = a[:,30] # classes having 0 and 1
x = np.column_stack((x.malignant,x.benign))
x.shape
print (x),(y)
program 3: Supervised Learning
1. Implementation of Linear Regression
import numpy as np
import matplotlib.pyplot as plt
def estimate_coef(x, y):
n = np.size(x)
m_x = np.mean(x)
m_y = np.mean(y)
SS_xy = np.sum(y*x) - n*m_y*m_x
SS_xx = np.sum(x*x) - n*m_x*m_x
b_1 = SS_xy / SS_xx
b_0 = m_y - b_1*m_x
return (b_0, b_1)
def plot_regression_line(x, y, b):
# plotting the actual points as scatter plot
plt.scatter(x, y, color = "m",marker = "o", s = 30)
y_pred = b[0] + b[1]*x
plt.plot(x, y_pred, color = "g")
plt.xlabel('x')
plt.ylabel('y')
plt.show()
def main():
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {}\\nb_1 = {}".format(b[0], b[1]))
plot_regression_line(x, y, b)
if name == " main ":
program 4 : Implementation of Logistic regression
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings( "ignore" )
class LogitRegression() :
def init ( self, learning_rate, iterations ) :
self.learning_rate = learning_rate
self.iterations = iterations
def fit( self, X, Y ) :
self.m, self.n = X.shape
self.W = np.zeros( self.n )
self.b = 0
self.X = X
self.Y = Y
for i in range( self.iterations ) :
self.update_weights()
return self
def update_weights( self ) :
A = 1 / ( 1 + np.exp( - ( self.X.dot( self.W ) + self.b ) ) )
tmp = ( A - self.Y.T )
tmp = np.reshape( tmp, self.m )
dW = np.dot( self.X.T, tmp ) / self.m
db = np.sum( tmp ) / self.m
self.W = self.W - self.learning_rate * dW
self.b = self.b - self.learning_rate * db
return self
def predict( self, X ) :
Z = 1 / ( 1 + np.exp( - ( X.dot( self.W ) + self.b ) ) )
Y = np.where( Z > 0.5, 1, 0 )
return Y
def main() :
df = pd.read_csv( "diabetes.csv" )
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1:].values
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size = 1/3, random_state = 0 )
model = LogitRegression( learning_rate = 0.01, iterations = 1000 )
model.fit( X_train, Y_train )
model1 = LogisticRegression()
model1.fit( X_train, Y_train)
Y_pred = model.predict( X_test )
Y_pred1 = model1.predict( X_test )
correctly_classified = 0
correctly_classified1 = 0
count = 0
for count in range( np.size( Y_pred ) ) :
if Y_test[count] == Y_pred[count] :
correctly_classified = correctly_classified + 1
if Y_test[count] == Y_pred1[count] :
correctly_classified1 = correctly_classified1 + 1
count = count + 1
print( "Accuracy on test set by our model : ", (
correctly_classified / count ) * 100 )
print( "Accuracy on test set by sklearn model : ", (
correctly_classified1 / count ) * 100 )
if name == " main " :
main()
# importing pandas package
import pandas as pd
# making data frame from csv file
data = pd.read_csv("employees.csv")
# Printing the first 10 to 24 rows of
# the data frame for visualization
data[10:25]
Program 5:-. Implementation of Decision tree
classification import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
def importdata():
balance_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-
'+'databases/balance-scale/balance-scale.data',sep= ',', header = None)
print ("Dataset Length: ", len(balance_data))
print ("Dataset Shape: ", balance_data.shape)
print ("Dataset: ",balance_data.head())
return balance_data
def splitdataset(balance_data):
X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size = 0.3, random_state = 100)
return X, Y, X_train, X_test, y_train, y_test
def train_using_gini(X_train, X_test, y_train):
clf_gini = DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=3,
min_samples_leaf=5)
clf_gini.fit(X_train, y_train)
return clf_gini
def tarin_using_entropy(X_train, X_test, y_train):
clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state = 100,max_depth = 3,
min_samples_leaf = 5)
clf_entropy.fit(X_train, y_train)
return clf_entropy
def prediction(X_test, clf_object):
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred
def cal_accuracy(y_test, y_pred):
print("Confusion Matrix: ",confusion_matrix(y_test, y_pred))print ("Accuracy :
",accuracy_score(y_test,y_pred)*100)
print("Report : ",
classification_report(y_test, y_pred))
def main():
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = tarin_using_entropy(X_train, X_test, y_train)
print("Results Using Gini Index:")
y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)
print("Results Using Entropy:")
y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)
if name ==" main ":
main()
Program 6:- Implementation of K-nearest Neighbor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np
import matplotlib.pyplot as plt
y = irisData.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)
neighbors = np.arange(1, 9)
train_accuracy = np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))
for i, k in enumerate(neighbors):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
train_accuracy[i] = knn.score(X_train, y_train)
test_accuracy[i] = knn.score(X_test, y_test)
plt.plot(neighbors, test_accuracy, label = 'Testing dataset Accuracy')
plt.plot(neighbors, train_accuracy, label = 'Training dataset Accuracy')
plt.legend()
plt.xlabel('n_neighbors')
plt.ylabel('Accuracy')
plt.show()
Program 7:-Implementation of Naïve Bayes classifier algorithm
import math
import random
import csv
def encode_class(mydata):
classes = []
for i in range(len(mydata)):
if mydata[i][-1] not in classes:
classes.append(mydata[i][-1])
for i in range(len(classes)):
for j in range(len(mydata)):
if mydata[j][-1] == classes[i]:
mydata[j][-1] = i
return mydata
def splitting(mydata, ratio):
train_num = int(len(mydata) * ratio)
train = []
test = list(mydata)
while len(train) < train_num:
index = random.randrange(len(test))
train.append(test.pop(index))
return train, test
def groupUnderClass(mydata):
dict = {}
for i in range(len(mydata)):
if (mydata[i][-1] not in dict):
dict[mydata[i][-1]] = []
dict[mydata[i][-1]].append(mydata[i])
return dict
return sum(numbers) / float(len(numbers))
def std_dev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)
def MeanAndStdDev(mydata):
info = [(mean(attribute), std_dev(attribute)) for attribute in zip(*mydata)]
del info[-1]
return info
def MeanAndStdDevForClass(mydata):
info = {}
dict = groupUnderClass(mydata)
for classValue, instances in dict.items():
info[classValue] = MeanAndStdDev(instances)
return info
model.compile(loss='binary_crossentropy', optimizer='adam',
metrics=['accuracy'])
model.summary()
Program 9:Implementing K-means
def ReadData(fileName):
def FindColMinMax(items):n
= len(items[0]);
minima = [sys.maxint for i in range(n)];
maxima = [-sys.maxint -1 for i in range(n)];
for item in items:
for f in range(len(item)):
if (item[f] < minima[f]):
minima[f] = item[f];
if (item[f] > maxima[f]):
maxima[f] = item[f];
return minima,maxima;
def InitializeMeans(items, k, cMin, cMax):
# Initialize means to random numbers between
# the min and max of each column/feature
f = len(items[0]); # number of features
means = [[0 for i in range(f)] for j in range(k)];
for mean in means:
for i in range(len(mean)):
# Set value to a random float
# (adding +-1 to avoid a wide placement of a mean)
mean[i] = uniform(cMin[i]+1, cMax[i]-1);
return means;
def UpdateMean(n,mean,item):
for i in range(len(mean)):
m = mean[i];
m = (m*(n-1)+item[i])/float(n);
mean[i] = round(m, 3);
return mean;
def Classify(means,item):
# Classify item to the mean with minimum distance
minimum = sys.maxint;
index = -1;
for i in range(len(means)):
# Find distance from item to mean
dis = EuclideanDistance(item, means[i]);
if (dis < minimum):
minimum = dis;
index = i;
return index;
def CalculateMeans(k,items,maxIterations=100000):
# Find the minima and maxima for columns
cMin, cMax = FindColMinMax(items);
# Initialize means at random points
means = InitializeMeans(items,k,cMin,cMax);
# Initialize clusters, the array to hold
# the number of items in a class
clusterSizes= [0 for i in range(len(means))];
# An array to hold the cluster an item is in
belongsTo = [0 for i in range(len(items))];
# Calculate means
for e in range(maxIterations):
# If no change of cluster occurs, halt
noChange = True;
for i in range(len(items)):
item = items[i];
# Classify item into a cluster and update the
# corresponding means.
index = Classify(means,item);
clusterSizes[index] += 1;
cSize = clusterSizes[index];
means[index] = UpdateMean(cSize,means[index],item);
# Item changed cluster
if(index != belongsTo[i]):
noChange = False;
belongsTo[i] = index;
# Nothing changed, return
if (noChange):
break;
return means;
def FindClusters(means,items):
clusters = [[] for i in range(len(means))]; # Init clusters
for item in items:
# Classify item into a cluster
index = Classify(means,item);
# Add item to cluster
clusters[index].append(item);
return clusters;