Warm Normal Strong Warm Same Yes Warm High Strong Warm Same Yes Warm High Strong Cool Change Yes

Find-S Algorithm
In [1]: import pandas as pd

import numpy as np
df = pd.read_csv('enjoysport.csv')
#print(df)
df.loc[df['enjoy_sport'] == 'yes']
#print(df['sky'])
Out[1]: sky air_temp humidity wind water forecast enjoy_sport
0 sunny warm normal strong warm same yes
1 sunny warm high strong warm same yes
3 sunny warm high strong cool change yes
Example code 1
import numpy as np
import csv
a = []
df = pd.read_csv('enjoysport.csv')
print(df)
#df.loc[df['enjoy_sport'] == 'YES']
print()
with open('enjoysport.csv', 'r') as csvfile:
next(csvfile)
for row in csv.reader(csvfile):
a.append(row)
print(a)
print("\nThe total number of training instances are : ",len(a))
num_attribute = len(a[0])-1
print("\nThe initial hypothesis is : ")

hypothesis = ['0']*num_attribute
print(hypothesis)
for i in range(0, len(a)):

if a[i][num_attribute] == 'yes':
print ("\nInstance ", i+1, "is", a[i], " and is Positive Instance")
for j in range(0, num_attribute):
if hypothesis[j] == '0' or hypothesis[j] == a[i][j]:
hypothesis[j] = a[i][j]
else:
hypothesis[j] = '?'
print("The hypothesis for the training instance", i+1, " is: " , hypothesis, "
if a[i][num_attribute] == 'no':
print ("\nInstance ", i+1, "is", a[i], " and is Negative Instance Hence Ignore
print("The hypothesis for the training instance", i+1, " is: " , hypothesis, "
print("\nThe Maximally specific hypothesis for the training instance is ", hypothesis)
sky air_temp humidity wind water forecast enjoy_sport

2 rainy cold high strong warm change no
[['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'yes'], ['sunny', 'warm', 'hig
h', 'strong', 'warm', 'same', 'yes'], ['rainy', 'cold', 'high', 'strong', 'warm', 'cha
nge', 'no'], ['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'yes']]
The total number of training instances are : 4
The initial hypothesis is :

['0', '0', '0', '0', '0', '0']
Instance 1 is ['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'yes'] and is Po

sitive Instance
The hypothesis for the training instance 1 is: ['sunny', 'warm', 'normal', 'strong',
'warm', 'same']
Instance 2 is ['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'yes'] and is Posi
tive Instance
The hypothesis for the training instance 2 is: ['sunny', 'warm', '?', 'strong', 'war
m', 'same']
Instance 3 is ['rainy', 'cold', 'high', 'strong', 'warm', 'change', 'no'] and is Neg
ative Instance Hence Ignored
The hypothesis for the training instance 3 is: ['sunny', 'warm', '?', 'strong', 'war
m', 'same']
Instance 4 is ['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'yes'] and is Po

sitive Instance
The hypothesis for the training instance 4 is: ['sunny', 'warm', '?', 'strong', '?',
'?']
The Maximally specific hypothesis for the training instance is ['sunny', 'warm', '?',
'strong', '?', '?']
Example code 2
In [5]: import random
import csv
attributes = [['Sunny','Rainy'],
['Warm','Cold'],
['Normal','High'],
['Strong','Weak'],
['Warm','Cool'],
['Same','Change']]
num_attributes = len(attributes)
print (" \n The most general hypothesis : ['?','?','?','?','?','?']\n")

print ("\n The most specific hypothesis : ['0','0','0','0','0','0']\n")
a = []
print("\n The Given Training Data Set \n")
with open('ws.csv', 'r') as csvFile:

reader = csv.reader(csvFile)
for row in reader:
a.append (row)
print(row)
print("\n The initial value of hypothesis: ")

hypothesis = ['0'] * num_attributes
print(hypothesis)
# Comparing with First Training Example

for j in range(0,num_attributes):
hypothesis[j] = a[0][j];
# Comparing with Remaining Training Examples of Given Data Set
print("\n Find S: Finding a Maximally Specific Hypothesis\n")
for i in range(0,len(a)):
if a[i][num_attributes]=='Yes':
for j in range(0,num_attributes):
if a[i][j]!=hypothesis[j]:
hypothesis[j]='?'
else :
hypothesis[j]= a[i][j]
print(" For Training Example No :{0} the hypothesis is ".format(i),hypothesis)
print("\n The Maximally Specific Hypothesis for a given Training Examples :\n")
print(hypothesis)
The most general hypothesis : ['?','?','?','?','?','?']
The most specific hypothesis : ['0','0','0','0','0','0']
The Given Training Data Set
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']

['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']
The initial value of hypothesis:

['0', '0', '0', '0', '0', '0']
Find S: Finding a Maximally Specific Hypothesis
For Training Example No :0 the hypothesis is ['Sunny', 'Warm', 'Normal', 'Strong',

'Warm', 'Same']
For Training Example No :1 the hypothesis is ['Sunny', 'Warm', '?', 'Strong', 'War
m', 'Same']
For Training Example No :2 the hypothesis is ['Sunny', 'Warm', '?', 'Strong', 'War
m', 'Same']
For Training Example No :3 the hypothesis is ['Sunny', 'Warm', '?', 'Strong', '?',
'?']
The Maximally Specific Hypothesis for a given Training Examples :
['Sunny', 'Warm', '?', 'Strong', '?', '?']
Candidate Elimination
attributes = [['Sunny','Rainy'],
['Warm','Cold'],
['Normal','High'],
['Strong','Weak'],
['Warm','Cool'],
['Same','Change']]
df = pd.read_csv('Enjoysport.csv')
print(df)
print()

Example code 1
In [7]: df = df.values
def find_s(h, d):
for i in range(len(d)):
if h[i] != d[i]:
h[i] = '?'
return h
def hipotesa(g, s):

gTemp = []
flags = []
for ds in s:
if ds != '?':
flags.append(ds)
flagIdx = 0
i = 0
for f in flags:
pieces = []
count = 0
idx = 0
for ds in s:
if idx < flagIdx:
continue
if count < 2:
if f == ds:
pieces.append(f)
flagIdx = idx
count += 1
elif f != ds and ds != '?':
pieces.append(ds)
count += 1
else:
pieces.append('?')
else:
break
idx += 1
gTemp.append(pieces)
i += 1
print(pieces)
def main():
print('Candidate elimination\n')
tipeIdx = len(df[0]) - 1
trueCol = len(attributes)
s = df[0]
g = [['?'] * trueCol] * trueCol
count = 0
for data in df:
if data[tipeIdx] == 'yes':
# eliminate g
if count == 0:
idx = 0
for dg in g:
for i in range(len(dg)):
temp = ['?'] * trueCol
if i == idx:
if dg[i] != s[idx]:
temp[idx] = s[idx]
g[i] = temp
idx += 1
else:
# update spesifik
s = find_s(s, data)
idx = 0
for dg in g:
if i == idx:
# jika data general sama dengan specific maka g baru di is
if dg[i] == s[idx]:
temp[idx] = s[idx]
g[i] = temp
idx += 1
else:
idx = 0
for dg in g:

if i == idx:
# Jika data tidak sama maka g di isi nilai s
if dg[i] != data[idx]:
g[i] = temp
temp[idx] = s[idx]
else:
temp[idx] = '?'
g[i] = temp
idx += 1
count += 1
print('=> specific: ', s)

print('=> general: ', g)
print()
# delete baris jika isinya ? semua

gTemp = []
cIdx = 0
for dg in g:
if any(x != '?' for x in dg):
# print('all ?', cIdx)
gTemp.append(dg)
cIdx += 1
g = gTemp
s = [x for x in s if x != 'yes']
print('==============================================================\n')
print('Hasil: ')
print('specific: {}'.format(s))
print('general: {}'.format(g))
print('Hipotesa: ')
hipotesa(g, s)
print('==============================================================\n')
if __name__ == '__main__':
main()
Candidate elimination
=> specific: ['sunny' 'warm' 'normal' 'strong' 'warm' 'same' 'yes']

=> general: [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'],
['?', '?', 'normal', '?', '?', '?'], ['?', '?', '?', 'strong', '?', '?'], ['?', '?',
'?', '?', 'warm', '?'], ['?', '?', '?', '?', '?', 'same']]
=> specific: ['sunny' 'warm' '?' 'strong' 'warm' 'same' 'yes']

['?', '?', '?', '?', '?', '?'], ['?', '?', '?', 'strong', '?', '?'], ['?', '?', '?',
'?', 'warm', '?'], ['?', '?', '?', '?', '?', 'same']]
=> specific: ['sunny' 'warm' '?' 'strong' 'warm' 'same' 'yes']

['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', 'same']]
=> specific: ['sunny' 'warm' '?' 'strong' '?' '?' 'yes']

['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?']]
==============================================================
Hasil:
specific: ['sunny', 'warm', '?', 'strong', '?', '?']
general: [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
Hipotesa:
[]
==============================================================
Example code 2
In [8]: import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
#df = pd.read_csv('enjoysport.csv')
#print(df)
data = pd.read_csv('enjoysport.csv')
data.loc[data['enjoy_sport'] == 'yes']
print(data)
concepts = np.array(data.iloc[:,0:-1])
print("\nInstances are:\n",concepts)
target = np.array(data.iloc[:,-1])
print("\nTarget Values are: ",target)
def learn(concepts, target):

specific_h = concepts[0].copy()
print("\nInitialization of specific_h and genearal_h")
print("\nSpecific Boundary: ", specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))
print("\nGeneric Boundary: ",general_h)
for i, h in enumerate(concepts):
print("\nInstance", i+1 , "is ", h)
if target[i] == "yes":
print("Instance is Positive ")
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
specific_h[x] ='?'
general_h[x][x] ='?'
if target[i] == "no":
print("Instance is Negative ")
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print("Specific Bundary after ", i+1, "Instance is ", specific_h)

print("Generic Boundary after ", i+1, "Instance is ", general_h)
print("\n")
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("Final Specific_h: ", s_final, sep="\n")

print("Final General_h: ", g_final, sep="\n")

Instances are:
[['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
['sunny' 'warm' 'high' 'strong' 'warm' 'same']
['rainy' 'cold' 'high' 'strong' 'warm' 'change']
['sunny' 'warm' 'high' 'strong' 'cool' 'change']]
Target Values are: ['yes' 'yes' 'no' 'yes']
Initialization of specific_h and genearal_h
Specific Boundary: ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
Generic Boundary: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?']]
Instance 1 is ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']

Instance is Positive
Specific Bundary after 1 Instance is ['sunny' 'warm' 'normal' 'strong' 'warm' 'sam
e']
Generic Boundary after 1 Instance is [['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Instance 2 is ['sunny' 'warm' 'high' 'strong' 'warm' 'same']

Specific Bundary after 2 Instance is ['sunny' 'warm' '?' 'strong' 'warm' 'same']
Generic Boundary after 2 Instance is [['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Instance 3 is ['rainy' 'cold' 'high' 'strong' 'warm' 'change']

Instance is Negative
Specific Bundary after 3 Instance is ['sunny' 'warm' '?' 'strong' 'warm' 'same']
Generic Boundary after 3 Instance is [['sunny', '?', '?', '?', '?', '?'], ['?', 'war
m', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'same']]
Instance 4 is ['sunny' 'warm' 'high' 'strong' 'cool' 'change']

Specific Bundary after 4 Instance is ['sunny' 'warm' '?' 'strong' '?' '?']
Generic Boundary after 4 Instance is [['sunny', '?', '?', '?', '?', '?'], ['?', 'war
m', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Final Specific_h:
['sunny' 'warm' '?' 'strong' '?' '?']
Final General_h:
[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
KNN
In [11]: import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
data_set= pd.read_csv('Social_Network_Ads.csv')
#print(data_set)
x= data_set.iloc[:, [2,3]].values
y= data_set.iloc[:, 4].values
# Splitting the dataset into training and test set.

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, random_state
#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
from sklearn.neighbors import KNeighborsClassifier

classifier= KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2 )
classifier.fit(x_train, y_train)
y_pred= classifier.predict(x_test)
from sklearn.metrics import confusion_matrix
cm= confusion_matrix(y_test, y_pred)
print('Confusion Matrix\n',cm)
Confusion Matrix
[[64 4]
[ 3 29]]
Visualization
In [13]: from matplotlib.colors import ListedColormap
x_set, y_set = x_train, y_train
x1, x2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max()
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(x1, x2, classifier.predict(nm.array([x1.ravel(), x2.ravel()]).T).reshape
alpha = 0.75, cmap = ListedColormap(('red','green' )))
mtp.xlim(x1.min(), x1.max())
mtp.ylim(x2.min(), x2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('red', 'green'))(i), label = j)
mtp.title('K-NN Algorithm (Training set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()
from matplotlib.colors import ListedColormap

x_set, y_set = x_test, y_test
x1, x2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max()
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(x1, x2, classifier.predict(nm.array([x1.ravel(), x2.ravel()]).T).reshape
alpha = 0.75, cmap = ListedColormap(('red','green' )))
mtp.xlim(x1.min(), x1.max())
mtp.ylim(x2.min(), x2.max())
c = ListedColormap(('red', 'green'))(i), label = j)
mtp.title('K-NN algorithm(Test set)')
mtp.xlabel('Age')
mtp.legend()
mtp.show()
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided
as value-mapping will have precedence in case its length matches with *x* & *y*. Plea
se use the *color* keyword-argument or provide a 2-D array with a single row if you in
tend to specify the same RGB or RGBA value for all points.
Example code 2
In [40]: import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
x=np.array([[5,3],[15,12],[24,10],[30,45],[85,70],[71,80],[60,78],[55,52],[80,91]])
In [41]: plt.scatter(x[:,0],x[:,1], label="true position")
Out[41]: <matplotlib.collections.PathCollection at 0x271384847c0>
In [42]: kmeans=KMeans(n_clusters=3)
kmeans.fit(x)
Out[42]: KMeans(n_clusters=3)
In [43]: print(kmeans.cluster_centers_)
[[74. 79.75 ]
[14.66666667 8.33333333]
[42.5 48.5 ]]
In [44]: print(kmeans.labels_)
[1 1 1 2 0 0 0 2 0]
In [45]: plt.scatter(x[:,0],x[:,1],c=kmeans.labels_,cmap='rainbow')
Out[45]: <matplotlib.collections.PathCollection at 0x271376d1eb0>
In [46]: plt.scatter(x[:,0],x[:,1],c=kmeans.labels_,cmap='rainbow')
plt.scatter(kmeans.cluster_centers_[:,0],kmeans.cluster_centers_[:,1],color="black")
Out[46]: <matplotlib.collections.PathCollection at 0x271343a6910>
Naive Bayes
In [17]: import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
# Importing the dataset

dataset = pd.read_csv('Social_Network_Ads.csv')
x = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values
# Splitting the dataset into the Training set and Test set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_sta
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix\n',cm)
Confusion Matrix
[[65 3]
[ 7 25]]
Visualization
In [22]: from matplotlib.colors import ListedColormap
x_set, y_set = x_train, y_train
X1, X2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max()
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max()
mtp.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(), X2.ravel()]).T).reshape
alpha = 0.75, cmap = ListedColormap(('purple', 'green')))
mtp.xlim(X1.min(), X1.max())
mtp.ylim(X2.min(), X2.max())
c = ListedColormap(('purple', 'green'))(i), label = j)
mtp.title('Naive Bayes (Training set)')
mtp.xlabel('Age')
mtp.legend()
mtp.show()
from matplotlib.colors import ListedColormap

x_set, y_set = x_test, y_test
X1, X2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max()
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max()
mtp.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(), X2.ravel()]).T).reshape
alpha = 0.75, cmap = ListedColormap(('purple', 'green')))
mtp.xlim(X1.min(), X1.max())
mtp.ylim(X2.min(), X2.max())
c = ListedColormap(('purple', 'green'))(i), label = j)
mtp.title('Naive Bayes (test set)')
mtp.xlabel('Age')
mtp.legend()
mtp.show()
Back Propogation
In [23]: import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)

y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) #maximum of X array longitudinally
y = y/100
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid Function

def derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=5 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set

hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y

for i in range(epoch):
#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO * outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts contributed
d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.T.dot(d_output) *lr # dotproduct of nextlayererror and curren

wh += X.T.dot(d_hiddenlayer) *lr
print ("-----------Epoch-", i+1, "Starts----------")

print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
print ("-----------Epoch-", i+1, "Ends----------\n")
print("Input: \n" + str(X))

print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
-----------Epoch- 1 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92703096]
[0.91694636]
[0.9264059 ]]
-----------Epoch- 1 Ends----------
-----------Epoch- 2 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92693199]
[0.91684592]
[0.92630607]]
-----------Epoch- 2 Ends----------
-----------Epoch- 3 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92683308]
[0.91674556]
[0.92620629]]
-----------Epoch- 3 Ends----------
-----------Epoch- 4 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92673423]
[0.91664527]
[0.92610658]]
-----------Epoch- 4 Ends----------
-----------Epoch- 5 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92663545]
[0.91654506]
[0.92600693]]
-----------Epoch- 5 Ends----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92663545]
[0.91654506]
[0.92600693]]
Decision Tree (ID3)

Example Code 1
import numpy as np
import math
data = pd.read_csv('dataset.csv')
print(data)
print()
features = [feat for feat in data]
features.remove("answer")
class Node:
def __init__(self):
self.children = []
self.value = ""
self.isLeaf = False
self.pred = ""
def entropy(examples):
pos = 0.0
neg = 0.0
for _, row in examples.iterrows():
if row["answer"] == "yes":
pos += 1
else:
neg += 1
if pos == 0.0 or neg == 0.0:
return 0.0
else:
p = pos / (pos + neg)
n = neg / (pos + neg)
return -(p * math.log(p, 2) + n * math.log(n, 2))
print("\n",entropy)
def info_gain(examples, attr):

uniq = np.unique(examples[attr])
#print ("\n",uniq)
gain = entropy(examples)
#print ("\n",gain)
for u in uniq:
subdata = examples[examples[attr]== u]
#print ("\n",subdata)
sub_e = entropy(subdata)
gain -= (float(len(subdata)) / float(len(examples))) * sub_e
#print ("\n",gain)
return gain
def ID3(examples, attrs):

root = Node()
max_gain = 0
max_feat = ""
for feature in attrs:
#print ("\n",examples)
gain = info_gain(examples, feature)
if gain > max_gain:
max_gain = gain
max_feat = feature
root.value = max_feat
#print ("\nMax feature attr",max_feat)
uniq = np.unique(examples[max_feat])
#print ("\n",uniq)
for u in uniq:
#print ("\n",u)
subdata = examples[examples[max_feat] == u]
#print ("\n",subdata)
if entropy(subdata) == 0.0:
newNode = Node()
newNode.isLeaf = True
newNode.value = u
newNode.pred = np.unique(subdata["answer"])
root.children.append(newNode)
else:
dummyNode = Node()
dummyNode.value = u
new_attrs = attrs.copy()
new_attrs.remove(max_feat)
child = ID3(subdata, new_attrs)
dummyNode.children.append(child)
root.children.append(dummyNode)
return root
def printTree(root: Node, depth=0):

for i in range(depth):
print("\t", end="")
print(root.value, end="")
if root.isLeaf:
print(" -> ", root.pred)
print()
for child in root.children:
printTree(child, depth + 1)
root = ID3(data, features)

printTree(root)
#tree.plot_tree(root);
outlook temperature humidity wind answer

0 sunny hot high weak no
1 sunny hot high strong no
2 overcast hot high weak yes
3 rain mild high weak yes
4 rain cool normal weak yes
5 rain cool normal strong no
6 overcast cool normal strong yes
7 sunny mild high weak no
8 sunny cool normal weak yes
9 rain mild normal weak yes
10 sunny mild normal strong yes
11 overcast mild high strong yes
12 overcast hot normal weak yes
13 rain mild high strong no
outlook
overcast -> ['yes']
rain
wind
strong -> ['no']
weak -> ['yes']
sunny
humidity
high -> ['no']
normal -> ['yes']
Example code 2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
dataset = pd.read_csv("bill_authentication.csv")
print(dataset.shape)
dataset
(1372, 5)
Out[26]: Variance Skewness Curtosis Entropy Class
0 3.62160 8.66610 -2.8073 -0.44699 0
1 4.54590 8.16740 -2.4586 -1.46210 0
2 3.86600 -2.63830 1.9242 0.10645 0
3 3.45660 9.52280 -4.0112 -3.59440 0
4 0.32924 -4.45520 4.5718 -0.98880 0
... ... ... ... ... ...
1367 0.40614 1.34920 -1.4501 -0.55949 1
1368 -1.38870 -4.87730 6.4774 0.34179 1
1369 -3.75030 -13.45860 17.5932 -2.77710 1
1370 -3.56370 -8.38270 12.3930 -1.28230 1
1371 -2.54190 -0.65804 2.6842 1.19520 1
1372 rows × 5 columns
In [27]: X = dataset.drop('Class', axis=1)

y = dataset['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
from sklearn.tree import DecisionTreeClassifier

classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(y_pred)
print(y_test-y_pred)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
[1 0 1 1 1 1 0 0 1 1 1 0 1 1 0 1 1 0 1 0 1 0 0 0 0 0 0 1 0 1 0 1 0 0 1 1 1
0 0 0 1 1 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 0 1
1 0 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 1 1 0 0 1 1 1 1 0 1 1 1 0 0 0 0
0 0 0 0 1 1 0 1 1 0 0 0 1 1 1 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 1 0 1 1 1 0
1 0 0 0 0 0 1 0 0 0 0 1 1 1 0 1 0 0 1 1 1 0 1 0 1 0 1 1 1 1 0 1 1 0 0 0 1
0 0 0 0 1 1 0 1 1 0 1 1 0 0 0 0 1 0 0 1 0 1 0 1 0 1 1 1 1 1 0 0 0 1 1 0 0
0 1 0 1 0 0 0 1 1 0 0 1 1 0 0 0 0 1 1 1 1 1 0 1 1 1 0 0 0 0 1 0 1 1 1 1 1
0 0 1 1 0 0 0 0 1 1 0 0 1 1 0 0]
861 0
175 0
1113 0
822 0
1116 0
..
439 0
1136 0
836 0
238 0
423 0
Name: Class, Length: 275, dtype: int64
[[145 4]
[ 4 122]]
precision recall f1-score support
0 0.97 0.97 0.97 149

1 0.97 0.97 0.97 126
accuracy 0.97 275

macro avg 0.97 0.97 0.97 275
weighted avg 0.97 0.97 0.97 275
In [28]: from matplotlib import pyplot as plt

from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(classifier)
Example code 3
=============================
Data set
In [29]: data_dict = {
'Outlook' : ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast',
,'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Coo
,'Humidity' : ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High
,'Wind': ['False', 'True', 'False', 'False', 'False', 'True', 'True', 'False', 'Fa
,'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes',
}
tennis_data = pd.DataFrame(data_dict, columns=data_dict.keys())
tennis_data
Out[29]: Outlook Temperature Humidity Wind PlayTennis
0 Sunny Hot High False No
1 Sunny Hot High True No
2 Overcast Hot High False Yes
3 Rainy Mild High False Yes
4 Rainy Cool Normal False Yes
5 Rainy Cool Normal True No
6 Overcast Cool Normal True Yes
7 Sunny Mild High False No
8 Sunny Cool Normal False Yes
9 Rainy Mild Normal False Yes
10 Sunny Mild Normal True Yes
11 Overcast Mild High True Yes
12 Overcast Hot Normal False Yes
13 Rainy Mild High True No
Finding Entropy
In [30]: def entropy_calculate(prob_list):
entropy = 0
for item in prob_list:
entropy -= item * np.log2(item)
return entropy
In [31]: # Entire system entropy

cases,counts = np.unique(tennis_data.PlayTennis,return_counts=True)
P = [count/len(tennis_data) for count in counts]
print('Probabilities of %s and %s are %.3f, %.3f respectively'%(cases[0],cases[1],P[0]
entropy_entire = entropy_calculate(P)
print('Entire syetems entropy is %.3f bits'%entropy_entire)
Probabilities of No and Yes are 0.357, 0.643 respectively

Entire syetems entropy is 0.940 bits
Outlook
In [32]: cases_outlook,counts_outlook= np.unique(tennis_data.Outlook,return_counts=True)
P_outlook = [count/len(tennis_data) for count in counts_outlook]
print('For outlook:')
for case, prob in zip(cases_outlook,P_outlook):
print('\tProbabality of %s is %.3f'%(case, prob))
For outlook:
Probabality of Overcast is 0.286
Probabality of Rainy is 0.357
Probabality of Sunny is 0.357
In [33]: entropy_outlook={}
total_entropy_outlook=0
for case, prob in zip(cases_outlook,P_outlook):
cases,counts = np.unique(tennis_data.PlayTennis[tennis_data.Outlook==case],return_
P = [count/len(tennis_data[tennis_data.Outlook==case]) for count in counts]
entropy_outlook[case]=entropy_calculate(P)
total_entropy_outlook += entropy_calculate(P)*prob
for case, entropy in entropy_outlook.items():

print('Entropy for %s is %.2f'%(case,entropy))
print('\nEntropy at Outlook decision level is %.3f'%total_entropy_outlook)
print('\nInformation gain is %.3f'%(entropy_entire- total_entropy_outlook))
Entropy for Overcast is 0.00

Entropy for Rainy is 0.97
Entropy for Sunny is 0.97
Entropy at Outlook decision level is 0.694
Information gain is 0.247
For temp
In [34]: cases_temperature,counts_temperature= np.unique(tennis_data.Temperature,return_counts=
P_temperature = [count/len(tennis_data) for count in counts_temperature]
print('For temperature:')
for case, prob in zip(cases_temperature,P_temperature):
For temperature:
Probabality of Cool is 0.286
Probabality of Hot is 0.286
Probabality of Mild is 0.429
In [35]: entropy_temperature={}
total_entropy_temperature=0
for case, prob in zip(cases_temperature,P_temperature):
cases,counts = np.unique(tennis_data.PlayTennis[tennis_data.Temperature==case],ret
P = [count/len(tennis_data[tennis_data.Temperature==case]) for count in counts]
entropy_temperature[case]=entropy_calculate(P)
total_entropy_temperature += entropy_calculate(P)*prob
for case, entropy in entropy_temperature.items():

print('\nEntropy at Temperature decision level is %.3f'%total_entropy_temperature)
print('\nInformation gain is %.3f'%(entropy_entire- total_entropy_temperature))
Entropy for Cool is 0.81

Entropy for Hot is 1.00
Entropy for Mild is 0.92
Entropy at Temperature decision level is 0.911
For wind
In [36]: cases_wind,counts_wind= np.unique(tennis_data.Wind,return_counts=True)
P_wind = [count/len(tennis_data) for count in counts_wind]
print('For wind:')
for case, prob in zip(cases_wind,P_wind):
For wind:
Probabality of False is 0.571
Probabality of True is 0.429
In [37]: entropy_wind={}
total_entropy_wind=0
for case, prob in zip(cases_wind,P_wind):
cases,counts = np.unique(tennis_data.PlayTennis[tennis_data.Wind==case],return_cou
P = [count/len(tennis_data[tennis_data.Wind==case]) for count in counts]
entropy_wind[case]=entropy_calculate(P)
total_entropy_wind += entropy_calculate(P)*prob
for case, entropy in entropy_wind.items():

print('\nEntropy at Wind decision level is %.3f'%total_entropy_wind)
print('\nInformation gain is %.3f'%(entropy_entire- total_entropy_wind))
Entropy for False is 0.81

Entropy for True is 1.00
Entropy at Wind decision level is 0.892
For Humidity
In [38]: cases_humidity,counts_humidity= np.unique(tennis_data.Humidity,return_counts=True)
P_humidity = [count/len(tennis_data) for count in counts_humidity]
print('For humidity:')
for case, prob in zip(cases_humidity,P_humidity):
For humidity:
Probabality of High is 0.500
Probabality of Normal is 0.500
In [39]: entropy_humidity={}
total_entropy_humidity=0
for case, prob in zip(cases_humidity,P_humidity):
cases,counts = np.unique(tennis_data.PlayTennis[tennis_data.Humidity==case],return
P = [count/len(tennis_data[tennis_data.Humidity==case]) for count in counts]
entropy_humidity[case]=entropy_calculate(P)
total_entropy_humidity += entropy_calculate(P)*prob
for case, entropy in entropy_humidity.items():

print('\nEntropy at Humidity decision level is %.3f'%total_entropy_humidity)
print('\nInformation gain is %.3f'%(entropy_entire- total_entropy_humidity))
Entropy for High is 0.99

Entropy for Normal is 0.59
Entropy at Humidity decision level is 0.788

Note:As shown above, by choosing outlook as the first decision/question, the highest reduction in
entropy/randomness is achieved that corresponds to the highest Information Gain

Warm Normal Strong Warm Same Yes Warm High Strong Warm Same Yes Warm High Strong Cool Change Yes

Uploaded by

Document Informationclick to expand document information

Copyright:

Available Formats

Warm Normal Strong Warm Same Yes Warm High Strong Warm Same Yes Warm High Strong Cool Change Yes

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Warm Normal Strong Warm Same Yes Warm High Strong Warm Same Yes Warm High Strong Cool Change Yes

Uploaded by

Copyright:

Available Formats

Find-S Algorithm

In [1]: import pandas as pd

Out[1]: sky air_temp humidity wind water forecast enjoy_sport

0 sunny warm normal strong warm same yes

1 sunny warm high strong warm same yes

3 sunny warm high strong cool change yes

print("\nThe total number of training instances are : ",len(a))

print("\nThe initial hypothesis is : ")

for i in range(0, len(a)):

sky air_temp humidity wind water forecast enjoy_sport

The total number of training instances are : 4

The initial hypothesis is :

Instance 1 is ['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'yes'] and is Po

Instance 4 is ['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'yes'] and is Po

print (" \n The most general hypothesis : ['?','?','?','?','?','?']\n")

with open('ws.csv', 'r') as csvFile:

print("\n The initial value of hypothesis: ")

# Comparing with First Training Example

# Comparing with Remaining Training Examples of Given Data Set

print("\n Find S: Finding a Maximally Specific Hypothesis\n")

The most general hypothesis : ['?','?','?','?','?','?']

The most specific hypothesis : ['0','0','0','0','0','0']

The Given Training Data Set

['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']

The initial value of hypothesis:

Find S: Finding a Maximally Specific Hypothesis

For Training Example No :0 the hypothesis is ['Sunny', 'Warm', 'Normal', 'Strong',

The Maximally Specific Hypothesis for a given Training Examples :

['Sunny', 'Warm', '?', 'Strong', '?', '?']

sky air_temp humidity wind water forecast enjoy_sport

def hipotesa(g, s):

temp = ['?'] * trueCol

print('=> specific: ', s)

# delete baris jika isinya ? semua

=> specific: ['sunny' 'warm' 'normal' 'strong' 'warm' 'same' 'yes']

=> specific: ['sunny' 'warm' '?' 'strong' 'warm' 'same' 'yes']

=> specific: ['sunny' 'warm' '?' 'strong' 'warm' 'same' 'yes']

=> specific: ['sunny' 'warm' '?' 'strong' '?' '?' 'yes']

def learn(concepts, target):

print("Specific Bundary after ", i+1, "Instance is ", specific_h)

s_final, g_final = learn(concepts, target)

print("Final Specific_h: ", s_final, sep="\n")

sky air_temp humidity wind water forecast enjoy_sport

Target Values are: ['yes' 'yes' 'no' 'yes']

Initialization of specific_h and genearal_h

Specific Boundary: ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']

Instance 1 is ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']

Instance 2 is ['sunny' 'warm' 'high' 'strong' 'warm' 'same']

Instance 3 is ['rainy' 'cold' 'high' 'strong' 'warm' 'change']

Instance 4 is ['sunny' 'warm' 'high' 'strong' 'cool' 'change']

# Splitting the dataset into training and test set.

from sklearn.neighbors import KNeighborsClassifier

from matplotlib.colors import ListedColormap

In [41]: plt.scatter(x[:,0],x[:,1], label="true position")

Out[41]: <matplotlib.collections.PathCollection at 0x271384847c0>

Out[45]: <matplotlib.collections.PathCollection at 0x271376d1eb0>

Out[46]: <matplotlib.collections.PathCollection at 0x271343a6910>

# Importing the dataset