Warm Normal Strong Warm Same Yes Warm High Strong Warm Same Yes Warm High Strong Cool Change Yes
Warm Normal Strong Warm Same Yes Warm High Strong Warm Same Yes Warm High Strong Cool Change Yes
Warm Normal Strong Warm Same Yes Warm High Strong Warm Same Yes Warm High Strong Cool Change Yes
df = pd.read_csv('enjoysport.csv')
#print(df)
df.loc[df['enjoy_sport'] == 'yes']
#print(df['sky'])
Example code 1
In [4]: import pandas as pd
import numpy as np
import csv
a = []
df = pd.read_csv('enjoysport.csv')
print(df)
#df.loc[df['enjoy_sport'] == 'YES']
print()
with open('enjoysport.csv', 'r') as csvfile:
next(csvfile)
for row in csv.reader(csvfile):
a.append(row)
print(a)
num_attribute = len(a[0])-1
if a[i][num_attribute] == 'no':
print ("\nInstance ", i+1, "is", a[i], " and is Negative Instance Hence Ignore
print("The hypothesis for the training instance", i+1, " is: " , hypothesis, "
print("\nThe Maximally specific hypothesis for the training instance is ", hypothesis)
[['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'yes'], ['sunny', 'warm', 'hig
h', 'strong', 'warm', 'same', 'yes'], ['rainy', 'cold', 'high', 'strong', 'warm', 'cha
nge', 'no'], ['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'yes']]
Instance 2 is ['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'yes'] and is Posi
tive Instance
The hypothesis for the training instance 2 is: ['sunny', 'warm', '?', 'strong', 'war
m', 'same']
Instance 3 is ['rainy', 'cold', 'high', 'strong', 'warm', 'change', 'no'] and is Neg
ative Instance Hence Ignored
The hypothesis for the training instance 3 is: ['sunny', 'warm', '?', 'strong', 'war
m', 'same']
The Maximally specific hypothesis for the training instance is ['sunny', 'warm', '?',
'strong', '?', '?']
Example code 2
In [5]: import random
import csv
attributes = [['Sunny','Rainy'],
['Warm','Cold'],
['Normal','High'],
['Strong','Weak'],
['Warm','Cool'],
['Same','Change']]
num_attributes = len(attributes)
a = []
print("\n The Given Training Data Set \n")
for i in range(0,len(a)):
if a[i][num_attributes]=='Yes':
for j in range(0,num_attributes):
if a[i][j]!=hypothesis[j]:
hypothesis[j]='?'
else :
hypothesis[j]= a[i][j]
print(" For Training Example No :{0} the hypothesis is ".format(i),hypothesis)
print("\n The Maximally Specific Hypothesis for a given Training Examples :\n")
print(hypothesis)
Candidate Elimination
In [6]: import pandas as pd
attributes = [['Sunny','Rainy'],
['Warm','Cold'],
['Normal','High'],
['Strong','Weak'],
['Warm','Cool'],
['Same','Change']]
df = pd.read_csv('Enjoysport.csv')
print(df)
print()
Example code 1
In [7]: df = df.values
def find_s(h, d):
for i in range(len(d)):
if h[i] != d[i]:
h[i] = '?'
return h
flagIdx = 0
i = 0
for f in flags:
pieces = []
count = 0
idx = 0
for ds in s:
if idx < flagIdx:
continue
if count < 2:
if f == ds:
pieces.append(f)
flagIdx = idx
count += 1
elif f != ds and ds != '?':
pieces.append(ds)
count += 1
else:
pieces.append('?')
else:
break
idx += 1
gTemp.append(pieces)
i += 1
print(pieces)
def main():
print('Candidate elimination\n')
tipeIdx = len(df[0]) - 1
trueCol = len(attributes)
s = df[0]
g = [['?'] * trueCol] * trueCol
count = 0
for data in df:
if data[tipeIdx] == 'yes':
# eliminate g
if count == 0:
idx = 0
for dg in g:
for i in range(len(dg)):
temp = ['?'] * trueCol
if i == idx:
if dg[i] != s[idx]:
temp[idx] = s[idx]
g[i] = temp
idx += 1
else:
# update spesifik
s = find_s(s, data)
idx = 0
for dg in g:
for i in range(len(dg)):
temp = ['?'] * trueCol
if i == idx:
# jika data general sama dengan specific maka g baru di is
if dg[i] == s[idx]:
temp[idx] = s[idx]
g[i] = temp
idx += 1
else:
idx = 0
for dg in g:
for i in range(len(dg)):
g = gTemp
s = [x for x in s if x != 'yes']
print('==============================================================\n')
print('Hasil: ')
print('specific: {}'.format(s))
print('general: {}'.format(g))
print('Hipotesa: ')
hipotesa(g, s)
print('==============================================================\n')
if __name__ == '__main__':
main()
Candidate elimination
==============================================================
Hasil:
specific: ['sunny', 'warm', '?', 'strong', '?', '?']
general: [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
Hipotesa:
[]
==============================================================
Example code 2
In [8]: import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
#df = pd.read_csv('enjoysport.csv')
#print(df)
data = pd.read_csv('enjoysport.csv')
data.loc[data['enjoy_sport'] == 'yes']
print(data)
concepts = np.array(data.iloc[:,0:-1])
print("\nInstances are:\n",concepts)
target = np.array(data.iloc[:,-1])
print("\nTarget Values are: ",target)
for i, h in enumerate(concepts):
print("\nInstance", i+1 , "is ", h)
if target[i] == "yes":
print("Instance is Positive ")
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
specific_h[x] ='?'
general_h[x][x] ='?'
if target[i] == "no":
print("Instance is Negative ")
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
Instances are:
[['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
['sunny' 'warm' 'high' 'strong' 'warm' 'same']
['rainy' 'cold' 'high' 'strong' 'warm' 'change']
['sunny' 'warm' 'high' 'strong' 'cool' 'change']]
Generic Boundary: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?']]
Final Specific_h:
['sunny' 'warm' '?' 'strong' '?' '?']
Final General_h:
[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
KNN
In [11]: import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
data_set= pd.read_csv('Social_Network_Ads.csv')
#print(data_set)
x= data_set.iloc[:, [2,3]].values
y= data_set.iloc[:, 4].values
#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
Confusion Matrix
[[64 4]
[ 3 29]]
Visualization
In [13]: from matplotlib.colors import ListedColormap
x_set, y_set = x_train, y_train
x1, x2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max()
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(x1, x2, classifier.predict(nm.array([x1.ravel(), x2.ravel()]).T).reshape
alpha = 0.75, cmap = ListedColormap(('red','green' )))
mtp.xlim(x1.min(), x1.max())
mtp.ylim(x2.min(), x2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('red', 'green'))(i), label = j)
mtp.title('K-NN Algorithm (Training set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided
as value-mapping will have precedence in case its length matches with *x* & *y*. Plea
se use the *color* keyword-argument or provide a 2-D array with a single row if you in
tend to specify the same RGB or RGBA value for all points.
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided
as value-mapping will have precedence in case its length matches with *x* & *y*. Plea
se use the *color* keyword-argument or provide a 2-D array with a single row if you in
tend to specify the same RGB or RGBA value for all points.
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided
as value-mapping will have precedence in case its length matches with *x* & *y*. Plea
se use the *color* keyword-argument or provide a 2-D array with a single row if you in
tend to specify the same RGB or RGBA value for all points.
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided
as value-mapping will have precedence in case its length matches with *x* & *y*. Plea
se use the *color* keyword-argument or provide a 2-D array with a single row if you in
tend to specify the same RGB or RGBA value for all points.
Example code 2
In [40]: import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
x=np.array([[5,3],[15,12],[24,10],[30,45],[85,70],[71,80],[60,78],[55,52],[80,91]])
In [42]: kmeans=KMeans(n_clusters=3)
kmeans.fit(x)
Out[42]: KMeans(n_clusters=3)
In [43]: print(kmeans.cluster_centers_)
[[74. 79.75 ]
[14.66666667 8.33333333]
[42.5 48.5 ]]
In [44]: print(kmeans.labels_)
[1 1 1 2 0 0 0 2 0]
In [45]: plt.scatter(x[:,0],x[:,1],c=kmeans.labels_,cmap='rainbow')
In [46]: plt.scatter(x[:,0],x[:,1],c=kmeans.labels_,cmap='rainbow')
plt.scatter(kmeans.cluster_centers_[:,0],kmeans.cluster_centers_[:,1],color="black")
Naive Bayes
In [17]: import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_sta
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
Confusion Matrix
[[65 3]
[ 7 25]]
Visualization
In [22]: from matplotlib.colors import ListedColormap
x_set, y_set = x_train, y_train
X1, X2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max()
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max()
mtp.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(), X2.ravel()]).T).reshape
alpha = 0.75, cmap = ListedColormap(('purple', 'green')))
mtp.xlim(X1.min(), X1.max())
mtp.ylim(X2.min(), X2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('purple', 'green'))(i), label = j)
mtp.title('Naive Bayes (Training set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided
as value-mapping will have precedence in case its length matches with *x* & *y*. Plea
se use the *color* keyword-argument or provide a 2-D array with a single row if you in
tend to specify the same RGB or RGBA value for all points.
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided
as value-mapping will have precedence in case its length matches with *x* & *y*. Plea
se use the *color* keyword-argument or provide a 2-D array with a single row if you in
tend to specify the same RGB or RGBA value for all points.
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided
as value-mapping will have precedence in case its length matches with *x* & *y*. Plea
se use the *color* keyword-argument or provide a 2-D array with a single row if you in
tend to specify the same RGB or RGBA value for all points.
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided
as value-mapping will have precedence in case its length matches with *x* & *y*. Plea
se use the *color* keyword-argument or provide a 2-D array with a single row if you in
tend to specify the same RGB or RGBA value for all points.
Back Propogation
In [23]: import numpy as np
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Variable initialization
epoch=5 #Setting training iterations
lr=0.1 #Setting learning rate
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO * outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts contributed
d_hiddenlayer = EH * hiddengrad
-----------Epoch- 1 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92703096]
[0.91694636]
[0.9264059 ]]
-----------Epoch- 1 Ends----------
-----------Epoch- 2 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92693199]
[0.91684592]
[0.92630607]]
-----------Epoch- 2 Ends----------
-----------Epoch- 3 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92683308]
[0.91674556]
[0.92620629]]
-----------Epoch- 3 Ends----------
-----------Epoch- 4 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92673423]
[0.91664527]
[0.92610658]]
-----------Epoch- 4 Ends----------
-----------Epoch- 5 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92663545]
[0.91654506]
[0.92600693]]
-----------Epoch- 5 Ends----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.92663545]
[0.91654506]
[0.92600693]]
class Node:
def __init__(self):
self.children = []
self.value = ""
self.isLeaf = False
self.pred = ""
def entropy(examples):
pos = 0.0
neg = 0.0
for _, row in examples.iterrows():
if row["answer"] == "yes":
pos += 1
else:
neg += 1
if pos == 0.0 or neg == 0.0:
return 0.0
else:
p = pos / (pos + neg)
n = neg / (pos + neg)
return -(p * math.log(p, 2) + n * math.log(n, 2))
print("\n",entropy)
max_gain = 0
max_feat = ""
for feature in attrs:
#print ("\n",examples)
gain = info_gain(examples, feature)
if gain > max_gain:
max_gain = gain
max_feat = feature
root.value = max_feat
#print ("\nMax feature attr",max_feat)
uniq = np.unique(examples[max_feat])
#print ("\n",uniq)
for u in uniq:
#print ("\n",u)
subdata = examples[examples[max_feat] == u]
#print ("\n",subdata)
if entropy(subdata) == 0.0:
newNode = Node()
newNode.isLeaf = True
newNode.value = u
newNode.pred = np.unique(subdata["answer"])
root.children.append(newNode)
else:
dummyNode = Node()
dummyNode.value = u
new_attrs = attrs.copy()
new_attrs.remove(max_feat)
child = ID3(subdata, new_attrs)
dummyNode.children.append(child)
root.children.append(dummyNode)
return root
outlook
overcast -> ['yes']
rain
wind
strong -> ['no']
sunny
humidity
high -> ['no']
Example code 2
In [26]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
dataset = pd.read_csv("bill_authentication.csv")
print(dataset.shape)
dataset
(1372, 5)
Out[26]: Variance Skewness Curtosis Entropy Class
y_pred = classifier.predict(X_test)
print(y_pred)
print(y_test-y_pred)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
[1 0 1 1 1 1 0 0 1 1 1 0 1 1 0 1 1 0 1 0 1 0 0 0 0 0 0 1 0 1 0 1 0 0 1 1 1
0 0 0 1 1 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 0 1
1 0 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 1 1 0 0 1 1 1 1 0 1 1 1 0 0 0 0
0 0 0 0 1 1 0 1 1 0 0 0 1 1 1 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 1 0 1 1 1 0
1 0 0 0 0 0 1 0 0 0 0 1 1 1 0 1 0 0 1 1 1 0 1 0 1 0 1 1 1 1 0 1 1 0 0 0 1
0 0 0 0 1 1 0 1 1 0 1 1 0 0 0 0 1 0 0 1 0 1 0 1 0 1 1 1 1 1 0 0 0 1 1 0 0
0 1 0 1 0 0 0 1 1 0 0 1 1 0 0 0 0 1 1 1 1 1 0 1 1 1 0 0 0 0 1 0 1 1 1 1 1
0 0 1 1 0 0 0 0 1 1 0 0 1 1 0 0]
861 0
175 0
1113 0
822 0
1116 0
..
439 0
1136 0
836 0
238 0
423 0
Name: Class, Length: 275, dtype: int64
[[145 4]
[ 4 122]]
precision recall f1-score support
Example code 3
=============================
Data set
In [29]: data_dict = {
'Outlook' : ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast',
,'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Coo
,'Humidity' : ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High
,'Wind': ['False', 'True', 'False', 'False', 'False', 'True', 'True', 'False', 'Fa
,'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes',
}
tennis_data = pd.DataFrame(data_dict, columns=data_dict.keys())
tennis_data
Finding Entropy
In [30]: def entropy_calculate(prob_list):
entropy = 0
for item in prob_list:
entropy -= item * np.log2(item)
return entropy
entropy_entire = entropy_calculate(P)
Outlook
In [32]: cases_outlook,counts_outlook= np.unique(tennis_data.Outlook,return_counts=True)
P_outlook = [count/len(tennis_data) for count in counts_outlook]
print('For outlook:')
for case, prob in zip(cases_outlook,P_outlook):
print('\tProbabality of %s is %.3f'%(case, prob))
For outlook:
Probabality of Overcast is 0.286
Probabality of Rainy is 0.357
Probabality of Sunny is 0.357
In [33]: entropy_outlook={}
total_entropy_outlook=0
for case, prob in zip(cases_outlook,P_outlook):
cases,counts = np.unique(tennis_data.PlayTennis[tennis_data.Outlook==case],return_
P = [count/len(tennis_data[tennis_data.Outlook==case]) for count in counts]
entropy_outlook[case]=entropy_calculate(P)
total_entropy_outlook += entropy_calculate(P)*prob
For temp
In [34]: cases_temperature,counts_temperature= np.unique(tennis_data.Temperature,return_counts=
P_temperature = [count/len(tennis_data) for count in counts_temperature]
print('For temperature:')
for case, prob in zip(cases_temperature,P_temperature):
print('\tProbabality of %s is %.3f'%(case, prob))
For temperature:
Probabality of Cool is 0.286
Probabality of Hot is 0.286
Probabality of Mild is 0.429
In [35]: entropy_temperature={}
total_entropy_temperature=0
for case, prob in zip(cases_temperature,P_temperature):
cases,counts = np.unique(tennis_data.PlayTennis[tennis_data.Temperature==case],ret
P = [count/len(tennis_data[tennis_data.Temperature==case]) for count in counts]
entropy_temperature[case]=entropy_calculate(P)
total_entropy_temperature += entropy_calculate(P)*prob
For wind
In [36]: cases_wind,counts_wind= np.unique(tennis_data.Wind,return_counts=True)
P_wind = [count/len(tennis_data) for count in counts_wind]
print('For wind:')
for case, prob in zip(cases_wind,P_wind):
print('\tProbabality of %s is %.3f'%(case, prob))
For wind:
Probabality of False is 0.571
Probabality of True is 0.429
In [37]: entropy_wind={}
total_entropy_wind=0
for case, prob in zip(cases_wind,P_wind):
cases,counts = np.unique(tennis_data.PlayTennis[tennis_data.Wind==case],return_cou
P = [count/len(tennis_data[tennis_data.Wind==case]) for count in counts]
entropy_wind[case]=entropy_calculate(P)
total_entropy_wind += entropy_calculate(P)*prob
For Humidity
In [38]: cases_humidity,counts_humidity= np.unique(tennis_data.Humidity,return_counts=True)
P_humidity = [count/len(tennis_data) for count in counts_humidity]
print('For humidity:')
for case, prob in zip(cases_humidity,P_humidity):
print('\tProbabality of %s is %.3f'%(case, prob))
For humidity:
Probabality of High is 0.500
Probabality of Normal is 0.500
In [39]: entropy_humidity={}
total_entropy_humidity=0
for case, prob in zip(cases_humidity,P_humidity):
cases,counts = np.unique(tennis_data.PlayTennis[tennis_data.Humidity==case],return
P = [count/len(tennis_data[tennis_data.Humidity==case]) for count in counts]
entropy_humidity[case]=entropy_calculate(P)
total_entropy_humidity += entropy_calculate(P)*prob