Tutorial Classification Py
Tutorial Classification Py
py
"""Tutorial_Classification.ipynb
https://colab.research.google.com/drive/11oY_mGpqTqFlj7o-iRjsMy3tkQvlcTzc
"""
import matplotlib
import numpy as np
# %matplotlib inline
# %%shell
iris = load_iris()
print (iris['DESCR'])
import pandas as pd
iris_data = pd.DataFrame(data=iris['data'],columns=iris['feature_names'])
iris_data["target"] = iris['target']
sepal_len = iris['data'][:100,0]
sepal_wid = iris['data'][:100,1]
labels = iris['target'][:100]
sepal_len -= np.mean(sepal_len)
sepal_wid -= np.mean(sepal_wid)
# Plot Iris
plt.scatter(sepal_len,sepal_wid,c=labels,cmap=plt.cm.Paired)
plt.xlabel("sepal length")
plt.ylabel("sepal width")
w1 * sepal_len + w2 * sepal_wid = 0
plt.scatter(sepal_len,sepal_wid,c=labels,cmap=plt.cm.Paired)
plt.ylim([-1.5,1.5])
plt.xlim([-1.5,2])
plt.xlabel("sepal length")
plt.ylabel("sepal width")
if w2 != 0:
m = -w1/w2
t = 1 if w2 > 0 else -1
plt.plot([-1.5,2.0],[-1.5*m, 2.0*m],'-y',color=color)
plt.fill_between(
[-1.5, 2.0],
file:///Users/leehomking/Downloads/tutorial_classification.py 1/7
2022/4/26 19:10 tutorial_classification.py
[m*-1.5, m*2.0],
[t*1.5, t*1.5],
alpha=0.2,
color=color)
t = 1 if w1 > 0 else -1
plt.fill_between(
[0, 2.0*t],
[-1.5, -2.0],
[1.5, 2],
alpha=0.2,
color=color)
plot_sep(0, 1)
plot_sep(-0.5, 1)
# We're going to hand pick one point and # analyze that point:
a1 = sepal_len[41]
a2 = sepal_wid[41]
plot_sep(-0.5, 1)
maxlim=2.0):
plt.ylim([-maxlim,maxlim])
plt.xlim([-maxlim,maxlim])
plt.xlabel("w1")
plt.ylabel("w2")
if sepal_wid != 0:
m = -sepal_len/sepal_wid
plt.plot([-maxlim, maxlim],
[-maxlim*m, maxlim*m],
'-y',
color=color)
plt.fill_between([-maxlim, maxlim], # x
color=color)
plt.fill_between([0, 2.0*t],
[-maxlim, -maxlim],
[maxlim, maxlim],
alpha=0.2,
color=color)
a1 = sepal_len[41]
a2 = sepal_wid[41]
plt.plot(-0.5, 1, 'og')
w1 = -0.5 # + ...
w2= 1 #+...
file:///Users/leehomking/Downloads/tutorial_classification.py 2/7
2022/4/26 19:10 tutorial_classification.py
# This should bring the point closer to the boundary # In this case, the step brought the
point into the # condition boundary
# old hypothesis
plt.plot(-0.5, 1, 'og')
plt.axes().set_aspect('equal', 'box')
plot_sep(-0.5+a1, 1+a2)
sepal_len
sepal_wid
labels
sgn_labels=labels.copy()
sgn_labels
for i in range(0,sgn_labels.size,1):
if sgn_labels[i] == 0:
sgn_labels[i]=-1
sgn_labels
def plr2d(x1,x2,t,w0_0,w1_0,w2_0,N,disable_w0):
w0=w0_0
w1=w1_0
w2=w2_0
for n in range(0,N,1):
mismatched = False
for i in range(0,t.size,1):
if z*(t[i]) <= 0:
mismatched = True
if disable_w0:
w0 = w0
else:
w0 = w0 + (t[i])*1
w1 = w1 + (t[i])*(x1[i])
w2 = w2 + (t[i])*(x2[i])
if mismatched == False:
break
cnt=0
for i in range(0,t.size,1):
if z*(t[i]) <= 0:
print ("mismatch[",i,"]")
cnt+=1
print (cnt,"mismatches")
return [w0,w1,w2]
[wt0,wt1,wt2]=plr2d(sepal_len,sepal_wid,sgn_labels,0,0,0,10,True)
print ([wt0,wt1,wt2])
plot_sep(wt1,wt2)
# reference AND
x1=np.array([0,0,1,1])
file:///Users/leehomking/Downloads/tutorial_classification.py 3/7
2022/4/26 19:10 tutorial_classification.py
x2=np.array([0,1,0,1])
t=np.array([-1,-1,-1,1])
[w0,w1,w2]=plr2d(x1,x2,t,0,0,0,100,False)
print ([w0,w1,w2])
# reference OR
x1=np.array([0,0,1,1])
x2=np.array([0,1,0,1])
t=np.array([-1,1,1,1])
[w0,w1,w2]=plr2d(x1,x2,t,0,0,0,100,False)
print ([w0,w1,w2])
# reference NOT
x1=np.array([1,1])
x2=np.array([0,1])
t=np.array([1,-1])
[w0,w1,w2]=plr2d(x1,x2,t,0,0,0,100,False)
print ([w0,w1,w2])
# generate a separable DS
separable = False
red = samples[0][samples[1] == 0]
blue = samples[0][samples[1] == 1]
separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max()
for k in range(2)])
plt.show()
x1 = samples[0][:100,0]
x2 = samples[0][:100,1]
labels = samples[1]
sgn_labels=labels.copy()
for i in range(0,sgn_labels.size,1):
if sgn_labels[i] == 0:
sgn_labels[i]=-1
#plt.scatter(x1,x2,c=labels,cmap=plt.cm.Paired)
[w0,w1,w2]=plr2d(x1,x2,sgn_labels,0,0,0,100,False)
print(w0,w1,w2)
red = samples[0][samples[1] == 0]
blue = samples[0][samples[1] == 1]
separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max() for
k in range(2)])
plt.ylim([-4,4])
plt.xlim([-4,4])
plt.show()
separable = True
while separable:
file:///Users/leehomking/Downloads/tutorial_classification.py 4/7
2022/4/26 19:10 tutorial_classification.py
samples = sklearn.datasets.make_classification(n_samples=100, n_features=2,
n_redundant=0, n_informative=1, n_clusters_per_class=1, flip_y=-1)
red = samples[0][samples[1] == 0]
blue = samples[0][samples[1] == 1]
separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max()
for k in range(2)])
plt.show()
x1 = samples[0][:100,0]
x2 = samples[0][:100,1]
labels = samples[1]
sgn_labels=labels.copy()
for i in range(0,sgn_labels.size,1):
if sgn_labels[i] == 0:
sgn_labels[i]=-1
#plt.scatter(x1,x2,c=labels,cmap=plt.cm.Paired)
[w0,w1,w2]=plr2d(x1,x2,sgn_labels,0,0,0,100,False)
print(w0,w1,w2)
#plot_ds(w1,w2,x1,x2)
red = samples[0][samples[1] == 0]
blue = samples[0][samples[1] == 1]
separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max() for
k in range(2)])
plt.ylim([-4,4])
#plt.xlim([-4,4])
plt.show()
# note this doesn't converge (it breaks due to limIter) and produces classifier that missing
many cases
import numpy as np
def ptrLCE_ERROR(x1,x2,t,w0_0,w1_0,w2_0,N,disable_w0,alpha):
w0=w0_0
w1=w1_0
w2=w2_0
for n in range(0,N,1):
for i in range(0,t.size,1):
#print(n,z)
y=1./(1+np.exp(-z))
# A. dL/dz = y - t
if disable_w0:
w0 = w0_0
else:
w0 = w0 - alpha*(y-t[i])
w1 = w1 - alpha*(y-t[i])*x1[i]
w2 = w2 - alpha*(y-t[i])*x2[i]
cnt=0
for i in range(0,t.size,1):
y=1./(1+np.exp(-z))
if y < 0.5:
c=0
file:///Users/leehomking/Downloads/tutorial_classification.py 5/7
2022/4/26 19:10 tutorial_classification.py
else:
c=1
if c!=t[i]:
print ("mismatch[",i,"]",c,t[i])
cnt+=1
print (cnt,"mismatches")
return [w0,w1,w2]
def ptrLCE(x1,x2,t,w0_0,w1_0,w2_0,disable_w0,alpha):
w0=w0_0
w1=w1_0
w2=w2_0
gradEw=0
gradEb=0
for i in range(0,t.size,1):
y=1./(1+np.exp(-z))
gradEw += np.array([x1[i],x2[i]])*(y-t[i])
gradEb += (y-t[i])
gradEw = gradEw/t.size;
gradEb = gradEb/t.size
# A. dL/dz = y - t
if disable_w0:
w0 = w0_0
else:
w0 = w0 - alpha*gradEb
w1 = w1 - alpha*gradEw[0]
w2 = w2 - alpha*gradEw[1]
cnt=0
for i in range(0,t.size,1):
y=1./(1+np.exp(-z))
if y < 0.5:
c=0
else:
c=1
if c!=t[i]:
print ("mismatch[",i,"]",c,t[i])
cnt+=1
print (cnt,"mismatches")
return [w0,w1,w2]
[w0,w1,w2]=ptrLCE(x1,x2,labels,0,0,0,False,0.001)
print(w0,w1,w2)
red = samples[0][samples[1] == 0]
blue = samples[0][samples[1] == 1]
separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max() for
k in range(2)])
plt.ylim([-4,4])
#plt.xlim([-4,4])
plt.show()
samples[0][:,1]
file:///Users/leehomking/Downloads/tutorial_classification.py 6/7
2022/4/26 19:10 tutorial_classification.py
from sklearn.linear_model import LogisticRegression
import numpy as np
x1 = samples[0][:100,0]
x2 = samples[0][:100,1]
model.fit(samples[0],samples[1])
y_predict = model.predict(samples[0])
print(accuracy_score(samples[1], y_predict))
coef=model.coef_
intercept_=model.intercept_
red = samples[0][samples[1] == 0]
blue = samples[0][samples[1] == 1]
separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max() for
k in range(2)])
plt.ylim([-4,4])
plt.show()
file:///Users/leehomking/Downloads/tutorial_classification.py 7/7