Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
0% found this document useful (0 votes)
7 views

Tutorial Classification Py

This document contains Python code for analyzing and classifying Iris flower data using machine learning algorithms like perceptron and logistic regression. It loads Iris data, explores features, trains models to learn weights to classify samples, and evaluates models on separable and non-separable synthesized datasets. Key steps include data preprocessing, plotting decision boundaries, training models to minimize misclassifications, and assessing predictions.

Uploaded by

Lucas Z
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views

Tutorial Classification Py

This document contains Python code for analyzing and classifying Iris flower data using machine learning algorithms like perceptron and logistic regression. It loads Iris data, explores features, trains models to learn weights to classify samples, and evaluates models on separable and non-separable synthesized datasets. Key steps include data preprocessing, plotting decision boundaries, training models to minimize misclassifications, and assessing predictions.

Uploaded by

Lucas Z
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

2022/4/26 19:10 tutorial_classification.

py

# -*- coding: utf-8 -*-

"""Tutorial_Classification.ipynb

Automatically generated by Colaboratory.

Original file is located at

https://colab.research.google.com/drive/11oY_mGpqTqFlj7o-iRjsMy3tkQvlcTzc

"""

# Commented out IPython magic to ensure Python compatibility.

import matplotlib

import numpy as np

import matplotlib.pyplot as plt

# %matplotlib inline

# Commented out IPython magic to ensure Python compatibility.

# %%shell

# jupyter nbconvert --to html /content/Tutorial_Classification_final.ipynb

from sklearn.datasets import load_iris

iris = load_iris()

print (iris['DESCR'])

from pandas.plotting import scatter_matrix

import pandas as pd

iris_data = pd.DataFrame(data=iris['data'],columns=iris['feature_names'])

iris_data["target"] = iris['target']

color_wheel = {1: "#0392cf", 2: "#7bc043", 3: "#ee4035"}

colors = iris_data["target"].map(lambda x: color_wheel.get(x + 1))

ax = scatter_matrix(iris_data, color=colors, alpha=0.6, figsize=(15, 15))

# Select first 2 flower classes (~100 rows)

# And first 2 features

sepal_len = iris['data'][:100,0]

sepal_wid = iris['data'][:100,1]

labels = iris['target'][:100]

# We will also center the data

# This is done to make numbers nice, so that we have no

# need for biases in our classification. (You might not

# be able to remove biases this way in general.)

sepal_len -= np.mean(sepal_len)

sepal_wid -= np.mean(sepal_wid)

# Plot Iris

plt.scatter(sepal_len,sepal_wid,c=labels,cmap=plt.cm.Paired)

plt.xlabel("sepal length")

plt.ylabel("sepal width")

def plot_sep(w1, w2, color='green'):

''' Plot decision boundary hypothesis

w1 * sepal_len + w2 * sepal_wid = 0

in input space, highlighting the hyperplane '''

plt.scatter(sepal_len,sepal_wid,c=labels,cmap=plt.cm.Paired)

plt.title("Separation in Input Space")

plt.ylim([-1.5,1.5])

plt.xlim([-1.5,2])

plt.xlabel("sepal length")

plt.ylabel("sepal width")

if w2 != 0:

m = -w1/w2

t = 1 if w2 > 0 else -1

plt.plot([-1.5,2.0],[-1.5*m, 2.0*m],'-y',color=color)

plt.fill_between(

[-1.5, 2.0],

file:///Users/leehomking/Downloads/tutorial_classification.py 1/7
2022/4/26 19:10 tutorial_classification.py
[m*-1.5, m*2.0],

[t*1.5, t*1.5],

alpha=0.2,

color=color)

if w2 == 0: # decision boundary is vertical

t = 1 if w1 > 0 else -1

plt.plot([0, 0],[-1.5, 2.0],'-y',color=color)

plt.fill_between(

[0, 2.0*t],

[-1.5, -2.0],

[1.5, 2],

alpha=0.2,

color=color)

plot_sep(0, 1)

plot_sep(-0.5, 1)

# We're going to hand pick one point and # analyze that point:

a1 = sepal_len[41]

a2 = sepal_wid[41]

print (a1, a2) # (-0.97, -0.79)

plot_sep(-0.5, 1)

plt.plot(a1, a2, 'ob') # highlight the point

# Now let's look at weight space

def plot_weight_space(sepal_len, sepal_wid, lab=1, color='steelblue',

maxlim=2.0):

plt.title("Constraint(s) in Weight Space")

plt.ylim([-maxlim,maxlim])

plt.xlim([-maxlim,maxlim])

plt.xlabel("w1")

plt.ylabel("w2")

if sepal_wid != 0:

m = -sepal_len/sepal_wid

t = 1*lab if sepal_wid > 0 else -1*lab

plt.plot([-maxlim, maxlim],

[-maxlim*m, maxlim*m],

'-y',

color=color)

plt.fill_between([-maxlim, maxlim], # x

[m*-maxlim, m*maxlim], # y-min

[t*maxlim, t*maxlim], # y-max alpha=0.2,

color=color)

if sepal_wid == 0: # decision boundary is vertical

t = 1*lab if sepal_len > 0 else -1*lab

plt.plot([0, 0],[-maxlim, maxlim],'-y',color=color)

plt.fill_between([0, 2.0*t],

[-maxlim, -maxlim],

[maxlim, maxlim],

alpha=0.2,

color=color)

# Plot the constraint for the point identified earlier:

a1 = sepal_len[41]

a2 = sepal_wid[41]

print (a1, a2)

# Do this on the board first by hand

plot_weight_space(a1, a2, lab=1)

# Below is the hypothesis we plotted earlier

# Notice it falls outside the range.

plt.plot(-0.5, 1, 'og')

w1 = -0.5 # + ...

w2= 1 #+...

file:///Users/leehomking/Downloads/tutorial_classification.py 2/7
2022/4/26 19:10 tutorial_classification.py

# This should bring the point closer to the boundary # In this case, the step brought the
point into the # condition boundary

plot_weight_space(a1, a2, lab=1)

plt.plot(-0.5+a1, 1+a2, 'og')

# old hypothesis

plt.plot(-0.5, 1, 'og')

plt.plot([-0.5, -0.5+a1], [1, 1+a2], '-g')

plt.axes().set_aspect('equal', 'box')

# Which means that the point (a1, a2) in input

# space is correctly classified.

plot_sep(-0.5+a1, 1+a2)

sepal_len

sepal_wid

labels

sgn_labels=labels.copy()

sgn_labels

for i in range(0,sgn_labels.size,1):

if sgn_labels[i] == 0:

sgn_labels[i]=-1

sgn_labels

def plr2d(x1,x2,t,w0_0,w1_0,w2_0,N,disable_w0):

w0=w0_0

w1=w1_0

w2=w2_0

for n in range(0,N,1):

mismatched = False

for i in range(0,t.size,1):

z=w0 + x1[i]*w1 + x2[i]*w2

if z*(t[i]) <= 0:

mismatched = True

if disable_w0:

w0 = w0

else:

w0 = w0 + (t[i])*1

w1 = w1 + (t[i])*(x1[i])

w2 = w2 + (t[i])*(x2[i])

if mismatched == False:

print ("converged: n=",n)

break

print ("learning done")

cnt=0

for i in range(0,t.size,1):

z=w0 + x1[i]*w1 + x2[i]*w2

if z*(t[i]) <= 0:

print ("mismatch[",i,"]")

cnt+=1

print (cnt,"mismatches")

return [w0,w1,w2]

[wt0,wt1,wt2]=plr2d(sepal_len,sepal_wid,sgn_labels,0,0,0,10,True)

print ([wt0,wt1,wt2])

plot_sep(wt1,wt2)

# reference AND

x1=np.array([0,0,1,1])

file:///Users/leehomking/Downloads/tutorial_classification.py 3/7
2022/4/26 19:10 tutorial_classification.py
x2=np.array([0,1,0,1])

t=np.array([-1,-1,-1,1])

[w0,w1,w2]=plr2d(x1,x2,t,0,0,0,100,False)

print ([w0,w1,w2])

# reference OR

x1=np.array([0,0,1,1])

x2=np.array([0,1,0,1])

t=np.array([-1,1,1,1])

[w0,w1,w2]=plr2d(x1,x2,t,0,0,0,100,False)

print ([w0,w1,w2])

# reference NOT

x1=np.array([1,1])

x2=np.array([0,1])

t=np.array([1,-1])

[w0,w1,w2]=plr2d(x1,x2,t,0,0,0,100,False)

print ([w0,w1,w2])

import sklearn as sklearn

# generate a separable DS

separable = False

while not separable:

samples = sklearn.datasets.make_classification(n_samples=100, n_features=2,


n_redundant=0, n_informative=1, n_clusters_per_class=1, flip_y=-1)

red = samples[0][samples[1] == 0]

blue = samples[0][samples[1] == 1]

separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max()
for k in range(2)])

plt.plot(red[:, 0], red[:, 1], 'r.')

plt.plot(blue[:, 0], blue[:, 1], 'b.')

plt.show()

x1 = samples[0][:100,0]

x2 = samples[0][:100,1]

labels = samples[1]

sgn_labels=labels.copy()

for i in range(0,sgn_labels.size,1):

if sgn_labels[i] == 0:

sgn_labels[i]=-1

#plt.scatter(x1,x2,c=labels,cmap=plt.cm.Paired)

[w0,w1,w2]=plr2d(x1,x2,sgn_labels,0,0,0,100,False)

print(w0,w1,w2)

red = samples[0][samples[1] == 0]

blue = samples[0][samples[1] == 1]

separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max() for
k in range(2)])

plt.plot(red[:, 0], red[:, 1], 'r.')

plt.plot(blue[:, 0], blue[:, 1], 'b.')

plt.ylim([-4,4])

plt.xlim([-4,4])

x_values = [np.min(x1[:]), np.max(x1[:])]

y_values = np.dot((-1./w2), (np.dot(w1,x_values) + w0))

plt.plot(x_values, y_values, label='Decision Boundary')

plt.show()

# generate a NON separable DS

separable = True

while separable:

file:///Users/leehomking/Downloads/tutorial_classification.py 4/7
2022/4/26 19:10 tutorial_classification.py
samples = sklearn.datasets.make_classification(n_samples=100, n_features=2,
n_redundant=0, n_informative=1, n_clusters_per_class=1, flip_y=-1)

red = samples[0][samples[1] == 0]

blue = samples[0][samples[1] == 1]

separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max()
for k in range(2)])

plt.plot(red[:, 0], red[:, 1], 'r.')

plt.plot(blue[:, 0], blue[:, 1], 'b.')

plt.show()

x1 = samples[0][:100,0]

x2 = samples[0][:100,1]

labels = samples[1]

sgn_labels=labels.copy()

for i in range(0,sgn_labels.size,1):

if sgn_labels[i] == 0:

sgn_labels[i]=-1

#plt.scatter(x1,x2,c=labels,cmap=plt.cm.Paired)

# ignore w0 (disable set to True)

[w0,w1,w2]=plr2d(x1,x2,sgn_labels,0,0,0,100,False)

print(w0,w1,w2)

#plot_ds(w1,w2,x1,x2)

red = samples[0][samples[1] == 0]

blue = samples[0][samples[1] == 1]

separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max() for
k in range(2)])

plt.plot(red[:, 0], red[:, 1], 'r.')

plt.plot(blue[:, 0], blue[:, 1], 'b.')

plt.ylim([-4,4])

#plt.xlim([-4,4])

x_values = [np.min(x1[:]), np.max(x1[:])]

y_values = np.dot((-1./w2), (np.dot(w1,x_values) + w0))

plt.plot(x_values, y_values, label='Decision Boundary')

plt.show()
# note this doesn't converge (it breaks due to limIter) and produces classifier that missing
many cases

import numpy as np

def ptrLCE_ERROR(x1,x2,t,w0_0,w1_0,w2_0,N,disable_w0,alpha):

w0=w0_0

w1=w1_0

w2=w2_0

for n in range(0,N,1):

for i in range(0,t.size,1):

z=w0 + x1[i]*w1 + x2[i]*w2

#print(n,z)

y=1./(1+np.exp(-z))

# A. dL/dz = y - t

# B1. dL/db = dL/dz

if disable_w0:

w0 = w0_0

else:

w0 = w0 - alpha*(y-t[i])

# B2. dL/dwi = dL/dz*xi

w1 = w1 - alpha*(y-t[i])*x1[i]

w2 = w2 - alpha*(y-t[i])*x2[i]

cnt=0

for i in range(0,t.size,1):

z=w0 + x1[i]*w1 + x2[i]*w2

y=1./(1+np.exp(-z))

if y < 0.5:

c=0

file:///Users/leehomking/Downloads/tutorial_classification.py 5/7
2022/4/26 19:10 tutorial_classification.py
else:

c=1

if c!=t[i]:

print ("mismatch[",i,"]",c,t[i])

cnt+=1

print (cnt,"mismatches")

return [w0,w1,w2]

def ptrLCE(x1,x2,t,w0_0,w1_0,w2_0,disable_w0,alpha):

w0=w0_0

w1=w1_0

w2=w2_0

gradEw=0
gradEb=0

for i in range(0,t.size,1):

z=w0 + x1[i]*w1 + x2[i]*w2

y=1./(1+np.exp(-z))

gradEw += np.array([x1[i],x2[i]])*(y-t[i])

gradEb += (y-t[i])

gradEw = gradEw/t.size;

gradEb = gradEb/t.size

# A. dL/dz = y - t

# B1. dL/db = dL/dz

if disable_w0:

w0 = w0_0

else:

w0 = w0 - alpha*gradEb

# B2. dL/dwi = dL/dz*xi

w1 = w1 - alpha*gradEw[0]

w2 = w2 - alpha*gradEw[1]

cnt=0

for i in range(0,t.size,1):

z=w0 + x1[i]*w1 + x2[i]*w2

y=1./(1+np.exp(-z))

if y < 0.5:

c=0

else:

c=1

if c!=t[i]:

print ("mismatch[",i,"]",c,t[i])

cnt+=1

print (cnt,"mismatches")

return [w0,w1,w2]

[w0,w1,w2]=ptrLCE(x1,x2,labels,0,0,0,False,0.001)

print(w0,w1,w2)

red = samples[0][samples[1] == 0]

blue = samples[0][samples[1] == 1]

separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max() for
k in range(2)])

plt.plot(red[:, 0], red[:, 1], 'r.')

plt.plot(blue[:, 0], blue[:, 1], 'b.')

plt.ylim([-4,4])

#plt.xlim([-4,4])

x_values = [np.min(x1[:]), np.max(x1[:])]

y_values = np.dot((-1./w2), (np.dot(w1,x_values) + w0))

plt.plot(x_values, y_values, label='Decision Boundary')

plt.show()

samples[0][:,1]

file:///Users/leehomking/Downloads/tutorial_classification.py 6/7
2022/4/26 19:10 tutorial_classification.py
from sklearn.linear_model import LogisticRegression

import numpy as np

from sklearn import datasets

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score

model=LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True,


intercept_scaling=1,

class_weight=None, random_state=None, solver='liblinear',

max_iter=100, multi_class='ovr', verbose=0, warm_start=False, n_jobs=1)

x1 = samples[0][:100,0]

x2 = samples[0][:100,1]

model.fit(samples[0],samples[1])

y_predict = model.predict(samples[0])

print(accuracy_score(samples[1], y_predict))

coef=model.coef_

intercept_=model.intercept_

red = samples[0][samples[1] == 0]

blue = samples[0][samples[1] == 1]

separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max() for
k in range(2)])

plt.plot(red[:, 0], red[:, 1], 'r.')

plt.plot(blue[:, 0], blue[:, 1], 'b.')

plt.ylim([-4,4])

x1_plot = np.arange(4, 8, step=0.1)

x_values = [np.min(x1[:]), np.max(x1[:])]

x2_plot = -(np.array(x_values) *coef[0][0] + intercept_)/coef[0][1]

plt.plot(x_values, x2_plot, label='Decision Boundary')

plt.show()

np.array(x_values) *coef[0][0] + intercept_

file:///Users/leehomking/Downloads/tutorial_classification.py 7/7

You might also like