Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
0% found this document useful (0 votes)
138 views

Back Propagation - Machine Learning

The document describes experiments with a neural network for digit recognition. In the original assignment, the author found that 1000 epochs and a learning rate of 0.5 produced good results, with low error even with some corrupted pixels. However, there seemed to be overfitting as error was higher for more corrupted pixels. In the extra credit task using ASCII digits, the network overfit more, with error converging quickly during training but being high for more corrupted pixels during testing.

Uploaded by

Evelyn Mahasin
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
138 views

Back Propagation - Machine Learning

The document describes experiments with a neural network for digit recognition. In the original assignment, the author found that 1000 epochs and a learning rate of 0.5 produced good results, with low error even with some corrupted pixels. However, there seemed to be overfitting as error was higher for more corrupted pixels. In the extra credit task using ASCII digits, the network overfit more, with error converging quickly during training but being high for more corrupted pixels during testing.

Uploaded by

Evelyn Mahasin
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 8

HW5

Results of original assignment

Conclusion/discussion
I experimented with the epochs and learning rate and found 1000 and 0.5 to be good. Even 100 epochs
produced good enough results. Test results error are very low, even with 8 corrupted pixels (<0.1 avg
MSE). However, there seem to be overfitting, based on the high MSE for 8 corrupted pixels compared to
8.
Results of extra credit

Conclusion/discussion
The network seemed to overfit more than the original assignment with the ASCII case. We can see this
by seeing how fast the MSE converges on training, but produces a high MSE at 8 corrupted pixels
(compared to 4)
Backprop code

function hw4
% define constants
trainingEpochs = 1000;
testingEpochs = 1000;
learningRate = 0.5;
noOfNeurons = 4;
error(1:trainingEpochs+1) = 0;
error1(1:testingEpochs+1) = 0;
error2(1:testingEpochs+1) = 0;
error3(1:testingEpochs+1) = 0;
noOfIterations(1:trainingEpochs+1) = 0;

% initial weights and biases (random b/w -0.5 and 0.5)


% 3 neuron hidden layer
W1 = (-0.5 + (0.5+0.5)*rand(30,noOfNeurons))';
b1 = (-0.5 + (0.5+0.5)*rand(1,noOfNeurons))';
W2 = -0.5 + (0.5+0.5)*rand(3,noOfNeurons);
b2 = -0.5 + (0.5+0.5)*rand(1,1);

% inputs p0 = 0, p1 = 1, p2 = 2
p0 = [-1 1 1 1 1 -1 1 -1 -1 -1 -1 1 1 -1 -1 -1 -1 1 1 -1 -1 -1 -1 1 -1 1 1
1 1 -1]';
p1 = [-1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1]';
p2 = [1 -1 -1 -1 -1 -1 1 -1 -1 1 1 1 1 -1 -1 1 -1 1 -1 1 1 -1 -1 1 -1 -1 -
1 -1 -1 1]';
P = [p0 p1 p2];

% targets
t0 = [1 0 0]';
t1 = [0 1 0]';
t2 = [0 0 1]';
T = [t0 t1 t2];

% backprop training for <trainingEpochs> iterations


for x = 1:trainingEpochs
% random one out of 3 test cases
r = randi(3);
% fwdComp, backProp, update
[out1, out2] = fwdComp(P(:,r), W1, b1, W2, b2);
[sens1, sens2] = backProp(out1, out2, T(:,r), W2);
[W1, b1, W2, b2] = update(W1, b1, W2, b2, learningRate, sens1, sens2,
out1, P(:,r));

% increase counter and error


noOfIterations(x+1) = (x+1);
error(x) = sum((T(:,r) - out2).^2);
end

% plot backprop results


plot(noOfIterations, error)
title('Backpropagation ANN Training Results')
xlabel('Number of Iterations')
ylabel('Mean Squared Error')

% testing for <testingEpochs> iterations


for t = 1:3
for y = 1:testingEpochs

[~, out2] = fwdComp(addNoise(P(:,t),0), W1, b1, W2, b2);


error1(y) = sum((T(:,t) - out2).^2);
[~, out2] = fwdComp(addNoise(P(:,t),4), W1, b1, W2, b2);
error2(y) = sum((T(:,t) - out2).^2);
[~, out2] = fwdComp(addNoise(P(:,t),8), W1, b1, W2, b2);
error3(y) = sum((T(:,t) - out2).^2);
end
end

% plot pixel corruption results with 0, 4, and 8 pixels changed


graphx = 0:4:8;
graphy = [mean(error1) mean(error2) mean(error3)];
bar(graphx,graphy);
title('Backpropagation ANN Testing Results')
xlabel('Number of Corrupted Pixels')
ylabel('Average Mean Squared Error')
end

function out = addNoise(originalDigit, pixelsChanged)


% find random pixel positions
r = randi([1 30], 1, pixelsChanged);

% for randomly chosen pixels, flip values


for idx = 1:pixelsChanged
originalDigit(r(idx)) = originalDigit(r(idx)) * -1;
end

out = originalDigit;
end

function [out1,out2] = fwdComp(input, W1, b1, W2, b2)


out1 = logsig(W1 * input + b1);
out2 = logsig(W2 * out1 + b2);
end

function [sens1,sens2] = backProp(out1, out2, target, W2)


% fdots using derivative of logsig
fdot2 = diag((ones(size(out2))-out2).*out2);
fdot1 = diag((ones(size(out1))-out1).*out1);
% backprop starting w/ second layer
sens2 = -2 * fdot2 * (target - out2);
sens1 = fdot1 * W2' * sens2;
end

function [W1n, b1n, W2n, b2n] = update(W1, b1, W2, b2, learningRate, sens1,
sens2, out1, input)
W2n = W2 - (learningRate * sens2 * out1');
b2n = b2 - (learningRate * sens2);
W1n = W1 - (learningRate * sens1 * input');
b1n = b1 - (learningRate * sens1);
end

Extra credit code (same, just different inputs, targets, test param, other
param)

function hw4ext
% define constants
trainingEpochs = 1000;
testingEpochs = 1000;
learningRate = 0.5;
noOfNeurons = 4;
error(1:trainingEpochs+1) = 0;
error1(1:testingEpochs+1) = 0;
error2(1:testingEpochs+1) = 0;
error3(1:testingEpochs+1) = 0;
noOfIterations(1:trainingEpochs+1) = 0;

% initial weights and biases (random b/w -0.5 and 0.5)


% 3 neuron hidden layer
W1 = (-0.5 + (0.5+0.5)*rand(30,noOfNeurons))';
b1 = (-0.5 + (0.5+0.5)*rand(1,noOfNeurons))';
W2 = -0.5 + (0.5+0.5)*rand(3,noOfNeurons);
b2 = -0.5 + (0.5+0.5)*rand(1,1);

% p0 = 0, p1 = 1, ... , p6 = 6
p0 = [-1 1 1 1 1 -1 1 -1 -1 -1 -1 1 1 -1 -1 -1 -1 1 1 -1 -1 -1 -1 1 -1 1 1
1 1 -1]';
p1 = [-1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1]';
p2 = [1 -1 -1 -1 -1 -1 1 -1 -1 1 1 1 1 -1 -1 1 -1 1 -1 1 1 -1 -1 1 -1 -1 -
1 -1 -1 1]';
p3 = [-1 -1 -1 -1 -1 -1 -1 1 -1 -1 1 -1 1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1
1 -1 1 1 -1]';
p4 = [-1 -1 -1 -1 -1 -1 1 1 1 -1 -1 -1 -1 -1 1 -1 -1 -1 1 1 1 1 1 1 -1 -1
-1 -1 -1 -1]';
p5 = [-1 -1 -1 -1 -1 -1 1 1 1 -1 1 -1 1 -1 1 -1 -1 1 1 -1 1 -1 -1 1 1 -1 -
1 1 1 -1]';
p6 = [-1 -1 -1 -1 -1 -1 -1 1 1 1 1 -1 1 -1 1 -1 -1 1 1 -1 1 -1 -1 1 1 -1 -
1 1 1 -1]';
P = [p0 p1 p2 p3 p4 p5 p6];

% targets
t0 = [0 1 1 0 0 0 0];
t1 = [0 1 1 0 0 0 1];
t2 = [0 1 1 0 0 1 0];
t3 = [0 1 1 0 0 1 1];
t4 = [0 1 1 0 1 0 0];
t5 = [0 1 1 0 1 0 1];
t6 = [0 1 1 0 1 1 0];
T = [t0 t1 t2 t3 t4 t5 t6];
% backprop training for <trainingEpochs> iterations
for x = 1:trainingEpochs
% random one out of 7 test cases
r = randi(7);
% fwdComp, backProp, update
[out1, out2] = fwdComp(P(:,r), W1, b1, W2, b2);
[sens1, sens2] = backProp(out1, out2, T(:,r), W2);
[W1, b1, W2, b2] = update(W1, b1, W2, b2, learningRate, sens1, sens2,
out1, P(:,r));

% increase counter and error


noOfIterations(x+1) = (x+1);
error(x) = sum((T(:,r) - out2).^2);
end

% plot backprop results


plot(noOfIterations, error)
title('Backprop ASCII ANN Training Results')
xlabel('Number of Iterations')
ylabel('Mean Squared Error')

% testing for <testingEpochs> iterations


for t = 1:7
for y = 1:testingEpochs
[~, out2] = fwdComp(addNoise(P(:,t),0), W1, b1, W2, b2);
error1(y) = sum((T(:,t) - out2).^2);
[~, out2] = fwdComp(addNoise(P(:,t),4), W1, b1, W2, b2);
error2(y) = sum((T(:,t) - out2).^2);
[~, out2] = fwdComp(addNoise(P(:,t),8), W1, b1, W2, b2);
error3(y) = sum((T(:,t) - out2).^2);
end
end

% % plot pixel corruption results with 0, 4, and 8 pixels changed


% graphx = 0:4:8;
% graphy = [mean(error1) mean(error2) mean(error3)];
% bar(graphx,graphy);
% title('Backprop ASCII ANN Testing Results')
% xlabel('Number of Corrupted Pixels')
% ylabel('Average Mean Squared Error')
end

function out = addNoise(originalDigit, pixelsChanged)


% find random pixel positions
r = randi([1 30], 1, pixelsChanged);

% for randomly chosen pixels, flip values


for idx = 1:pixelsChanged
originalDigit(r(idx)) = originalDigit(r(idx)) * -1;
end

out = originalDigit;
end
function [out1,out2] = fwdComp(input, W1, b1, W2, b2)
out1 = logsig(W1 * input + b1);
out2 = logsig(W2 * out1 + b2);
end

function [sens1,sens2] = backProp(out1, out2, target, W2)


% fdots using derivative of logsig
fdot2 = diag((ones(size(out2))-out2).*out2);
fdot1 = diag((ones(size(out1))-out1).*out1);
% backprop starting w/ second layer
sens2 = -2 * fdot2 * (target - out2);
sens1 = fdot1 * W2' * sens2;
end

function [W1n, b1n, W2n, b2n] = update(W1, b1, W2, b2, learningRate, sens1,
sens2, out1, input)
W2n = W2 - (learningRate * sens2 * out1');
b2n = b2 - (learningRate * sens2);
W1n = W1 - (learningRate * sens1 * input');
b1n = b1 - (learningRate * sens1);
end

You might also like