diff --git a/15-01-01 459_Mont_Lyman.jpg b/15-01-01 459_Mont_Lyman.jpg new file mode 100644 index 0000000..860f2d0 Binary files /dev/null and b/15-01-01 459_Mont_Lyman.jpg differ diff --git a/README.md b/README.md index 7c714a1..7454fb0 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,22 @@ be used with E13B. run_batch.sc is a bash script that runs all of the programs in the directory -The script file will create many files in the folder, ./plots. +The script file will create many files in the folder, /tmp/plots. -Python 3.4 +Python 3.8 Anaconda3 Linux or Windows +cuda 11.2.1 +h5py 2.10.0 +Keras 2.4.3 +Lasagne 0.1 +matplotlib 3.3.2 +numpy 1.19.2 +pytesseract 0.3.7 +sklearn 0.0 +tensorflow 2.4.1 +tesseract 4.1.1 +Theano 1.0.5 +tika 1.24 + diff --git a/license.txt b/license.txt new file mode 100644 index 0000000..4219ba5 --- /dev/null +++ b/license.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Richard Ricker Lyman + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/n0_network.py b/n0_network.py new file mode 100644 index 0000000..c37a1bd --- /dev/null +++ b/n0_network.py @@ -0,0 +1,98 @@ +#import tensorflow as tf +from tensorflow.compat import v1 as tf +tf.compat.v1.disable_eager_execution() +import numpy as np +from collections import namedtuple +import datetime +import ocr_utils + +class base_network(object): + ''' definition of the network + ''' + + + + + def fit(self, truthed_data, nEpochs=5000): + + perfect_count=10 + for i in range(nEpochs): + + batch = truthed_data.next_batch(100) + # assign feature data to each placeholder + # the batch list is returned in the same order as the features requested + feed = {self._keep_prob: 0.5} + for j in range(truthed_data.num_features): + feed[self._ph[j]] = batch[j] + + if i%100 == 0: + + feed[self._keep_prob] = 1.0 + result = self._sess.run([self._merged, self._accuracy ], feed_dict=feed) + summary_str = result[0] + + self._writer.add_summary(summary_str, i) + train_accuracy = result[1] + if train_accuracy <= (1.0 - 1e-5 ): + perfect_count=10; + else: + perfect_count -= 1 + if perfect_count==0: + break; + + print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True) + self._sess.run(self._train_step,feed_dict=feed) + + + + + + def test(self, truthed_data, title = ''): + + # assign feature data to each placeholder + error_images = np.empty((0,self._nRows,self._nCols)) + + test_accuracy=0 + m=0 + + for i in range(int(len(truthed_data.features[0])/100)): + + batch = truthed_data.next_batch(100) + # assign feature data to each placeholder + # the batch list is returned in the same order as the features requested + feed = {self._keep_prob: 1.0} + for j in range(truthed_data.num_features): + feed[self._ph[j]] = batch[j] + + + result = self._sess.run([self._accuracy, self._x_image, self._correct_prediction], feed_dict=feed) + + test_accuracy += result[0] + error_images = np.append(error_images, result[1][:,:,:,0][result[2]==False],axis=0) + m += 1 + try: + print ("test accuracy {} for : {}".format(test_accuracy/m, title),flush=True) + ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(title)) + except: + if m==0: + print ("test accuracy 1",flush=True) + else: + print ("test accuracy {}".format(test_accuracy/m),flush=True) + ocr_utils.montage(error_images,title='TensorFlow Error Images') + + + def predict(self, truthed_features): + feed={self._keep_prob: 1.0} + # assign feature data to each placeholder + error_images = np.empty((truthed_features.num_rows,truthed_features.num_columns)) + + test_accuracy=0 + m=0 + + for j in range(1,truthed_features.num_features): + feed[self._ph[j]] = truthed_features.features[j] + result = self._sess.run([self._prediction], feed_dict=feed) + + return result[0] + + \ No newline at end of file diff --git a/n1_2cnv1fc.py b/n1_2cnv1fc.py new file mode 100644 index 0000000..845b6eb --- /dev/null +++ b/n1_2cnv1fc.py @@ -0,0 +1,246 @@ +#import tensorflow as tf +from tensorflow.compat import v1 as tf +tf.compat.v1.disable_eager_execution() +import numpy as np +from collections import namedtuple +import datetime +import ocr_utils +from n0_network import base_network as b_network + +class network(b_network): + ''' definition of the network + ''' + def __init__(self, truthed_features, dtype=np.float32): + + self._sess = tf.InteractiveSession() + + lst = [] + extra_features_width = 0 # width of extra features + + """# ============================================================================== + + Placeholders + + Compute the size of various layers + + Create a tensorflow Placeholder for each feature of data returned from the + dataset + + """# ============================================================================== + + for i,nm in enumerate(truthed_features.feature_names): + + # features[0], is always the target. For instance it may be m_label_one_hot + # the second features[1] is the 'image' that is passed to the convolution layers + # Any additional features bypass the convolution layers and go directly + # into the fully connected layer. + + # The width of the extra features is calculated in order to allocate + # the correct widths of weights, # and inputs + # names are assigned to make the look pretty on the tensorboard graph. + + if i == 0: + nm = 'y_'+nm + else: + nm = 'x_'+nm + if i>1: + extra_features_width += truthed_features.feature_width[i] + + lst.append(tf.placeholder(dtype, shape=[None, truthed_features.feature_width[i]], name=nm)) + + # ph is a named tuple with key names like 'image', 'm_label', and values that + # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, + # x_upper_case etc. + + + Place_Holders = namedtuple('Place_Holders', truthed_features.feature_names) + self._ph = Place_Holders(*lst) # unpack placeholders into named Tuple + self._keep_prob = tf.placeholder(dtype,name='keep_prob') + self._nRows = truthed_features.num_rows #image height + self._nCols = truthed_features.num_columns #image width + nFc = 1024 # size of fully connected layer + nConv1 = 32 # size of first convolution layer + nConv2 = 64 # size of second convolution layer + nTarget = truthed_features.feature_width[0] # the number of one_hot features in the target, 'm_label' + n_h_pool2_outputs = int(self._nRows/4) * int(self._nCols/4) * nConv2 # second pooling layer + n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected + + """# ============================================================================== + + Build a Multilayer Convolutional Network + + Weight Initialization + + """# ============================================================================== + + def weight_variable(shape, dtype): + initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) + return tf.Variable(initial) + + def bias_variable(shape, dtype): + initial = tf.constant(0.1, shape=shape, dtype=dtype) + return tf.Variable(initial) + + """# ============================================================================== + + Convolution and Pooling + + keep our code cleaner, let's also abstract those operations into functions. + + """# ============================================================================== + + def conv2d(x, W): + return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') + + def max_pool_2x2(x): + return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], + strides=[1, 2, 2, 1], padding='SAME') + + """# ============================================================================== + + First Convolutional Layer + + """# ============================================================================== + with tf.name_scope("w_conv1") as scope: + W_conv1 = weight_variable([5, 5, 1, nConv1],dtype) + b_conv1 = bias_variable([nConv1],dtype) + + with tf.name_scope("reshape_x_image") as scope: + self._x_image = tf.reshape(self._ph.image, [-1,self._nCols,self._nRows,1]) + + image_summ = tf.summary.image("x_image", self._x_image) + + """# ============================================================================== + + We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, + and finally max pool. + + """# ============================================================================== + + with tf.name_scope("convolve_1") as scope: + h_conv1 = tf.nn.relu(conv2d(self._x_image, W_conv1) + b_conv1) + + with tf.name_scope("pool_1") as scope: + h_pool1 = max_pool_2x2(h_conv1) + + """# ============================================================================== + + Second Convolutional Layer + + In order to build a deep network, we stack several layers of this type. The second + layer will have 64 features for each 5x5 patch. + + """# ============================================================================== + + with tf.name_scope("convolve_2") as scope: + W_conv2 = weight_variable([5, 5, nConv1, nConv2],dtype) + b_conv2 = bias_variable([64],dtype) + h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) + + with tf.name_scope("pool_2") as scope: + h_pool2 = max_pool_2x2(h_conv2) + + """# ============================================================================== + + Densely Connected Layer + + Now that the image size has been reduced to 7x7, we add a fully-connected layer + with neurons to allow processing on the entire image. We reshape the tensor + from the pooling layer into a batch of vectors, multiply by a weight matrix, add + a bias, and apply a ReLU. + + """# ============================================================================== + + with tf.name_scope("W_fc1_b") as scope: + W_fc1 = weight_variable([n_h_pool2_outputsx, nFc],dtype) + b_fc1 = bias_variable([nFc],dtype) + + h_pool2_flat = tf.reshape(h_pool2, [-1, n_h_pool2_outputs]) + + # append the features, the 2nd on, that go directly to the fully connected layer + for i in range(2,truthed_features.num_features ): + print(i) + print(self._ph[i]) + h_pool2_flat = tf.concat([h_pool2_flat, self._ph[i]],1) + h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) + + """# ============================================================================== + + Dropout + + """# ============================================================================== + + + with tf.name_scope("drop") as scope: + h_fc1_drop = tf.nn.dropout(h_fc1, self._keep_prob) + + """# ============================================================================== + + Readout Layer + + """# ============================================================================== + with tf.name_scope("softmax") as scope: + W_fc2 = weight_variable([nFc, nTarget],dtype) + b_fc2 = bias_variable([nTarget],dtype) + y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) + + + with tf.name_scope("xent") as scope: + + # 1e-8 added to eliminate the crash of training when taking log of 0 + cross_entropy = -tf.reduce_sum(self._ph[0]*tf.log(y_conv+ 1e-8 )) + ce_summ = tf.summary.scalar("cross entropy", cross_entropy) + + with tf.name_scope("train") as scope: + self._train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) + + with tf.name_scope("test") as scope: + self._correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(self._ph[0],1)) + self._prediction = tf.argmax(y_conv,1) + + self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction, dtype)) + accuracy_summary = tf.summary.scalar("accuracy", self._accuracy) + weight_summary = tf.summary.histogram("weights", W_fc2) + """# ============================================================================== + + Start TensorFlow Interactive Session + + """# ============================================================================== + + self._sess.run(tf.initialize_all_variables()) + self._merged = tf.summary.merge_all() + tm = "" + tp = datetime.datetime.now().timetuple() + for i in range(4): + tm += str(tp[i])+'-' + tm += str(tp[4]) + self._writer = tf.summary.FileWriter("/tmp/ds_logs/"+ tm, self._sess.graph) + + def computeSize(s,tens): + sumC = 1 + tShape = tens.get_shape() + nDims = len(tShape) + for i in range(nDims): + sumC *= tShape[i] + print ('\t{}\t{}'.format(s,sumC),flush=True) + return sumC + + print ('network size:',flush=True) + total = computeSize("W_fc1",W_fc1)+ \ + computeSize ("b_fc1",b_fc1) + \ + computeSize ("W_conv1",W_conv1) + \ + computeSize ("b_conv1",b_conv1) + \ + computeSize ("W_conv2",W_conv2) + \ + computeSize ("b_conv2",b_conv2) + \ + computeSize ("W_fc2",W_fc2) + \ + computeSize ("b_fc2",b_fc2) + print('\ttotal\t{}'.format(total),flush=True ) + + + def reset_graph(self): + tf.reset_default_graph() # only necessary when iterating through fonts + self._sess.close() + + + + \ No newline at end of file diff --git a/n1_2cnv2fc.py b/n1_2cnv2fc.py new file mode 100644 index 0000000..4e32471 --- /dev/null +++ b/n1_2cnv2fc.py @@ -0,0 +1,281 @@ +#import tensorflow as tf +from tensorflow.compat import v1 as tf +tf.compat.v1.disable_eager_execution() +import numpy as np +from collections import namedtuple +import datetime +from n0_network import base_network as b_network +import ocr_utils + +class network( b_network): + ''' definition of the network + ''' + + def __init__(self, truthed_features, dtype=np.float32): + self._sess = tf.InteractiveSession() + + lst = [] + extra_features_width = 0 # width of extra features + + """# ============================================================================== + + Placeholders + + Compute the size of various layers + + Create a tensorflow Placeholder for each feature of data returned from the + dataset + + """# ============================================================================== + + for i,nm in enumerate(truthed_features.feature_names): + + # features[0], is always the target. For instance it may be m_label_one_hot + # the second features[1] is the 'image' that is passed to the convolution layers + # Any additional features bypass the convolution layers and go directly + # into the fully connected layer. + + # The width of the extra features is calculated in order to allocate + # the correct widths of weights, # and inputs + # names are assigned to make the look pretty on the tensorboard graph. + + if i == 0: + nm = 'y_'+nm + else: + nm = 'x_'+nm + if i>1: + extra_features_width += truthed_features.feature_width[i] + lst.append(tf.placeholder(dtype, shape=[None, truthed_features.feature_width[i]], name=nm)) + + # ph is a named tuple with key names like 'image', 'm_label', and values that + # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, + # x_upper_case etc. + + + Place_Holders = namedtuple('Place_Holders', truthed_features.feature_names) + self._ph = Place_Holders(*lst) # unpack placeholders into named Tuple + self._keep_prob = tf.placeholder(dtype,name='keep_prob') + self._nRows = truthed_features.num_rows #image height + self._nCols = truthed_features.num_columns #image width + nFc0 = 2048 # size of fully connected layer + nFc1 = 2048 # size of fully connected layer + nFc2 = 2048 # size of fully connected layer + nConv1 = 32 # size of first convolution layer + nConv2 = 64 # size of second convolution layer + nTarget = truthed_features.feature_width[0] # the number of one_hot features in the target, 'm_label' + n_h_pool2_outputs = int(self._nRows/4) * int(self._nCols/4) * nConv2 # second pooling layer + n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected + + """# ============================================================================== + + Build a Multilayer Convolutional Network + + Weight Initialization + + """# ============================================================================== + + def weight_variable(shape, dtype): + initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) + return tf.Variable(initial) + + def bias_variable(shape, dtype): + initial = tf.constant(0, shape=shape, dtype=dtype) + return tf.Variable(initial) + + """# ============================================================================== + + Convolution and Pooling + + keep our code cleaner, let's also abstract those operations into functions. + + """# ============================================================================== + + def conv2d(x, W): + return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') + + def max_pool_2x2(x): + return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], + strides=[1, 2, 2, 1], padding='SAME') + + """# ============================================================================== + + First Convolutional Layer + + """# ============================================================================== + with tf.name_scope("w_conv1") as scope: + W_conv1 = weight_variable([5, 5, 1, nConv1],dtype) + b_conv1 = bias_variable([nConv1],dtype) + + with tf.name_scope("reshape_x_image") as scope: + self._x_image = tf.reshape(self._ph.image, [-1,self._nCols,self._nRows,1]) + + image_summ = tf.summary.image("x_image", self._x_image) + + """# ============================================================================== + + We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, + and finally max pool. + + """# ============================================================================== + + with tf.name_scope("convolve_1") as scope: + h_conv1 = tf.nn.relu(conv2d(self._x_image, W_conv1) + b_conv1) + + with tf.name_scope("pool_1") as scope: + h_pool1 = max_pool_2x2(h_conv1) + + """# ============================================================================== + + Second Convolutional Layer + + In order to build a deep network, we stack several layers of this type. The second + layer will have 64 features for each 5x5 patch. + + """# ============================================================================== + + with tf.name_scope("convolve_2") as scope: + W_conv2 = weight_variable([5, 5, nConv1, nConv2],dtype) + b_conv2 = bias_variable([64],dtype) + h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) + + with tf.name_scope("pool_2") as scope: + h_pool2 = max_pool_2x2(h_conv2) + + """# ============================================================================== + + Densely Connected Layer + + Now that the image size has been reduced to 7x7, we add a fully-connected layer + with neurons to allow processing on the entire image. We reshape the tensor + from the pooling layer into a batch of vectors, multiply by a weight matrix, add + a bias, and apply a ReLU. + + """# ============================================================================== + + with tf.name_scope("W_fc1_b") as scope: + W_fc0 = weight_variable([n_h_pool2_outputsx, nFc0],dtype) + b_fc0 = bias_variable([nFc0],dtype) + + h_pool2_flat = tf.reshape(h_pool2, [-1, n_h_pool2_outputs]) + + # append the features, the 2nd on, that go directly to the fully connected layer + for i in range(2,truthed_features.num_features ): + h_pool2_flat = tf.concat(1, [h_pool2_flat, self._ph[i]]) + h_fc0 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc0) + b_fc0) + + """# ============================================================================== + + Densely Connected Layer 1 + + We add a fully-connected layer + with neurons to allow processing on the entire image. We reshape the tensor + from the pooling layer into a batch of vectors, multiply by a weight matrix, add + a bias, and apply a ReLU. + + """# ============================================================================== + + with tf.name_scope("W_fc1_b") as scope: + W_fc1 = weight_variable([nFc0, nFc1],dtype) + b_fc1 = bias_variable([nFc1],dtype) + + h_fc1 = tf.nn.relu(tf.matmul(h_fc0, W_fc1) + b_fc1) + + """# ============================================================================== + + Densely Connected Layer 2 + + We add a fully-connected layer + with neurons to allow processing on the entire image. We reshape the tensor + from the pooling layer into a batch of vectors, multiply by a weight matrix, add + a bias, and apply a ReLU. + + """# ============================================================================== + + with tf.name_scope("W_fc2_b") as scope: + W_fc2 = weight_variable([nFc1, nFc2],dtype) + b_fc2 = bias_variable([nFc2],dtype) + + h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) + + """# ============================================================================== + + Dropout + + """# ============================================================================== + + + with tf.name_scope("drop") as scope: + h_fc2_drop = tf.nn.dropout(h_fc2, self._keep_prob) + + """# ============================================================================== + + Readout Layer + + """# ============================================================================== + with tf.name_scope("softmax") as scope: + W_fc3 = weight_variable([nFc2, nTarget],dtype) + b_fc3 = bias_variable([nTarget],dtype) + y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, W_fc3) + b_fc3) + + with tf.name_scope("xent") as scope: + + # 1e-8 added to eliminate the crash of training when taking log of 0 + cross_entropy = -tf.reduce_sum(self._ph[0]*tf.log(y_conv+ 1e-8 )) + ce_summ = tf.summary.scalar("cross entropy", cross_entropy) + + with tf.name_scope("train") as scope: + self._train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) + + with tf.name_scope("test") as scope: + self._correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(self._ph[0],1)) + self._prediction = tf.argmax(y_conv,1) + + self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction, dtype)) + accuracy_summary = tf.summary.scalar("accuracy", self._accuracy) + """# ============================================================================== + + Start TensorFlow Interactive Session + + """# ============================================================================== + + self._sess.run(tf.initialize_all_variables()) + self._merged = tf.summary.merge_all() + tm = "" + tp = datetime.datetime.now().timetuple() + for i in range(4): + tm += str(tp[i])+'-' + tm += str(tp[4]) + self._writer = tf.summary.FileWriter("/tmp/ds_logs/"+ tm, self._sess.graph) + + def computeSize(s,tens): + sumC = 1 + tShape = tens.get_shape() + nDims = len(tShape) + for i in range(nDims): + sumC *= tShape[i] + print ('\t{}\t{}'.format(s,sumC),flush=True) + return sumC + + print ('network size:',flush=True) + total = computeSize("W_fc0",W_fc0)+ \ + computeSize ("b_fc0",b_fc0) + \ + computeSize ("W_conv1",W_conv1) + \ + computeSize ("b_conv1",b_conv1) + \ + computeSize ("W_conv2",W_conv2) + \ + computeSize ("b_conv2",b_conv2) + \ + computeSize ("W_fc0",W_fc0) + \ + computeSize ("b_fc0",b_fc0) + \ + computeSize ("W_fc1",W_fc1) + \ + computeSize ("b_fc1",b_fc1) + \ + computeSize ("W_fc2",W_fc2) + \ + computeSize ("b_fc2",b_fc2) + + print('\ttotal\t{}'.format(total),flush=True) + + def reset_graph(self): + tf.reset_default_graph() # only necessary when iterating through fonts + self._sess.close() + + + + \ No newline at end of file diff --git a/n1_baseTensorNN.py b/n1_baseTensorNN.py new file mode 100644 index 0000000..22a90e6 --- /dev/null +++ b/n1_baseTensorNN.py @@ -0,0 +1,274 @@ +from tensorflow.compat import v1 as tf +tf.compat.v1.disable_eager_execution() +#import tf +import numpy as np +from collections import namedtuple +import datetime +import ocr_utils +''' + # To see the results in Chrome, + # Run the following in terminal to activate server. + # tensorboard --logdir '/tmp/ds_logs/' + # See results on localhost:6006 +''' + +class network(object): + ''' definition of the network + ''' + def __init__(self, truthed_features, dtype=np.float32): + self._ph=None + self._keep_prob=None + self._train_step =None + self._accuracy=None + self._prediction=None + self._merged=None + self._writer=None + self._correct_prediction=None + + + lst = [] + extra_features_width = 0 # width of extra features + + """# ============================================================================== + + Placeholders + + Compute the size of various layers + + Create a tensorflow Placeholder for each feature of data returned from the + dataset + + """# ============================================================================== + + for i,nm in enumerate(truthed_features.feature_names): + + # features[0], is always the target. For instance it may be m_label_one_hot + # the second features[1] is the 'image' that is passed to the convolution layers + # Any additional features bypass the convolution layers and go directly + # into the fully connected layer. + + # The width of the extra features is calculated in order to allocate + # the correct widths of weights, # and inputs + # names are assigned to make the look pretty on the tensorboard graph. + + if i == 0: + nm = 'y_'+nm + else: + nm = 'x_'+nm + if i>1: + extra_features_width += truthed_features.feature_width[i] + lst.append(tf.placeholder(dtype, shape=[None, truthed_features.feature_width[i]], name=nm)) + + # ph is a named tuple with key names like 'image', 'm_label', and values that + # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, + # x_upper_case etc. + + + Place_Holders = namedtuple('Place_Holders', truthed_features.feature_names) + self._ph = Place_Holders(*lst) # unpack placeholders into named Tuple + self._keep_prob = tf.placeholder(dtype,name='keep_prob') + self._nRows = truthed_features.num_rows #image height + self._nCols = truthed_features.num_columns #image width + nFc = 1024 # size of fully connected layer + nConv1 = 32 # size of first convolution layer + nConv2 = 64 # size of second convolution layer + nTarget = truthed_features.feature_width[0] # the number of one_hot features in the target, 'm_label' + n_h_pool2_outputs = int(self._nRows/4) * int(self._nCols/4) * nConv2 # second pooling layer + n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected + + """# ============================================================================== + + Build a Multilayer Convolutional Network + + Weight Initialization + + """# ============================================================================== + + def weight_variable(shape, dtype): + initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) + return tf.Variable(initial) + + def bias_variable(shape, dtype): + initial = tf.constant(0, shape=shape, dtype=dtype) + return tf.Variable(initial) + + """# ============================================================================== + + Convolution and Pooling + + keep our code cleaner, let's also abstract those operations into functions. + + """# ============================================================================== + + def conv2d(x, W): + return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') + + def max_pool_2x2(x): + return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], + strides=[1, 2, 2, 1], padding='SAME') + + """# ============================================================================== + + Image debugging output + + """# ============================================================================== + + with tf.name_scope("reshape_x_image") as scope: + self._x_image = tf.reshape(self._ph.image, [-1,self._nCols,self._nRows,1]) + + image_summ = tf.image_summary("x_image", self._x_image) + + + + """# ============================================================================== + + Dropout + + """# ============================================================================== + + with tf.name_scope("drop") as scope: + h_fc1_drop = tf.nn.dropout(h_fc1, self._keep_prob) + + """# ============================================================================== + + Readout Layer + + """# ============================================================================== + with tf.name_scope("softmax") as scope: + W_fc2 = weight_variable([nFc, nTarget],dtype) + b_fc2 = bias_variable([nTarget],dtype) + y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) + + + with tf.name_scope("xent") as scope: + + # 1e-8 added to eliminate the crash of training when taking log of 0 + self._cross_entropy = -tf.reduce_sum(self._ph[0]*tf.log(y_conv+ 1e-8 )) + ce_summ = tf.scalar_summary("cross entropy", self._cross_entropy) + + with tf.name_scope("train") as scope: + self._train_step = tf.train.AdamOptimizer(1e-4).minimize(self._cross_entropy) + + with tf.name_scope("test") as scope: + self._correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(self._ph[0],1)) + self._prediction = tf.argmax(y_conv,1) + + self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction, dtype)) + accuracy_summary = tf.scalar_summary("accuracy", self._accuracy) + """# ============================================================================== + + Start TensorFlow Session + + """# ============================================================================== + self._sess = tf.Session() + self._sess.run(tf.initialize_all_variables()) + self._merged = tf.merge_all_summaries() + tm = "" + tp = datetime.datetime.now().timetuple() + for i in range(4): + tm += str(tp[i])+'-' + tm += str(tp[4]) + self._writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, self._sess.graph) + + def computeSize(s,tens): + sumC = 1 + tShape = tens.get_shape() + nDims = len(tShape) + for i in range(nDims): + sumC *= tShape[i].value + print ('\t{}\t{}'.format(s,sumC),flush=True) + return sumC + + print ('network size:',flush=True) + total = computeSize("W_fc1",W_fc1)+ \ + computeSize ("b_fc1",b_fc1) + \ + computeSize ("W_conv1",W_conv1) + \ + computeSize ("b_conv1",b_conv1) + \ + computeSize ("W_conv2",W_conv2) + \ + computeSize ("b_conv2",b_conv2) + \ + computeSize ("W_fc2",W_fc2) + \ + computeSize ("b_fc2",b_fc2) + print('\ttotal\t{}'.format(total),flush=True ) + + + + def __exit__(self, exc_type, exc_value, traceback): + tf.reset_default_graph() # only necessary when iterating through fonts + self._sess.close() + + + + def fit(self, truthed_data, nEpochs=5000): + + perfect_count=10 + for i in range(nEpochs): + + batch = truthed_data.next_batch(100) + # assign feature data to each placeholder + # the batch list is returned in the same order as the features requested + feed = {self._keep_prob: 0.5} + for j in range(truthed_data.num_features): + feed[self._ph[j]] = batch[j] + + if i%100 == 0: + # sh=h_pool2_flat.get_shape() + feed[self._keep_prob] = 1.0 + result = self._sess.run([self._merged, self._accuracy ], feed_dict=feed) + summary_str = result[0] + #acc = result[1] + self._writer.add_summary(summary_str, i) + train_accuracy = result[1] + if train_accuracy <= (1.0 - 1e-5 ): + perfect_count=10; + else: + perfect_count -= 1 + if perfect_count==0: + break; + + print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True) + self._sess.run(self._train_step.run,feed_dict=feed) + + + + + + def test(self, truthed_features): + feed={self._keep_prob: 1.0} + # assign feature data to each placeholder + error_images = np.empty((0,self._nRows,self._nCols)) + + test_accuracy=0 + m=0 + + for j in range(truthed_features.num_features): + feed[self._ph[j]] =truthed_features.features[j] + result = self._sess.run([self._accuracy, self._x_image, self._correct_prediction], feed_dict=feed) + test_accuracy += result[0] + error_images = np.append(error_images, result[1][:,:,:,0][result[2]==False],axis=0) + m += 1 + try: + print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True) + ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font'])) + except: + if m==0: + print ("test accuracy 1",flush=True) + else: + print ("test accuracy {}".format(test_accuracy/m),flush=True) + ocr_utils.montage(error_images,title='TensorFlow Error Images') + + + def predict(self, truthed_features): + feed={self._keep_prob: 1.0} + # assign feature data to each placeholder + error_images = np.empty((truthed_features.num_rows,truthed_features.num_columns)) + + test_accuracy=0 + m=0 + + for j in range(1,truthed_features.num_features): + feed[self._ph[j]] = truthed_features.features[j] + result = self._sess.run([self._prediction], feed_dict=feed) + + return result[0] + + \ No newline at end of file diff --git a/n1_image_to_image.py b/n1_image_to_image.py new file mode 100644 index 0000000..f391548 --- /dev/null +++ b/n1_image_to_image.py @@ -0,0 +1,355 @@ +from tensorflow.compat import v1 as tf +#import tf +import numpy as np +from collections import namedtuple +import datetime +from n0_network import base_network as b_network +import ocr_utils + +class network( b_network): + ''' definition of the network + ''' + + def __init__(self, truthed_features, dtype=np.float32): + self._sess = tf.InteractiveSession() + + lst = [] + extra_features_width = 0 # width of extra features + + """# ============================================================================== + + Placeholders + + Compute the size of various layers + + Create a tensorflow Placeholder for each feature of data returned from the + dataset + + """# ============================================================================== + + for i,nm in enumerate(truthed_features.feature_names): + + # features[0], is always the target. For instance it may be m_label_one_hot + # the second features[1] is the 'image' that is passed to the convolution layers + # Any additional features bypass the convolution layers and go directly + # into the fully connected layer. + + # The width of the extra features is calculated in order to allocate + # the correct widths of weights, # and inputs + # names are assigned to make the look pretty on the tensorboard graph. + + if i == 0: + nm = 'y_'+nm + else: + nm = 'x_'+nm + if i>1: + extra_features_width += truthed_features.feature_width[i] + lst.append(tf.placeholder(dtype, shape=[None, truthed_features.feature_width[i]], name=nm)) + + # ph is a named tuple with key names like 'image', 'm_label', and values that + # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, + # x_upper_case etc. + + + Place_Holders = namedtuple('Place_Holders', truthed_features.feature_names) + self._ph = Place_Holders(*lst) # unpack placeholders into named Tuple + self._keep_prob = tf.placeholder(dtype,name='keep_prob') + self._nRows = truthed_features.num_rows #image height + self._nCols = truthed_features.num_columns #image width + nFc0 = 2048 # size of fully connected layer + nFc1 = 100 # size of fully connected layer + nFc2 = self._nRows*self._nCols # size of fully connected layer + nConv1 = 32 # size of first convolution layer + nConv2 = 64 # size of second convolution layer + nTarget = truthed_features.feature_width[0] # the number of one_hot features in the target, 'm_label' + n_h_pool2_outputs = int(self._nRows/4) * int(self._nCols/4) * nConv2 # second pooling layer + n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected + + """# ============================================================================== + + Build a Multilayer Convolutional Network + + Weight Initialization + + """# ============================================================================== + + def weight_variable(shape, dtype): + initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) + return tf.Variable(initial) + + def bias_variable(shape, dtype): + initial = tf.constant(0, shape=shape, dtype=dtype) + return tf.Variable(initial) + + """# ============================================================================== + + Convolution and Pooling + + keep our code cleaner, let's also abstract those operations into functions. + + """# ============================================================================== + + def conv2d(x, W): + return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') + + def max_pool_2x2(x): + return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], + strides=[1, 2, 2, 1], padding='SAME') + + """# ============================================================================== + + First Convolutional Layer + + """# ============================================================================== + with tf.name_scope("w_conv1") as scope: + W_conv1 = weight_variable([5, 5, 1, nConv1],dtype) + b_conv1 = bias_variable([nConv1],dtype) + + with tf.name_scope("reshape_x_image") as scope: + self._x_image = tf.reshape(self._ph.image, [-1,self._nCols,self._nRows,1]) + + image_summ = tf.summary.image("x_image", self._x_image) + + """# ============================================================================== + + We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, + and finally max pool. + + """# ============================================================================== + + with tf.name_scope("convolve_1") as scope: + h_conv1 = tf.nn.relu(conv2d(self._x_image, W_conv1) + b_conv1) + + with tf.name_scope("pool_1") as scope: + h_pool1 = max_pool_2x2(h_conv1) + + """# ============================================================================== + + Second Convolutional Layer + + In order to build a deep network, we stack several layers of this type. The second + layer will have 64 features for each 5x5 patch. + + """# ============================================================================== + + with tf.name_scope("convolve_2") as scope: + W_conv2 = weight_variable([5, 5, nConv1, nConv2],dtype) + b_conv2 = bias_variable([64],dtype) + h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) + + with tf.name_scope("pool_2") as scope: + h_pool2 = max_pool_2x2(h_conv2) + + """# ============================================================================== + + Densely Connected Layer + + Now that the image size has been reduced to 7x7, we add a fully-connected layer + with neurons to allow processing on the entire image. We reshape the tensor + from the pooling layer into a batch of vectors, multiply by a weight matrix, add + a bias, and apply a ReLU. + + """# ============================================================================== + + with tf.name_scope("W_fc1_b") as scope: + W_fc0 = weight_variable([n_h_pool2_outputsx, nFc0],dtype) + b_fc0 = bias_variable([nFc0],dtype) + + h_pool2_flat = tf.reshape(h_pool2, [-1, n_h_pool2_outputs]) + + # append the features, the 2nd on, that go directly to the fully connected layer + for i in range(2,truthed_features.num_features ): + h_pool2_flat = tf.concat(1, [h_pool2_flat, self._ph[i]]) + h_fc0 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc0) + b_fc0) + + """# ============================================================================== + + Densely Connected Layer 1 + + We add a fully-connected layer + with neurons to allow processing on the entire image. We reshape the tensor + from the pooling layer into a batch of vectors, multiply by a weight matrix, add + a bias, and apply a ReLU. + + """# ============================================================================== + + with tf.name_scope("W_fc1_b") as scope: + W_fc1 = weight_variable([nFc0, nFc1],dtype) + b_fc1 = bias_variable([nFc1],dtype) + + h_fc1 = tf.nn.relu(tf.matmul(h_fc0, W_fc1) + b_fc1) + + """# ============================================================================== + + Densely Connected Layer 2 + + We add a fully-connected layer + with neurons to allow processing on the entire image. We reshape the tensor + from the pooling layer into a batch of vectors, multiply by a weight matrix, add + a bias, and apply a ReLU. + + """# ============================================================================== + + with tf.name_scope("W_fc2_b") as scope: + W_fc2 = weight_variable([nFc1, nFc2],dtype) + b_fc2 = bias_variable([nFc2],dtype) + + h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) + + """# ============================================================================== + + Dropout + + """# ============================================================================== + + + with tf.name_scope("drop") as scope: + h_fc2_drop = tf.nn.dropout(h_fc2, self._keep_prob) + + """# ============================================================================== + + Readout Layer + + """# ============================================================================== + with tf.name_scope("softmax") as scope: + W_fc3 = weight_variable([nFc2, nTarget],dtype) + b_fc3 = bias_variable([nTarget],dtype) + y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, W_fc3) + b_fc3) + + with tf.name_scope("xent") as scope: + + # 1e-8 added to eliminate the crash of training when taking log of 0 + self._cross_entropy = -tf.reduce_sum(self._ph[0]*tf.log(y_conv+ 1e-8 )) + #cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( + # logits, labels, name='xentropy') + ce_summ = tf.summary.scalar("cross entropy", self._cross_entropy) + + with tf.name_scope("reshape_x_image2") as scope: + self._x_image2 = tf.reshape(self._ph[0], [-1,int(self._nCols/2),int(self._nRows/2),1]) + + image_summ2 = tf.summary.image("x_image2", self._x_image2) + + with tf.name_scope("train") as scope: + self._train_step = tf.train.AdamOptimizer(1e-4).minimize(self._cross_entropy) + #self._train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) + + with tf.name_scope("test") as scope: + self._correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(self._ph[0],1)) + self._prediction = tf.argmax(y_conv,1) + + self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction, dtype)) + accuracy_summary = tf.summary.scalar("accuracy", self._accuracy) + """# ============================================================================== + + Start TensorFlow Session + + """# ============================================================================== + + self._sess.run(tf.initialize_all_variables()) + self._merged = tf.summary.merge_all() + tm = "" + tp = datetime.datetime.now().timetuple() + for i in range(4): + tm += str(tp[i])+'-' + tm += str(tp[4]) + self._writer = tf.summary.FileWriter("/tmp/ds_logs/"+ tm, self._sess.graph) + + def computeSize(s,tens): + sumC = 1 + tShape = tens.get_shape() + nDims = len(tShape) + for i in range(nDims): + sumC *= tShape[i] + print ('\t{}\t{}'.format(s,sumC),flush=True) + return sumC + + print ('network size:',flush=True) + total = computeSize("W_fc0",W_fc0)+ \ + computeSize ("b_fc0",b_fc0) + \ + computeSize ("W_conv1",W_conv1) + \ + computeSize ("b_conv1",b_conv1) + \ + computeSize ("W_conv2",W_conv2) + \ + computeSize ("b_conv2",b_conv2) + \ + computeSize ("W_fc0",W_fc0) + \ + computeSize ("b_fc0",b_fc0) + \ + computeSize ("W_fc1",W_fc1) + \ + computeSize ("b_fc1",b_fc1) + \ + computeSize ("W_fc2",W_fc2) + \ + computeSize ("b_fc2",b_fc2) + + print('\ttotal\t{}'.format(total),flush=True) + + def reset_graph(self): + tf.reset_default_graph() # only necessary when iterating through fonts + self._sess.close() + + def test2(self, truthed_data, title = ''): + + # assign feature data to each placeholder + + output_images = np.empty((0,int(self._nRows/2),int(self._nCols/2))) + input_images = np.empty((0,int(self._nRows),int(self._nCols))) + test_accuracy=0 + m=0 + + for i in range(int(len(truthed_data.features[0])/100)): + + batch = truthed_data.next_batch(100) + # assign feature data to each placeholder + # the batch list is returned in the same order as the features requested + feed = {self._keep_prob: 1.0} + for j in range(truthed_data.num_features): + feed[self._ph[j]] = batch[j] + + + result = self._sess.run([self._accuracy, self._x_image, self._correct_prediction, self._x_image2], feed_dict=feed) + + test_accuracy += result[0] + input_images = np.append(input_images, result[1][:,:,:,0],axis=0) + output_images = np.append(output_images, result[3][:,:,:,0],axis=0) + m += 1 + try: + print ("test accuracy {} for : {}".format(test_accuracy/m, title),flush=True) + ocr_utils.montage(input_images,title='TensorFlow {} Input Images'.format(title)) + ocr_utils.montage(output_images,title='TensorFlow {} Output Images'.format(title)) + except: + if m==0: + print ("test accuracy 1",flush=True) + else: + print ("test accuracy {}".format(test_accuracy/m),flush=True) + ocr_utils.montage(output_images,title='TensorFlow Output Images') + ocr_utils.montage(input_images,title='TensorFlow Input Images') + + def fit_entropy(self, truthed_data, nEpochs=5000): + + perfect_count=10 + for i in range(nEpochs): + + batch = truthed_data.next_batch(100) + # assign feature data to each placeholder + # the batch list is returned in the same order as the features requested + feed = {self._keep_prob: 0.5} + for j in range(truthed_data.num_features): + feed[self._ph[j]] = batch[j] + + if i%100 == 0: + + feed[self._keep_prob] = 1.0 + result = self._sess.run([self._merged, self._cross_entropy ], feed_dict=feed) + summary_str = result[0] + + self._writer.add_summary(summary_str, i) + train_entropy = result[1] + if train_entropy >= (2000 ): + perfect_count=10; + else: + perfect_count -= 1 + if perfect_count==0: + break; + + print ("step %d, training entropy %g"%(i, train_entropy),flush=True) + self._sess.run(self._train_step,feed_dict=feed) + + + + \ No newline at end of file diff --git a/n1_residual3x4.py b/n1_residual3x4.py new file mode 100644 index 0000000..3bf1cc6 --- /dev/null +++ b/n1_residual3x4.py @@ -0,0 +1,230 @@ +''' + +takes an image input and trains it to make an image output + +funnels down to a 'key' and then goes back up to image + + + +''' +from tensorflow.compat import v1 as tf +import numpy as np +from collections import namedtuple +import datetime +import ocr_utils +from n0_network import base_network as b_network + +class network(b_network): + ''' definition of the network + ''' + def __init__(self, truthed_features, dtype=np.float32): + self._sess = tf.InteractiveSession() + + lst = [] + extra_features_width = 0 # width of extra features + + """# ============================================================================== + + Placeholders + + Compute the size of various layers + + Create a tensorflow Placeholder for each feature of data returned from the + dataset + + """# ============================================================================== + + for i,nm in enumerate(truthed_features.feature_names): + + # features[0], is always the target. For instance it may be m_label_one_hot + # the second features[1] is the 'image' that is passed to the convolution layers + # Any additional features bypass the convolution layers and go directly + # into the fully connected layer. + + # The width of the extra features is calculated in order to allocate + # the correct widths of weights, # and inputs + # names are assigned to make the look pretty on the tensorboard graph. + + if i == 0: + nm = 'y_'+nm + else: + nm = 'x_'+nm + if i>1: + extra_features_width += truthed_features.feature_width[i] + lst.append(tf.placeholder(dtype, shape=[None, truthed_features.feature_width[i]], name=nm)) + + # ph is a named tuple with key names like 'image', 'm_label', and values that + # are tensors. The display name on the Chrome graph are 'y_m_label', 'self._x_image, + # x_upper_case etc. + + + Place_Holders = namedtuple('Place_Holders', truthed_features.feature_names) + self._ph = Place_Holders(*lst) # unpack placeholders into named Tuple + self._keep_prob = tf.placeholder(dtype,name='keep_prob') + self._nRows = truthed_features.num_rows #image height + self._nCols = truthed_features.num_columns #image width + nSections = 10 + + in_out_width = self._nRows*self._nCols + internal_width = int(in_out_width/4) + w = list(range(nSections*3)) + b = list(range(nSections*3)) + h = list(range(nSections*3+1)) + nFc1 = 2048 # size of fully connected layer + + nTarget = truthed_features.feature_width[0] # the number of one_hot features in the target, 'm_label' + + """# ============================================================================== + + Build a Multilayer Convolutional Network + + Weight Initialization + + """# ============================================================================== + + def weight_variable(shape, dtype): + initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) + return tf.Variable(initial) + + def bias_variable(shape, dtype): + initial = tf.constant(0, shape=shape, dtype=dtype) + return tf.Variable(initial) + + def shapeOuts(n): + print ('n={}, hin={},w={}, b={} ,hout={}\n'.format(n, h[n].shape, w[n].shape, b[n].shape, h[n+1]._shape)) + + def section(n): + with tf.name_scope('section_'+str(n)+'_0') as scope: + w[n]=weight_variable([in_out_width, internal_width],dtype) + b[n]=bias_variable([internal_width],dtype) + h[n+1] = tf.nn.relu(tf.matmul(h[n], w[n]) + b[n]) + shapeOuts(n) + + with tf.name_scope('section_'+str(n)+'_1') as scope: + w[n+1]=weight_variable([internal_width, internal_width],dtype) + b[n+1]=bias_variable([internal_width],dtype) + + h[n+2]=tf.nn.relu(tf.matmul(h[n+1], w[n+1]) + b[n+1]) + shapeOuts(n+1) + + with tf.name_scope('section_'+str(n)+'_2') as scope: + w[n+2]=weight_variable([internal_width, in_out_width],dtype) + b[n+2]=bias_variable([in_out_width],dtype) + z= tf.nn.relu(tf.matmul(h[n+2], w[n+2]) + b[n+2]) + h[n+3]= tf.add(z ,h[n]) #n+3 + + print('z shape ={}'.format(z._shape)) + shapeOuts(n+2) + return + + def computeSize(s,tens): + sumC = 1 + tShape = tens.get_shape() + nDims = len(tShape) + for i in range(nDims): + sumC *= tShape[i] + print ('\t{}\t{}'.format(s,sumC),flush=True) + return sumC + + """# ============================================================================== + Build sectional network + + """# ============================================================================== + h[0]= self._ph[1] + for i in range(nSections): + section(3*i) + + """# ============================================================================== + Dropout + + """# ============================================================================== + self._keep_prob = tf.placeholder(dtype,name='keep_prob') + + with tf.name_scope("drop") as scope: + h_fc2_drop = tf.nn.dropout(h[nSections*3], self._keep_prob) + + """# ============================================================================== + + Readout Layer + + """# ============================================================================== + with tf.name_scope("softmax") as scope: + w_fc3 = weight_variable([in_out_width, nTarget],dtype) + b_fc3 = bias_variable([nTarget],dtype) + y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, w_fc3) + b_fc3) + + print ('network size:',flush=True) + total = 0 + for i in range(nSections*3): + total = total + computeSize("w{}".format(i),w[i]) + total = total + computeSize ("b_fc3",b_fc3) + \ + computeSize ("w_fc3",w_fc3) + + print('\ttotal\t{}'.format(total),flush=True) + + + with tf.name_scope("reshape_self._x_image") as scope: + self._x_image = tf.reshape(self._ph.image, [-1,self._nCols,self._nRows,1]) + + with tf.name_scope("xent") as scope: + # 1e-8 added to eliminate the crash of training when taking log of 0 + cross_entropy = -tf.reduce_sum(self._ph[0]*tf.log(y_conv+1e-8)) + ce_summ = tf.summary.scalar("cross entropy", cross_entropy) + + with tf.name_scope("train") as scope: + self._train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) + + with tf.name_scope("test") as scope: + self._correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(self._ph[0],1)) + self._prediction = tf.argmax(y_conv,1) + + self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction, dtype)) + accuracy_summary = tf.summary.scalar("accuracy", self._accuracy) + + """# ============================================================================== + + Start TensorFlow Interactive Session + + """# ============================================================================== + + self._sess.run(tf.initialize_all_variables()) + self._merged = tf.summary.merge_all() + tm = "" + tp = datetime.datetime.now().timetuple() + for i in range(4): + tm += str(tp[i])+'-' + tm += str(tp[4]) + + # To see the results in Chrome, + # Run the following in terminal to activate server. + # tensorboard --logdir '/tmp/ds_logs/' + # See results on localhost:6006 + + self._writer = tf.summary.FileWriter("/tmp/ds_logs/"+ tm, self._sess.graph) + + def computeSize(s,tens): + sumC = 1 + tShape = tens.get_shape() + nDims = len(tShape) + for i in range(nDims): + sumC *= tShape[i].value + print ('\t{}\t{}'.format(s,sumC),flush=True) + return sumC + + + + def __exit__(self, exc_type, exc_value, traceback): + tf.reset_default_graph() # only necessary when iterating through fonts + self._sess.close() + + + def reset_graph(self): + tf.reset_default_graph() # only necessary when iterating through fonts + self._sess.close() + +# +# def encode(self): +# +# return key +# +# def decode(self, key): \ No newline at end of file diff --git a/o1_top_secret_cnn.py b/o1_top_secret_cnn.py new file mode 100644 index 0000000..afe5e5a --- /dev/null +++ b/o1_top_secret_cnn.py @@ -0,0 +1,229 @@ +#!/usr/bin/python + + +"""# ========================================================================== + +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +encode a secret message in the angle of rotation of characters + +Train a neural network on rotated versions of characters with the output of +the network being the angle of rotation. + +Thus, given a rotated character, the neural network will yield a value +that is the amount of rotation of the character. + +Encode a test set by applying a secret message with one bit for each character. +Decode the secret message by running the rotated characters through the +neural network, yielding the pattern of bits. + + +@author: richard lyman + +"""# ============================================================================== +import ocr_utils + + +import numpy as np +from PIL import Image, ImageDraw +import io +#import n1_2cnv1fc as nnetwork +#import n1_residual3x4 as nnetwork +import n1_2cnv2fc as nnetwork +import skimage.transform as af +from bitarray import bitarray + + +input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))} +output_feature_list = ['orientation_one_hot','image'] +dtype = np.float32 + +skewRange = np.linspace(-0.2,0.2,2) + +''' + pick up the base character + + make a training set by rotating them through n angles + + train + + pick up the base characters + encode the secret message n bits at a time into the characters + this is the testing set + + test secret message yielding a vector of rotations + + convert the rotation back into bits + + assemble the bits into the secret message. + ''' + + +# pick up the base characters from training_image_file +# produce some sheared versions +# make into a training set +# place in a ocr_utils TruthedCharacters class so we can use the +# one hot and batch functions + +character_size = 100 +white_space=8 + +image_file= '15-01-01 459_Mont_Lyman' +image_file_jpg = image_file+'.jpg' + +df,t1 = ocr_utils.file_to_df(image_file,character_size,title='Characters to Train',white_space=white_space) + +shp = t1.shape +totalN = len(skewRange)*shp[0] + +images=[] +originalH=[] +originalW=[] +tops=[] +lefts=[] +orientation=[] +recognized_label =[] + + + +for j in range(shp[0]): + for i,skew in enumerate(skewRange): + k = i+j*len(skewRange) + + images.append(ocr_utils.shear(t1[j],skew)) + originalH.append(df['originalH'][j]) + tops.append(df['m_top'][j]) + originalW.append(df['originalW'][j]) + lefts.append(df['m_left'][j]) + + orientation.append(skew) + recognized_label.append( df['m_label'][j]) +images=np.array(images) +ocr_utils.montage(images, title='Base Characters Skewed') + +images = np.reshape(images,(images.shape[0],images.shape[1]*images.shape[2])) +df = ocr_utils.make_df(images, character_size, character_size, originalH, originalW, tops, lefts, orientation, recognized_label ) +#df = ocr_utils.make_df(images, character_size, character_size, bottoms, rights, tops, lefts, orientation, recognized_label ) + + +# input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))} +input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))} +output_feature_list = ['orientation_one_hot','image'] +ds = ocr_utils.read_df(df,input_filters_dict = input_filters_dict, + output_feature_list=output_feature_list, + test_size = 0, + engine_type='tensorflow', + dtype=dtype) + +nn = nnetwork.network(ds.train) +"""# ============================================================================== + +Train and Evaluate the Model + +"""# ============================================================================== + +nn.fit( ds.train , nEpochs=5000) + +####################################################################################### + +# now that the font is trained, pick up some text and encode a message +image_file= '15-01-01 459_Mont_Lyman' +image_file_jpg = image_file+'.jpg' +df,t1 = ocr_utils.file_to_df(image_file,character_size, title = 'unencrypted file',white_space=white_space) + + +secret_message = "top secret" +a = bitarray() +a.frombytes(secret_message.encode('utf_8')) + +index = 0 +encoded_skews=[] +def convert_to_shear(a): + index = 0 + while True: + if index < len(a): + bits = a[index:index+1].to01() + index += 1 + #c = int(bits,2) + c = int(bits) + yield c + else: + yield -1 + +gen= convert_to_shear(a) + +im = Image.open(image_file_jpg) +img2 = Image.new('L',(im.height,im.width),color=255) +img3 = Image.new('L',(im.height,im.width),color=255) +draw = ImageDraw.Draw(img3) +for i in range(t1.shape[0]): + left = int(df['m_left'][i]) + right = left + int(df['originalW'][i]) + top = int(df['m_top'][i]) + bottom = top + int(df['originalH'][i]) + skew_index = next(gen) + #print ('i={}, skew_index={}, left={}, top={}, right={}, bottom={}'.format(i,skew_index, left,top,right,bottom)) + encoded_skews.append(skew_index) + if skew_index >= 0: + t1[i] = ocr_utils.shear(t1[i], skewRange[skew_index]) + im_clip = Image.fromarray(256.0-t1[i]*256.0) + img2.paste(im_clip, box= (left , top)) + img3.paste(im_clip, box= (left , top)) + + + draw.rectangle((left,top,right+2*white_space,bottom+2*white_space), outline=0) + +gen.close() + +###########################################################################vvvvvvv +image_file= '/tmp/plots/01_encrypted_file' +image_file_jpg = image_file+'.jpg' +img2.save(image_file_jpg) + + +image_file3= '/tmp/plots/01_03_encrypted_file_with_box' +image_file3_jpg = image_file3+'.jpg' +img3.save(image_file3_jpg) + +''' test the new encrptyed file +''' +df,t1 = ocr_utils.file_to_df(image_file,character_size, title = 'Encrypted File',white_space=white_space) + +ds = ocr_utils.read_df(df,input_filters_dict = input_filters_dict, + output_feature_list=output_feature_list, + test_size = 1, + engine_type='tensorflow', + dtype=dtype) + +results = nn.predict(ds.test) +correct_characters=[] +incorrect_characters=[] +for i,x in enumerate(df['m_label']): + try: + print('index={}, original character={}, result= {}, skew={}'.format(i, chr(int(x)),results[i], encoded_skews[i]) ) + if encoded_skews[i] >=0: + if results[i] == encoded_skews[i]: + correct_characters.append(chr(int(x))) + else: + incorrect_characters.append(chr(int(x))) + except: + print ('index out of bounds={}'.format(i)) +print ('correct characters={}'.format(correct_characters)) +print ('incorrect characters={}'.format(incorrect_characters)) + +print ('\n########################### No Errors ####################################') + diff --git a/o2_top_secret_lda-tesseract.py b/o2_top_secret_lda-tesseract.py new file mode 100644 index 0000000..b7dcc36 --- /dev/null +++ b/o2_top_secret_lda-tesseract.py @@ -0,0 +1,266 @@ +''' + +Created on Oct, 2016 +T + +@author: richard +''' +import ocr_utils +import numpy as np +from PIL import Image, ImageDraw +import io +from sklearn.linear_model import LogisticRegression +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA +from sklearn.metrics import accuracy_score +from ruamel_yaml.compat import utf8 + +inputs = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghiklnopqrstuvwxyz' +inputs_list = list(ord(x) for x in inputs) +input_filters_dict = {'m_label': inputs_list} +# input_filters_dict={} + +output_feature_list = ['orientation','image'] +dtype = np.float32 + +#if -0.3 whitespace 8 is not enough +#if 0-.2 then whitespace 6 is just enough +character_size = 100 +white_space=6 +skewRange = np.linspace(-0.1,0.1,4) + +''' + pick up the base character via tesseract + + make a training set by shearing them + + save images + + retrieve and unbox with tesseract + + train + + pick up the base characters + encode the secret message n bits at a time into the characters + this is the testing set + + test secret message yielding a vector of rotations + + convert the rotation back into bits + + assemble the bits into the secret message. + ''' + + +# pick up the base characters from training_image_file +# produce some skeared versions +# make into a training set +# place in a ocr_utils TruthedCharacters class so we can use the +# one hot and batch functions + + +def encode_and_save_file(input_base, output_base, character_size, white_space, secret_message=''): + input_image_file_jpg = input_base+ocr_utils.extension + ouput_encoded_file = output_base +ocr_utils.extension + output_box_file = output_base +'_box' +ocr_utils.extension + print ('input_base = {}'.format(input_base)) + print ('input_image_file_jpg = {}'.format(input_image_file_jpg)) + print ('ouput_encoded_file = {}'.format(ouput_encoded_file)) + print ('output_box_file = {}'.format(output_box_file)) + + df,t1 = ocr_utils.file_to_df(input_base, character_size, title = 'unencrypted file', white_space=white_space, input_filters_dict=input_filters_dict) + + from bitarray import bitarray + a = bitarray() + a.frombytes(secret_message.encode('utf-8') ) + index = 0 + + def convert_to_shear(a): + index = 0 + while True: + if index < len(a)-1: + bits = a[index:index+2].to01() + index += 2 + c = int(bits,2) + #c = int(bits) + yield c + else: + yield -1 + + def draw_encoded_images(skews_indices, offset=0): + + for i in range(len(t1)): + left = right = top = bottom = 0 + try: + left = int((df['m_left']).iloc[i]) + right = left + int((df['originalW']).iloc[i]) + top = int((df['m_top']).iloc[i]) +offset + bottom = top + int((df['originalH']).iloc[i]) + skew_index = skews_indices[i] + #print ('i={}, skew_index={}, left={}, top={}, right={}, bottom={}'.format(i,skew_index, left,top,right,bottom)) + + if skew_index >= 0: + z = ocr_utils.shear(t1[i], skewRange[skew_index]) + else: + z=t1[i] + im_clip = Image.fromarray(256.0-z*256.0) + img2.paste(im_clip, box= (left , top)) + img3.paste(im_clip, box= (left , top)) + draw.rectangle((left,top,right+2*white_space,bottom+2*white_space), outline=0) + except: + print (left,right,top,bottom,df.columns) + return bottom + + im = Image.open(input_image_file_jpg) + + + bottom = 0 + if len(secret_message)==0: + img2 = Image.new('L',(im.width,im.height*3),color=255) + img3 = Image.new('L',(im.width,im.height*3),color=255) + draw = ImageDraw.Draw(img3) + for skew_index in range(len(skewRange)): + skew_indices = [] + for i in range(len(t1)): + skew_indices.append(skew_index) + bottom = draw_encoded_images(skew_indices, offset=bottom+16) + else: + img2 = Image.new('L',(im.width,im.height),color=255) + img3 = Image.new('L',(im.width,im.height),color=255) + draw = ImageDraw.Draw(img3) + gen= convert_to_shear(a) + skew_indices = [] + for i in range(len(t1)): + skew_indices.append(next(gen)) + draw_encoded_images(skew_indices, offset=0) + gen.close() + + img2.save(ouput_encoded_file) + img3.save(output_box_file) + + return output_base,skew_indices + + +###################################################################################### +# us the original document as the source of characters to shear and train +###################################################################################### + +base_file= '15-01-01 459_Mont_Lyman' +next_base = '/tmp/plots/'+base_file+'_training' + +# shear the characters +base_file,skew_indices = encode_and_save_file(base_file, next_base , character_size, white_space) + +# use tesseract to make the boxes around each skewed character. +df,t1 = ocr_utils.file_to_df(base_file, character_size,title='Characters to Train',white_space=white_space,input_filters_dict=input_filters_dict) + +ds = ocr_utils.read_df(df,input_filters_dict = input_filters_dict, + output_feature_list=output_feature_list, + test_size = 0, + engine_type='tensorflow', + dtype=dtype) + + + +X_train = ds.train.features[1] +# the characters were written once for each entry in skewrange +# fill in the y_train with the skew_index +y_train = np.zeros(len(X_train), dtype=np.int32) +for i in range(len(X_train)): + y_train[i] = i / (len(X_train)/len(skewRange)) + +print (y_train) +print (y_train.shape) +print (X_train.shape) + +###################################################################################### +# train the characters. The resultant logistic regression is the key to decoded +###################################################################################### + +n_components = 2 +lda = LDA(n_components=n_components) + +X_train_lda = lda.fit_transform(X_train, y_train) + +print('\nLDA components = {}'.format(lda.n_components)) +lr = LogisticRegression() +logistic_fitted = lr.fit(X_train_lda, y_train) + +y_train_pred = logistic_fitted.predict(X_train_lda) + +print('\nLDA Train Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_train, y_train_pred),lda.n_components,lr.coef_.shape)) +# print('LDA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),lda.n_components,lr.coef_.shape)) + +X_errors_image = X_train[y_train!=y_train_pred] + +X_errors2D=np.reshape(X_errors_image, (X_errors_image.shape[0], character_size, character_size)) +ocr_utils.montage(X_errors2D,title='LDA Error Images, components={}'.format (n_components)) + +# X_combined = np.vstack((X_train_lda, X_test_lda)) + # y_combined = np.hstack((y_train, y_test)) +if X_train_lda.shape[1] > 1: + ocr_utils.plot_decision_regions( + X=X_train_lda, + y=y_train, + classifier=lr, + labels = ['LDA1','LDA2'] , + title='logistic_regression after 2 component LDA') + +###################################################################################### +# now that the font is trained, pick up some text and encode a message +###################################################################################### + +base_file = '15-01-01 459_Mont_Lyman' +output_base = '/tmp/plots/15-01-01 459_Mont_Lyman_encrypted' +base_file,skew_indices = encode_and_save_file(base_file, output_base, character_size, white_space, secret_message='your first born is mine') +print ('base file to decode = {}'.format(base_file)) + + +df,t1 = ocr_utils.file_to_df(base_file, character_size, title = 'Encrypted File',white_space=white_space,input_filters_dict=input_filters_dict) + +ds = ocr_utils.read_df(df,input_filters_dict = input_filters_dict, + output_feature_list=output_feature_list, + test_size = 0, + engine_type='tensorflow', + dtype=dtype) + +print ('document length in chars={}'.format(len(t1))) +X_train = ds.train.features[1] +X_train_lda = lda.transform(X_train) +results = logistic_fitted.predict(X_train_lda) +correct_characters=[] +incorrect_characters=[] +error_characters=[] +decoded_message = '' +dc = 0 + +for i,x in enumerate(df['m_label']): + try: + + if skew_indices[i] >=0: + dc = dc * 4 + skew_indices[i] + if (i+1) % 4 == 0: + decoded_message = decoded_message + chr(dc) + dc = 0 + + + print('index={}, original character={}, result= {}, skew={}'.format(i, chr(int(x)),results[i], skew_indices[i]) ) + if results[i] == skew_indices[i]: + correct_characters.append(chr(int(x))) + else: + incorrect_characters.append(chr(int(x))) + error_characters.append(X_train[i]) + except: + print ('.',end='') +error_characters = np.array(error_characters) + +error_characters=np.reshape(error_characters, (error_characters.shape[0], character_size, character_size)) +ocr_utils.montage(error_characters,title='LDA Encrption Errors, components={}'.format (n_components)) +print ('\ncorrect characters={}'.format(correct_characters)) +print ('incorrect characters={}'.format(incorrect_characters)) +print ("decoded message={}".format(decoded_message)) + +###################################################################################### +# decode the message +###################################################################################### + +print ('\n########################### No Errors ####################################') diff --git a/o3_top_secret_python_box.py b/o3_top_secret_python_box.py new file mode 100644 index 0000000..c330dd1 --- /dev/null +++ b/o3_top_secret_python_box.py @@ -0,0 +1,267 @@ +''' +Created on Oct 23, 2016 + +Created on Jul 12, 2016 +This program shows how Principal Component Analysis removes affine +transformation distortions. + +Parallel lines in an image remain parallel after an affine transformation. +For instance, if an image is rotated or sheared, lines remain parallel. + +PCA and LDA can remove affine transformations. This is shown by making 3 shapes +and then making a number of shear versions of the shapes. Running +Principal Component Analysis reduces the number of features necessary to +recognize the features during Logistic Regression with 100% accuracy, +down to 2 from 400 (20 columns by 20 rows). + +We make three images and then make about 80 copies of each image created by +shearing the original image. + +Since there is very little noise introduced by the shearing, almost all of +the explained variance is due to the shearing. PCA finds eigenvectors +that line up with shearing. + +1) For a couple of shapes, make sheared version. +2) train and print accuracies without PCA +3) repeat, but use PCA first before training. +4) observe the improvement + +Do the same thing for Linear Discriminant Analysis +encode a secret message in the angle of rotation of characters + +Train a neural network on rotated versions of characters with the output of +the network being the angle of rotation. + +Thus, given a rotated character, the neural network will yield a value +that is the amount of rotation of the character. + +Encode a test set by applying a secret message with one bit for each character. +Decode the secret message by running the rotated characters through the +neural network, yielding the pattern of bits. + + pick up the base character + + make a training set by rotating them through n angles + + train + + pick up the base characters + encode the secret message n bits at a time into the characters + this is the testing set + + test secret message yielding a vector of rotations + + convert the rotation back into bits + + assemble the bits into the secret message. + + +@author: richard lyman + + +'''# ============================================================================== + +import ocr_utils + + +import numpy as np +from PIL import Image, ImageDraw +import io +from sklearn.metrics import accuracy_score +from sklearn.decomposition import PCA +from sklearn.metrics import accuracy_score +#from sklearn.model_selection +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split + +# input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))} +# output_feature_list = ['orientation_one_hot','image'] +dtype = np.float32 +character_size = 100 +white_space = 10 +skewRange = np.linspace(-0.2,0.2,4) + +class c_box(object): + def __init__(self, top, left, right, bottom): + self._top = top + self._left = left + self._right = right + self._bottom = bottom + + +def find_min_max(sums): + case = 0 + mins = [] + maxes = [] + for i,sum in enumerate(sums): + ''' + case 0, going through area between characters + if sum ==0 stay in case 0 + if sum != 0 set the top to i and switch to case 1 + case 1, going through a character + if sum ==0 set the bottom to i and drop to case 0 + also append the box to the list using + left = 0, and right = the width of the image + if sum !=0 then continue in case 1 + ''' + + if case==0 : + if sum != 0 : + case = 1 + min= i + else: + if sum == 0 : + case = 0 + max= i + mins.append(min) + maxes.append(max) + return mins, maxes + + + +# pick up the base characters from training_image_file +# produce some skeared versions +# make into a training set +# place in a ocr_utils TruthedCharacters class so we can use the +# one hot and batch functions + +im = Image.open('15-01-01 459_Mont_Lyman.png') +#im = Image.open('CourierFont.png') +im = im.convert(mode='L') +data = 255-np.asarray( im, dtype="int32" ) +sums = np.sum(data,axis=1) +mins, maxes = find_min_max(sums) +boxes = [] +for top,bottom in zip(mins,maxes): + line = data[top:bottom] + line_sums = np.sum(line,axis=0) + lefts,rights = find_min_max(line_sums) + for left,right in zip(lefts,rights): + boxes.append(c_box(top,left,right,bottom)) + +images=[] +orientation=[] +recognized_label =[] +for box in boxes: + + img2 = Image.new('L',(character_size,character_size),color=255) + + img = im.crop(box=(box._left, box._top, box._right, box._bottom)) + img2.paste(img,box=(white_space,white_space)) + + imgByteArr = img2.tobytes() + lst = list(imgByteArr) + image = np.array(lst)/255.0 + image = 1.0 - image + images.append(image) + +height = im.height +width = im.width + +t1 = np.array(images) +t1=np.reshape(t1,(t1.shape[0],character_size,character_size)) +ocr_utils.montage(t1, title='characters from file') + +shp = t1.shape +totalN = len(skewRange)*shp[0] +images = [] +import skimage.transform as af + +for j in range(shp[0]): + for i,skew in enumerate(skewRange): + images.append(ocr_utils.shear(t1[j],skew)) + orientation.append(skew) + +images=np.array(images) +ocr_utils.montage(images, title='characters being trained') +images=np.reshape(images,(len(images),character_size*character_size)) +ys = ocr_utils.convert_to_unique(orientation) + + +X_train , X_test, y_train, y_test = train_test_split(images, ys, test_size=0.3, random_state=0) +print (y_test.shape) + +lr = LogisticRegression() +lr.fit(X_train, y_train) +y_train_pred = lr.predict(X_train) +y_test_pred = lr.predict(X_test) + +print('\nTrain Accuracy: {:4.6f} coefficients={}'.format(accuracy_score(y_train, y_train_pred), lr.coef_.shape)) +print('Test Accuracy: {:4.6f} coefficients={}'.format(accuracy_score(y_test, y_test_pred), lr.coef_.shape)) + +######################################################################### +# run Principal Component analysis first, then Logistic Regression + +n_components = 2 +pca = PCA(n_components=n_components) + +X_train_pca = pca.fit_transform(X_train) +X_test_pca = pca.transform(X_test) + +print('\nPCA components = {}'.format(pca.components_.shape)) + +lr = LogisticRegression() +logistic_fitted = lr.fit(X_train_pca, y_train) + +y_train_pred = logistic_fitted.predict(X_train_pca) +y_test_pred = logistic_fitted.predict(X_test_pca) + +print('\nPCA Train Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_train, y_train_pred),pca.n_components,lr.coef_.shape)) +print('PCA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),pca.n_components,lr.coef_.shape)) + +X_errors_image = X_test[y_test!=y_test_pred] +y_errors = y_test[y_test!=y_test_pred] +X_errors_pca = X_test_pca[y_test!=y_test_pred] + +# change to a 2D shape +X_errors2D=np.reshape(X_errors_image, (X_errors_image.shape[0], character_size, character_size)) +ocr_utils.montage(X_errors2D,title='PCA Error Images, components={}'.format (n_components)) + +X_combined = np.vstack((X_train_pca, X_test_pca)) +y_combined = np.hstack((y_train, y_test)) + +ocr_utils.plot_decision_regions( + X=X_combined, + y=y_combined, + classifier=lr, + labels = ['PC1','PC2'] , + title='logistic_regression after 2 component PCA') + + +######################################################################### +# run Linear Discriminant Analysis first then Logistic Regression + + +n_components = 2 +lda = LinearDiscriminantAnalysis(n_components=n_components) + +X_train_lda = lda.fit_transform(X_train, y_train) +X_test_lda = lda.transform(X_test) + +print('\nLDA components = {}'.format(pca.components_.shape)) +lr = LogisticRegression() +logistic_fitted = lr.fit(X_train_lda, y_train) + +y_train_pred = logistic_fitted.predict(X_train_lda) +y_test_pred = logistic_fitted.predict(X_test_lda) + +print('\nLDA Train Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_train, y_train_pred),lda.n_components,lr.coef_.shape)) +print('LDA Test Accuracy: {:4.6f}, n_components={} coefficients={}'.format(accuracy_score(y_test, y_test_pred),lda.n_components,lr.coef_.shape)) + +X_errors_image = X_test[y_test!=y_test_pred] + +# change to a 2D shape +X_errors2D=np.reshape(X_errors_image, (X_errors_image.shape[0], character_size, character_size)) +ocr_utils.montage(X_errors2D,title='LDA Error Images, components={}'.format (n_components)) + +X_combined = np.vstack((X_train_lda, X_test_lda)) +y_combined = np.hstack((y_train, y_test)) +if X_combined.shape[1] > 1: + ocr_utils.plot_decision_regions( + X=X_combined, + y=y_combined, + classifier=lr, + labels = ['LDA1','LDA2'] , + title='logistic_regression after 2 component LDA') +print ('\n########################### No Errors ####################################') diff --git a/o4_image_to_image.py b/o4_image_to_image.py new file mode 100644 index 0000000..5cce174 --- /dev/null +++ b/o4_image_to_image.py @@ -0,0 +1,110 @@ +"""# ========================================================================== + +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +This sample program is a modified version of the Google mnist convolutional +network tutorial example. See the mnist tutorial in www.tensorflow.org + +This graph has multiple sections 3 layers each, 400 100 400 followed +by a fully connected layer. + +see tensor_flow_graph.png +"""# ============================================================================== +import ocr_utils +import datetime +from collections import namedtuple +import numpy as np +import pandas as pd +import n1_image_to_image as nnetwork +#import n1_residual3x4 as nnetwork +from tensorflow.compat import v1 as tf +dtype = np.float32 +#with tf.device('/GPU:0'): +#with tf.device('/cpu:0'): + + +if True: + # single font train + + # examples + # select only images from 'OCRB' scanned font + # input_filters_dict = {'font': ('OCRA',)} + + # select only images from 'HANDPRINT' font + #input_filters_dict = {'font': ('HANDPRINT',)} + + # select only images from 'OCRA' and 'OCRB' fonts with the 'scanned" fontVariant + # input_filters_dict = {'font': ('OCRA','OCRB'), 'fontVariant':('scanned',)} + + # select everything; all fonts , font variants, etc. + # input_filters_dict = {} + + # select the digits 0 through 9 in the E13B font + # input_filters_dict = {'m_label': range(48,58), 'font': 'E13B'} + + # select the digits 0 and 2in the E13B font + # input_filters_dict = {'m_label': (48,50), 'font': 'E13B'} + + # output the character label, image, italic flag, aspect_ratio and upper_case flag + # output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case'] + + # output only the character label and the image + # output_feature_list = ['m_label_one_hot','image'] + + # identify the font given the input images + #output_feature_list = ['font_one_hot','image','italic','aspect_ratio','upper_case'] + + # train the digits 0-9 for all fonts + input_filters_dict = {'m_label': [43]+list(range(48,58)),'italic':0,'strength':.4} + #input_filters_dict = {'font':'BANKGOTHIC','m_label': list(range(48,58)),'italic':0,'strength':.7} + #input_filters_dict = {} + output_feature_list = ['low_pass_image','image'] + + """# ============================================================================== + + Train and Evaluate the Model + + """# ============================================================================== + ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, + output_feature_list=output_feature_list, + test_size = .2, + engine_type='tensorflow',dtype=dtype) + nn = nnetwork.network(ds.train) + nn.fit_entropy( ds.train, nEpochs=5000) + nn.test2(ds.test) + +# train_a_font(input_filters_dict, output_feature_list, nEpochs = 50000) + +else: + # loop through all the fonts and train individually + + # pick up the entire list of fonts and font variants. Train each one. + df1 = ocr_utils.get_list(input_filters_dict={'font': ()}) + + import pprint as pprint + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(df1) + + output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case','font_one_hot'] + + # Change nEpochs to 5000 for better results + for l in df1: + input_filters_dict= {'font': (l[0],)} + train_a_font(input_filters_dict,output_feature_list, nEpochs = 5000) + + +print ('\n########################### No Errors ####################################') + diff --git a/ocr_utils.py b/ocr_utils.py index a3c0238..a130fe9 100644 --- a/ocr_utils.py +++ b/ocr_utils.py @@ -25,8 +25,11 @@ ###################################################### show_plot = False #set True to show plot on screen, set False to save to file +plot_dir = '/tmp/plots' ##################################################### +extension = '.jpg' + ############################################################################## default_zip_file = "fonts.zip" #small data set #default_zip_file = 'fonts_all.zip' #for the big data set @@ -35,11 +38,15 @@ import numpy as np import pandas as pd import math -from pandas.io.common import ZipFile +#from pandas.io.common import ZipFile +from zipfile import ZipFile from matplotlib.colors import ListedColormap import matplotlib.pyplot as plt import sys import os +import subprocess +from PIL import Image +import io def report(blocknr, blocksize, size): @@ -165,8 +172,12 @@ def __init__(self, features, output_feature_list, one_hot_map, engine_type,h,w, self._epochs_completed = 0 self._index_in_epoch = 0 self._feature_names = output_feature_list # list of names of features + + for i,nm in enumerate(self._feature_names): + if nm in self._feature_names[i+1:]: + self._feature_names[i+1] = nm + "_"+ str(i) self._num_features = len(features) - self._one_hot_map = one_hot_map # list >0 for each feature that is one_hot + self._one_hot_map = one_hot_map # list 0 for non one-hots, or # of one_hots self._engine_type= engine_type self._dtype = dtype @@ -252,7 +263,7 @@ def next_batch(self, batch_size): A list of npArrays, one for each feature requested """ - + batch_size = min(batch_size, self._num_examples) start = self._index_in_epoch self._index_in_epoch += batch_size if self._index_in_epoch > self._num_examples: @@ -267,7 +278,7 @@ def next_batch(self, batch_size): # Start next epoch start = 0 self._index_in_epoch = batch_size - assert batch_size <= self._num_examples + end = self._index_in_epoch outs = [] for i in range(self._num_features): @@ -313,6 +324,7 @@ def apply_column_filters(df, input_filters_dict ): filtered datafram ''' for key,value in input_filters_dict.items(): + if isinstance(value, str): value = (value,) if hasattr(value, '__iter__')==False: @@ -320,6 +332,10 @@ def apply_column_filters(df, input_filters_dict ): if len(value) > 0: criterion = df[key].map(lambda x: x in value) df = df[criterion] +# try: +# print (df['font'].iloc[0]) +# except: +# print (df) return df def convert_to_unique(t1): @@ -338,7 +354,8 @@ def convert_to_unique(t1): for i,u in enumerate(unique): t2[t1==u]=i return t2 - + + def read_data(fileName=default_zip_file, input_filters_dict={}, output_feature_list=[], @@ -347,6 +364,7 @@ def read_data(fileName=default_zip_file, dtype=np.float32, engine_type='', random_state=None ): + """ Reads data from a given .zip file holding .csv files, filters the data to extract the requested features and @@ -448,10 +466,7 @@ def read_data(fileName=default_zip_file, respective column in the .csv file """ - class DataSets(object): - pass - - data_sets = DataSets() + ''' 1) read in the fonts applying the input filter to extract only the fonts, @@ -465,13 +480,35 @@ class DataSets(object): 5) construct training and test set TruthedCharacters classes and return them ''' - engine_type = engine_type.lower() + df = read_file(fileName, input_filters_dict,random_state=random_state) + + return read_df(df, + input_filters_dict=input_filters_dict, + output_feature_list=output_feature_list, + test_size=test_size, + evaluation_size=evaluation_size, + dtype=dtype, + engine_type=engine_type, + random_state=random_state ) + +def read_df(df, + input_filters_dict={}, + output_feature_list=[], + test_size=0.0, + evaluation_size=0.0, + dtype=np.float32, + engine_type='', + random_state=None ): + + class DataSets(object): + pass + print('\nparameter: input_filters_dict\n\t{}'.format(sorted(input_filters_dict.items()))) print('parameter: output_feature_list\n\t{}'.format(output_feature_list)) - - df = read_file(fileName, input_filters_dict,random_state=random_state) + engine_type = engine_type.lower() + data_sets = DataSets() available_columns = [] for key in df.columns: if key=='r0c0': #omit the image @@ -519,7 +556,38 @@ class DataSets(object): elif colName=='image': t1 = np.array(df.loc[:,'r0c0':],dtype=dtype) #extract the images with is everything to the right of row 0 column 0 t1 = np.multiply(t1, 1.0 / 256.0) - feature_name.append(colName) + feature_name.append(colName) + + elif colName=='low_pass_image': + ''' Create unique images for each of the labels, using a single font + These are the concepts. + Make an np array of the original images with the concept images replacing + the originals. + ''' + + boolDF1 = df['fontVariant'] == 'BANKGOTHIC MD BT' + + criterion = df['fontVariant'].map(lambda x: x in 'BANKGOTHIC MD BT') + x = df[criterion] + dx = pd.DataFrame(df) + labels = np.array(x['m_label']) + + for i,label in enumerate(labels): + to_be_replaced = df['m_label']!=label + dx =dx.where(to_be_replaced, other= x.iloc[i], axis=1) + + + t1 = np.array(dx.loc[:,'r0c0':],dtype=dtype) #extract the images with is everything to the right of row 0 column 0 + t1 = np.reshape(t1, (t1.shape[0],h,w)) + t2 = np.zeros((t1.shape[0],h,int(w/2)),dtype=dtype) + for col in range(w): + t2[:,:,int(col/2)] = np.sum(t1[:,:,col:col+2],axis=2) + t3 = np.zeros((t1.shape[0], int(h/2),int(w/2)),dtype=dtype) + for row in range(h): + t3[:,int(row/2),:] = np.sum(t2[:,row:row+2,:],axis=1) + t1 = np.reshape(t3,(t3.shape[0],int(h*w/4)) ) + t1=(t1/4)/256.0 + feature_name.append(colName) elif colName=='m_label_one_hot': t1 = np.array(df['m_label']) @@ -533,6 +601,12 @@ class DataSets(object): one_hot_map[-1] = len(np.unique(t1)) feature_name.append(colName) + elif colName=='orientation_one_hot': + t1 = np.array(df['orientation']) + t1 = convert_to_unique(t1) + one_hot_map[-1] = len(np.unique(t1)) + feature_name.append(colName) + elif colName=='fontVariant_one_hot': t1 = np.array(df['fontVariant'] ) t1 = convert_to_unique(t1) @@ -721,10 +795,135 @@ def compute_column_sum(npx,h,w): see column sum notes under read_data ''' npx = np.reshape(npx,(npx.shape[0],h,w)) - return np.sum(npx,axis=1) # sum of rows in each column + return np.sum(npx,axis=1) # sum of rows in each column + +import skimage.transform as af + +def shear(X, skew): + ''' given a 2D image, shear and return a 2D image + + parameters: + X is the 2D image of shape (nRows, nColumns) + skew is the amount to shear in the range 0 to 1.0 + ''' + + rows = X.shape[0] + cols = X.shape[1] + ratioY = skew*cols/rows + matrix = np.array( [[1, ratioY, 0] ,[0, 1, 0] ,[0, 0, 1 ]]) + tp=af.ProjectiveTransform(matrix=matrix) + #tp = tf.AffineTransform(scale=(.3,.3), shear=skew) + f = af.warp(X, tp) + return f +# +# class file_names(object): +# ''' store variants of file a file name with .jpg, .png, .box variations +# ''' +# def __init__(selp, base_name, dir_name = ''): +# base = base_name +# jpeg = base_name + '.jpg' +# png = base_name + '.png' +# box = base_name + '.box' + +def file_to_df(base_file, character_size, title = "", white_space = 0, input_filters_dict={}): + ''' + Given a 2D image file with some characters, uses Tesseract to cut out + the characters, , plots the characters and the boxes that Tesseract + found, then places the characters into a pandas dataframe + and also returns the images in 2D format + parameters: + image_file, string is the path name of the file + character size, integer, is the number of pixels of the return images + in pixels, height and width are equal + title, string is the name to be in the character/box plotted file + white_space is the amount of padding to be placed around the characters + ''' + input_image_file_jpg = base_file + extension + print('input_image_file_jpg = {}'.format(input_image_file_jpg )) + subprocess.run(['tesseract', input_image_file_jpg, base_file, 'batch.nochop', 'makebox']) + #subprocess.run(['tesseract', input_image_file_jpg]) + im = Image.open(input_image_file_jpg) + + images =[] + recognized_label = [] + tops=[] + originalH=[] + lefts=[] + originalW=[] + orientation = 0 + + f = open(base_file+'.box','r') + + for line in f: + coords = line.split(' ') + top = im.height-int(coords[4]) + bottom = im.height-int(coords[2]) + left = int(coords[1]) + right = int(coords[3]) + tops.append(top) + originalW.append(right-left) + lefts.append(left) + originalH.append(bottom-top) + + img2 = Image.new('L',(character_size,character_size),color=255) + img = im.crop(box=(left, top, right, bottom)) + img = img.convert('LA') + img2.paste(img, box=(white_space,white_space)) + + imgByteArr = img2.tobytes() + lst = list(imgByteArr) + image = np.array(lst)/255.0 + image = 1.0 - image + images.append(image) + try: + recognized_label.append(ord(coords[0])) + except: + print (coords[0]) + recognized_label.append(ord('_')) + + df = make_df(images, character_size, character_size, originalH, originalW, tops, lefts, orientation, recognized_label ) + df= apply_column_filters(df, input_filters_dict ) + t1 = np.array(df.loc[:,'r0c0':]) #extract the images with is everything to the right of row 0 column 0 + t1 = np.reshape(t1,(t1.shape[0],character_size,character_size )) + montage(t1, title=title) + return df,t1 + +def make_df(images, character_w, character_h, originalH, originalW, tops, lefts, orientation, recognized_label ): + ''' Given an numpy array of images and attributes of each image, place these in a pandas dataframe + ''' + x = np.zeros((len(images), character_w*character_h+8)) + x[:,0] = recognized_label + x[:,1] = orientation + x[:,2] = tops + x[:,3] = lefts + x[:,4] = originalH + x[:,5] = originalW + x[:,6] = character_h + x[:,7] = character_w +# print (' appending images') + x[:,8:] = images +# print (' DONE appending images') + +# images = np.array(images) +# images = np.insert(images, 0, character_w, axis=1) +# images = np.insert(images, 0, character_h, axis=1) +# images = np.insert(images, 0, originalW, axis=1) +# images = np.insert(images, 0, originalH, axis=1) +# images = np.insert(images, 0, lefts, axis=1) +# images = np.insert(images, 0, tops, axis=1) +# images = np.insert(images, 0, orientation, axis=1) +# images = np.insert(images, 0, recognized_label, axis=1) + + columns = ['m_label', 'orientation','m_top','m_left','originalH','originalW', 'h','w'] + for i in range(character_h): + for j in range(character_w): + columns.append('r{}c{}'.format(i,j)) + df = pd.DataFrame(x, columns=columns) -################# Miscellaneous Plot Routines ############################## + return df + +############################################### Miscellaneous Plot Routines ############################################################ num_fig = 0 # used to give each saved plot a unique name def program_name(): @@ -739,7 +938,7 @@ def show_figures(plt, title="untitled"): If show_plot is true, the the plot is shown on the screen. If show_plot is false, the plot will be saved to a file in the - ./plots folder + /plots folder The files are given unique names based on the plot title args @@ -759,7 +958,7 @@ def show_figures(plt, title="untitled"): if show_plot: plt.show() else: - plot_dir = './plots' + try: os.mkdir(plot_dir) except: @@ -773,6 +972,7 @@ def show_figures(plt, title="untitled"): plt.clf() # savefig does not clear the figure like show does plt.cla() num_fig += 1 +# print ('END PLOT') def scatter_plot(X=None, y=None, legend_entries=[],axis_labels=("",""), title="",xlim=None, ylim=None): ''' @@ -865,9 +1065,13 @@ def plot_decision_regions(X=None, y=None, classifier=None, resolution = .005, te plt.ylim(xx2.min()-d, xx2.max()+d) # plot class samples + for idx, cl in enumerate(np.unique(y)): - plt.scatter(X[y == cl, 0], X[y == cl, 1], - alpha=0.8, c=cmap(idx), + xs = X[y == cl, 0] + ys = X[y == cl, 1] + c =cmap(idx) + plt.scatter(xs, ys, + alpha=0.8, color=c, marker=markers[idx%len(markers)], label=cl) # highlight test samples @@ -892,7 +1096,7 @@ def plot_decision_regions(X=None, y=None, classifier=None, resolution = .005, te show_figures(plt, title) -def montage(X, maxChars = 256, title=''): +def montage(X, maxChars = 2500, title=''): ''' montage displays a square matrix of characters diff --git a/p115_l1_l2_regularization.py b/p115_l1_l2_regularization.py index c685ecd..690ae2f 100644 --- a/p115_l1_l2_regularization.py +++ b/p115_l1_l2_regularization.py @@ -69,7 +69,7 @@ from sklearn.linear_model import LogisticRegression -lr = LogisticRegression(penalty='l1', C=0.1, random_state=0) +lr = LogisticRegression(penalty='l1', C=0.1, random_state=0, solver='liblinear',multi_class='auto') lr.fit(X_train_std, y_train) print('Training accuracy-l1 regularization:', lr.score(X_train_std, y_train)) print('Test accuracy-l1 regularization:', lr.score(X_test_std, y_test)) @@ -79,7 +79,7 @@ print('\t{}'.format(lr.coef_)) -lr = LogisticRegression(penalty='l2', C=0.1, random_state=0) +lr = LogisticRegression(penalty='l2', C=0.1, random_state=0, solver='liblinear',multi_class='auto') lr.fit(X_train_std, y_train) print('Training accuracy-l2 regularization:', lr.score(X_train_std, y_train)) print('Test accuracy-l2 regularization:', lr.score(X_test_std, y_test)) @@ -100,8 +100,8 @@ def weight_graph(regularization = 'l1'): weights, params = [], [] - for c in np.arange(-4, 6): - lr = LogisticRegression(penalty=regularization, C=10**c, random_state=0) + for c in np.arange(0, 6): + lr = LogisticRegression(penalty=regularization, C=10**c, random_state=0, solver='liblinear',multi_class='auto') lr.fit(X_train_std, y_train) weights.append(lr.coef_[1]) params.append(10**c) diff --git a/p119_squential_backward_selection.py b/p119_squential_backward_selection.py index 3043008..d226961 100644 --- a/p119_squential_backward_selection.py +++ b/p119_squential_backward_selection.py @@ -49,7 +49,7 @@ y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=range(0,20), nChars=1000, random_state=0) -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0) @@ -80,6 +80,7 @@ def __init__(self, estimator, k_features, self.k_features = k_features self.test_size = test_size self.random_state = random_state + def fit(self, X, y): X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=self.test_size, diff --git a/p131_principal_component_analysis.py b/p131_principal_component_analysis.py index 8a379b5..c4b8a00 100644 --- a/p131_principal_component_analysis.py +++ b/p131_principal_component_analysis.py @@ -130,7 +130,7 @@ X_train_pca = pca.fit_transform(X_train_image) X_test_pca = pca.transform(X_test_image) -lr = LogisticRegression() +lr = LogisticRegression(solver='liblinear',multi_class='auto') logistic_fitted =lr.fit(X_train_pca, y_train) print('\nPCA Train Accuracy: {:4.6f}, n_components={}'.format(accuracy_score(y_train, logistic_fitted.predict(X_train_pca)),pca.n_components)) @@ -149,7 +149,7 @@ X_train_pca = pca.fit_transform(X_train_image) X_test_pca = pca.transform(X_test_image) -lr = LogisticRegression() +lr = LogisticRegression(solver='liblinear',multi_class='auto') logistic_fitted = lr.fit(X_train_pca, y_train) y_train_pred = logistic_fitted.predict(X_train_pca) @@ -191,7 +191,7 @@ X_train_pca = pca.fit_transform(X_train_image) X_test_pca = pca.transform(X_test_image) -lr = LogisticRegression() +lr = LogisticRegression(solver='liblinear',multi_class='auto') logistic_fitted=lr.fit(X_train_pca, y_train) y_train_pred = logistic_fitted.predict(X_train_pca) y_test_pred = logistic_fitted.predict(X_test_pca) diff --git a/p141_linear_descriminant_analsys.py b/p141_linear_descriminant_analsys.py index a43faf6..1d50af8 100644 --- a/p141_linear_descriminant_analsys.py +++ b/p141_linear_descriminant_analsys.py @@ -57,10 +57,11 @@ import numpy as np import ocr_utils import matplotlib.pyplot as plt -from sklearn.lda import LDA - +#from sklearn.lda import LDA +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA print_limit = 20 chars_to_train = range(48,58) +n_classes = len(chars_to_train) columnsXY=range(0,20) column_str = 'column_sum{}'.format(list(columnsXY)) @@ -107,7 +108,7 @@ S_W = np.zeros((d, d)) for label, mv in zip(unique_labels, mean_vecs): class_scatter = np.zeros((d, d)) - for row in X_train_std[[y_train == label]]: + for row in X_train_std[y_train == label]: row, mv = row.reshape(d, 1), mv.reshape(d, 1) class_scatter += (row-mv).dot((row-mv).T) S_W += class_scatter @@ -115,7 +116,7 @@ print('Within-class scatter matrix: {}x{}'.format(S_W.shape[0], S_W.shape[1])) print('Class label distribution: %s' - % np.bincount(np.array(y_train,dtype='int32'))[min(y_train):]) + % np.bincount(np.array(y_train,dtype='int32'))[int(min(y_train)):]) d = S_W.shape[1] # number of features S_W = np.zeros((d, d)) @@ -195,7 +196,7 @@ X_test_lda = lda.transform(X_test_std) from sklearn.linear_model import LogisticRegression -lr = LogisticRegression() +lr = LogisticRegression(solver='liblinear', multi_class='auto') lr = lr.fit(X_train_lda, y_train) title = 'Linear Descriminant Analysis Training Set' @@ -208,13 +209,13 @@ ############################################################################### n_components = 10 -lda = LDA(n_components=n_components) +lda = LDA(n_components=min(n_components,n_classes-1)) X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.transform(X_test_std) print ('n_components={}'.format(lda.n_components)) -lr = LogisticRegression() +lr = LogisticRegression(solver='liblinear', multi_class='auto') logistic_fitted = lr.fit(X_train_lda, y_train) from sklearn.metrics import accuracy_score @@ -233,13 +234,13 @@ ############################################################################### n_components = 10 -lda = LDA(n_components=n_components, solver='eigen') +lda = LDA(n_components=n_components-1, solver='eigen') X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.transform(X_test_std) print ('n_components={}'.format(lda.n_components)) -lr = LogisticRegression() +lr = LogisticRegression(solver='liblinear', multi_class='auto') logistic_fitted = lr.fit(X_train_lda, y_train) from sklearn.metrics import accuracy_score diff --git a/p154_pca_nonlinear_mapings.py b/p154_pca_nonlinear_mapings.py index c7da287..3d92e9d 100644 --- a/p154_pca_nonlinear_mapings.py +++ b/p154_pca_nonlinear_mapings.py @@ -83,8 +83,7 @@ def rbf_kernel_pca1(X, gamma, n_components): eigvals, eigvecs = eigh(K) # Collect the top k eigenvectors (projected samples) - X_pc = np.column_stack((eigvecs[:, -i] - for i in range(1, n_components + 1))) + X_pc = np.column_stack([eigvecs[:, -i] for i in range(1, n_components + 1)]) return X_pc @@ -269,7 +268,7 @@ def rbf_kernel_pca(X, gamma, n_components): eigvals, eigvecs = eigh(K) # Collect the top k eigenvectors (projected samples) - alphas = np.column_stack((eigvecs[:,-i] for i in range(1,n_components+1))) + alphas = np.column_stack([eigvecs[:,-i] for i in range(1,n_components+1)]) # Collect the corresponding eigenvalues lambdas = [eigvals[-i] for i in range(1,n_components+1)] diff --git a/p177_k_fold_cross_validation.py b/p177_k_fold_cross_validation.py index ca4eba0..6a32210 100644 --- a/p177_k_fold_cross_validation.py +++ b/p177_k_fold_cross_validation.py @@ -1,4 +1,4 @@ -'''k_fold_cross_validation.py +'''k_fold_model_selection.py k fold cross validation splits the training set into n parts and uses a different 1/n of the test set for each iteration. It is good for tuning parameters as all samples are used, reducing the variance of the @@ -48,12 +48,13 @@ import ocr_utils import matplotlib.pyplot as plt import numpy as np -from sklearn.cross_validation import StratifiedKFold -from sklearn.lda import LDA +from sklearn.model_selection import StratifiedKFold +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA if __name__ == '__main__': #charsToTrain=range(48,58) chars_to_train = range(48,58) + n_classes = len(chars_to_train) num_chars = 3000 #limit the number to speed up the calculation @@ -75,7 +76,7 @@ # y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = charsToTrain , columns=range(0,20), nChars=1000, test_size=0.3,random_state=0) from sklearn.linear_model import LogisticRegression - from sklearn.cross_validation import train_test_split + from sklearn.model_selection import train_test_split X_train , X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.3, random_state=0) @@ -99,28 +100,24 @@ for num_PCA in num_planes: print ('number of Principal Components = {}'.format(num_PCA)) pipe_lr = Pipeline([('scl', StandardScaler()), - ('pca', PCA(n_components=num_PCA)), - ('clf', LogisticRegression(random_state=1))]) - + ('pca', PCA(n_components=num_PCA, svd_solver='full')), + ('clf', LogisticRegression(random_state=1,multi_class='auto', solver='liblinear'))]) + pipe_lr.fit(X_train, y_train) print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test)) - - - kfold = StratifiedKFold(y=y_train, - n_folds=10, - random_state=1) + kfold = StratifiedKFold(n_splits=10, random_state=1) scores = [] - for k, (train, test) in enumerate(kfold): - pipe_lr.fit(X_train[train], y_train[train]) - score = pipe_lr.score(X_train[test], y_train[test]) + for train_index, test_index in kfold.split(X_train, y_train): + pipe_lr.fit(X_train[train_index], y_train[train_index]) + score = pipe_lr.score(X_train[test_index], y_train[test_index]) scores.append(score) #print ('train {} samples: {}'.format(len(train), train)) #print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, np.bincount(y_train[train])[list(charsToTrain)], score)) print('\nCV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores))) - from sklearn.cross_validation import cross_val_score + from sklearn.model_selection import cross_val_score scores = cross_val_score(estimator=pipe_lr, X=X_train, @@ -153,21 +150,21 @@ for num_LDA in num_planes: print ('number of Principal Components = {}'.format(num_LDA)) pipe_lr = Pipeline([('scl', StandardScaler()), - ('lda', LDA(n_components=num_LDA)), - ('clf', LogisticRegression(random_state=1))]) - + ('lda', LDA(n_components=min(num_LDA,n_classes-1), solver='eigen')), + ('clf', LogisticRegression(random_state=1,multi_class='auto',solver='liblinear'))]) + + kys = pipe_lr.get_params().keys() + print(kys) +# pipe_lr.set_params(lda__solver='eigen',clf__solver='liblinear',clf__multi_class='auto') pipe_lr.fit(X_train, y_train) print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test)) - - kfold = StratifiedKFold(y=y_train, - n_folds=10, - random_state=1) + kfold = StratifiedKFold(n_splits=10, random_state=1) scores = [] - for k, (train, test) in enumerate(kfold): - pipe_lr.fit(X_train[train], y_train[train]) - score = pipe_lr.score(X_train[test], y_train[test]) + for train_index, test_index in kfold.split(X_train, y_train): + pipe_lr.fit(X_train[train_index], y_train[train_index]) + score = pipe_lr.score(X_train[test_index], y_train[test_index]) scores.append(score) #print ('train {} samples: {}'.format(len(train), train)) #print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, np.bincount(y_train[train])[list(charsToTrain)], score)) diff --git a/p181_learning_curves.py b/p181_learning_curves.py index 281587e..811a9ef 100644 --- a/p181_learning_curves.py +++ b/p181_learning_curves.py @@ -42,7 +42,7 @@ @author: richard lyman ''' import matplotlib.pyplot as plt -from sklearn.learning_curve import learning_curve +from sklearn.model_selection import learning_curve import numpy as np import ocr_utils from sklearn.preprocessing import StandardScaler @@ -55,7 +55,7 @@ y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , columns=(9,17), random_state=0) pipe_lr = Pipeline([('scl', StandardScaler()), - ('clf', LogisticRegression(penalty='l2', random_state=0))]) + ('clf', LogisticRegression(penalty='l2', random_state=0, solver='lbfgs'))]) train_sizes, train_scores, test_scores =\ learning_curve(estimator=pipe_lr, @@ -99,7 +99,7 @@ plt.tight_layout() ocr_utils.show_figures(plt,title) - from sklearn.learning_curve import validation_curve + from sklearn.model_selection import validation_curve param_range = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0] train_scores, test_scores = validation_curve( diff --git a/p186_grid_search.py b/p186_grid_search.py index 344b807..cb2f00a 100644 --- a/p186_grid_search.py +++ b/p186_grid_search.py @@ -35,7 +35,7 @@ import ocr_utils from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC if __name__ == '__main__': diff --git a/p189_nested_cross_validation.py b/p189_nested_cross_validation.py index 694f0c4..17d55e6 100644 --- a/p189_nested_cross_validation.py +++ b/p189_nested_cross_validation.py @@ -55,8 +55,8 @@ import ocr_utils from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline -from sklearn.cross_validation import cross_val_score -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import cross_val_score +from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC if __name__ == '__main__': y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , test_size=0.3, columns=(9,17), random_state=0) diff --git a/p193_model_precision_recall.py b/p193_model_precision_recall.py index 25e8766..5413066 100644 --- a/p193_model_precision_recall.py +++ b/p193_model_precision_recall.py @@ -49,9 +49,9 @@ from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.svm import SVC -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import GridSearchCV from sklearn.metrics import make_scorer,precision_score, recall_score, f1_score -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split if __name__ == '__main__': y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , columns=(9,17), random_state=0) diff --git a/p194_receiver_operating_characteristic.py b/p194_receiver_operating_characteristic.py index 44d69a0..5bc308f 100644 --- a/p194_receiver_operating_characteristic.py +++ b/p194_receiver_operating_characteristic.py @@ -45,10 +45,12 @@ from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline -from sklearn.cross_validation import StratifiedKFold +from sklearn.model_selection import StratifiedKFold from sklearn.decomposition import PCA -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.metrics import make_scorer,precision_score,roc_curve, auc +from sklearn.metrics import roc_auc_score, accuracy_score +from sklearn.model_selection import cross_val_score if __name__ == '__main__': @@ -64,32 +66,48 @@ pipe_lr = Pipeline([('scl', StandardScaler()), ('pca', PCA(n_components=2)), - ('clf', LogisticRegression(penalty='l2',random_state=0,C=100.0))]) + ('clf', LogisticRegression(penalty='l2',random_state=0,C=100.0, solver='lbfgs'))]) # X_train2 = X_train[:, [4, 14]] X_train2 = X_train - cv = StratifiedKFold(y_train,n_folds=3,random_state=1) + + kfold = StratifiedKFold(n_splits=3, random_state=1) + +# scores = [] +# for train_index, test_index in kfold.split(X_train, y_train): +# pipe_lr.fit(X_train[train_index], y_train[train_index]) +# score = pipe_lr.score(X_train[test_index], y_train[test_index]) +# scores.append(score) + + + + + + + +# cv = StratifiedKFold(y_train,n_folds=3,random_state=1) fig = plt.figure(figsize=(7, 5)) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] - - for i, (train, test) in enumerate(cv): - probas = pipe_lr.fit(X_train2[train], - y_train[train]).predict_proba(X_train2[test]) + i=0 + for train_index, test_index in kfold.split(X_train, y_train): + probas = pipe_lr.fit(X_train2[train_index], + y_train[train_index]).predict_proba(X_train2[test_index]) - fpr, tpr, thresholds = roc_curve(y_train[test], + fpr, tpr, thresholds = roc_curve(y_train[test_index], probas[:, 1], pos_label=1) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) + i=i+1 plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' - % (i+1, roc_auc)) + % (i, roc_auc)) plt.plot([0, 1], [0, 1], @@ -97,7 +115,7 @@ color=(0.6, 0.6, 0.6), label='random guessing') - mean_tpr /= len(cv) + mean_tpr /= kfold.get_n_splits(X_train) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', @@ -123,8 +141,6 @@ pipe_lr = pipe_lr.fit(X_train2, y_train) # y_pred2 = pipe_lr.predict(X_test[:, [4, 14]]) y_pred2 = pipe_lr.predict(X_test) - from sklearn.metrics import roc_auc_score, accuracy_score - print('ROC AUC: %.3f' % roc_auc_score(y_true=y_test, y_score=y_pred2)) print('Accuracy: %.3f' % accuracy_score(y_true=y_test, y_pred=y_pred2)) @@ -133,10 +149,10 @@ # for more than 2 classes for GridSearch # i.e. applies a binary scoring technique to multiclasses pos_label=range(48,58) - pre_scorer = make_scorer(score_func=precision_score, - pos_label=pos_label, - greater_is_better=True, - average='micro') +# pre_scorer = make_scorer(score_func=precision_score, +# pos_label=pos_label, +# greater_is_better=True, +# average='micro') from sklearn.svm import SVC y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = pos_label , nChars=4000, columns=(9,17), random_state=0) @@ -149,15 +165,15 @@ {'clf__C': c_gamma_range, 'clf__gamma': c_gamma_range, 'clf__kernel': ['rbf'],}] - from sklearn.grid_search import GridSearchCV + from sklearn.model_selection import GridSearchCV gs = GridSearchCV(estimator=pipe_svc, param_grid=param_grid, - scoring=pre_scorer, + scoring='accuracy', cv=5, n_jobs=-1) - from sklearn.cross_validation import cross_val_score - scores = cross_val_score(gs, X_train, y_train, scoring=pre_scorer, cv=5) + + scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5) print('\nSupport Vector Cross Validation accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores))) gs = gs.fit(X_train, y_train) diff --git a/p206_majority_vote_classifier.py b/p206_majority_vote_classifier.py index 221dc52..e404fa7 100644 --- a/p206_majority_vote_classifier.py +++ b/p206_majority_vote_classifier.py @@ -44,13 +44,12 @@ from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin from sklearn.preprocessing import LabelEncoder -from sklearn.externals import six from sklearn.base import clone from sklearn.pipeline import _name_estimators import numpy as np import ocr_utils -from sklearn.cross_validation import train_test_split -from sklearn.cross_validation import cross_val_score +from sklearn.model_selection import train_test_split +from sklearn.model_selection import cross_val_score from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier @@ -59,6 +58,7 @@ from sklearn.metrics import roc_curve from sklearn.metrics import auc import matplotlib.pyplot as plt +from sklearn.model_selection import GridSearchCV class MajorityVoteClassifier(BaseEstimator, ClassifierMixin): @@ -182,8 +182,8 @@ def get_params(self, deep=True): return super(MajorityVoteClassifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() - for name, step in six.iteritems(self.named_classifiers): - for key, value in six.iteritems(step.get_params(deep=True)): + for name, step in self.named_classifiers.items(): + for key, value in step.get_params(deep=True).items(): out['%s__%s' % (name, key)] = value return out @@ -204,7 +204,7 @@ def get_params(self, deep=True): clf1 = LogisticRegression(penalty='l2', C=0.001, - random_state=0) + random_state=0, solver='lbfgs') clf2 = DecisionTreeClassifier(max_depth=1, criterion='entropy', @@ -216,6 +216,7 @@ def get_params(self, deep=True): pipe1 = Pipeline([['sc', StandardScaler()], ['clf', clf1]]) +kys = pipe1.get_params() pipe3 = Pipeline([['sc', StandardScaler()], ['clf', clf3]]) @@ -314,7 +315,7 @@ def get_params(self, deep=True): pprint.pprint(mv_clf.get_params()) print() -from sklearn.grid_search import GridSearchCV + params = {'decisiontreeclassifier__max_depth': [1, 2], 'pipeline-1__clf__C': [0.001, 0.1, 100.0]} @@ -325,9 +326,13 @@ def get_params(self, deep=True): scoring='roc_auc') grid.fit(X_train, y_train) -for params, mean_score, scores in grid.grid_scores_: +params=grid.cv_results_['params'] +mean_scores=grid.cv_results_['mean_test_score'] +scores = grid.cv_results_['std_test_score'] + +for i in range(len(params)): print("%0.6f+/-%0.6f %r" - % (mean_score, scores.std() / 2, sorted(params.items()))) + % (mean_scores[i], scores[i] / 2, sorted(params[i].items()))) print('\nBest parameters: %s' % sorted(grid.best_params_.items())) print('Best Accuracy: %.6f' % grid.best_score_) diff --git a/p221_bagging_bootstrap_samples.py b/p221_bagging_bootstrap_samples.py index 1f0a945..cc34df9 100644 --- a/p221_bagging_bootstrap_samples.py +++ b/p221_bagging_bootstrap_samples.py @@ -35,7 +35,7 @@ from sklearn.preprocessing import LabelEncoder import ocr_utils -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split import numpy as np import matplotlib.pyplot as plt if __name__ == '__main__': diff --git a/p229_adaboost.py b/p229_adaboost.py index cf97b71..f4852f0 100644 --- a/p229_adaboost.py +++ b/p229_adaboost.py @@ -41,7 +41,7 @@ from sklearn.ensemble import AdaBoostClassifier import ocr_utils -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import LabelEncoder diff --git a/p356_neural_net.py b/p356_neural_net.py index b034f82..b503870 100644 --- a/p356_neural_net.py +++ b/p356_neural_net.py @@ -178,7 +178,7 @@ def _encode_labels(self, y, k): """ onehot = np.zeros((k, y.shape[0])) for idx, val in enumerate(y): - onehot[val, idx] = 1.0 + onehot[int(val), idx] = 1.0 return onehot def _initialize_weights(self): @@ -587,7 +587,7 @@ def _encode_labels(self, y, k): """ onehot = np.zeros((k, y.shape[0])) for idx, val in enumerate(y): - onehot[val, idx] = 1.0 + onehot[int(val), idx] = 1.0 return onehot def _initialize_weights(self): diff --git a/p411_keras.py b/p411_keras.py index 823dd3a..d5216ba 100644 --- a/p411_keras.py +++ b/p411_keras.py @@ -50,25 +50,22 @@ def do_keras(X_train,X_test, y_train_ohe, y_train,y_test): model = Sequential() model.add(Dense(input_dim=X_train.shape[1], - output_dim=50, - init='uniform', + units=50, activation='tanh')) model.add(Dense(input_dim=50, - output_dim=50, - init='uniform', + units=50, activation='tanh')) model.add(Dense(input_dim=50, - output_dim=y_train_ohe.shape[1], - init='uniform', + units=y_train_ohe.shape[1], activation='softmax')) sgd = SGD(lr=0.001, decay=1e-7, momentum=.9) model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=["accuracy"]) model.fit(X_train, y_train_ohe, - nb_epoch=50, + epochs=50, batch_size=300, verbose=2, validation_split=0.1 diff --git a/p51_standard_scalar.py b/p51_standard_scalar.py index 1a8cca4..89e8014 100644 --- a/p51_standard_scalar.py +++ b/p51_standard_scalar.py @@ -38,6 +38,7 @@ import ocr_utils from sklearn.preprocessing import StandardScaler from sklearn.linear_model import Perceptron +from sklearn.model_selection import train_test_split ############################################################################# @@ -51,7 +52,7 @@ print('Class labels:', np.unique(y)) -from sklearn.cross_validation import train_test_split + ############################################################################# # standardize the features @@ -63,7 +64,7 @@ X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) -ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) +ppn = Perceptron(max_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) diff --git a/p62_logistic_regression.py b/p62_logistic_regression.py index b105ba6..6333395 100644 --- a/p62_logistic_regression.py +++ b/p62_logistic_regression.py @@ -56,7 +56,7 @@ import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17),nChars=500) @@ -108,7 +108,7 @@ def cost_0(z): sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) -lr = LogisticRegression(C=1000.0, random_state=0) +lr = LogisticRegression(C=1000.0, random_state=0, solver='lbfgs',multi_class='auto') lr.fit(X_train_std, y_train) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) @@ -122,8 +122,8 @@ def cost_0(z): weights, params = [], [] -for c in np.arange(-5, 5): - lr = LogisticRegression(C=10**c, random_state=0) +for c in np.arange(0, 5): + lr = LogisticRegression(C=10**c, random_state=0, solver='lbfgs',multi_class='auto') lr.fit(X_train_std, y_train) weights.append(lr.coef_[0]) params.append(10**c) @@ -131,8 +131,8 @@ def cost_0(z): title = 'regression_path' weights, params = [], [] -for c in np.arange(-5, 5): - lr = LogisticRegression(C=10**c, random_state=0) +for c in np.arange(0, 5): + lr = LogisticRegression(C=10**c, random_state=0, solver='lbfgs',multi_class='auto') lr.fit(X_train_std, y_train) weights.append(lr.coef_[1]) params.append(10**c) diff --git a/p73_support_vector_machine.py b/p73_support_vector_machine.py index 4643414..27a03b3 100644 --- a/p73_support_vector_machine.py +++ b/p73_support_vector_machine.py @@ -51,6 +51,7 @@ svm = SVC(kernel='linear', C=1.0, random_state=0) svm.fit(X_train_std, y_train) + ocr_utils.plot_decision_regions(X=X_combined_std, y=y_combined, classifier=svm, diff --git a/q2_tensorflow_mnist.py b/q2_tensorflow_mnist.py index 20a6b8d..a5493da 100644 --- a/q2_tensorflow_mnist.py +++ b/q2_tensorflow_mnist.py @@ -26,307 +26,16 @@ see tensor_flow_graph.png """# ============================================================================== + import ocr_utils import datetime from collections import namedtuple import numpy as np import pandas as pd - -import tensorflow as tf -dtype=np.float32 -#with tf.device('/gpu:0'): -#with tf.device('/cpu:0'): -def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000): - - ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, - output_feature_list=output_feature_list, - test_size = .1, - engine_type='tensorflow', - dtype=dtype) - - - """# ============================================================================== - - Start TensorFlow Interactive Session - - """# ============================================================================== - - - sess = tf.InteractiveSession() - - """# ============================================================================== - - Placeholders - - Compute the size of various layers - - Create a tensorflow Placeholder for each feature of data returned from the - dataset - - """# ============================================================================== - - - - lst = [] - extra_features_width = 0 # width of extra features - - for i,nm in enumerate(output_feature_list): - - # features[0], is always the target. For instance it may be m_label_one_hot - # the second features[1] is the 'image' that is passed to the convolution layers - # Any additional features bypass the convolution layers and go directly - # into the fully connected layer. - - # The width of the extra features is calculated in order to allocate - # the correct widths of weights, # and inputs - # names are assigned to make the look pretty on the tensorboard graph. - - if i == 0: - nm = 'y_'+nm - else: - nm = 'x_'+nm - if i>1: - extra_features_width += ds.train.feature_width[i] - lst.append(tf.placeholder(dtype, shape=[None, ds.train.feature_width[i]], name=nm)) - - # ph is a named tuple with key names like 'image', 'm_label', and values that - # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, - # x_upper_case etc. - Place_Holders = namedtuple('Place_Holders', output_feature_list) - ph = Place_Holders(*lst) # unpack placeholders into named Tuple - - nRows = ds.train.num_rows #image height - nCols = ds.train.num_columns #image width - nFc = 1024 # size of fully connected layer - nConv1 = 32 # size of first convolution layer - nConv2 = 64 # size of second convolution layer - nTarget = ds.train.feature_width[0] # the number of one_hot features in the target, 'm_label' - n_h_pool2_outputs = int(nRows/4) * int(nCols/4) * nConv2 # second pooling layer - n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected - - """# ============================================================================== - - Build a Multilayer Convolutional Network - - Weight Initialization - - """# ============================================================================== - - def weight_variable(shape, dtype): - initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) - return tf.Variable(initial) - - def bias_variable(shape, dtype): - initial = tf.constant(0.1, shape=shape, dtype=dtype) - return tf.Variable(initial) - - """# ============================================================================== - - Convolution and Pooling - - keep our code cleaner, let's also abstract those operations into functions. - - """# ============================================================================== - - def conv2d(x, W): - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') - - def max_pool_2x2(x): - return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], padding='SAME') - - """# ============================================================================== - - First Convolutional Layer - - """# ============================================================================== - with tf.name_scope("w_conv1") as scope: - W_conv1 = weight_variable([5, 5, 1, nConv1],dtype) - b_conv1 = bias_variable([nConv1],dtype) - - with tf.name_scope("reshape_x_image") as scope: - x_image = tf.reshape(ph.image, [-1,nCols,nRows,1]) - - image_summ = tf.image_summary("x_image", x_image) - - """# ============================================================================== - - We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, - and finally max pool. - - """# ============================================================================== - - with tf.name_scope("convolve_1") as scope: - h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) - - with tf.name_scope("pool_1") as scope: - h_pool1 = max_pool_2x2(h_conv1) - - """# ============================================================================== - - Second Convolutional Layer - - In order to build a deep network, we stack several layers of this type. The second - layer will have 64 features for each 5x5 patch. - - """# ============================================================================== - - with tf.name_scope("convolve_2") as scope: - W_conv2 = weight_variable([5, 5, nConv1, nConv2],dtype) - b_conv2 = bias_variable([64],dtype) - h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) - - with tf.name_scope("pool_2") as scope: - h_pool2 = max_pool_2x2(h_conv2) - - """# ============================================================================== - - Densely Connected Layer - - Now that the image size has been reduced to 7x7, we add a fully-connected layer - with neurons to allow processing on the entire image. We reshape the tensor - from the pooling layer into a batch of vectors, multiply by a weight matrix, add - a bias, and apply a ReLU. - - """# ============================================================================== - - with tf.name_scope("W_fc1_b") as scope: - W_fc1 = weight_variable([n_h_pool2_outputsx, nFc],dtype) - b_fc1 = bias_variable([nFc],dtype) - - h_pool2_flat = tf.reshape(h_pool2, [-1, n_h_pool2_outputs]) - - # append the features, the 2nd on, that go directly to the fully connected layer - for i in range(2,ds.train.num_features ): - h_pool2_flat = tf.concat(1, [h_pool2_flat, ph[i]]) - h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) - - """# ============================================================================== - - Dropout - - """# ============================================================================== - keep_prob = tf.placeholder(dtype,name='keep_prob') - - with tf.name_scope("drop") as scope: - h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) - - """# ============================================================================== - - Readout Layer - - """# ============================================================================== - with tf.name_scope("softmax") as scope: - W_fc2 = weight_variable([nFc, nTarget],dtype) - b_fc2 = bias_variable([nTarget],dtype) - y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) - - """# ============================================================================== - - Train and Evaluate the Model - - """# ============================================================================== - - with tf.name_scope("xent") as scope: - # 1e-8 added to eliminate the crash of training when taking log of 0 - cross_entropy = -tf.reduce_sum(ph[0]*tf.log(y_conv+1e-8)) - ce_summ = tf.scalar_summary("cross entropy", cross_entropy) - - with tf.name_scope("train") as scope: - train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) - - with tf.name_scope("test") as scope: - correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(ph[0],1)) - - accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype)) - accuracy_summary = tf.scalar_summary("accuracy", accuracy) - - merged = tf.merge_all_summaries() - tm = "" - tp = datetime.datetime.now().timetuple() - for i in range(4): - tm += str(tp[i])+'-' - tm += str(tp[4]) - writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, sess.graph) - - # To see the results in Chrome, - # Run the following in terminal to activate server. - # tensorboard --logdir '/tmp/ds_logs/' - # See results on localhost:6006 - - sess.run(tf.initialize_all_variables()) - - perfect_count=10 - for i in range(nEpochs): - - batch = ds.train.next_batch(100) - # assign feature data to each placeholder - # the batch list is returned in the same order as the features requested - feed = {keep_prob: 0.5} - for j in range(ds.train.num_features): - feed[ph[j]] = batch[j] - - if i%100 == 0: - # sh=h_pool2_flat.get_shape() - feed[keep_prob] = 1.0 - result = sess.run([merged, accuracy ], feed_dict=feed) - summary_str = result[0] - #acc = result[1] - writer.add_summary(summary_str, i) - train_accuracy = accuracy.eval(feed) - if train_accuracy != 1: - perfect_count=10; - else: - perfect_count -= 1 - if perfect_count==0: - break; - - print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True) - train_step.run(feed_dict=feed) - - def computeSize(s,tens): - sumC = 1 - tShape = tens.get_shape() - nDims = len(tShape) - for i in range(nDims): - sumC *= tShape[i].value - print ('\t{}\t{}'.format(s,sumC),flush=True) - return sumC - - print ('network size:',flush=True) - total = computeSize("W_fc1",W_fc1)+ \ - computeSize ("b_fc1",b_fc1) + \ - computeSize ("W_conv1",W_conv1) + \ - computeSize ("b_conv1",b_conv1) + \ - computeSize ("W_conv2",W_conv2) + \ - computeSize ("b_conv2",b_conv2) + \ - computeSize ("W_fc2",W_fc2) + \ - computeSize ("b_fc2",b_fc2) - print('\ttotal\t{}'.format(total),flush=True) - - feed={keep_prob: 1.0} - # assign feature data to each placeholder - error_images = np.empty((0,nRows,nCols)) - - test_accuracy=0 - m=0 - for n in range(0,ds.test.features[0].shape[0],100 ): - for i in range(ds.train.num_features ): - feed[ph[i]] = ds.test.features[i] [n:n+100] - result = sess.run([accuracy, x_image, W_conv1, correct_prediction], feed_dict=feed) - test_accuracy += result[0] - error_images = np.append(error_images, result[1][:,:,:,0][result[3]==False],axis=0) - m += 1 - try: - print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True) - ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font'])) - except: - print ("test accuracy {}".format(test_accuracy/m),flush=True) - ocr_utils.montage(error_images,title='TensorFlow Error Images') - - tf.reset_default_graph() # only necessary when iterating through fonts - sess.close() - +import n1_2cnv1fc as nnetwork +from tensorflow.compat import v1 as tf +#import tf +dtype = np.float32 if True: # single font train @@ -362,7 +71,13 @@ def computeSize(s,tens): # train the digits 0-9 for all fonts input_filters_dict = {'m_label': range(48,58)} output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case'] - train_a_font(input_filters_dict, output_feature_list, nEpochs = 5000) + ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, + output_feature_list=output_feature_list, + test_size = .1, + engine_type='tensorflow',dtype=dtype) + nn = nnetwork.network(ds.train) + nn.fit( ds.train, nEpochs=5000) + nn.test(ds.test) else: # loop through all the fonts and train individually diff --git a/q3_removing_affine_distortion.py b/q3_removing_affine_distortion.py index e46baab..93b7855 100644 --- a/q3_removing_affine_distortion.py +++ b/q3_removing_affine_distortion.py @@ -93,7 +93,7 @@ def shear(X, skew): # run a Logistic Regression on the raw features with 20 rows, 20 columns from sklearn.linear_model import LogisticRegression -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split X_train , X_test, y_train, y_test = train_test_split(images_reshaped, ys, test_size=0.3, random_state=0) @@ -146,7 +146,7 @@ def shear(X, skew): ######################################################################### # run Linear Discriminant Analysis first then Logistic Regression -from sklearn.lda import LDA +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA n_components = 2 lda = LDA(n_components=n_components) diff --git a/q4_Theano_mlp.py b/q4_Theano_mlp.py index f0278c7..caed736 100644 --- a/q4_Theano_mlp.py +++ b/q4_Theano_mlp.py @@ -20,6 +20,7 @@ import time import numpy as np import theano +# must comment out downsampling in pool.py because pip install did not include downsample import theano.tensor as T import lasagne import ocr_utils diff --git a/q5_tensorflow_residual.py b/q5_tensorflow_residual.py index 3044b46..c9e0815 100644 --- a/q5_tensorflow_residual.py +++ b/q5_tensorflow_residual.py @@ -33,346 +33,19 @@ from collections import namedtuple import numpy as np import pandas as pd +import n1_2cnv1fc as nnetwork + +# import tensorflow as tf -import tensorflow as tf dtype = np.float32 #with tf.device('/GPU:0'): #with tf.device('/cpu:0'): -def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000): - - ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, - output_feature_list=output_feature_list, - test_size = .1, - engine_type='tensorflow', - dtype=dtype) - - """# ============================================================================== - - Start TensorFlow Interactive Session - - """# ============================================================================== - - sess = tf.InteractiveSession() - - """# ============================================================================== - - Placeholders - - Compute the size of various layers - - Create a tensorflow Placeholder for each feature of data returned from the - dataset - - """# ============================================================================== - - - - lst = [] - extra_features_width = 0 # width of extra features - - for i,nm in enumerate(output_feature_list): - - # features[0], is always the target. For instance it may be m_label_one_hot - # the second features[1] is the 'image' that is passed to the convolution layers - # Any additional features bypass the convolution layers and go directly - # into the fully connected layer. - - # The width of the extra features is calculated in order to allocate - # the correct widths of weights, # and inputs - # names are assigned to make the look pretty on the tensorboard graph. - - if i == 0: - nm = 'y_'+nm - else: - nm = 'x_'+nm - if i>1: - extra_features_width += ds.train.feature_width[i] - lst.append(tf.placeholder(dtype, shape=[None, ds.train.feature_width[i]], name=nm)) - - # ph is a named tuple with key names like 'image', 'm_label', and values that - # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, - # x_upper_case etc. - Place_Holders = namedtuple('Place_Holders', output_feature_list) - ph = Place_Holders(*lst) # unpack placeholders into named Tuple - - nRows = ds.train.num_rows #image height - nCols = ds.train.num_columns #image width - nFc0 = 2048 # size of fully connected layer - nFc1 = 2048 # size of fully connected layer - nFc2 = 2048 # size of fully connected layer - nConv1 = 32 # size of first convolution layer - nConv2 = 64 # size of second convolution layer - nTarget = ds.train.feature_width[0] # the number of one_hot features in the target, 'm_label' - n_h_pool2_outputs = int(nRows/4) * int(nCols/4) * nConv2 # second pooling layer - n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected - - """# ============================================================================== - - Build a Multilayer Convolutional Network - - Weight Initialization - - """# ============================================================================== - - def weight_variable(shape, dtype): - initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) - return tf.Variable(initial) - - def bias_variable(shape, dtype): - initial = tf.constant(0.1, shape=shape, dtype=dtype) - return tf.Variable(initial) - - """# ============================================================================== - - Convolution and Pooling - - keep our code cleaner, let's also abstract those operations into functions. - - """# ============================================================================== - - def conv2d(x, W): - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') - - def max_pool_2x2(x): - return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], padding='SAME') - - """# ============================================================================== - - First Convolutional Layer - - """# ============================================================================== - with tf.name_scope("w_conv1") as scope: - W_conv1 = weight_variable([5, 5, 1, nConv1],dtype) - b_conv1 = bias_variable([nConv1],dtype) - - with tf.name_scope("reshape_x_image") as scope: - x_image = tf.reshape(ph.image, [-1,nCols,nRows,1]) - - image_summ = tf.image_summary("x_image", x_image) - - """# ============================================================================== - - We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, - and finally max pool. - - """# ============================================================================== - - with tf.name_scope("convolve_1") as scope: - h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) - - with tf.name_scope("pool_1") as scope: - h_pool1 = max_pool_2x2(h_conv1) - - """# ============================================================================== - - Second Convolutional Layer - - In order to build a deep network, we stack several layers of this type. The second - layer will have 64 features for each 5x5 patch. - - """# ============================================================================== - - with tf.name_scope("convolve_2") as scope: - W_conv2 = weight_variable([5, 5, nConv1, nConv2],dtype) - b_conv2 = bias_variable([64],dtype) - h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) - - with tf.name_scope("pool_2") as scope: - h_pool2 = max_pool_2x2(h_conv2) - """# ============================================================================== - - Densely Connected Layer 0 - - Now that the image size has been reduced to 7x7, we add a fully-connected layer - with neurons to allow processing on the entire image. We reshape the tensor - from the pooling layer into a batch of vectors, multiply by a weight matrix, add - a bias, and apply a ReLU. - - """# ============================================================================== - - with tf.name_scope("W_fc0_b") as scope: - W_fc0 = weight_variable([n_h_pool2_outputsx, nFc0],dtype) - b_fc0 = bias_variable([nFc0],dtype) - - h_pool2_flat = tf.reshape(h_pool2, [-1, n_h_pool2_outputs]) - - # append the features, the 2nd on, that go directly to the fully connected layer - for i in range(2,ds.train.num_features ): - h_pool2_flat = tf.concat(1, [h_pool2_flat, ph[i]]) - h_fc0 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc0) + b_fc0) - """# ============================================================================== - - Densely Connected Layer 1 - - We add a fully-connected layer - with neurons to allow processing on the entire image. We reshape the tensor - from the pooling layer into a batch of vectors, multiply by a weight matrix, add - a bias, and apply a ReLU. - - """# ============================================================================== - - with tf.name_scope("W_fc1_b") as scope: - W_fc1 = weight_variable([nFc0, nFc1],dtype) - b_fc1 = bias_variable([nFc1],dtype) - - h_fc1 = tf.nn.relu(tf.matmul(h_fc0, W_fc1) + b_fc1) - - """# ============================================================================== - - Densely Connected Layer 2 - - We add a fully-connected layer - with neurons to allow processing on the entire image. We reshape the tensor - from the pooling layer into a batch of vectors, multiply by a weight matrix, add - a bias, and apply a ReLU. - - """# ============================================================================== - - with tf.name_scope("W_fc2_b") as scope: - W_fc2 = weight_variable([nFc1, nFc2],dtype) - b_fc2 = bias_variable([nFc2],dtype) - - h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) - - """# ============================================================================== - Dropout - - """# ============================================================================== - keep_prob = tf.placeholder(dtype,name='keep_prob') - - with tf.name_scope("drop") as scope: - h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob) - - """# ============================================================================== - - Readout Layer - - """# ============================================================================== - with tf.name_scope("softmax") as scope: - W_fc3 = weight_variable([nFc2, nTarget],dtype) - b_fc3 = bias_variable([nTarget],dtype) - y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, W_fc3) + b_fc3) - - """# ============================================================================== - - Train and Evaluate the Model - - """# ============================================================================== - - with tf.name_scope("xent") as scope: - # 1e-8 added to eliminate the crash of training when taking log of 0 - cross_entropy = -tf.reduce_sum(ph[0]*tf.log(y_conv+1e-8)) - ce_summ = tf.scalar_summary("cross entropy", cross_entropy) - - with tf.name_scope("train") as scope: - train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) - - with tf.name_scope("test") as scope: - correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(ph[0],1)) - - accuracy = tf.reduce_mean(tf.cast(correct_prediction,dtype)) - accuracy_summary = tf.scalar_summary("accuracy", accuracy) - - merged = tf.merge_all_summaries() - tm = "" - tp = datetime.datetime.now().timetuple() - for i in range(4): - tm += str(tp[i])+'-' - tm += str(tp[4]) - writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, sess.graph) - - # To see the results in Chrome, - # Run the following in terminal to activate server. - # tensorboard --logdir '/tmp/ds_logs/' - # See results on localhost:6006 - - sess.run(tf.initialize_all_variables()) - - perfect_count=10 - for i in range(nEpochs): - - batch = ds.train.next_batch(100) - # assign feature data to each placeholder - # the batch list is returned in the same order as the features requested - feed = {keep_prob: 0.5} - for j in range(ds.train.num_features): - feed[ph[j]] = batch[j] - - if i%100 == 0: - # sh=h_pool2_flat.get_shape() - feed[keep_prob] = 1.0 - result = sess.run([merged, accuracy ], feed_dict=feed) - summary_str = result[0] - #acc = result[1] - writer.add_summary(summary_str, i) - train_accuracy = accuracy.eval(feed) - if train_accuracy != 1: - perfect_count=10; - else: - perfect_count -= 1 - if perfect_count==0: - break; - - print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True) - train_step.run(feed_dict=feed) - - def computeSize(s,tens): - sumC = 1 - tShape = tens.get_shape() - nDims = len(tShape) - for i in range(nDims): - sumC *= tShape[i].value - print ('\t{}\t{}'.format(s,sumC),flush=True) - return sumC - - print ('network size:',flush=True) - total = computeSize("W_fc0",W_fc0)+ \ - computeSize ("b_fc0",b_fc0) + \ - computeSize ("W_conv1",W_conv1) + \ - computeSize ("b_conv1",b_conv1) + \ - computeSize ("W_conv2",W_conv2) + \ - computeSize ("b_conv2",b_conv2) + \ - computeSize ("W_fc0",W_fc0) + \ - computeSize ("b_fc0",b_fc0) + \ - computeSize ("W_fc1",W_fc1) + \ - computeSize ("b_fc1",b_fc1) + \ - computeSize ("W_fc2",W_fc2) + \ - computeSize ("b_fc2",b_fc2) - - - print('\ttotal\t{}'.format(total),flush=True) - - feed={keep_prob: 1.0} - # assign feature data to each placeholder - error_images = np.empty((0,nRows,nCols)) - - test_accuracy=0 - m=0 - for n in range(0,ds.test.features[0].shape[0],100 ): - for i in range(ds.train.num_features ): - feed[ph[i]] = ds.test.features[i] [n:n+100] - result = sess.run([accuracy, x_image, W_conv1, correct_prediction], feed_dict=feed) - test_accuracy += result[0] - error_images = np.append(error_images, result[1][:,:,:,0][result[3]==False],axis=0) - m += 1 - try: - print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True) - ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font'])) - except: - print ("test accuracy {}".format(test_accuracy/m),flush=True) - ocr_utils.montage(error_images,title='TensorFlow Error Images') - - tf.reset_default_graph() # only necessary when iterating through fonts - sess.close() - - -if True: +if False: # single font train - # esamples + # examples # select only images from 'OCRB' scanned font # input_filters_dict = {'font': ('OCRA',)} @@ -404,7 +77,13 @@ def computeSize(s,tens): input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))+list(range(97,123)),'fontVariant':'scanned'} #input_filters_dict = {} output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case'] - train_a_font(input_filters_dict, output_feature_list, nEpochs = 20000) + ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, + output_feature_list=output_feature_list, + test_size = .1, + engine_type='tensorflow',dtype=dtype) + nn = nnetwork.network( ds.train) + nn.fit( ds.train, nEpochs=5000) + nn.test(ds.test) else: # loop through all the fonts and train individually @@ -420,9 +99,22 @@ def computeSize(s,tens): # Change nEpochs to 5000 for better results for l in df1: - input_filters_dict= {'font': (l[0],)} - train_a_font(input_filters_dict,output_feature_list, nEpochs = 500) - + #input_filters_dict= {'font': (l[0],)} + input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))+list(range(97,123)),'font': (l[0],)} + #train_a_font(input_filters_dict,output_feature_list, nEpochs = 500) + + ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, + output_feature_list=output_feature_list, + test_size = .1, + engine_type='tensorflow',dtype=dtype) + + nn = nnetwork.network(ds.train) + nn.fit( ds.train, nEpochs=5000) + nn.test(ds.test, title = l[0] ) + nn.reset_graph() + + + print ('\n########################### No Errors ####################################') diff --git a/q6_tensorflow_residual3x3.py b/q6_tensorflow_residual3x3.py deleted file mode 100644 index 4a9ea2d..0000000 --- a/q6_tensorflow_residual3x3.py +++ /dev/null @@ -1,371 +0,0 @@ -"""# ========================================================================== - -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -This sample program is a modified version of the Google mnist convolutional -network tutorial example. See the mnist tutorial in www.tensorflow.org - -This graph has multiple sections 3 layers each, 400 100 400 followed -by a fully connected layer. - -see tensor_flow_graph.png -"""# ============================================================================== -import ocr_utils -import datetime -from collections import namedtuple -import numpy as np -import pandas as pd - -import tensorflow as tf -dtype = np.float32 -#with tf.device('/GPU:0'): -#with tf.device('/cpu:0'): - -def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000): - - ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, - output_feature_list=output_feature_list, - test_size = .1, - engine_type='tensorflow',dtype=dtype) - - - """# ============================================================================== - - Start TensorFlow Interactive Session - - """# ============================================================================== - - sess = tf.InteractiveSession() - - """# ============================================================================== - - Placeholders - - Compute the size of various layers - - Create a tensorflow Placeholder for each feature of data returned from the - dataset - - """# ============================================================================== - - - - lst = [] - extra_features_width = 0 # width of extra features - - for i,nm in enumerate(output_feature_list): - - # features[0], is always the target. For instance it may be m_label_one_hot - # the second features[1] is the 'image' that is passed to the convolution layers - # Any additional features bypass the convolution layers and go directly - # into the fully connected layer. - - # The width of the extra features is calculated in order to allocate - # the correct widths of weights, # and inputs - # names are assigned to make the look pretty on the tensorboard graph. - - if i == 0: - nm = 'y_'+nm - else: - nm = 'x_'+nm - if i>1: - extra_features_width += ds.train.feature_width[i] - print (ds.train.features[i].dtype) - lst.append(tf.placeholder(dtype, shape=[None, ds.train.feature_width[i]], name=nm)) - - # ph is a named tuple with key names like 'image', 'm_label', and values that - # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image, - # x_upper_case etc. - Place_Holders = namedtuple('Place_Holders', ds.train.feature_names) - ph = Place_Holders(*lst) # unpack placeholders into named Tuple - - nRows = ds.train.num_rows #image height - nCols = ds.train.num_columns #image width - - nSections = 10 - w = list(range(nSections*3)) - b = list(range(nSections*3)) - h = list(range(nSections*3+1)) - - - in_out_width = nRows*nCols - internal_width = int(in_out_width/4) - - -# nFc0 = 2048 # size of fully connected layer - nFc1 = 2048 # size of fully connected layer -# nFc2 = 2048 # size of fully connected layer -# nConv1 = 32 # size of first convolution layer -# nConv2 = 64 # size of second convolution layer - nTarget = ds.train.feature_width[0] # the number of one_hot features in the target, 'm_label' - -# n_h_pool2_outputs = int(nRows/4) * int(nCols/4) * nConv2 # second pooling layer -# n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected -# - """# ============================================================================== - - Build a Multilayer Convolutional Network - - Weight Initialization - - """# ============================================================================== - - def weight_variable(shape, dtype): - initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype) - return tf.Variable(initial) - - def bias_variable(shape, dtype): - initial = tf.constant(0.1, shape=shape, dtype=dtype) - return tf.Variable(initial) - - - """# ============================================================================== - Convolution and Pooling - - keep our code cleaner, let's also abstract those operations into functions. - - """# ============================================================================== - - def conv2d(x, W): - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') - - - """# ============================================================================== - - First Convolutional Layers - - """# ============================================================================== - - def shapeOuts(n): - print ('n={}, hin={},w={}, b={} ,hout={}\n'.format(n, h[n]._shape, w[n]._variable._shape, b[n]._variable._shape, h[n+1]._shape)) - - def section(n): - with tf.name_scope('section_'+str(n)+'_0') as scope: - w[n]=weight_variable([in_out_width, internal_width],dtype) - b[n]=bias_variable([internal_width],dtype) - h[n+1] = tf.nn.relu(tf.matmul(h[n], w[n]) + b[n]) - shapeOuts(n) - - with tf.name_scope('section_'+str(n)+'_1') as scope: - w[n+1]=weight_variable([internal_width, internal_width],dtype) - b[n+1]=bias_variable([internal_width],dtype) - - h[n+2]=tf.nn.relu(tf.matmul(h[n+1], w[n+1]) + b[n+1]) - shapeOuts(n+1) - - with tf.name_scope('section_'+str(n)+'_2') as scope: - w[n+2]=weight_variable([internal_width, in_out_width],dtype) - b[n+2]=bias_variable([in_out_width],dtype) - z= tf.nn.relu(tf.matmul(h[n+2], w[n+2]) + b[n+2]) - h[n+3]= tf.add(z ,h[n]) #n+3 - - print('z shape ={}'.format(z._shape)) - shapeOuts(n+2) - return - - def computeSize(s,tens): - sumC = 1 - tShape = tens.get_shape() - nDims = len(tShape) - for i in range(nDims): - sumC *= tShape[i].value - print ('\t{}\t{}'.format(s,sumC),flush=True) - return sumC - - """# ============================================================================== - Build sectional network - - """# ============================================================================== - h[0]= ph[1] - for i in range(nSections): - section(3*i) - - """# ============================================================================== - Dropout - - """# ============================================================================== - keep_prob = tf.placeholder(dtype,name='keep_prob') - - with tf.name_scope("drop") as scope: - h_fc2_drop = tf.nn.dropout(h[nSections*3], keep_prob) - - """# ============================================================================== - - Readout Layer - - """# ============================================================================== - with tf.name_scope("softmax") as scope: - w_fc3 = weight_variable([in_out_width, nTarget],dtype) - b_fc3 = bias_variable([nTarget],dtype) - y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, w_fc3) + b_fc3) - - print ('network size:',flush=True) - total = 0 - for i in range(nSections*3): - total = total + computeSize("w{}".format(i),w[i]) - total = total + computeSize ("b_fc3",b_fc3) + \ - computeSize ("w_fc3",w_fc3) - - - print('\ttotal\t{}'.format(total),flush=True) - - """# ============================================================================== - - Train and Evaluate the Model - - """# ============================================================================== - with tf.name_scope("reshape_x_image") as scope: - x_image = tf.reshape(ph.image, [-1,nCols,nRows,1]) - - with tf.name_scope("xent") as scope: - # 1e-8 added to eliminate the crash of training when taking log of 0 - cross_entropy = -tf.reduce_sum(ph[0]*tf.log(y_conv+1e-8)) - ce_summ = tf.scalar_summary("cross entropy", cross_entropy) - - with tf.name_scope("train") as scope: - train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) - - with tf.name_scope("test") as scope: - correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(ph[0],1)) - - accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype)) - accuracy_summary = tf.scalar_summary("accuracy", accuracy) - - merged = tf.merge_all_summaries() - tm = "" - tp = datetime.datetime.now().timetuple() - for i in range(4): - tm += str(tp[i])+'-' - tm += str(tp[4]) - writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, sess.graph) - - # To see the results in Chrome, - # Run the following in terminal to activate server. - # tensorboard --logdir '/tmp/ds_logs/' - # See results on localhost:6006 - - sess.run(tf.initialize_all_variables()) - - perfect_count=10 - for i in range(nEpochs): - - batch = ds.train.next_batch(100) - # assign feature data to each placeholder - # the batch list is returned in the same order as the features requested - feed = {keep_prob: 0.5} - for j in range(ds.train.num_features): - feed[ph[j]] = batch[j] - - if i%100 == 0: - # sh=h_pool2_flat.get_shape() - feed[keep_prob] = 1.0 - result = sess.run([merged, accuracy ], feed_dict=feed) - summary_str = result[0] - #acc = result[1] - writer.add_summary(summary_str, i) - train_accuracy = accuracy.eval(feed) - if train_accuracy != 1: - perfect_count=10; - else: - perfect_count -= 1 - if perfect_count==0: - break; - - print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True) - train_step.run(feed_dict=feed) - - feed={keep_prob: 1.0} - # assign feature data to each placeholder - error_images = np.empty((0,nRows,nCols)) - - test_accuracy=0 - m=0 - for n in range(0,ds.test.features[0].shape[0],100 ): - for i in range(ds.train.num_features ): - feed[ph[i]] = ds.test.features[i] [n:n+100] - result = sess.run([accuracy, x_image, correct_prediction], feed_dict=feed) - test_accuracy += result[0] - error_images = np.append(error_images, result[1][:,:,:,0][result[2]==False],axis=0) - m += 1 - try: - print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True) - ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font'])) - except: - print ("test accuracy {}".format(test_accuracy/m),flush=True) - ocr_utils.montage(error_images,title='TensorFlow Error Images') - - tf.reset_default_graph() # only necessary when iterating through fonts - sess.close() - - -if True: - # single font train - - # esamples - # select only images from 'OCRB' scanned font - # input_filters_dict = {'font': ('OCRA',)} - - # select only images from 'HANDPRINT' font - #input_filters_dict = {'font': ('HANDPRINT',)} - - # select only images from 'OCRA' and 'OCRB' fonts with the 'scanned" fontVariant - # input_filters_dict = {'font': ('OCRA','OCRB'), 'fontVariant':('scanned',)} - - # select everything; all fonts , font variants, etc. - # input_filters_dict = {} - - # select the digits 0 through 9 in the E13B font - # input_filters_dict = {'m_label': range(48,58), 'font': 'E13B'} - - # select the digits 0 and 2in the E13B font - # input_filters_dict = {'m_label': (48,50), 'font': 'E13B'} - - # output the character label, image, italic flag, aspect_ratio and upper_case flag - # output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case'] - - # output only the character label and the image - # output_feature_list = ['m_label_one_hot','image'] - - # identify the font given the input images - #output_feature_list = ['font_one_hot','image','italic','aspect_ratio','upper_case'] - - # train the digits 0-9 for all fonts - #input_filters_dict = {'m_label': range(48,58)} - input_filters_dict = {'font':'ARIAL','m_label': list(range(48,58))+list(range(65,91))+list(range(97,123))} - #input_filters_dict = {} - output_feature_list = ['m_label_one_hot','image'] - train_a_font(input_filters_dict, output_feature_list, nEpochs = 50000) - -else: - # loop through all the fonts and train individually - - # pick up the entire list of fonts and font variants. Train each one. - df1 = ocr_utils.get_list(input_filters_dict={'font': ()}) - - import pprint as pprint - pp = pprint.PrettyPrinter(indent=4) - pp.pprint(df1) - - output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case','font_one_hot'] - - # Change nEpochs to 5000 for better results - for l in df1: - input_filters_dict= {'font': (l[0],)} - train_a_font(input_filters_dict,output_feature_list, nEpochs = 500) - - -print ('\n########################### No Errors ####################################') - diff --git a/q6_tensorflow_residual3x4.py b/q6_tensorflow_residual3x4.py new file mode 100644 index 0000000..d38cb8f --- /dev/null +++ b/q6_tensorflow_residual3x4.py @@ -0,0 +1,109 @@ +"""# ========================================================================== + +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +This sample program is a modified version of the Google mnist convolutional +network tutorial example. See the mnist tutorial in www.tensorflow.org + +This graph has multiple sections 3 layers each, 400 100 400 followed +by a fully connected layer. + +see tensor_flow_graph.png +"""# ============================================================================== +import ocr_utils +import datetime +from collections import namedtuple +import numpy as np +import pandas as pd +import n1_residual3x4 as nnetwork +from tensorflow.compat import v1 as tf +dtype = np.float32 +#with tf.device('/GPU:0'): +#with tf.device('/cpu:0'): + + +if True: + # single font train + + # examples + # select only images from 'OCRB' scanned font + # input_filters_dict = {'font': ('OCRA',)} + + # select only images from 'HANDPRINT' font + #input_filters_dict = {'font': ('HANDPRINT',)} + + # select only images from 'OCRA' and 'OCRB' fonts with the 'scanned" fontVariant + # input_filters_dict = {'font': ('OCRA','OCRB'), 'fontVariant':('scanned',)} + + # select everything; all fonts , font variants, etc. + # input_filters_dict = {} + + # select the digits 0 through 9 in the E13B font + # input_filters_dict = {'m_label': range(48,58), 'font': 'E13B'} + + # select the digits 0 and 2in the E13B font + # input_filters_dict = {'m_label': (48,50), 'font': 'E13B'} + + # output the character label, image, italic flag, aspect_ratio and upper_case flag + # output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case'] + + # output only the character label and the image + # output_feature_list = ['m_label_one_hot','image'] + + # identify the font given the input images + #output_feature_list = ['font_one_hot','image','italic','aspect_ratio','upper_case'] + + # train the digits 0-9 for all fonts + #input_filters_dict = {'m_label': range(48,58)} + input_filters_dict = {'font':'ARIAL','m_label': list(range(48,58))+list(range(65,91))+list(range(97,123))} + #input_filters_dict = {} + output_feature_list = ['m_label_one_hot','image'] + + """# ============================================================================== + + Train and Evaluate the Model + + """# ============================================================================== + ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, + output_feature_list=output_feature_list, + test_size = .1, + engine_type='tensorflow',dtype=dtype) + nn = nnetwork.network(ds.train) + nn.fit( ds.train, nEpochs=5000) + nn.test(ds.test) + +# train_a_font(input_filters_dict, output_feature_list, nEpochs = 50000) + +else: + # loop through all the fonts and train individually + + # pick up the entire list of fonts and font variants. Train each one. + df1 = ocr_utils.get_list(input_filters_dict={'font': ()}) + + import pprint as pprint + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(df1) + + output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case','font_one_hot'] + + # Change nEpochs to 5000 for better results + for l in df1: + input_filters_dict= {'font': (l[0],)} + train_a_font(input_filters_dict,output_feature_list, nEpochs = 500) + + +print ('\n########################### No Errors ####################################') + diff --git a/q8_tika.py b/q8_tika.py index bae89cf..1cc29f7 100644 --- a/q8_tika.py +++ b/q8_tika.py @@ -4,11 +4,50 @@ @author: richard lyman ''' + +import pytesseract import tika +from tika import translate, detector, language + + + +filename = '15-01-01 459_Mont_Lyman.jpg' +filename2 = 'img20150901_15233271bw.jpg' + +from PIL import Image + +rawText = pytesseract.image_to_string(Image.open(filename2), lang="rus") +print (rawText) +lines = rawText.split('\n') + +import os +#os.putenv( 'TIKA_VERSION','default') # - set to the version string, e.g., 1.12 or default to current Tika version. +#os.putenv( 'TIKA_SERVER_JAR','/home/richard/.m2/repository/org/apache/tika/tika-server/1.13/tika-server-1.13.jar') #- set to the full URL to the remote Tika server jar to download and cache. +os.putenv( 'TIKA_SERVER_ENDPOINT',' http://localhost:9998') #- set to the host (local or remote) for the running Tika server jar. +#os.putenv( 'TIKA_SERVER_ENDPOINT',' http://localhost:9998/language/string') #- set to the host (local or remote) for the running Tika server jar. +#os.putenv( 'TIKA_CLIENT_ONLY','True') #- if set to True, then TIKA_SERVER_JAR is ignored, and relies on the value for TIKA_SERVER_ENDPOINT and treats Tika like a REST client. +#os.putenv( 'TIKA_TRANSLATOR','org/apache/tika/language/translate/') #- set to the fully qualified class name (defaults to Lingo24) for the Tika translator implementation. +#os.putenv( 'TIKA_SERVER_CLASSPATH','/home/richard/.m2/repository/org/apache/tika/tika-server/1.13/tika-server-1.13.jar') #- set to a string (delimited by ':' for each additional path) to prepend to the Tika server jar path. +#os.putenv('TESSDATA_PREFIX','/usr/share/tesseract-ocr/4.00/tessdata/') tika.initVM() from tika import parser -parsed = parser.from_file('15-01-01 459_Mont_Lyman.jpg') -#parsed = parser.from_file('img20150901_15233271bw.jpg') -print (parsed["metadata"]) -print (parsed["content"]) +parsed = parser.from_buffer("comme çi comme ça") +print(parsed["metadata"]) +print(parsed["content"]) +global Verbose +Verbose=True + +result=translate.auto_from_buffer("comme çi comme ça", 'en') +print(result) +result = detector.from_buffer("comme çi comme ça") +print (result) +result = translate.from_buffer("comme çi comme ça",'fr','en') +print (result) +result = language.from_buffer("comme çi comme ça") +print (result) +for line in lines: + if len(line)>0: + result=translate.from_buffer(line, 'ru','en') + print(result) + print ('\n########################### No Errors ####################################') \ No newline at end of file diff --git a/q9_tensorflow_gpu_test.py b/q9_tensorflow_gpu_test.py index dbb7ad1..d5725ce 100644 --- a/q9_tensorflow_gpu_test.py +++ b/q9_tensorflow_gpu_test.py @@ -1,9 +1,22 @@ # Creates a graph. import tensorflow as tf +#from tensorflow.compat import v1 as tf + +#sess = tf.InteractiveSession() +@tf.function +def d(a,b): + return tf.matmul(a, b) + a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') -c = tf.matmul(a, b) +#c = tf.matmul(a, b) # Creates a session with log_device_placement set to True. -sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) + # Runs the op. -print (sess.run(c)) \ No newline at end of file + +# tens1 = tf.constant([ [[1,2],[2,3]], [[3,4],[5,6]] ]) +# print (sess.run(tens1)[1,1,0]) +# self._sess.run(tf.initialize_all_variables()) +for i in range(100000): + d(a,b) +print ('\n########################### No Errors ####################################') \ No newline at end of file diff --git a/run_batch.sc b/run_batch.sc index 6ff5449..045a5ae 100755 --- a/run_batch.sc +++ b/run_batch.sc @@ -1,16 +1,16 @@ #!/bin/bash -if [ ! -d "./plots" ]; then - mkdir ./plots +if [ ! -d "/tmp/plots" ]; then + mkdir /tmp/plots fi -echo "" > ./plots/run_batch.txt -for i in $(ls -1v ./[p-q]*.py ); do - echo "" |& tee -a ./plots/run_batch.txt - echo "##############################################################" |& tee -a ./plots/run_batch.txt - echo "$i ###############################" |& tee -a ./plots/run_batch.txt - echo "##############################################################" |& tee -a ./plots/run_batch.txt - echo "" |& tee -a ./plots/run_batch.txt - python3 $i |& tee -a ./plots/run_batch.txt +echo "" > /tmp/plots/run_batch.txt +for i in $(ls -1v ./[o-q]*.py ); do + echo "" |& tee -a /tmp/plots/run_batch.txt + echo "##############################################################" |& tee -a /tmp/plots/run_batch.txt + echo "$i ###############################" |& tee -a /tmp/plots/run_batch.txt + echo "##############################################################" |& tee -a /tmp/plots/run_batch.txt + echo "" |& tee -a /tmp/plots/run_batch.txt + python3 $i |& tee -a /tmp/plots/run_batch.txt done