python 2.7 - How to train a caffe model? -


has trained caffe model? have training ready image set use create caffe model use google's deep dream.

the resources i've been able find on how train model these:
imagenet tutorial
edit: here's another, it's not creating deploy.prototxt file. when try use 1 model "works" isn't correct.
caffe-oxford 102
can point me in right direction training own model?

i have written simple example train caffe model on iris data set in python. gives predicted outputs given user-defined inputs. network solver settings need more tuning wanted have code skeleton started. feel free edit improve.

(github repository)

iris_tuto.py

'''  requirements:  - caffe (script install caffe , pycaffe on new ubuntu 14.04 lts x64 or ubuntu 14.10 x64.     cpu only, multi-threaded caffe. https://stackoverflow.com/a/31396229/395857)  - sudo pip install pydot  - sudo apt-get install -y graphviz  interesting resources on caffe:  - https://github.com/bvlc/caffe/tree/master/examples  - http://nbviewer.ipython.org/github/joyofdata/joyofdata-articles/blob/master/deeplearning-with-caffe/neural-networks-with-caffe-on-the-gpu.ipynb  interesting resources on iris anns:  - iris data set test bed: http://deeplearning4j.org/iris-flower-dataset-tutorial.html  - http://se.mathworks.com/help/nnet/examples/iris-clustering.html  - http://lab.fs.uni-lj.si/lasin/wp/imit_files/neural/doc/seminar8.pdf  synonyms:  - output = label = target  - input = feature  '''  import subprocess import platform import copy  sklearn.datasets import load_iris import sklearn.metrics  import numpy np sklearn.cross_validation import stratifiedshufflesplit import matplotlib.pyplot plt import h5py import caffe import caffe.draw   def load_data():     '''     load iris data set     '''     data = load_iris()     print(data.data)     print(data.target)     targets = np.zeros((len(data.target), 3))     count, target in enumerate(data.target):         targets[count][target]= 1         print(targets)      new_data = {}     #new_data['input'] = data.data     new_data['input'] = np.reshape(data.data, (150,1,1,4))     new_data['output'] = targets     #print(new_data['input'].shape)     #new_data['input'] = np.random.random((150, 1, 1, 4))     #print(new_data['input'].shape)        #new_data['output'] = np.random.random_integers(0, 1, size=(150,3))         #print(new_data['input'])      return new_data  def save_data_as_hdf5(hdf5_data_filename, data):     '''     hdf5 1 of data formats caffe accepts     '''     h5py.file(hdf5_data_filename, 'w') f:         f['data'] = data['input'].astype(np.float32)         f['label'] = data['output'].astype(np.float32)   def train(solver_prototxt_filename):     '''     train ann     '''     caffe.set_mode_cpu()     solver = caffe.get_solver(solver_prototxt_filename)     solver.solve()   def print_network_parameters(net):     '''     print parameters of network     '''     print(net)     print('net.inputs: {0}'.format(net.inputs))     print('net.outputs: {0}'.format(net.outputs))     print('net.blobs: {0}'.format(net.blobs))     print('net.params: {0}'.format(net.params))      def get_predicted_output(deploy_prototxt_filename, caffemodel_filename, input, net = none):     '''     predicted output, i.e. perform forward pass     '''     if net none:         net = caffe.net(deploy_prototxt_filename,caffemodel_filename, caffe.test)      #input = np.array([[ 5.1,  3.5,  1.4,  0.2]])     #input = np.random.random((1, 1, 1))     #print(input)     #print(input.shape)     out = net.forward(data=input)     #print('out: {0}'.format(out))     return out[net.outputs[0]]   import google.protobuf  def print_network(prototxt_filename, caffemodel_filename):     '''     draw ann architecture     '''     _net = caffe.proto.caffe_pb2.netparameter()     f = open(prototxt_filename)     google.protobuf.text_format.merge(f.read(), _net)     caffe.draw.draw_net_to_file(_net, prototxt_filename + '.png' )     print('draw ann done!')   def print_network_weights(prototxt_filename, caffemodel_filename):     '''     each ann layer, print weight heatmap , weight histogram      '''     net = caffe.net(prototxt_filename,caffemodel_filename, caffe.test)     layer_name in net.params:          # weights heatmap          arr = net.params[layer_name][0].data         plt.clf()         fig = plt.figure(figsize=(10,10))         ax = fig.add_subplot(111)         cax = ax.matshow(arr, interpolation='none')         fig.colorbar(cax, orientation="horizontal")         plt.savefig('{0}_weights_{1}.png'.format(caffemodel_filename, layer_name), dpi=100, format='png', bbox_inches='tight') # use format='svg' or 'pdf' vectorial pictures         plt.close()          # weights histogram           plt.clf()         plt.hist(arr.tolist(), bins=20)         plt.savefig('{0}_weights_hist_{1}.png'.format(caffemodel_filename, layer_name), dpi=100, format='png', bbox_inches='tight') # use format='svg' or 'pdf' vectorial pictures         plt.close()   def get_predicted_outputs(deploy_prototxt_filename, caffemodel_filename, inputs):     '''     several predicted outputs     '''     outputs = []     net = caffe.net(deploy_prototxt_filename,caffemodel_filename, caffe.test)     input in inputs:         #print(input)         outputs.append(copy.deepcopy(get_predicted_output(deploy_prototxt_filename, caffemodel_filename, input, net)))     return outputs       def get_accuracy(true_outputs, predicted_outputs):     '''      '''     number_of_samples = true_outputs.shape[0]     number_of_outputs = true_outputs.shape[1]     threshold = 0.0 # 0 if sigmoidcrossentropyloss ; 0.5 if euclideanloss     output_number in range(number_of_outputs):         predicted_output_binary = []         sample_number in range(number_of_samples):             #print(predicted_outputs)             #print(predicted_outputs[sample_number][output_number])                         if predicted_outputs[sample_number][0][output_number] < threshold:                 predicted_output = 0             else:                 predicted_output = 1             predicted_output_binary.append(predicted_output)          print('accuracy: {0}'.format(sklearn.metrics.accuracy_score(true_outputs[:, output_number], predicted_output_binary)))         print(sklearn.metrics.confusion_matrix(true_outputs[:, output_number], predicted_output_binary))   def main():     '''     main function     '''      # set parameters     solver_prototxt_filename = 'iris_solver.prototxt'     train_test_prototxt_filename = 'iris_train_test.prototxt'     deploy_prototxt_filename  = 'iris_deploy.prototxt'     deploy_prototxt_filename  = 'iris_deploy.prototxt'     deploy_prototxt_batch2_filename  = 'iris_deploy_batchsize2.prototxt'     hdf5_train_data_filename = 'iris_train_data.hdf5'      hdf5_test_data_filename = 'iris_test_data.hdf5'      caffemodel_filename = 'iris__iter_5000.caffemodel' # generated train()      # prepare data     data = load_data()     print(data)     train_data = data     test_data = data     save_data_as_hdf5(hdf5_train_data_filename, data)     save_data_as_hdf5(hdf5_test_data_filename, data)      # train network     train(solver_prototxt_filename)      # print network     print_network(deploy_prototxt_filename, caffemodel_filename)     print_network(train_test_prototxt_filename, caffemodel_filename)     print_network_weights(train_test_prototxt_filename, caffemodel_filename)      # compute performance metrics     #inputs = input = np.array([[[[ 5.1,  3.5,  1.4,  0.2]]],[[[ 5.9,  3. ,  5.1,  1.8]]]])     inputs = data['input']     outputs = get_predicted_outputs(deploy_prototxt_filename, caffemodel_filename, inputs)     get_accuracy(data['output'], outputs)   if __name__ == "__main__":     main()     #cprofile.run('main()') # if want profiling 

iris_train_test.prototxt:

name: "irisnet" layer {   name: "iris"   type: "hdf5data"   top: "data"   top: "label"   include {     phase: train   }   hdf5_data_param {     source: "iris_train_data.txt"     batch_size: 1    } }  layer {   name: "iris"   type: "hdf5data"   top: "data"   top: "label"   include {     phase: test   }   hdf5_data_param {     source: "iris_test_data.txt"     batch_size: 1    } }     layer {   name: "ip1"   type: "innerproduct"   bottom: "data"   top: "ip1"   param {     lr_mult: 1   }   param {     lr_mult: 2   }   inner_product_param {     num_output: 50     weight_filler {       type: "xavier"     }     bias_filler {       type: "constant"     }   } } layer {   name: "relu1"   type: "relu"   bottom: "ip1"   top: "ip1" } layer {   name: "drop1"   type: "dropout"   bottom: "ip1"   top: "ip1"   dropout_param {     dropout_ratio: 0.5   } }   layer {   name: "ip2"   type: "innerproduct"   bottom: "ip1"   top: "ip2"   param {     lr_mult: 1   }   param {     lr_mult: 2   }   inner_product_param {     num_output: 50     weight_filler {       type: "xavier"     }     bias_filler {       type: "constant"     }   } } layer {   name: "drop2"   type: "dropout"   bottom: "ip2"   top: "ip2"   dropout_param {     dropout_ratio: 0.4   } }    layer {   name: "ip3"   type: "innerproduct"   bottom: "ip2"   top: "ip3"   param {     lr_mult: 1   }   param {     lr_mult: 2   }   inner_product_param {     num_output: 3     weight_filler {       type: "xavier"     }     bias_filler {       type: "constant"     }   } }  layer {   name: "drop3"   type: "dropout"   bottom: "ip3"   top: "ip3"   dropout_param {     dropout_ratio: 0.3   } }  layer {   name: "loss"   type: "sigmoidcrossentropyloss"    # type: "euclideanloss"    # type: "hingeloss"     bottom: "ip3"   bottom: "label"   top: "loss" } 

iris_deploy.prototxt:

name: "irisnet" input: "data" input_dim: 1 # batch size input_dim: 1 input_dim: 1 input_dim: 4   layer {   name: "ip1"   type: "innerproduct"   bottom: "data"   top: "ip1"   param {     lr_mult: 1   }   param {     lr_mult: 2   }   inner_product_param {     num_output: 50     weight_filler {       type: "xavier"     }     bias_filler {       type: "constant"     }   } } layer {   name: "relu1"   type: "relu"   bottom: "ip1"   top: "ip1" } layer {   name: "drop1"   type: "dropout"   bottom: "ip1"   top: "ip1"   dropout_param {     dropout_ratio: 0.5   } }   layer {   name: "ip2"   type: "innerproduct"   bottom: "ip1"   top: "ip2"   param {     lr_mult: 1   }   param {     lr_mult: 2   }   inner_product_param {     num_output: 50     weight_filler {       type: "xavier"     }     bias_filler {       type: "constant"     }   } } layer {   name: "drop2"   type: "dropout"   bottom: "ip2"   top: "ip2"   dropout_param {     dropout_ratio: 0.4   } }   layer {   name: "ip3"   type: "innerproduct"   bottom: "ip2"   top: "ip3"   param {     lr_mult: 1   }   param {     lr_mult: 2   }   inner_product_param {     num_output: 3     weight_filler {       type: "xavier"     }     bias_filler {       type: "constant"     }   } }  layer {   name: "drop3"   type: "dropout"   bottom: "ip3"   top: "ip3"   dropout_param {     dropout_ratio: 0.3   } } 

iris_solver.prototxt:

# train/test net protocol buffer definition net: "iris_train_test.prototxt" # test_iter specifies how many forward passes test should carry out. test_iter: 1 # carry out testing every test_interval training iterations. test_interval: 1000 # base learning rate, momentum , weight decay of network. base_lr: 0.0001 momentum: 0.001 weight_decay: 0.0005 # learning rate policy lr_policy: "inv" gamma: 0.0001 power: 0.75 # display every 100 iterations display: 1000 # maximum number of iterations max_iter: 5000 # snapshot intermediate results snapshot: 5000 snapshot_prefix: "iris_" # solver mode: cpu or gpu solver_mode: cpu # gpu 

fyi: script install caffe , pycaffe on ubuntu.


Comments