has trained caffe model? have training ready image set use create caffe model use google's deep dream.
the resources i've been able find on how train model these:
imagenet tutorial
edit: here's another, it's not creating deploy.prototxt file. when try use 1 model "works" isn't correct.
caffe-oxford 102
can point me in right direction training own model?
i have written simple example train caffe model on iris data set in python. gives predicted outputs given user-defined inputs. network solver settings need more tuning wanted have code skeleton started. feel free edit improve.
iris_tuto.py
''' requirements: - caffe (script install caffe , pycaffe on new ubuntu 14.04 lts x64 or ubuntu 14.10 x64. cpu only, multi-threaded caffe. https://stackoverflow.com/a/31396229/395857) - sudo pip install pydot - sudo apt-get install -y graphviz interesting resources on caffe: - https://github.com/bvlc/caffe/tree/master/examples - http://nbviewer.ipython.org/github/joyofdata/joyofdata-articles/blob/master/deeplearning-with-caffe/neural-networks-with-caffe-on-the-gpu.ipynb interesting resources on iris anns: - iris data set test bed: http://deeplearning4j.org/iris-flower-dataset-tutorial.html - http://se.mathworks.com/help/nnet/examples/iris-clustering.html - http://lab.fs.uni-lj.si/lasin/wp/imit_files/neural/doc/seminar8.pdf synonyms: - output = label = target - input = feature ''' import subprocess import platform import copy sklearn.datasets import load_iris import sklearn.metrics import numpy np sklearn.cross_validation import stratifiedshufflesplit import matplotlib.pyplot plt import h5py import caffe import caffe.draw def load_data(): ''' load iris data set ''' data = load_iris() print(data.data) print(data.target) targets = np.zeros((len(data.target), 3)) count, target in enumerate(data.target): targets[count][target]= 1 print(targets) new_data = {} #new_data['input'] = data.data new_data['input'] = np.reshape(data.data, (150,1,1,4)) new_data['output'] = targets #print(new_data['input'].shape) #new_data['input'] = np.random.random((150, 1, 1, 4)) #print(new_data['input'].shape) #new_data['output'] = np.random.random_integers(0, 1, size=(150,3)) #print(new_data['input']) return new_data def save_data_as_hdf5(hdf5_data_filename, data): ''' hdf5 1 of data formats caffe accepts ''' h5py.file(hdf5_data_filename, 'w') f: f['data'] = data['input'].astype(np.float32) f['label'] = data['output'].astype(np.float32) def train(solver_prototxt_filename): ''' train ann ''' caffe.set_mode_cpu() solver = caffe.get_solver(solver_prototxt_filename) solver.solve() def print_network_parameters(net): ''' print parameters of network ''' print(net) print('net.inputs: {0}'.format(net.inputs)) print('net.outputs: {0}'.format(net.outputs)) print('net.blobs: {0}'.format(net.blobs)) print('net.params: {0}'.format(net.params)) def get_predicted_output(deploy_prototxt_filename, caffemodel_filename, input, net = none): ''' predicted output, i.e. perform forward pass ''' if net none: net = caffe.net(deploy_prototxt_filename,caffemodel_filename, caffe.test) #input = np.array([[ 5.1, 3.5, 1.4, 0.2]]) #input = np.random.random((1, 1, 1)) #print(input) #print(input.shape) out = net.forward(data=input) #print('out: {0}'.format(out)) return out[net.outputs[0]] import google.protobuf def print_network(prototxt_filename, caffemodel_filename): ''' draw ann architecture ''' _net = caffe.proto.caffe_pb2.netparameter() f = open(prototxt_filename) google.protobuf.text_format.merge(f.read(), _net) caffe.draw.draw_net_to_file(_net, prototxt_filename + '.png' ) print('draw ann done!') def print_network_weights(prototxt_filename, caffemodel_filename): ''' each ann layer, print weight heatmap , weight histogram ''' net = caffe.net(prototxt_filename,caffemodel_filename, caffe.test) layer_name in net.params: # weights heatmap arr = net.params[layer_name][0].data plt.clf() fig = plt.figure(figsize=(10,10)) ax = fig.add_subplot(111) cax = ax.matshow(arr, interpolation='none') fig.colorbar(cax, orientation="horizontal") plt.savefig('{0}_weights_{1}.png'.format(caffemodel_filename, layer_name), dpi=100, format='png', bbox_inches='tight') # use format='svg' or 'pdf' vectorial pictures plt.close() # weights histogram plt.clf() plt.hist(arr.tolist(), bins=20) plt.savefig('{0}_weights_hist_{1}.png'.format(caffemodel_filename, layer_name), dpi=100, format='png', bbox_inches='tight') # use format='svg' or 'pdf' vectorial pictures plt.close() def get_predicted_outputs(deploy_prototxt_filename, caffemodel_filename, inputs): ''' several predicted outputs ''' outputs = [] net = caffe.net(deploy_prototxt_filename,caffemodel_filename, caffe.test) input in inputs: #print(input) outputs.append(copy.deepcopy(get_predicted_output(deploy_prototxt_filename, caffemodel_filename, input, net))) return outputs def get_accuracy(true_outputs, predicted_outputs): ''' ''' number_of_samples = true_outputs.shape[0] number_of_outputs = true_outputs.shape[1] threshold = 0.0 # 0 if sigmoidcrossentropyloss ; 0.5 if euclideanloss output_number in range(number_of_outputs): predicted_output_binary = [] sample_number in range(number_of_samples): #print(predicted_outputs) #print(predicted_outputs[sample_number][output_number]) if predicted_outputs[sample_number][0][output_number] < threshold: predicted_output = 0 else: predicted_output = 1 predicted_output_binary.append(predicted_output) print('accuracy: {0}'.format(sklearn.metrics.accuracy_score(true_outputs[:, output_number], predicted_output_binary))) print(sklearn.metrics.confusion_matrix(true_outputs[:, output_number], predicted_output_binary)) def main(): ''' main function ''' # set parameters solver_prototxt_filename = 'iris_solver.prototxt' train_test_prototxt_filename = 'iris_train_test.prototxt' deploy_prototxt_filename = 'iris_deploy.prototxt' deploy_prototxt_filename = 'iris_deploy.prototxt' deploy_prototxt_batch2_filename = 'iris_deploy_batchsize2.prototxt' hdf5_train_data_filename = 'iris_train_data.hdf5' hdf5_test_data_filename = 'iris_test_data.hdf5' caffemodel_filename = 'iris__iter_5000.caffemodel' # generated train() # prepare data data = load_data() print(data) train_data = data test_data = data save_data_as_hdf5(hdf5_train_data_filename, data) save_data_as_hdf5(hdf5_test_data_filename, data) # train network train(solver_prototxt_filename) # print network print_network(deploy_prototxt_filename, caffemodel_filename) print_network(train_test_prototxt_filename, caffemodel_filename) print_network_weights(train_test_prototxt_filename, caffemodel_filename) # compute performance metrics #inputs = input = np.array([[[[ 5.1, 3.5, 1.4, 0.2]]],[[[ 5.9, 3. , 5.1, 1.8]]]]) inputs = data['input'] outputs = get_predicted_outputs(deploy_prototxt_filename, caffemodel_filename, inputs) get_accuracy(data['output'], outputs) if __name__ == "__main__": main() #cprofile.run('main()') # if want profiling iris_train_test.prototxt:
name: "irisnet" layer { name: "iris" type: "hdf5data" top: "data" top: "label" include { phase: train } hdf5_data_param { source: "iris_train_data.txt" batch_size: 1 } } layer { name: "iris" type: "hdf5data" top: "data" top: "label" include { phase: test } hdf5_data_param { source: "iris_test_data.txt" batch_size: 1 } } layer { name: "ip1" type: "innerproduct" bottom: "data" top: "ip1" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 50 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu1" type: "relu" bottom: "ip1" top: "ip1" } layer { name: "drop1" type: "dropout" bottom: "ip1" top: "ip1" dropout_param { dropout_ratio: 0.5 } } layer { name: "ip2" type: "innerproduct" bottom: "ip1" top: "ip2" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 50 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "drop2" type: "dropout" bottom: "ip2" top: "ip2" dropout_param { dropout_ratio: 0.4 } } layer { name: "ip3" type: "innerproduct" bottom: "ip2" top: "ip3" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 3 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "drop3" type: "dropout" bottom: "ip3" top: "ip3" dropout_param { dropout_ratio: 0.3 } } layer { name: "loss" type: "sigmoidcrossentropyloss" # type: "euclideanloss" # type: "hingeloss" bottom: "ip3" bottom: "label" top: "loss" } iris_deploy.prototxt:
name: "irisnet" input: "data" input_dim: 1 # batch size input_dim: 1 input_dim: 1 input_dim: 4 layer { name: "ip1" type: "innerproduct" bottom: "data" top: "ip1" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 50 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu1" type: "relu" bottom: "ip1" top: "ip1" } layer { name: "drop1" type: "dropout" bottom: "ip1" top: "ip1" dropout_param { dropout_ratio: 0.5 } } layer { name: "ip2" type: "innerproduct" bottom: "ip1" top: "ip2" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 50 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "drop2" type: "dropout" bottom: "ip2" top: "ip2" dropout_param { dropout_ratio: 0.4 } } layer { name: "ip3" type: "innerproduct" bottom: "ip2" top: "ip3" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 3 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "drop3" type: "dropout" bottom: "ip3" top: "ip3" dropout_param { dropout_ratio: 0.3 } } iris_solver.prototxt:
# train/test net protocol buffer definition net: "iris_train_test.prototxt" # test_iter specifies how many forward passes test should carry out. test_iter: 1 # carry out testing every test_interval training iterations. test_interval: 1000 # base learning rate, momentum , weight decay of network. base_lr: 0.0001 momentum: 0.001 weight_decay: 0.0005 # learning rate policy lr_policy: "inv" gamma: 0.0001 power: 0.75 # display every 100 iterations display: 1000 # maximum number of iterations max_iter: 5000 # snapshot intermediate results snapshot: 5000 snapshot_prefix: "iris_" # solver mode: cpu or gpu solver_mode: cpu # gpu
Comments
Post a Comment