Creating and training an example from scratch with Caffe
This example will be performed with MNIST dataset. This guide was performed in Ubuntu, but works in Windows too after the correctly installation of Caffe. Be careful and change the path of the example.
You can download the example from [here].
File structure
Organize your files in the next way:
----example/
|
|----input/
| |----test/
| | |----class_01/
| | | |----img_0001.png
| | | |----img_0002.png
| | | |----...
| | |----class_02/
| | | |----img_0001.png
| | | |----img_0002.png
| | | |----...
| | |----.../
| |
| |----test_lmdb/
| | |----data.mdb
| | |----lock.mdb
| |
| |----train/
| | |----class_01/
| | | |----img_0001.png
| | | |----img_0002.png
| | | |----...
| | |----class_02/
| | | |----img_0001.png
| | | |----img_0002.png
| | | |----...
| | |----.../
| |
| |----train_lmdb/
| | |----data.mdb
| | |----lock.mdb
| |
| |----mean_image.binaryproto
| |----test.txt
| |----train.txt
|
|----log/
| |----INFO_*****.txt (auto generated log files after training)
|
|----models/
| |----model1/
| |----model_deploy.prototxt
| |----model_solver.prototxt
| |----model_train_test.prototxt
| |----train_iter_****.caffemodel (auto generated files after training)
| |----train_iter_****.solverstate (auto generated files after training)
|
|----scripts/ (folder for your optional scripts)
|----generate_list_text.m
|----test_mean.py
Creating test.txt and train.txt files
File
% Start with a folder and get a list of all subfolders.
% Finds and prints names of all PNG, JPG, and TIF images in
% that folder and all of its subfolders.
clc;
format long g;
format compact;
fileID = fopen('/home/dennis/Desktop/example/input/train.txt','w'); %CHANGE HERE!
% Ask user to confirm or change.
topLevelFolder = fullfile('/home/dennis/Desktop/example/input/train'); %CHANGE HERE!
% Get list of all subfolders.
allSubFolders = genpath(topLevelFolder);
% Parse into a cell array.
remain = allSubFolders;
listOfFolderNames = {};
while true
[singleSubFolder, remain] = strtok(remain, ':');
if isempty(singleSubFolder)
break;
end
listOfFolderNames = [listOfFolderNames singleSubFolder];
end
numberOfFolders = length(listOfFolderNames)
% Process all image files in those folders.
for k = 1 : numberOfFolders
% Get this folder and print it out.
thisFolder = listOfFolderNames{k};
fprintf('Processing folder %s\n', thisFolder);
% Get PNG files.
filePattern = sprintf('%s/*.png', thisFolder);
baseFileNames = dir(filePattern);
numberOfImageFiles = length(baseFileNames);
% Now we have a list of all files in this folder.
ccc=strfind(filePattern,'class_');
if numberOfImageFiles >= 1
% Go through all those image files.
for f = 1 : numberOfImageFiles
fullFileName = fullfile(thisFolder, baseFileNames(f).name);
theClass = str2double(fullFileName(ccc+6:ccc+6));
fprintf('Processing image file %s %d\n', fullFileName, theClass);
label = theClass;
underlineLocations = find(fullFileName == '/');
thePath=(fullFileName(ccc:ccc+6)); %CHANGE HERE!
fprintf(fileID,'%s %d\n',['/train/' thePath '/' baseFileNames(f).name], label); %CHANGE HERE!
end
else
fprintf('Folder %s has no image files in it.\n', thisFolder);
end
end
fclose(fileID);
The file
/test/class_0/10.png 0
/test/class_0/1001.png 0
/test/class_0/1009.png 0
/test/class_0/101.png 0
/test/class_0/1034.png 0
/test/class_0/1047.png 0
/test/class_0/1061.png 0
/test/class_0/1084.png 0
/test/class_0/1094.png 0
...
The file
/test/class_0/10.png 0
/test/class_0/1001.png 0
/test/class_0/1009.png 0
/test/class_0/101.png 0
/test/class_0/1034.png 0
/test/class_0/1047.png 0
/test/class_0/1061.png 0
/test/class_0/1084.png 0
/test/class_0/1094.png 0
...
Creating lmdb and mean_image.binaryproto files
Create the lmdb files based on fuction
Create the
Create the
So, prompt in terminal:
cd /home/dennis/Desktop/example
convert_imageset --shuffle --gray /home/dennis/Desktop/example/input /home/dennis/Desktop/example/input/train.txt /home/dennis/Desktop/example/input/train_lmdb
convert_imageset --shuffle --gray /home/dennis/Desktop/example/input /home/dennis/Desktop/example/input/test.txt /home/dennis/Desktop/example/input/test_lmdb
Create the mean image
cd /home/dennis/Desktop/example
compute_image_mean /home/dennis/Desktop/example/input/train_lmdb /home/dennis/Desktop/example/input/mean_image.binaryproto
Setting the model
The file
# The train/test net protocol buffer definition
net: "models/model1/model_train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "models/model1/train"
# solver mode: CPU or GPU
solver_mode: GPU
The file
name: "LeNet"
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
data_param {
source: "input/train_lmdb"
batch_size: 64
backend: LMDB
}
}
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
data_param {
source: "input/test_lmdb"
batch_size: 100
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip2"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip2"
bottom: "label"
top: "loss"
}
The file
name: "LeNet"
layer {
name: "data"
type: "Input"
top: "data"
input_param { shape: { dim: 64 dim: 1 dim: 28 dim: 28 } }
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "prob"
type: "Softmax"
bottom: "ip2"
top: "prob"
}
Training and Testing
On order to train in CPU mode, type in terminal:
cd /home/dennis/Desktop/example
caffe train --solver /home/dennis/Desktop/example/models/model1/model_solver.prototxt
On order to train in GPU mode, type in terminal:
cd /home/dennis/Desktop/example
caffe train --solver /home/dennis/Desktop/example/models/model1/model_solver.prototxt --gpu 0
According to
After training, the file
Note ---------------------------------------------------------
If you want to store the logs, add
caffe train --solver /home/dennis/Desktop/example/models/model1/model_solver.prototxt 2>&1 | tee /home/dennis/Desktop/example/logs/model1_train_test_01.log
caffe train --solver /home/dennis/Desktop/example/models/model1/model_solver.prototxt --gpu 0 2>&1 | tee /home/dennis/Desktop/example/logs/model1_train_test_01.log
----------------------------------------------------------------
Testing using Python script
Using
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Author: Axel Angel, copyright 2015, license GPLv3.
# added mean subtraction so that, the accuracy can be reported accurately
# just like caffe when doing a mean subtraction
# Seyyed Hossein Hasan Pour
# Coderx7@Gmail.com
# 7/3/2016
import sys
import caffe
import numpy as np
import lmdb
import argparse
from collections import defaultdict
import time
start_time = time.time()
def flat_shape(x):
"Returns x without singleton dimension, eg: (1,28,28) -> (28,28)"
return x.reshape(filter(lambda s: s > 1, x.shape))
def lmdb_reader(fpath):
import lmdb
lmdb_env = lmdb.open(fpath)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
yield (key, flat_shape(image), label)
def leveldb_reader(fpath):
import leveldb
db = leveldb.LevelDB(fpath)
for key, value in db.RangeIter():
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
yield (key, flat_shape(image), label)
def npz_reader(fpath):
npz = np.load(fpath)
xs = npz['arr_0']
ls = npz['arr_1']
for i, (x, l) in enumerate(np.array([ xs, ls ]).T):
yield (i, x, l)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--proto', type=str, required=True)
parser.add_argument('--model', type=str, required=True)
parser.add_argument('--mean', type=str, required=True)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--lmdb', type=str, default=None)
group.add_argument('--leveldb', type=str, default=None)
group.add_argument('--npz', type=str, default=None)
args = parser.parse_args()
# Extract mean from the mean image file
mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
f = open(args.mean, 'rb')
mean_blobproto_new.ParseFromString(f.read())
mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
f.close()
count = 0
correct = 0
matrix = defaultdict(int) # (real,pred) -> int
labels_set = set()
# CNN reconstruction and loading the trained weights
net = caffe.Net(args.proto, args.model, caffe.TEST)
caffe.set_mode_cpu()
print "args", vars(args)
if args.lmdb != None:
reader = lmdb_reader(args.lmdb)
if args.leveldb != None:
reader = leveldb_reader(args.leveldb)
if args.npz != None:
reader = npz_reader(args.npz)
for i, image, label in reader:
image_caffe = image.reshape(1, *image.shape)
out = net.forward_all(data=np.asarray([ image_caffe ])- mean_image)
plabel = int(out['prob'][0].argmax(axis=0))
count += 1
iscorrect = label == plabel
correct += (1 if iscorrect else 0)
matrix[(label, plabel)] += 1
labels_set.update([label, plabel])
if not iscorrect:
print("\rError: i=%s, expected %i but predicted %i" \
% (i, label, plabel))
sys.stdout.write("\rAccuracy: %.1f%%" % (100.*correct/count))
sys.stdout.flush()
print(", %i/%i corrects" % (correct, count))
print("--- %s seconds ---" % (time.time() - start_time))
Type in terminal:
cd /home/dennis/Desktop/example
python /home/dennis/Desktop/example/scripts/test_mean.py --proto /home/dennis/Desktop/example/models/model1/model_deploy.prototxt --model /home/dennis/Desktop/example/models/model1/train_iter_10000.caffemodel --mean /home/dennis/Desktop/example/input/mean_image.binaryproto --lmdb /home/dennis/Desktop/example/input/test_lmdb
Drawing the Model
Using:
python /home/dennis/technical/python/draw_net.py /home/dennis/Desktop/example/models/model1/model_deploy.prototxt /home/dennis/Desktop/example/scripts/model1.png
Or:
python /home/dennis/technical/python/draw_net.py /home/dennis/Desktop/example/models/model1/model_train_test.prototxt /home/dennis/Desktop/example/scripts/model1.png
And the image will be saved at /home/dennis/Desktop/example/scripts/model1.png .
Resources
- http://caffe.berkeleyvision.org/gathered/examples/mnist.html.
- http://adilmoujahid.com/posts/technical/2016/06/introduction-deep-learning-python-caffe/.
- http://shengshuyang.github.io/A-step-by-step-guide-to-Caffe.html.
- http://www.panderson.me/images/Caffe.pdf.
- http://vision.stanford.edu/teaching/cs231n/slides/2015/caffe_tutorial.pdf.